네이버 뉴스를 크롤링해서 게시판에 작성하는 방법을 찾고 있습니다.
파이썬을 이용해서 다음과 같은 코드를 작성 했는데 오류가 뜨네요.
아마도 추측건데 스펨 게시글 같은걸 차단하려고 라이믹스에서 자체 보안을 해 놓은것 같은데
혹시 이런 부분은 어떻게 설정해야 할까요?
import requests
from bs4 import BeautifulSoup
def crawl_naver_news(query):
print("Starting to crawl Naver News...")
url = f'https://search.naver.com/search.naver?&where=news&query={query}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
print(f"Failed to fetch news. Status code: {response.status_code}")
return []
soup = BeautifulSoup(response.content, 'html.parser')
articles = []
seen_titles = set()
for item in soup.find_all('div', class_='news_wrap'):
title_element = item.find('a', class_='news_tit')
if title_element:
title = title_element.get_text()
link = title_element['href']
if title not in seen_titles:
seen_titles.add(title)
articles.append({'title': title, 'link': link})
if len(articles) >= 3: # 상위 3개 의 기사만 가져오기
break
if not articles:
print("No articles found after crawling.")
else:
print(f"Number of articles found: {len(articles)}")
print("Finished crawling Naver News.")
return articles
def post_to_rhymix(title, content):
print(f"Posting article: {title}")
post_url = 'est' # 실제 게시판의 URL로 변경 필요
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36',
'Referer': post_url,
'Content-Type': 'application/x-www-form-urlencoded'
}
# CSRF 토큰을 가져오기 위한 요청 (이 부분은 실제 CSRF 토큰을 어떻게 처리할지에 따라 다를 수 있음)
csrf_response = requests.get(post_url, headers=headers)
csrf_soup = BeautifulSoup(csrf_response.content, 'html.parser')
csrf_token = csrf_soup.find('meta', {'name': 'csrf-token'})['content']
# 게시글 작성 데이터
post_data = {
'title': title,
'content': content,
'module': 'board',
'act': 'procBoardInsert',
'mid': '459', # 게시판 MID 설정
'csrf_token': csrf_token # CSRF 토큰 추가
}
response = requests.post(post_url, data=post_data, headers=headers)
if response.status_code == 200:
print(f"Successfully posted: {title}")
else:
print(f"Failed to post: {title}. Status code: {response.status_code}. Response: {response.text}")
return response.status_code
def main():
query = "배나무"
articles = crawl_naver_news(query)
if not articles:
print("No articles found.")
else:
for article in articles:
print(f"Article found: {article['title']} - {article['link']}")
content = f"{article['title']}\n\n링크: {article['link']}"
post_status = post_to_rhymix(article['title'], content)
if post_status == 200:
print(f"Successfully posted: {article['title']}")
else:
print(f"Failed to post: {article['title']} with status code: {post_status}")
if __name__ == "__main__":
main()
어디에서 무슨 오류가 뜨는지 써주셔야죠~