빅데이터 김교수의 "AI노마드연구소" AI로 열어가는 노마드 세상!

빅데이터 김교수의 "AI노마드연구소" AI로 열어가는 노마드 세상입니다. AI 코딩작성, SNS 분석, AI업무자동화 컨설팅 0507-1419-0222

자세히보기

교육/파이썬빅데이터분석교육

py-script 실습

빅데이터 김교수 2022. 12. 20. 20:10
import os import sys import urllib.request import datetime import time import json #네이버 API 코드입력부 client_id = 'gdZ5G4cUekGLFcRl98_H' client_secret = 'i0zSLElEiq' #[CODE 1] def getRequestUrl(url): req = urllib.request.Request(url) req.add_header("X-Naver-Client-Id", client_id) req.add_header("X-Naver-Client-Secret", client_secret) try: response = urllib.request.urlopen(req) if response.getcode() == 200: print ("[%s] Url Request Success" % datetime.datetime.now()) return response.read().decode('utf-8') except Exception as e: print(e) print("[%s] Error for URL : %s" % (datetime.datetime.now(), url)) return None #[CODE 2] def getNaverSearch(node, srcText, start, display): base = "https://openapi.naver.com/v1/search/" node = "/%s.json" % node parameters = "?query=%s&start=%s&display=%s&sort=date" % (urllib.parse.quote(srcText), start, display) url = base + node + parameters responseDecode = getRequestUrl(url) #[CODE 1] if (responseDecode == None): return None else: return json.loads(responseDecode) #[CODE 3] ################################### def getPostData(post, jsonResult, cnt): title = post['title'] description = post['description'] link = post['link'] bloggerlink = post['bloggerlink'] description=post['description'] bloggername=post['bloggername'] postdate=post['postdate'] postdate = datetime.datetime.strptime(post['postdate'], '%Y%m%d') postdate = postdate.strftime('%Y-%m-%d') jsonResult.append({'cnt':cnt, 'title':title, 'description': description, 'bloggerlink': bloggerlink, 'link': link, 'postdate':postdate, 'bloggername':bloggername}) return #[CODE 0] def main(): node = 'blog' # 크롤링 할 대상 ###################### srcText = input('검색어를 입력하세요: ') cnt = 0 jsonResult = [] jsonResponse = getNaverSearch(node, srcText, 1, 100) #[CODE 2] total = jsonResponse['total'] while ((jsonResponse != None) and (jsonResponse['display'] != 0)): for post in jsonResponse['items']: cnt += 1 getPostData(post, jsonResult, cnt) #[CODE 3] start = jsonResponse['start'] + jsonResponse['display'] jsonResponse = getNaverSearch(node, srcText, start, 100) #[CODE 2] print('전체 검색 : %d 건' %total) with open('%s_naver_%s.json' % (srcText, node), 'w', encoding='utf8') as outfile: jsonFile = json.dumps(jsonResult, indent=4, ensure_ascii=False) outfile.write(jsonFile) print("가져온 데이터 : %d 건" %(cnt)) print ('%s_naver_%s.json SAVED' % (srcText, node)) if __name__ == '__main__': main()