-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathyoutube_video_crawler.py
54 lines (43 loc) Β· 1.53 KB
/
youtube_video_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os, urllib.parse, requests, pymysql
MYSQL_HOST = os.getenv("MYSQL_HOST")
MYSQL_PORT = os.getenv("MYSQL_PORT")
MYSQL_USER = os.getenv("MYSQL_USER")
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD")
MYSQL_DATABASE = os.getenv("MYSQL_DATABASE")
conn = pymysql.connect(
host=MYSQL_HOST,
port=int(MYSQL_PORT),
user=MYSQL_USER,
password=MYSQL_PASSWORD,
db=MYSQL_DATABASE,
charset='utf8mb4'
)
YOUTUBE_API_URL = "https://www.googleapis.com/youtube/v3/search?"
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
keywords = ["κ±°λΆλͺ©%7Cλ°λ₯ΈμμΈ", "λͺ©%7Cμ€νΈλ μΉ", "μλͺ©%7Cμ€νΈλ μΉ", "ν리%7Cμ€νΈλ μΉ"]
with conn.cursor() as cursor:
delete_sql = """
DELETE FROM videos
"""
cursor.execute(delete_sql)
for keyword in keywords:
params = {
"key": YOUTUBE_API_KEY,
"part": "snippet",
"maxResults": 50,
"q": keyword,
"type": "video",
"videoDuration" : "medium",
}
responses = requests.get(YOUTUBE_API_URL + urllib.parse.urlencode(params)).json()
category = urllib.parse.unquote(keyword).replace("|", "")
data = [(response["id"]["videoId"], response["snippet"]["title"], response["snippet"]["thumbnails"]["medium"]["url"], category) for response in responses["items"]]
with conn.cursor() as cursor:
sql = """
INSERT IGNORE INTO videos
(video_id, title, thumbnail_url, category)
VALUES (%s, %s, %s, %s)
"""
cursor.executemany(sql, data)
conn.commit()
conn.close()