-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkeyword_research.py
86 lines (74 loc) · 2.84 KB
/
keyword_research.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
import requests
import json
import time
import string
import nltk
from stop_words import get_stop_words
from collections import Counter
from json import loads
#language code and keywords. Replace "keyword1", with your first keyword and so on
lang_code="en"#@param {type:"string"}
keyword1="keyword1" #@param {type:"string"}
keyword2="keyword2" #@param {type:"string"}
keyword3="keyword3" #@param {type:"string"}
keyword4="keyword4" #@param {type:"string"}
keyword5="" #@param {type:"string"}
#generate keyword list
keywords=[keyword1,keyword2,keyword3,keyword4,keyword5]
keywordlist = list(filter(None, keywords))
#Make a list of letters to use for Google Suggest
letterlist=[""]
letterlist=letterlist+list(string.ascii_lowercase)
#Google Suggest for each combination of keyword and letter
keywordsuggestions=[]
for keyword in keywordlist:
for letter in letterlist :
URL="http://suggestqueries.google.com/complete/search?client=firefox&hl="+str(lang_code)+"&q="+keyword+" "+letter
headers = {'User-agent':'Mozilla/5.0'}
response = requests.get(URL, headers=headers)
result = json.loads(response.content.decode('utf-8'))
keywordsuggest=[keyword,letter]
for word in result[1]:
if(word!=keyword):
keywordsuggest.append(word)
time.sleep(1)
keywordsuggestions.append(keywordsuggest)
#crearte a dataframe from this list
keywordsuggestions_df = pd.DataFrame(keywordsuggestions)
#Rename columns of dataframe
columnnames=["Keyword","Letter"]
for i in range(1,len(keywordsuggestions_df.columns)-1):
columnnames.append("Suggestion"+str(i))
keywordsuggestions_df.columns=columnnames
#Make a list of all suggestions
allkeywords = keywordlist
for i in range(1,len(keywordsuggestions_df.columns)-1):
suggestlist = keywordsuggestions_df["Suggestion"+str(i)].values.tolist()
for suggestion in suggestlist:
allkeywords.append(suggestion)
#exclude stopwords and seed keywords from this list
stop_words=get_stop_words(lang_code)
wordlist=[]
seed_words=[]
for keyword in keywords:
for seed_word in nltk.word_tokenize(str(keyword).lower()):
if(len(seed_word)>0):
seed_words.append(seed_word)
for keyword in allkeywords:
words = nltk.word_tokenize(str(keyword).lower())
#word tokenizer
for word in words:
if(word not in stop_words and word not in seed_words and len(word)>1):
wordlist.append(word)
#find the most common words in the suggestions
most_common_words= [word for word, word_count in Counter(wordlist).most_common(200)]
#assign each suggestion to a common keyword
clusters=[]
for common_word in most_common_words:
for keyword in allkeywords:
if(common_word in str(keyword)):
clusters.append([keyword,common_word])
clusterdf = pd.DataFrame(clusters,columns=['Keyword', 'Cluster'])
#create dataframe wiht clusters en suggestions
clusterdf.to_csv("keywords_clustered.csv")