forked from arnauddelaunay/twitter_sentiment_challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo.py
63 lines (49 loc) · 2.11 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import tweepy
from textblob import TextBlob
#French adaptor
from textblob_fr import PatternTagger, PatternAnalyzer
import numpy as np
import operator
# Step 1 - Authenticate
consumer_key= 'CONSUMER_KEY_HERE'
consumer_secret= 'CONSUMER_SECRET_HERE'
access_token='ACCESS_TOKEN_HERE'
access_token_secret='ACCESS_TOKEN_SECRET_HERE'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
#Step 2 - Prepare query features
#List of candidates to French Republicans Primary Elections
candidates_names = ['Sarkozy', 'Kosciusko', 'Cope', 'Juppe', 'Fillon', 'Le Maire', 'Poisson']
#Hashtag related to the debate
name_of_debate = "PrimaireLeDebat"
#Date of the debate : October 13th
since_date = "2016-10-13"
until_date = "2016-10-14"
#Step 2b - Function of labelisation of analysis
def get_label(analysis, threshold = 0):
if analysis.sentiment[0]>threshold:
return 'Positive'
else:
return 'Negative'
#Step 3 - Retrieve Tweets and Save Them
all_polarities = dict()
for candidate in candidates_names:
this_candidate_polarities = []
#Get the tweets about the debate and the candidate between the dates
this_candidate_tweets = api.search(q=[name_of_debate, candidate], count=100, since = since_date, until=until_date)
#Save the tweets in csv
with open('%s_tweets.csv' % candidate, 'wb') as this_candidate_file:
this_candidate_file.write('tweet,sentiment_label\n')
for tweet in this_candidate_tweets:
analysis = TextBlob(tweet.text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer())
#Get the label corresponding to the sentiment analysis
this_candidate_polarities.append(analysis.sentiment[0])
this_candidate_file.write('%s,%s\n' % (tweet.text.encode('utf8'), get_label(analysis)))
#Save the mean for final results
all_polarities[candidate] = np.mean(this_candidate_polarities)
#Step bonus - Print a Result
sorted_analysis = sorted(all_polarities.items(), key=operator.itemgetter(1), reverse=True)
print 'Mean Sentiment Polarity in descending order :'
for candidate, polarity in sorted_analysis:
print '%s : %0.3f' % (candidate, polarity)