-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment.py
116 lines (99 loc) · 5.11 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import pandas as pd
from transformers import pipeline, AutoTokenizer
import sentiment_analysis.sentiment_nltk as n
import sentiment_analysis.sentiment_transformers as t
positive_keywords = [
'good', 'great', 'positive', 'successful', 'profitable', 'improved', 'increase',
'beneficial', 'strong', 'growth', 'upturn', 'bullish', 'booming', 'advantageous',
'rewarding', 'lucrative', 'surplus', 'expansion', 'upswing', 'thriving', 'yielding',
'gains', 'outperform', 'optimistic', 'upbeat', 'recovery', 'acceleration', 'enhancement',
'rally', 'surge', 'boom', 'profitability', 'efficiency', 'superior', 'leadership',
'innovation', 'breakthrough', 'high-demand', 'competitive edge', 'market leader',
'dividend increase', 'shareholder value', 'capital gain', 'revenue growth', 'cost reduction',
'strategic acquisition', 'synergy', 'scalability', 'liquidity'
]
negative_keywords = [
'bad', 'poor', 'negative', 'loss', 'problem', 'decrease', 'difficult', 'weak', 'decline',
'losses', 'bearish', 'slump', 'downturn', 'adverse', 'challenging', 'deteriorating',
'declining', 'recession', 'deficit', 'contraction', 'downgrade', 'volatility', 'risk',
'uncertainty', 'impairment', 'write-off', 'underperform', 'pessimistic', 'downbeat',
'stagnation', 'erosion', 'turmoil', 'crisis', 'bankruptcy', 'default', 'devaluation',
'overleveraged', 'layoffs', 'restructuring', 'downsizing', 'liquidation', 'fraud',
'scandal', 'litigation', 'regulatory penalty', 'market exit', 'competitive pressure',
'product recall', 'safety concern'
]
def keyword_analysis(text):
positive_count = sum(text.count(word) for word in positive_keywords)
negative_count = sum(text.count(word) for word in negative_keywords)
return positive_count, negative_count
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
sentiment_analysis = pipeline("sentiment-analysis", model=model_name)
# List of companies and years to check
companies = ['NASDAQ_TSLA', 'NASDAQ_AAPL', 'NASDAQ_MSFT', 'NASDAQ_AMZN', 'NYSE_BRK-A', 'NYSE_PFE', 'NASDAQ_CCBG']
years = ['2022', '2021', '2020', '2019']
discussion_dir = os.path.join(os.getcwd(), 'discussion')
data = []
for company in companies:
for year in years:
filename = f"{company}_{year}_DISCUSSION.txt"
filepath = os.path.join(discussion_dir, filename)
if os.path.exists(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
text = file.read()
sentiment_score_nltk = n.analyze_sentiment_vader(text)
print(f"{company} {year}:\n Sentiment Score NLTK = {sentiment_score_nltk}")
scores = n.analyze_sentiment_vader_detail(text)
print(f" Positive Score: {scores['pos']}")
print(f" Negative Score: {scores['neg']}")
print(f" Neutral Score: {scores['neu']}")
cleaned_text = t.clean_text(text)
sentiment_score_transformer = t.analyze_sentiment(cleaned_text, tokenizer, sentiment_analysis)
print(f"{company} {year}: Sentiment Score Transformer = {sentiment_score_transformer}\n")
# Keyword analysis
cleaned_text = t.clean_text(text)
positive_count, negative_count = keyword_analysis(cleaned_text)
data.append({
"Company": company,
"Year": year,
"NLTK_Sentiment_Score": sentiment_score_nltk,
"Positive_Score": scores['pos'],
"Negative_Score": scores['neg'],
"Neutral_Score": scores['neu'],
"Transformer_Sentiment_Score": sentiment_score_transformer,
"Positive_Keywords": positive_count,
"Negative_Keywords": negative_count,
})
df = pd.DataFrame(data)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
print(df)
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def make_word_cloud(text, title=None):
# Generate a word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
# Display the word cloud using matplotlib
plt.figure(figsize=(10, 5))
if title:
plt.title(title)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
def make_map(text, positive, negative):
print("All Words Word Cloud:")
make_word_cloud(text, "All Words")
print("Positive Word Cloud:")
positive_text = ' '.join([word for word in text.lower().split() if word in positive])
make_word_cloud(positive_text, "Positive Words")
print("Negative Word Cloud:")
negative_text = ' '.join([word for word in text.lower().split() if word in negative])
make_word_cloud(negative_text, "Negative Words")
# Example usage
filename = f"NYSE_PFE_2021_DISCUSSION.txt"
filepath = os.path.join(discussion_dir, filename)
if os.path.exists(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
text = file.read()
make_map(text, positive_keywords, negative_keywords)