-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcactor_graphs.py
175 lines (148 loc) · 7.3 KB
/
cactor_graphs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import pandas as pd
import matplotlib.pyplot as plt
import os
from datetime import datetime
# Read the CSV file
df = pd.read_csv("commits.csv")
# Create a folder with the current date and time as its name
current_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
folder_name = f"CACTOR_DA_{current_time}"
os.makedirs(folder_name, exist_ok=True)
def save_plot(filename):
plt.savefig(os.path.join(folder_name, filename))
print(f"Graph saved as: {filename}")
plt.close()
# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])
# Create or open the text file to save the analysis data
txt_file_path = os.path.join(folder_name, "analysis_results.txt")
txt_file = open(txt_file_path, "w")
def save_to_txt(title, data, percentages=None):
txt_file.write(f"{title}\n")
if percentages is not None:
combined_data = [f"{idx}: {val} ({pct:.1f}%)" for idx, val, pct in zip(data.index, data.values, percentages)]
txt_file.write(f"{combined_data}\n")
else:
txt_file.write(f"{data}\n")
txt_file.write("----------------------\n")
# 1. Commit Count by Author
plt.figure(figsize=(12, 7))
author_counts = df['Author'].value_counts()
ax = author_counts.plot(kind='pie', title='Commit Count by Author', autopct='%1.1f%%')
plt.legend(title='Authors', bbox_to_anchor=(1,1), loc="upper left")
save_plot('commit_count_by_author.png')
# Obtener porcentajes para guardar en txt
author_percentages = (author_counts / author_counts.sum()) * 100
save_to_txt('Commit Count by Author', author_counts, author_percentages)
# 2. Commit Distribution Over Time
plt.figure(figsize=(12, 7))
df['Day'] = df['Date'].dt.date
date_counts = df['Day'].value_counts().sort_index()
date_counts.plot(kind='line', title='Commit Distribution Over Time')
save_plot('commit_distribution_over_time.png')
save_to_txt('Commit Distribution Over Time', date_counts)
# 3. Commit Distribution by Branch
plt.figure(figsize=(12, 7))
branch_counts = df['Branch'].value_counts()
branch_counts.plot(kind='pie', title='Commit Distribution by Branch', autopct='%1.1f%%')
save_plot('commit_distribution_by_branch.png')
save_to_txt('Commit Distribution by Branch', branch_counts)
# 4. Most Active Days
plt.figure(figsize=(12, 7))
df['Weekday'] = df['Date'].dt.day_name()
weekday_counts = df['Weekday'].value_counts()
weekday_counts.plot(kind='pie', title='Most Active Days', autopct='%1.1f%%')
save_plot('most_active_days.png')
save_to_txt('Most Active Days', weekday_counts)
# 5. Commit Message Analysis
plt.figure(figsize=(12, 7))
df['Contains_merge'] = df['Message'].str.contains('merge')
view_counts = df['Contains_merge'].value_counts()
view_counts.plot(kind='pie', title='Commits Containing "merge"', labels=['Without "merge"', 'With "merge"'], autopct='%1.1f%%')
save_plot('commit_message_analysis.png')
save_to_txt('Commits Containing "merge"', view_counts)
# 6. Commit Frequency
plt.figure(figsize=(12, 7))
commit_frequency = df['Day'].value_counts().sort_index()
commit_frequency.plot(kind='bar', title='Commit Frequency')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
save_plot('commit_frequency.png')
save_to_txt('Commit Frequency', commit_frequency)
# 7. Commit Length Analysis
plt.figure(figsize=(12, 7))
df['Message_Length'] = df['Message'].str.len()
df['Message_Length'].plot(kind='hist', title='Commit Length Analysis', bins=20)
save_plot('commit_length_analysis.png')
save_to_txt('Commit Length Analysis', df['Message_Length'].describe())
# 8. Unique Branch Contributors
plt.figure(figsize=(12, 7))
unique_contributors = df.groupby('Branch')['Author'].nunique()
unique_contributors.plot(kind='bar', title='Unique Branch Contributors')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
save_plot('unique_branch_contributors.png')
save_to_txt('Unique Branch Contributors', unique_contributors)
# 9. Time Between Commits
plt.figure(figsize=(12, 7))
df['Time_Diff'] = df.sort_values('Date').groupby('Author')['Date'].diff().dt.total_seconds() / 3600 # in hours
avg_time_diff = df.groupby('Author')['Time_Diff'].mean()
avg_time_diff.plot(kind='bar', title='Average Time Between Commits (in hours)')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
save_plot('time_between_commits.png')
save_to_txt('Average Time Between Commits (in hours)', avg_time_diff)
# 10. Commits Without Issues/Task Reference
plt.figure(figsize=(12, 7))
df['Contains_Issue'] = df['Message'].str.contains('issue', case=False) | df['Message'].str.contains('task', case=False)
issue_counts = df['Contains_Issue'].value_counts()
issue_counts.plot(kind='pie', title='Commits Without Issues/Task Reference', labels=['Without Issues/Task', 'With Issues/Task'], autopct='%1.1f%%')
save_plot('commits_without_issues.png')
save_to_txt('Commits Without Issues/Task Reference', issue_counts)
# 11. Author Collaboration
plt.figure(figsize=(12, 7))
collaboration_df = df.groupby(['Branch', 'Day'])['Author'].unique().reset_index()
collaboration_df['Collaborators'] = collaboration_df['Author'].apply(lambda x: ', '.join(x))
collaboration_counts = collaboration_df['Collaborators'].value_counts()
collaboration_counts.head(10).plot(kind='bar', title='Top 10 Author Collaborations')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
save_plot('author_collaboration.png')
save_to_txt('Top 10 Author Collaborations', collaboration_counts.head(10))
# 12. Commit Time Analysis
plt.figure(figsize=(12, 7))
df['Hour'] = df['Date'].dt.hour
hourly_counts = df['Hour'].value_counts().sort_index()
ax = hourly_counts.plot(kind='bar', title='Commit Time Analysis')
ax.legend(["Commits"])
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
save_plot('commit_time_analysis.png')
save_to_txt('Commit Time Analysis', hourly_counts)
# 13. Commit Trend Analysis
plt.figure(figsize=(12, 7))
daily_commits = df['Day'].value_counts().sort_index()
daily_commits.plot(kind='line', title='Commit Trend Over Time')
save_plot('commit_trend_analysis.png')
save_to_txt('Commit Trend Over Time', daily_commits)
# 14. Commit Impact Analysis
plt.figure(figsize=(12, 7))
df['Impact'] = df['Message_Length'].apply(lambda x: 'Low' if x < 50 else ('Medium' if x < 100 else 'High'))
impact_counts = df['Impact'].value_counts()
impact_counts.plot(kind='pie', title='Commit Impact Analysis (based on message length)', autopct='%1.1f%%')
save_plot('commit_impact_analysis.png')
save_to_txt('Commit Impact Analysis (based on message length)', impact_counts)
# 15. Keyword Alerts
keywords = ['bug', 'fix', 'error', 'feature', 'update', 'refactor', 'add', 'delete', 'improve', 'docs']
for keyword in keywords:
plt.figure(figsize=(12, 7))
df[f'Contains_{keyword}'] = df['Message'].str.contains(keyword, case=False)
keyword_counts = df[f'Contains_{keyword}'].value_counts()
without_keyword_count = keyword_counts[False] if False in keyword_counts else 0
with_keyword_count = keyword_counts[True] if True in keyword_counts else 0
keyword_counts.plot(kind='pie', title=f'Commits With/Without "{keyword}"', labels=None, autopct='%1.1f%%', startangle=140)
plt.legend(labels=[f'Without "{keyword}" ({without_keyword_count})', f'With "{keyword}" ({with_keyword_count})'], loc="best")
save_plot(f'keyword_alerts_{keyword}.png')
save_to_txt(f'Commits With/Without "{keyword}"', keyword_counts)
txt_file.close()
print("\n--------------------------------------------\nCACTOR: Data analysis complete. Graphs and text file saved to directory successfully.")