-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdiagnostics.py
81 lines (63 loc) · 2.36 KB
/
diagnostics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
import numpy as np
import pickle
import timeit
import ast
import subprocess
import os
import json
##################Load config.json and get environment variables
with open('config.json','r') as f:
config = json.load(f)
dataset_csv_path = os.path.join(config['output_folder_path'])
test_data_path = os.path.join(config['test_data_path'])
prod_path = os.path.join(config['prod_deployment_path'])
output_model = os.path.join(config['output_model_path'])
##################Function to get model predictions
def model_predictions(dataset):
for file in os.listdir(os.getcwd()+'/'+output_model):
if file.endswith('.pkl'):
model = pickle.load(open(os.getcwd()+'/'+output_model+'/'+file,'rb'))
predictions = model.predict(dataset)
return str(predictions)
##################Function to get summary statistics
def dataframe_summary():
data = pd.DataFrame(
columns=[
'corporation',
'lastmonth_activity',
'lastyear_activity',
'number_of_employees',
'exited']
)
for file in os.listdir(os.getcwd()+'/'+dataset_csv_path):
if file.endswith('.csv'):
data = pd.read_csv(os.getcwd()+'/'+dataset_csv_path+'/'+file)
summary_statistics = [
data.mean(skipna=True),
data.median(skipna=True),
data.std(skipna=True)
]
nan_perc = data.isna().sum()/data.shape[0]
print('summary statistics: ', summary_statistics)
print('Nan values percentage: ', nan_perc)
return str([summary_statistics, nan_perc])
#################Function to get timings
def execution_time():
training_time_st = timeit.default_timer()
subprocess.run(['python', 'training.py'])
training_time = timeit.default_timer()-training_time_st
ingestion_time_st = timeit.default_timer()
subprocess.run(['python', 'ingestion.py'])
ingestion_time = timeit.default_timer()-ingestion_time_st
print([training_time, ingestion_time])
return str([training_time, ingestion_time])
##################Function to check dependencies
def outdated_packages_list():
outdated = subprocess.run(['pip','list','--outdated'])
return outdated
# if __name__ == '__main__':
# model_predictions()
# dataframe_summary()
# execution_time()
# outdated_packages_list()