-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_dataset_analysis.py
62 lines (49 loc) · 3.08 KB
/
run_dataset_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python
import os
import sys
__author__ = "Javier Sanz-Cruzado (javier.sanz-cruzadopuig@glasgow.ac.uk)"
if __name__ == "__main__":
"""
Script for running basic dataset analysis.
This program takes the FAR-Trans dataset and analyzes assets and customers over a set of 61 dates.
Input:
- Dataset path: the path on which the FAR-Trans dataset is stored. We assume names have not been changed.
- Output directory: the path on which the results will be stored. A directory for every model will be created.
Output:
- For every tested date, two files will be created:
- File 1: An asset file: assets_date.csv
Format: col_item \t current_price \t future_price \t ROI \t Annualized ROI \t Monthly ROI \t Volatility
- File 2: An customer file: customers_date.csv
Format: customerID \t buy_price \t sell_price \t ROI \t Annualized ROI \t Monthly ROI
- Summary files: four summary files. Format: timestamp \t stat1 \t stat2 \t ... \t statN
- File 1: assets_1.csv: First 29 dates, asset summary
- File 2: assets_2.csv: Last 31 dates, asset summary
- File 3: customers_1.csv: First 29 dates, customer summary.
- File 4: customers_2.csv: Last 31 dates, customer_summary.
"""
if len(sys.argv) < 3:
sys.stderr.write("ERROR: Invalid arguments")
sys.stderr.write("\tdataset_path: route to the dataset.")
sys.stderr.write("\toutput_dir: directory on which to store the results.")
dataset_path = sys.argv[1]
output_directory = sys.argv[2]
# Obtain the routes for the interaction and time series files.
interactions_file = os.path.join(dataset_path, "transactions.csv")
time_series = os.path.join(dataset_path, "close_prices.csv")
min_file = os.path.join(dataset_path, "limit_prices.csv")
# Execute the algorithms:
dates = [("2019-08-01", "2021-02-26", "28", "13", "6", output_directory, 1),
("2020-09-14", "2022-05-23", "31", "13", "6", output_directory, 2)]
for date in dates:
print("Starting analysis for period: " + str(date[0]) + " to " + str(date[1]))
exec_code = "python ./dataset_analysis.py " + interactions_file + " " + time_series + " range " + \
date[0] + " " + date[1] + " " + date[2] + " " + date[3] + " " + date[5] + " " + date[4] \
+ " assets_" + str(date[6]) + ".csv"
if os.system(exec_code) != 0:
sys.exit("Error when executing asset analysis for date" + str(date[0]) + " to " + str(date[1]))
exec_code = "python ./customer_analysis.py " + interactions_file + " " + time_series + " " + min_file + " " \
+ "range " + date[0] + " " + date[1] + " " + date[2] + " " + date[3] + " " + date[5] + " " \
+ date[4] + " customers_" + str(date[6]) + ".csv"
if os.system(exec_code) != 0:
sys.exit("Error when executing customer analysis for date" + str(date[0]) + " to " + str(date[1]))
print("End analysis for period: " + str(date[0]) + " to " + str(date[1]))