@@ -23,8 +23,9 @@ def empty_df():
23
23
"adj_close" , "volume" , "currency" , "provider" ])
24
24
25
25
26
- def get_tickers ():
27
- ticker_map = pd .read_csv (SIGNALS_TICKER_MAP )
26
+ def get_tickers (ticker_map ):
27
+ if ticker_map is None :
28
+ ticker_map = pd .read_csv (SIGNALS_TICKER_MAP )
28
29
ticker_map = ticker_map .dropna (subset = ['yahoo' ])
29
30
logger .info (f'Number of eligible tickers: { ticker_map .shape [0 ]} ' )
30
31
@@ -101,15 +102,18 @@ def get_data(
101
102
db_dir ,
102
103
features_generators = None ,
103
104
last_friday = datetime .today () - relativedelta (weekday = FR (- 1 )),
104
- target = 'target_20d' ):
105
+ target = 'target_20d' ,
106
+ ticker_map = None ):
105
107
"""generate data set"""
106
108
107
109
if features_generators is None :
108
110
features_generators = []
109
111
110
112
ticker_data = get_ticker_data (db_dir )
111
-
112
- ticker_universe = pd .read_csv (SIGNALS_UNIVERSE )
113
+ if ticker_map is None :
114
+ ticker_universe = pd .read_csv (SIGNALS_UNIVERSE )
115
+ else :
116
+ ticker_universe = ticker_map
113
117
ticker_data = ticker_data [ticker_data .bloomberg_ticker .isin (
114
118
ticker_universe ['bloomberg_ticker' ])]
115
119
@@ -216,15 +220,15 @@ def download_tickers(tickers, start, download_ticker):
216
220
return pd .concat (dfs )
217
221
218
222
219
- def download_data (db_dir , download_ticker , recreate = False ):
223
+ def download_data (db_dir , download_ticker , recreate = False , ticker_map = None ):
220
224
if recreate :
221
225
logging .warning (f'Removing dataset { db_dir } to recreate it' )
222
226
shutil .rmtree (db_dir , ignore_errors = True )
223
227
224
228
db_dir .mkdir (exist_ok = True )
225
229
226
230
ticker_data = get_ticker_data (db_dir )
227
- ticker_map = get_tickers ()
231
+ ticker_map = get_tickers (ticker_map )
228
232
ticker_missing = get_ticker_missing (ticker_data , ticker_map )
229
233
230
234
n_ticker_missing = ticker_missing .shape [0 ]
0 commit comments