-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #114 from prio-data/create_pgm_catalog_01
Create pgm catalog 01
- Loading branch information
Showing
9 changed files
with
275 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
name: Check for new model directories in views_pipeline/models | ||
|
||
on: | ||
push: | ||
branches: | ||
- create_pgm_catalog_01 # for testing on this branch | ||
- production | ||
- development | ||
paths: | ||
- models/*/configs/config_deployment.py | ||
- models/*/configs/config_meta.py | ||
- common_querysets/ | ||
workflow_dispatch: # for triggering manually | ||
|
||
jobs: | ||
check-new-folder: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Generate catalog if models directory has changed | ||
run: | | ||
python documentation/catalogs/generate_model_catalog.py | ||
if [ $? -ne 0 ]; then | ||
echo "Generating catalogs failed." | ||
exit 1 | ||
fi | ||
echo "Model catalog is updated." | ||
git status | ||
- name: Configure Git | ||
run: | | ||
git config --global user.name "GitHub Actions" | ||
git config --global user.email "actions@github.com" | ||
- name: Commit and Push Changes | ||
run: | | ||
git add documentation/catalogs/cm_model_catalog.md documentation/catalogs/pgm_model_catalog.md | ||
git commit -m "Automated changes by GitHub Actions" || echo "Nothing to commit" | ||
git push https://${{ secrets.VIEWS_PIPELINE_ACCESS_TOKEN }}:x-oauth-basic@github.com/prio-data/views_pipeline.git | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,3 @@ | ||
| Model Name | Algorithm | Target | Input Features | Non-default Hyperparameters | Forecasting Type | Implementation Status | Implementation Date | Author | | ||
| ---------- | --------- | ------ | -------------- | --------------------------- | ---------------- | --------------------- | ------------------- | ------ | | ||
| fatalities002_baseline_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_baseline](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L24) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_conflicthistory_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_conflicthistory_gbm | GradientBoostingRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | n_estimators=200 | Direct multi-step | no | NA | NA | | ||
| fatalities002_conflicthistory_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_conflicthistory_long_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history_long](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3101) | n_estimators=100, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_vdem_hurdle_xgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_vdem_short](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1213) | clf_name="XGBClassifier", reg_name="XGBRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_wdi_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_wdi_short](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1635) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_topics_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_topics_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | n_estimators=80, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_topics_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_broad_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_joint_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2098) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_broad_hurdle_rf | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2098) | clf_name="RFClassifier", reg_name="RFRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_narrow_xgb | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_narrow_hurdle_xgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | clf_name="XGBClassifier", reg_name="XGBRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_narrow_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_all_pca3_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_all_features](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3199) | n_estimators=100, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_aquastat_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_aquastat](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L647) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_faostat_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_faostat](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2705) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_faoprices_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_faoprices](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2955) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_imfweo_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_imfweo](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3021) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_Markov_glm | rf | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_Markov_rf | glm | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_baseline_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_baseline](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L34) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflictlong_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L110) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflictlong_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L110) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_escwa_drought_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L283) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_escwa_drought_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L283) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_natsoc_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L451) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_natsoc_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L451) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_broad_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L614) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_broad_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L614) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflict_history_xgb | xgb_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L770) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflict_treelag_hurdle | hur_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_treelag](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L1018) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflict_sptime_dist_hurdle | hur_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_sptime_dist](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L1061) | None | Direct multi-step | no | NA | NA | | ||
| electric_relaxation | RandomForestClassifier | ged_sb_dep | - [escwa001_cflong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_electric_relaxation.py) | - [hyperparameters electric_relaxation](https://github.com/prio-data/views_pipeline/blob/main/models/electric_relaxation/configs/config_hyperparameters.py) | None | shadow | NA | Sara | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
import os | ||
import logging | ||
logging.basicConfig( | ||
level=logging.ERROR, format="%(asctime)s %(name)s - %(levelname)s - %(message)s" | ||
) | ||
logger = logging.getLogger(__name__) | ||
|
||
import sys | ||
from pathlib import Path | ||
|
||
PATH = Path(__file__).resolve() | ||
indices = [i for i, x in enumerate(PATH.parts) if x == "views_pipeline"] | ||
PATH_ROOT = Path(*PATH.parts[:indices[-1] + 1]) | ||
|
||
sys.path.insert(0, str(PATH_ROOT)) | ||
sys.path.insert(0, str(PATH_ROOT/"common_utils")) | ||
|
||
from model_path import ModelPath | ||
|
||
GITHUB_URL = 'https://github.com/prio-data/views_pipeline/blob/main/' | ||
|
||
|
||
|
||
|
||
|
||
def extract_models(model_class): | ||
""" | ||
It creates a dictionary containing all the necessary information about a model by merging the config_meta.py, config_deployement.py and config_hyperparameters.py dictionaries. | ||
Parameters: | ||
model_class: ModelPath class object from ModelPath.py | ||
Returns: | ||
model_dict: A dictionary containing the following relevant keys: | ||
-name: model name from config_meta.py | ||
-algorithm: algorithm from config_meta.py | ||
-depvar: depvar from config_meta.py | ||
-queryset: markdown link with marker 'queryset' from config_meta.py pointing to the queryset in common_querysets | ||
-level: 'priogrid_month' or 'country_month' from queryset | ||
-creator: creator from config_meta.py | ||
-deployment_status: deployment_status from config_deployment.py | ||
-hyperparameters: markdown link with marker 'hyperparameters model_name' config_meta.py pointing to the model specific config_hyperparameters.py | ||
""" | ||
|
||
model_dict = {} | ||
tmp_dict = {} | ||
config_meta = os.path.join(model_class.configs, 'config_meta.py') | ||
config_deployment = os.path.join(model_class.configs, 'config_deployment.py') | ||
config_hyperparameters = os.path.join(model_class.configs, 'config_hyperparameters.py') | ||
|
||
|
||
if os.path.exists(config_meta): | ||
logging.info(f"Found meta config: {config_meta}") | ||
with open(config_meta, 'r') as file: | ||
code = file.read() | ||
exec(code, {}, tmp_dict) | ||
model_dict.update(tmp_dict['get_meta_config']()) | ||
model_dict['queryset'] = create_link(model_dict['queryset'], model_class.queryset_path) if 'queryset' in model_dict else 'None' | ||
|
||
|
||
if os.path.exists(config_deployment): | ||
logging.info(f"Found deployment config: {config_deployment}") | ||
with open(config_deployment, 'r') as file: | ||
code = file.read() | ||
exec(code, {}, tmp_dict) | ||
model_dict.update(tmp_dict['get_deployment_config']()) | ||
|
||
if os.path.exists(config_hyperparameters): | ||
logging.info(f"Found hyperparameters config: {config_hyperparameters}") | ||
model_dict['hyperparameters'] = create_link(f"hyperparameters {model_class.model_name}", Path(model_class.get_scripts()['config_hyperparameters.py'])) | ||
|
||
return model_dict | ||
|
||
|
||
|
||
def create_link(marker, filepath: Path): | ||
""" | ||
Generates a markdown-formatted link to a specific file in the repository's main branch. It creates the link by merging the path of the repository and the relative_path created from filepath. | ||
Parameters: | ||
marker: a marker that will be displayed as the clickable text in the markdown link | ||
filepath: absolute path of the file | ||
Returns: | ||
str: A markdown link in the format `- [marker](GITHUB_URL/relative_filepath)` | ||
""" | ||
relative_path = filepath.relative_to(ModelPath.get_root()) | ||
link_template = '- [{marker}]({url}{file})' | ||
return link_template.format(marker=marker, url=GITHUB_URL, file=relative_path) | ||
|
||
|
||
|
||
def generate_markdown_table(models_list): | ||
""" | ||
Function to generate markdown table from the model dictionaries. | ||
Parameters: | ||
model_list: list of model dictionaries containing all the necessary information | ||
Returns: | ||
markdown_table: a markdown table with links to the querysets and hyperparameters | ||
""" | ||
|
||
headers = ['Model Name', 'Algorithm', 'Target', 'Input Features', 'Non-default Hyperparameters', 'Forecasting Type', 'Implementation Status', 'Implementation Date', 'Author'] | ||
|
||
markdown_table = '| ' + ' '.join([f"{header} |" for header in headers]) + '\n' | ||
markdown_table += '| ' + ' '.join(['-' * len(header) + ' |' for header in headers]) + '\n' | ||
|
||
|
||
for model in models_list: | ||
|
||
|
||
row = [ | ||
model.get('name', ''), | ||
str(model.get('algorithm', '')).split('(')[0], | ||
model.get('depvar', '') if model.get('depvar', '') else ", ".join(model.get('target(S)', '')), | ||
model.get('queryset', ''), | ||
model.get('hyperparameters',''), | ||
'None',#Direct multi-step', | ||
model.get('deployment_status', ''), | ||
'NA', | ||
model.get('creator', '') | ||
] | ||
markdown_table += '| ' + ' | '.join(row) + ' |\n' | ||
|
||
return markdown_table | ||
|
||
|
||
|
||
|
||
if __name__ == "__main__": | ||
#import time | ||
#start_time = time.time() | ||
|
||
models_list_cm = [] | ||
models_list_pgm = [] | ||
|
||
for model_name in os.listdir(PATH_ROOT / 'models'): | ||
model_path = os.path.join(PATH_ROOT / 'models', model_name) | ||
|
||
|
||
if os.path.isdir(model_path): | ||
model_class = ModelPath(model_name, validate=True) | ||
|
||
|
||
|
||
model = extract_models(model_class) | ||
|
||
if 'level' in model and model['level'] == 'pgm': | ||
models_list_pgm.append(model) | ||
if 'level' in model and model['level'] == 'cm': | ||
models_list_cm.append(model) | ||
|
||
|
||
|
||
|
||
|
||
|
||
markdown_table_pgm = generate_markdown_table(models_list_pgm) | ||
with open('documentation/catalogs/pgm_model_catalog.md', 'w') as f: | ||
f.write(markdown_table_pgm) | ||
|
||
markdown_table_cm = generate_markdown_table(models_list_cm) | ||
with open('documentation/catalogs/cm_model_catalog.md', 'w') as f: | ||
f.write(markdown_table_cm) | ||
|
||
#print("--- %s seconds ---" % (time.time() - start_time)) | ||
|
||
|
Oops, something went wrong.