diff --git a/QC_salesval_nbhds_round2.xlsx b/QC_salesval_nbhds_round2.xlsx new file mode 100644 index 00000000..178d804a Binary files /dev/null and b/QC_salesval_nbhds_round2.xlsx differ diff --git a/manual_flagging/initial_flagging.py b/manual_flagging/initial_flagging.py index 12144801..91ee3b55 100755 --- a/manual_flagging/initial_flagging.py +++ b/manual_flagging/initial_flagging.py @@ -84,6 +84,7 @@ sale.doc_no AS meta_sale_document_num, sale.seller_name AS meta_sale_seller_name, sale.buyer_name AS meta_sale_buyer_name, + sale.nbhd as nbhd, sale.sale_filter_ptax_flag AS ptax_flag_original, data.class, data.township_code, @@ -145,12 +146,20 @@ if tri in inputs["run_tri"] } -# Create age column if we will need it later on +# Handle current methodology data manipulation if needed if "current" in tri_stat_groups.values(): # Calculate the building's age current_year = datetime.datetime.now().year df["bldg_age"] = current_year - df["yrblt"] + # Ingest new geographic groups + df_new_groups = pd.read_excel( + os.path.join(root, "QC_salesval_nbhds_round2.xlsx"), + usecols=["Town Nbhd", "Town Grp 1"], + ).rename(columns={"Town Nbhd": "nbhd", "Town Grp 1": "geography_split"}) + + df = pd.merge(df, df_new_groups, on="nbhd", how="left") + dfs_to_feature_creation = {} # Dictionary to store DataFrames