Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Time estimates workflow #48

Merged
merged 9 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
309 changes: 307 additions & 2 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ click = "^8.1.7"
tomlkit = "^0.13.0"
cml-pam = "0.3.2"
gdal = "<=3.8.4"
pandera = "^0.20.4"

[tool.poetry.dev-dependencies]
pytest = ">= 6"
Expand Down
80 changes: 29 additions & 51 deletions scripts/3.1_assign_primary_feasible_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,69 +78,45 @@ def main(config_file):
# are compared to the travel times of the individual's actual trips from the nts
# (`tst`/`TripStart` and `tet`/`TripEnd`)

logger.info("Loading travel time matrix")

travel_times = pd.read_parquet(
acbm.root_path / "data/external/travel_times/oa/travel_time_matrix_acbm.parquet"
)

logger.info("Travel time matrix loaded")

logger.info("Merging travel time matrix with boundaries")

# convert from_id and to_id to int to match the boundaries data type
travel_times = travel_times.astype({"from_id": int, "to_id": int})

# merge travel_times with boundaries
travel_times = travel_times.merge(
boundaries[["OBJECTID", config.zone_id]],
left_on="from_id",
right_on="OBJECTID",
how="left",
)
travel_times = travel_times.drop(columns="OBJECTID")

travel_times = travel_times.merge(
boundaries[["OBJECTID", config.zone_id]],
left_on="to_id",
right_on="OBJECTID",
how="left",
suffixes=("_from", "_to"),
)
travel_times = travel_times.drop(columns="OBJECTID")

# #### Travel distance matrix
#
# Some areas aren't reachable by specific modes. We create a travel distance matrix
# to fall back on when the, inplace=Truere are no travel time calculations

logger.info("Creating travel time estimates")

travel_time_estimates = zones_to_time_matrix(
zones=boundaries, id_col=config.zone_id, to_dict=True
# TODO: move to config
travel_time_matrix_path = (
acbm.root_path / "data/external/travel_times/oa/travel_time_matrix.parquet"
)

with open(
acbm.root_path / "data/interim/assigning/travel_time_estimates.pkl", "wb"
) as f:
pkl.dump(travel_time_estimates, f)

logger.info("Travel time estimates created")
if config.parameters.travel_times:
logger.info("Loading travel time matrix")
try:
travel_times = pd.read_parquet(travel_time_matrix_path)
print("Travel time matrix loaded successfully.")
except Exception as e:
logger.info(
f"Failed to load travel time matrix: {e}. Check that you have a "
"travel_times matrix at {travel_time_matrix_path}. Otherwise set "
"travel_times to false in config"
)
raise e
else:
# If travel_times is not true or loading failed, create a new travel time matrix
logger.info("No travel time matrix found. Creating a new travel time matrix.")
# Create a new travel time matrix based on distances between zones
travel_times = zones_to_time_matrix(zones=boundaries, id_col="OA21CD")
logger.info("Travel time estimates created")

# --- Intrazonal trip times
#
# Intrazonal trips all have time = 0. Our `get_possible_zones` function finds zones
# that are within a specified % threshold from the reported time in the NTS.
# A threshold percentage from a non zero number never equals 0, so intrazonal trips
# are not found. The problem is also explained in this issue #30
#

# Below, we assign intrazonal trips a non-zero time based on the zone area

# get intrazone travel time estimates per mode

logger.info("Creating intrazonal travel time estimates")

intrazone_times = intrazone_time(boundaries.set_index("OBJECTID"))
# TODO: use config zone_id instead of OA21CD
intrazone_times = intrazone_time(zones=boundaries, key_column="OA21CD")

logger.info("Intrazonal travel time estimates created")

Expand All @@ -150,7 +126,7 @@ def main(config_file):
travel_times = replace_intrazonal_travel_time(
travel_times=travel_times,
intrazonal_estimates=intrazone_times,
column_to_replace="travel_time_p50",
column_to_replace="time",
)

logger.info("Intrazonal travel times replaced")
Expand Down Expand Up @@ -223,10 +199,11 @@ def main(config_file):
activity_chains=activity_chains_edu,
travel_times=travel_times,
activities_per_zone=activities_per_zone,
boundaries=boundaries,
key_col="id",
zone_id=config.zone_id,
filter_by_activity=True,
activity_col="education_type",
zone_id=config.zone_id,
time_tolerance=0.3,
)

Expand All @@ -249,10 +226,11 @@ def main(config_file):
activity_chains=activity_chains_work,
travel_times=travel_times,
activities_per_zone=activities_per_zone,
boundaries=boundaries,
key_col="id",
zone_id=config.zone_id,
filter_by_activity=True,
activity_col="dact",
zone_id=config.zone_id,
time_tolerance=0.3,
)

Expand Down
73 changes: 27 additions & 46 deletions scripts/3.2.3_assign_secondary_zone.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr
logger.info("Analysis (matrices): Step 1 - Loading travel time data")

travel_times = pd.read_parquet(
acbm.root_path / "data/external/travel_times/oa/travel_time_matrix_acbm.parquet"
acbm.root_path / "data/external/travel_times/oa/travel_time_matrix.parquet"
)

# Edit modes
Expand All @@ -323,44 +323,25 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr

# I will do 2 for now

# keep only the rows that match specific "combination" values
modes_to_use = ["car", "walk", "cycle", "pt_wkday_morning"]

# Filter the DataFrame
travel_times = travel_times[travel_times["combination"].isin(modes_to_use)]

# Rename specific values in "combination" column
travel_times["combination"] = travel_times["combination"].replace(
{"cycle": "bike", "pt_wkday_morning": "pt"}
)

# Add OA21CD
# TODO: move this upstream and delete from here
logger.info("Analysis (matrices): Step 3 - Adding OA21CD to travel time data")

# convert from_id and to_id to int to match the boundaries data type
travel_times = travel_times.astype({"from_id": int, "to_id": int})

# merge travel_times with boundaries
travel_times = travel_times.merge(
boundaries[["OBJECTID", config.zone_id]],
left_on="from_id",
right_on="OBJECTID",
how="left",
)
travel_times = travel_times.drop(columns="OBJECTID")
# Check if 'time_of_day' column exists (this implies we have travel times for PT by time of day - ie travel times have not
# been generated by zones_to_time_matrix() function)
# TODO: just replace with time estimates from zones_to_time_matrix() function
if "time_of_day" in travel_times.columns:
# Apply filtering logic
travel_times = travel_times[
(travel_times["mode"] != "pt")
| (
(travel_times["mode"] == "pt")
& (travel_times["time_of_day"] == "morning")
& (travel_times["weekday"] == 1)
)
]

travel_times = travel_times.merge(
boundaries[["OBJECTID", config.zone_id]],
left_on="to_id",
right_on="OBJECTID",
how="left",
suffixes=("_from", "_to"),
)
travel_times = travel_times.drop(columns="OBJECTID")
# Rename specific values in "mode" column
travel_times["mode"] = travel_times["mode"].replace({"cycle": "bike"})

# --- Calculate OD probabilities (probabilities of choosing a destination zone for an activity, given the origin zone)
logger.info("Analysis (matrices): Step 4 - Calculating OD probabilities")
logger.info("Analysis (matrices): Step 3 - Calculating OD probabilities")

activities_per_zone = pd.read_parquet(
acbm.root_path / "data/interim/assigning/activities_per_zone.parquet"
Expand All @@ -385,14 +366,14 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr

# Calculate the visit_probability: it is a funciton of floor_area and travel time
merged_df["visit_prob"] = np.where(
merged_df["travel_time_p50"] != 0, # avoid division by zero
round(merged_df["floor_area"] / np.sqrt(merged_df["travel_time_p50"])),
merged_df["time"] != 0, # avoid division by zero
round(merged_df["floor_area"] / np.sqrt(merged_df["time"])),
round(merged_df["floor_area"]),
)

# --- Create matrices for travel times and OD probabilities
logger.info(
"Analysis (matrices): Step 5 - Creating matrices for travel times and OD probabilities"
"Analysis (matrices): Step 4 - Creating matrices for travel times and OD probabilities"
)

# Get unique zone labels for matrix
Expand All @@ -409,8 +390,8 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr

matrix_travel_times = create_od_matrices(
df=merged_df,
mode_column="combination",
value_column="travel_time_p50",
mode_column="mode",
value_column="time",
zone_labels=zone_labels,
fill_value=300, # replace missing travel times with 6 hours (they are unreachable)
zone_from=config.origin_zone_id(zone_id),
Expand All @@ -419,7 +400,7 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr

matrix_od_probs = create_od_matrices(
df=merged_df,
mode_column="combination",
mode_column="mode",
value_column="visit_prob",
zone_labels=zone_labels,
# replace missing probabilities with 1. There are no activities so shouldn't be visited
Expand All @@ -431,9 +412,9 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr
)

# Create ODMatrix objects
logger.info("Analysis (matrices): Step 6 - Creating ODMatrix objects")
logger.info("Analysis (matrices): Step 5 - Creating ODMatrix objects")

mode_types = travel_times["combination"].unique()
mode_types = travel_times["mode"].unique()

matrices_pam_travel_time = [
ODMatrix("time", mode, zone_labels, zone_labels, matrix_travel_times[mode])
Expand All @@ -449,7 +430,7 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr
matrices_pam_all = matrices_pam_travel_time + matrices_pam_od_probs

# create ODFactory
logger.info("Analysis (matrices): Step 7 - Creating ODFactory object")
logger.info("Analysis (matrices): Step 6 - Creating ODFactory object")

od = ODFactory.from_matrices(matrices=matrices_pam_all)

Expand All @@ -459,7 +440,7 @@ def merge_columns_from_other(df: pd.DataFrame, other: pd.DataFrame) -> pd.DataFr
update_population_plans(population, od)

# --- Save
logger.info("Saving: Step 9 - Saving population")
logger.info("Saving: Step 7 - Saving population")

write.to_csv(population, dir=(acbm.root_path / "data/processed/activities_pam"))

Expand Down
Loading
Loading