From 65ef8b7b9dec25598005db7cdaeee5fcc1a52a87 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Mon, 19 Jun 2023 08:45:03 -0700 Subject: [PATCH 01/16] Updating sampler and zone aggregator to include study area TAZs --- rsm/sampler.py | 2 +- rsm/utility.py | 59 ++++++++++++++++++++++++++++++++++ scripts/rsm_sampler.py | 18 +++++++++++ scripts/rsm_zone_aggregator.py | 11 ++++++- 4 files changed, 88 insertions(+), 2 deletions(-) diff --git a/rsm/sampler.py b/rsm/sampler.py index 15ccfc4..8292070 100644 --- a/rsm/sampler.py +++ b/rsm/sampler.py @@ -138,7 +138,7 @@ def _resolve_out_filename(x): mgra_hh["sampling_rate"] = default_sampling_rate if study_area is not None: - mgra_hh.loc[mgra_hh.index.isin(study_area), "sample_rate"] = 1 + mgra_hh.loc[mgra_hh.index.isin(study_area), "sampling_rate"] = 1 sample_households = [] diff --git a/rsm/utility.py b/rsm/utility.py index 44ae176..86fa66d 100644 --- a/rsm/utility.py +++ b/rsm/utility.py @@ -230,3 +230,62 @@ def _density_function(mgra_in): mgra_data = mgra_data.fillna(0) return mgra_data + + +def scaleup_to_rsm_samplingrate(df, scale_factor): + """ + scales up the tour, trips, household, person data files based on the sampling rate. + + """ + final_df = pd.DataFrame( + np.repeat(df.values, scale_factor, axis=0), + columns=df.columns + ) + + return final_df + +def check_column_names(df, columns): + """ + Check column names of study area file + """ + df_columns = df.columns.tolist() + if set(columns) != set(df_columns): + raise ValueError("Column names do not match the expected column names : taz and group. Please fix the column names") + return True + +def create_list_study_area_taz(study_area_file): + """ + Creates list[int or list] based on the values of the group column + """ + + try: + df = pd.read_csv(study_area_file) + columns_to_check = ['taz', 'group'] + match = check_column_names(df, columns_to_check) + + except ValueError as e: + print("Error:", str(e)) + logger.info("Error:", str(e)) + return None + + grouped_taz = df.groupby('group')['taz'].apply(list).values.tolist() + + return grouped_taz + + +def find_rsm_zone_of_study_area(study_area_file, taz_crosswalk): + """ + finds the RSM zones for the study area using the TAZ crosswalks + """ + + try: + df = pd.read_csv(study_area_file) + taz_cwk = pd.read_csv(taz_crosswalk) + study_area_taz = set(df['taz']) + rsm_zone = set(taz_cwk.loc[taz_cwk['taz'].isin(study_area_taz), 'cluster_id']) + + except Exception as e: + logger.info("Error in identifying RSM zone for study area:", str(e)) + return None + + return list(rsm_zone) \ No newline at end of file diff --git a/scripts/rsm_sampler.py b/scripts/rsm_sampler.py index 5d7229a..666f8b3 100644 --- a/scripts/rsm_sampler.py +++ b/scripts/rsm_sampler.py @@ -31,6 +31,7 @@ ABM_PROPERTIES_FOLDER = os.path.join(rsm_dir, "conf") ABM_PROPERTIES = os.path.join(ABM_PROPERTIES_FOLDER, "sandag_abm.properties") INPUT_RSM_DIR = os.path.join(rsm_dir, "input") +EXPILICT_AGG_TAZ = os.path.join(rsm_dir, "input", "study_area.csv") # output files OUTPUT_RSM_DIR = os.path.join(rsm_dir, "output") @@ -69,11 +70,28 @@ logging.info(f"Current Iteration Accessibility File: {CURR_ITER_ACCESS}") logging.info(f"Previous Iteration Accessibility File: {PREV_ITER_ACCESS}") +if os.path.exists(EXPILICT_AGG_TAZ): + logging.info(f"Study Area file: {EXPILICT_AGG_TAZ}") + study_area_taz = find_rsm_zone_of_study_area(EXPILICT_AGG_TAZ, OUTPUT_TAZ_CROSSWALK) + if study_area_taz is not None: + # Process the result + sa_taz = study_area_taz + logger.info(f"RSM Zone identified for the Study area are : {sa_taz}", ) + else: + # Handle the error + logger.info("Please check the study area file. 'taz' column is expected in the file to find the corresponding RSM zone.") + sa_taz = None + +else: + sa_taz = None + + rsm_household_sampler( input_dir=rsm_dir, output_dir=rsm_dir, prev_iter_access=PREV_ITER_ACCESS, curr_iter_access=CURR_ITER_ACCESS, + study_area=sa_taz, input_household=FULL_ABM_SYNTH_HOUSHOLDS, input_person=FULL_ABM_SYNTH_PERSONS, taz_crosswalk=OUTPUT_TAZ_CROSSWALK, diff --git a/scripts/rsm_zone_aggregator.py b/scripts/rsm_zone_aggregator.py index 3fe8f8d..a061c31 100644 --- a/scripts/rsm_zone_aggregator.py +++ b/scripts/rsm_zone_aggregator.py @@ -47,7 +47,8 @@ FULL_ABM_TRIP_DIR = os.path.join(full_model_dir, "output") FULL_ABM_SYNTH_HOUSHOLDS = os.path.join(full_model_dir, "input", "households.csv") FULL_ABM_SYNTH_PERSONS = os.path.join(full_model_dir, "input", "persons.csv") -EXPLICIT_ZONE_AGG = [] +EXPILICT_AGG_TAZ = os.path.join(rsm_main_dir, "input", "study_area.csv") + #output files RSM_ABM_PROPERTIES = os.path.join(rsm_main_dir, "conf", "sandag_abm.properties") @@ -60,6 +61,14 @@ ) logging.info("start logging rsm_zone_aggregator") + +# prepare list of MGRA that should not be aggregated or grouped together +logging.info("Check if the study area file exists in the RSM input folder") +if os.path.exists(EXPILICT_AGG_TAZ): + EXPLICIT_ZONE_AGG = create_list_study_area_mgra(EXPILICT_AGG_TAZ) +else: + EXPLICIT_ZONE_AGG = [] + # # Zone Aggregation # From b1912b59bf35f898748a2c5077d0ff600b0bd1f7 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Mon, 19 Jun 2023 08:50:34 -0700 Subject: [PATCH 02/16] Scaling up individual, joint, household and person file based on sampling rate --- rsm/assembler.py | 51 +++++++++++++++++++++++++++++++--------- scripts/rsm_assembler.py | 30 ++++++++++++++++++++--- 2 files changed, 67 insertions(+), 14 deletions(-) diff --git a/rsm/assembler.py b/rsm/assembler.py index c7f794a..2f22f84 100644 --- a/rsm/assembler.py +++ b/rsm/assembler.py @@ -3,6 +3,7 @@ from pathlib import Path import numpy as np import pandas as pd +from rsm.utility import * logger = logging.getLogger(__name__) @@ -28,6 +29,10 @@ def rsm_assemble( orig_joint, rsm_indiv, rsm_joint, + rsm_indiv_tour, + rsm_joint_tour, + rsm_household, + rsm_person, households, mgra_crosswalk=None, sample_rate=0.25, @@ -51,6 +56,17 @@ def rsm_assemble( Trips table from RSM model run, should be a simulation of all joint trips for potentially only a subset of all synthetic households (the same sampled households as in `rsm_indiv`). + rsm_indiv_tour : path-like + Tours table from RSM model run, should be a simulation of all individual + tous for potentially only a subset of all synthetic households. + rsm_joint_tour : path-like + Tours table from RSM model run, should be a simulation of all joint + tous for potentially only a subset of all synthetic households (the + same sampled households as in `rsm_indiv_tour`). + rsm_household: path-like + Households from RSM model run, should be sumulation of all sampled households. + rsm_person: path-like + Persons from RSM mdoel run, should be simulation of all sampled persons. households : path-like Synthetic household file, used to get home zones for households. mgra_crosswalk : path-like, optional @@ -80,6 +96,10 @@ def rsm_assemble( orig_joint = Path(orig_joint).expanduser() rsm_indiv = Path(rsm_indiv).expanduser() rsm_joint = Path(rsm_joint).expanduser() + rsm_indiv_tour = Path(rsm_indiv_tour).expanduser() + rsm_joint_tour = Path(rsm_joint_tour).expanduser() + rsm_household = Path(rsm_household).expanduser() + rsm_person = Path(rsm_person).expanduser() households = Path(households).expanduser() assert os.path.isfile(orig_indiv) @@ -97,6 +117,22 @@ def rsm_assemble( ind_trips_rsm = pd.read_csv(rsm_indiv) logger.info("reading jnt_trips_rsm") jnt_trips_rsm = pd.read_csv(rsm_joint) + logger.info("reading ind_tours_rsm") + ind_tours_rsm = pd.read_csv(rsm_indiv_tour) + logger.info("reading jnt_tours_rsm") + jnt_tours_rsm = pd.read_csv(rsm_joint_tour) + logger.info("reading household_rsm") + household_rsm = pd.read_csv(rsm_household) + logger.info("reading person_rsm") + person_rsm = pd.read_csv(rsm_person) + + scale_factor = int(1.0/sample_rate) + + # tours, household and person file are scaled up based on sampling rate + final_ind_tours = scaleup_to_rsm_samplingrate(ind_tours_rsm, scale_factor) + final_jnt_tours = scaleup_to_rsm_samplingrate(jnt_tours_rsm, scale_factor) + final_household = scaleup_to_rsm_samplingrate(household_rsm, scale_factor) + final_person = scaleup_to_rsm_samplingrate(person_rsm, scale_factor) if run_assembler == 1: # load trip data - full simulation of residual/source model @@ -213,14 +249,7 @@ def _agg_by_hhid_and_tripmode(df, name): #final_ind_trips = pd.concat([ind_trips_rsm]*scale_factor, ignore_index=True) #final_jnt_trips = pd.concat([jnt_trips_rsm]*scale_factor, ignore_index=True) - final_ind_trips = pd.DataFrame( - np.repeat(ind_trips_rsm.values, scale_factor, axis=0), - columns=ind_trips_rsm.columns - ) - - final_jnt_trips = pd.DataFrame( - np.repeat(jnt_trips_rsm.values, scale_factor, axis=0), - columns=jnt_trips_rsm.columns - ) - - return final_ind_trips, final_jnt_trips + final_ind_trips = scaleup_to_rsm_samplingrate(ind_trips_rsm, scale_factor) + final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm, scale_factor) + + return final_ind_trips, final_jnt_trips, final_ind_tours, final_jnt_tours, final_household, final_person diff --git a/scripts/rsm_assembler.py b/scripts/rsm_assembler.py index 85d6481..c44fb55 100644 --- a/scripts/rsm_assembler.py +++ b/scripts/rsm_assembler.py @@ -32,6 +32,12 @@ ORG_JOINT_TRIPS = os.path.join(org_model_dir, "output", "jointTripData_3.csv") RSM_INDIV_TRIPS = os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv") RSM_JOINT_TRIPS = os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv") + +RSM_INDIV_TOURS = os.path.join(rsm_dir, "output", "indivTourData_" + str(iteration) + ".csv") +RSM_JOINT_TOURS = os.path.join(rsm_dir, "output", "jointTourData_" + str(iteration) + ".csv") +RSM_HOUSEHOLD = os.path.join(rsm_dir, "output", "householdData_" + str(iteration) + ".csv") +RSM_PERSON = os.path.join(rsm_dir, "output", "personData_" + str(iteration) + ".csv") + HOUSEHOLDS = os.path.join(org_model_dir, "input", "households.csv") MGRA_CROSSWALK = os.path.join(rsm_dir, "input", "mgra_crosswalk.csv") @@ -39,17 +45,29 @@ shutil.copy(RSM_INDIV_TRIPS, os.path.join(rsm_dir, "output", "indivTripData_abm_"+ str(iteration) + ".csv")) shutil.copy(RSM_JOINT_TRIPS, os.path.join(rsm_dir, "output", "jointTripData_abm_"+ str(iteration) + ".csv")) +#creating copy of individual and joint tours file +shutil.copy(RSM_INDIV_TOURS, os.path.join(rsm_dir, "output", "indivTourData_abm_"+ str(iteration) + ".csv")) +shutil.copy(RSM_JOINT_TOURS, os.path.join(rsm_dir, "output", "jointTourData_abm_"+ str(iteration) + ".csv")) + +#creating copy of person and household file +shutil.copy(RSM_HOUSEHOLD, os.path.join(rsm_dir, "output", "householdData_abm_"+ str(iteration) + ".csv")) +shutil.copy(RSM_PERSON, os.path.join(rsm_dir, "output", "personData_abm_"+ str(iteration) + ".csv")) + ABM_PROPERTIES_FOLDER = os.path.join(rsm_dir, "conf") ABM_PROPERTIES = os.path.join(ABM_PROPERTIES_FOLDER, "sandag_abm.properties") RUN_ASSEMBLER = int(get_property(ABM_PROPERTIES, "run.rsm.assembler")) SAMPLE_RATE = float(get_property(ABM_PROPERTIES, "rsm.default.sampling.rate")) #RSM Assembler -final_ind, final_jnt = rsm_assemble( +final_ind_trips, final_jnt_trips, final_ind_tours, final_jnt_tours, final_household, final_person = rsm_assemble( ORG_INDIV_TRIPS, ORG_JOINT_TRIPS, RSM_INDIV_TRIPS, RSM_JOINT_TRIPS, + RSM_INDIV_TOURS, + RSM_JOINT_TOURS, + RSM_HOUSEHOLD, + RSM_PERSON, HOUSEHOLDS, MGRA_CROSSWALK, SAMPLE_RATE, @@ -57,7 +75,13 @@ ) #save as csv files -final_ind.to_csv(os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv"), index = False) -final_jnt.to_csv(os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv"), index = False) +final_ind_trips.to_csv(os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv"), index = False) +final_jnt_trips.to_csv(os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv"), index = False) + +final_ind_tours.to_csv(os.path.join(rsm_dir, "output", "indivTourData_" + str(iteration) + ".csv"), index = False) +final_jnt_tours.to_csv(os.path.join(rsm_dir, "output", "jointTourData_" + str(iteration) + ".csv"), index = False) + +final_household.to_csv(os.path.join(rsm_dir, "output", "householdData_" + str(iteration) + ".csv"), index = False) +final_person.to_csv(os.path.join(rsm_dir, "output", "personData_" + str(iteration) + ".csv"), index = False) logging.info("finished logging rsm_assembler") \ No newline at end of file From deb87dbb6d0498899d007eabcc8810d105651b2d Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Tue, 20 Jun 2023 12:53:31 -0500 Subject: [PATCH 03/16] Updating docstring --- rsm/zone_agg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rsm/zone_agg.py b/rsm/zone_agg.py index 2446be3..c4faf17 100644 --- a/rsm/zone_agg.py +++ b/rsm/zone_agg.py @@ -229,7 +229,7 @@ def aggregate_zones( given, with no less and no more) explicit_col : str The name of the column containing the ID's from `explicit_agg`, usually - 'mgra' or 'taz' + 'taz' agg_instruction : dict Dictionary passed to pandas `agg` that says how to aggregate data columns. start_cluster_ids : int, default 13 From 366fbe99fe424aca4be42fc2f4761b8b4d4d675a Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Thu, 29 Jun 2023 08:43:16 -0700 Subject: [PATCH 04/16] Removing the scaling updates made --- .../main/python/dataExporter/abmScenario.py | 18 ++++++---- rsm/assembler.py | 35 +------------------ scripts/rsm_assembler.py | 25 +------------ 3 files changed, 13 insertions(+), 65 deletions(-) diff --git a/abm/src/main/python/dataExporter/abmScenario.py b/abm/src/main/python/dataExporter/abmScenario.py index d5874a8..1ecc729 100644 --- a/abm/src/main/python/dataExporter/abmScenario.py +++ b/abm/src/main/python/dataExporter/abmScenario.py @@ -284,6 +284,10 @@ def properties(self) -> dict: "year": { "line": "scenarioYear=", "type": "int", + "value": None}, + "rsmSamplingRate":{ + "line": "rsm.default.sampling.rate=", + "type" : "float", "value": None} } @@ -637,7 +641,7 @@ def mgra_input(self) -> pd.DataFrame: "adultschenrl", "ech_dist", "hch_dist", - "pseudomsa", + #"pseudomsa", "parkarea", "hstallsoth", "hstallssam", @@ -649,7 +653,7 @@ def mgra_input(self) -> pd.DataFrame: "mstallsoth", "mstallssam", "mparkcost", - "zip09", + #"zip09", "parkactive", "openspaceparkpreserve", "beachactive", @@ -657,9 +661,9 @@ def mgra_input(self) -> pd.DataFrame: "truckregiontype", "district27", "milestocoast", - "acres", - "effective_acres", - "land_acres", + #"acres", + #"effective_acres", + #"land_acres", "MicroAccessTime", "remoteAVParking", "refueling_stations", @@ -2833,9 +2837,9 @@ def ie(self) -> pd.DataFrame: 1 / self.properties["pooledTNCPassengers"]] trips["weightTrip"] = pd.Series( - np.select(conditions, choices, default=1) / self.properties["sampleRate"], + np.select(conditions, choices, default=1) / self.properties["rsmSamplingRate"], dtype="float32") - trips["weightPersonTrip"] = 1 / self.properties["sampleRate"] + trips["weightPersonTrip"] = 1 / self.properties["rsmSamplingRate"] trips["weightPersonTrip"] = trips["weightPersonTrip"].astype("float32") # rename columns to standard/generic ABM naming conventions diff --git a/rsm/assembler.py b/rsm/assembler.py index 2f22f84..b96cc23 100644 --- a/rsm/assembler.py +++ b/rsm/assembler.py @@ -29,10 +29,6 @@ def rsm_assemble( orig_joint, rsm_indiv, rsm_joint, - rsm_indiv_tour, - rsm_joint_tour, - rsm_household, - rsm_person, households, mgra_crosswalk=None, sample_rate=0.25, @@ -56,17 +52,6 @@ def rsm_assemble( Trips table from RSM model run, should be a simulation of all joint trips for potentially only a subset of all synthetic households (the same sampled households as in `rsm_indiv`). - rsm_indiv_tour : path-like - Tours table from RSM model run, should be a simulation of all individual - tous for potentially only a subset of all synthetic households. - rsm_joint_tour : path-like - Tours table from RSM model run, should be a simulation of all joint - tous for potentially only a subset of all synthetic households (the - same sampled households as in `rsm_indiv_tour`). - rsm_household: path-like - Households from RSM model run, should be sumulation of all sampled households. - rsm_person: path-like - Persons from RSM mdoel run, should be simulation of all sampled persons. households : path-like Synthetic household file, used to get home zones for households. mgra_crosswalk : path-like, optional @@ -96,10 +81,6 @@ def rsm_assemble( orig_joint = Path(orig_joint).expanduser() rsm_indiv = Path(rsm_indiv).expanduser() rsm_joint = Path(rsm_joint).expanduser() - rsm_indiv_tour = Path(rsm_indiv_tour).expanduser() - rsm_joint_tour = Path(rsm_joint_tour).expanduser() - rsm_household = Path(rsm_household).expanduser() - rsm_person = Path(rsm_person).expanduser() households = Path(households).expanduser() assert os.path.isfile(orig_indiv) @@ -117,23 +98,9 @@ def rsm_assemble( ind_trips_rsm = pd.read_csv(rsm_indiv) logger.info("reading jnt_trips_rsm") jnt_trips_rsm = pd.read_csv(rsm_joint) - logger.info("reading ind_tours_rsm") - ind_tours_rsm = pd.read_csv(rsm_indiv_tour) - logger.info("reading jnt_tours_rsm") - jnt_tours_rsm = pd.read_csv(rsm_joint_tour) - logger.info("reading household_rsm") - household_rsm = pd.read_csv(rsm_household) - logger.info("reading person_rsm") - person_rsm = pd.read_csv(rsm_person) scale_factor = int(1.0/sample_rate) - # tours, household and person file are scaled up based on sampling rate - final_ind_tours = scaleup_to_rsm_samplingrate(ind_tours_rsm, scale_factor) - final_jnt_tours = scaleup_to_rsm_samplingrate(jnt_tours_rsm, scale_factor) - final_household = scaleup_to_rsm_samplingrate(household_rsm, scale_factor) - final_person = scaleup_to_rsm_samplingrate(person_rsm, scale_factor) - if run_assembler == 1: # load trip data - full simulation of residual/source model logger.info("reading ind_trips_full") @@ -252,4 +219,4 @@ def _agg_by_hhid_and_tripmode(df, name): final_ind_trips = scaleup_to_rsm_samplingrate(ind_trips_rsm, scale_factor) final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm, scale_factor) - return final_ind_trips, final_jnt_trips, final_ind_tours, final_jnt_tours, final_household, final_person + return final_ind_trips, final_jnt_trips diff --git a/scripts/rsm_assembler.py b/scripts/rsm_assembler.py index c44fb55..b2d8c17 100644 --- a/scripts/rsm_assembler.py +++ b/scripts/rsm_assembler.py @@ -33,11 +33,6 @@ RSM_INDIV_TRIPS = os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv") RSM_JOINT_TRIPS = os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv") -RSM_INDIV_TOURS = os.path.join(rsm_dir, "output", "indivTourData_" + str(iteration) + ".csv") -RSM_JOINT_TOURS = os.path.join(rsm_dir, "output", "jointTourData_" + str(iteration) + ".csv") -RSM_HOUSEHOLD = os.path.join(rsm_dir, "output", "householdData_" + str(iteration) + ".csv") -RSM_PERSON = os.path.join(rsm_dir, "output", "personData_" + str(iteration) + ".csv") - HOUSEHOLDS = os.path.join(org_model_dir, "input", "households.csv") MGRA_CROSSWALK = os.path.join(rsm_dir, "input", "mgra_crosswalk.csv") @@ -45,29 +40,17 @@ shutil.copy(RSM_INDIV_TRIPS, os.path.join(rsm_dir, "output", "indivTripData_abm_"+ str(iteration) + ".csv")) shutil.copy(RSM_JOINT_TRIPS, os.path.join(rsm_dir, "output", "jointTripData_abm_"+ str(iteration) + ".csv")) -#creating copy of individual and joint tours file -shutil.copy(RSM_INDIV_TOURS, os.path.join(rsm_dir, "output", "indivTourData_abm_"+ str(iteration) + ".csv")) -shutil.copy(RSM_JOINT_TOURS, os.path.join(rsm_dir, "output", "jointTourData_abm_"+ str(iteration) + ".csv")) - -#creating copy of person and household file -shutil.copy(RSM_HOUSEHOLD, os.path.join(rsm_dir, "output", "householdData_abm_"+ str(iteration) + ".csv")) -shutil.copy(RSM_PERSON, os.path.join(rsm_dir, "output", "personData_abm_"+ str(iteration) + ".csv")) - ABM_PROPERTIES_FOLDER = os.path.join(rsm_dir, "conf") ABM_PROPERTIES = os.path.join(ABM_PROPERTIES_FOLDER, "sandag_abm.properties") RUN_ASSEMBLER = int(get_property(ABM_PROPERTIES, "run.rsm.assembler")) SAMPLE_RATE = float(get_property(ABM_PROPERTIES, "rsm.default.sampling.rate")) #RSM Assembler -final_ind_trips, final_jnt_trips, final_ind_tours, final_jnt_tours, final_household, final_person = rsm_assemble( +final_ind_trips, final_jnt_trips = rsm_assemble( ORG_INDIV_TRIPS, ORG_JOINT_TRIPS, RSM_INDIV_TRIPS, RSM_JOINT_TRIPS, - RSM_INDIV_TOURS, - RSM_JOINT_TOURS, - RSM_HOUSEHOLD, - RSM_PERSON, HOUSEHOLDS, MGRA_CROSSWALK, SAMPLE_RATE, @@ -78,10 +61,4 @@ final_ind_trips.to_csv(os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv"), index = False) final_jnt_trips.to_csv(os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv"), index = False) -final_ind_tours.to_csv(os.path.join(rsm_dir, "output", "indivTourData_" + str(iteration) + ".csv"), index = False) -final_jnt_tours.to_csv(os.path.join(rsm_dir, "output", "jointTourData_" + str(iteration) + ".csv"), index = False) - -final_household.to_csv(os.path.join(rsm_dir, "output", "householdData_" + str(iteration) + ".csv"), index = False) -final_person.to_csv(os.path.join(rsm_dir, "output", "personData_" + str(iteration) + ".csv"), index = False) - logging.info("finished logging rsm_assembler") \ No newline at end of file From 13f38fdea9b51e1b08dffc32d7db2bbc66c0c0e7 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Wed, 5 Jul 2023 11:18:17 -0700 Subject: [PATCH 05/16] Scaling IE trips --- abm/src/main/python/dataExporter/abmScenario.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/abm/src/main/python/dataExporter/abmScenario.py b/abm/src/main/python/dataExporter/abmScenario.py index 1ecc729..25901f0 100644 --- a/abm/src/main/python/dataExporter/abmScenario.py +++ b/abm/src/main/python/dataExporter/abmScenario.py @@ -2836,10 +2836,11 @@ def ie(self) -> pd.DataFrame: 1 / self.properties["nonPooledTNCPassengers"], 1 / self.properties["pooledTNCPassengers"]] + scale_factor = self.properties["rsmSamplingRate"] trips["weightTrip"] = pd.Series( - np.select(conditions, choices, default=1) / self.properties["rsmSamplingRate"], + np.select(conditions, choices, default=1) / (scale_factor*scale_factor), dtype="float32") - trips["weightPersonTrip"] = 1 / self.properties["rsmSamplingRate"] + trips["weightPersonTrip"] = 1 / (scale_factor*scale_factor) trips["weightPersonTrip"] = trips["weightPersonTrip"].astype("float32") # rename columns to standard/generic ABM naming conventions From 9ecede01b0fe2ac8f131103652f1c12a51400750 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Wed, 5 Jul 2023 11:56:41 -0700 Subject: [PATCH 06/16] changing to sampling rate --- abm/src/main/python/dataExporter/abmScenario.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/abm/src/main/python/dataExporter/abmScenario.py b/abm/src/main/python/dataExporter/abmScenario.py index 25901f0..074008a 100644 --- a/abm/src/main/python/dataExporter/abmScenario.py +++ b/abm/src/main/python/dataExporter/abmScenario.py @@ -2836,11 +2836,11 @@ def ie(self) -> pd.DataFrame: 1 / self.properties["nonPooledTNCPassengers"], 1 / self.properties["pooledTNCPassengers"]] - scale_factor = self.properties["rsmSamplingRate"] + sampling_rate = self.properties["rsmSamplingRate"] trips["weightTrip"] = pd.Series( - np.select(conditions, choices, default=1) / (scale_factor*scale_factor), + np.select(conditions, choices, default=1) / (sampling_rate*sampling_rate), dtype="float32") - trips["weightPersonTrip"] = 1 / (scale_factor*scale_factor) + trips["weightPersonTrip"] = 1 / (sampling_rate*sampling_rate) trips["weightPersonTrip"] = trips["weightPersonTrip"].astype("float32") # rename columns to standard/generic ABM naming conventions From 1bf6423965864c2b2930c7fa89957c1480360a94 Mon Sep 17 00:00:00 2001 From: Joe Flood Date: Fri, 7 Jul 2023 10:25:44 -0700 Subject: [PATCH 07/16] Renamed create_list_study_area_mgra() to create_list_study_area_taz() to reflect function name change in rsm\utility.py --- scripts/rsm_zone_aggregator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rsm_zone_aggregator.py b/scripts/rsm_zone_aggregator.py index a061c31..a270449 100644 --- a/scripts/rsm_zone_aggregator.py +++ b/scripts/rsm_zone_aggregator.py @@ -65,7 +65,7 @@ # prepare list of MGRA that should not be aggregated or grouped together logging.info("Check if the study area file exists in the RSM input folder") if os.path.exists(EXPILICT_AGG_TAZ): - EXPLICIT_ZONE_AGG = create_list_study_area_mgra(EXPILICT_AGG_TAZ) + EXPLICIT_ZONE_AGG = create_list_study_area_taz(EXPILICT_AGG_TAZ) else: EXPLICIT_ZONE_AGG = [] From 5c634cb40b0b14fe13c1a13f38df72c2dcda0ff6 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Wed, 12 Jul 2023 18:48:51 -0700 Subject: [PATCH 08/16] Adding households in input aggregator --- scripts/rsm_input_aggregator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rsm_input_aggregator.py b/scripts/rsm_input_aggregator.py index b858226..e8ec496 100644 --- a/scripts/rsm_input_aggregator.py +++ b/scripts/rsm_input_aggregator.py @@ -87,7 +87,7 @@ "TripMatrices.csv", "transponderModelAccessibilities.csv", "crossBorderTours.csv", "internalExternalTrips.csv", "visitorTours.csv", "visitorTrips.csv", "householdAVTrips.csv", "crossBorderTrips.csv", "TNCTrips.csv", "airport_out.SAN.csv", "airport_out.CBX.csv", - "TNCtrips.csv"] + "TNCtrips.csv", "households.csv"] ) logging.info("finished logging rsm_input_aggregator") \ No newline at end of file From 6642edb2c8a62b6e61a26e529bdfec7001d39e0c Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Wed, 12 Jul 2023 18:49:31 -0700 Subject: [PATCH 09/16] Scaling individual and joint trips based on study area TAZs --- rsm/assembler.py | 14 +++++++++++--- rsm/utility.py | 19 ++++++++++++++++--- scripts/rsm_assembler.py | 18 ++++++++++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/rsm/assembler.py b/rsm/assembler.py index b96cc23..c0b5b89 100644 --- a/rsm/assembler.py +++ b/rsm/assembler.py @@ -32,6 +32,7 @@ def rsm_assemble( households, mgra_crosswalk=None, sample_rate=0.25, + study_area_taz=None, run_assembler=1 ): """ @@ -65,6 +66,8 @@ def rsm_assemble( sample_rate : float default/fixed sample rate if sampler was turned off this is used to scale the trips if run_assembler is 0 + study_area_rsm_zones : list + it is list of study area RSM zones Returns ------- @@ -210,13 +213,18 @@ def _agg_by_hhid_and_tripmode(df, name): # then scale the trips in the trip list using the fixed sample rate # trips in the final trip lists will be 100% scale_factor = int(1.0/sample_rate) - + + if study_area_taz: + sa_rsm = study_area_taz + else: + sa_rsm = None + # concat is slow # https://stackoverflow.com/questions/50788508/how-can-i-replicate-rows-of-a-pandas-dataframe #final_ind_trips = pd.concat([ind_trips_rsm]*scale_factor, ignore_index=True) #final_jnt_trips = pd.concat([jnt_trips_rsm]*scale_factor, ignore_index=True) - final_ind_trips = scaleup_to_rsm_samplingrate(ind_trips_rsm, scale_factor) - final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm, scale_factor) + final_ind_trips = scaleup_to_rsm_samplingrate(ind_trips_rsm, households, scale_factor, study_area_tazs=sa_rsm) + final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm, households, scale_factor, study_area_tazs=sa_rsm) return final_ind_trips, final_jnt_trips diff --git a/rsm/utility.py b/rsm/utility.py index 86fa66d..0cd95e5 100644 --- a/rsm/utility.py +++ b/rsm/utility.py @@ -232,15 +232,28 @@ def _density_function(mgra_in): return mgra_data -def scaleup_to_rsm_samplingrate(df, scale_factor): +def scaleup_to_rsm_samplingrate(df, household, scale_factor, study_area_tazs=None): """ - scales up the tour, trips, household, person data files based on the sampling rate. + scales up the trips based on the sampling rate. """ + + hh = pd.read_csv(household) + hh = hh[['hhid', 'taz']] + + hh['scale_factor'] = scale_factor + + if study_area_tazs: + hh.loc[hh['taz'].isin(study_area_tazs), 'scale_factor'] = 1 + + df = pd.merge(df, hh, left_on='hh_id', right_on='hhid', how='left') + final_df = pd.DataFrame( - np.repeat(df.values, scale_factor, axis=0), + np.repeat(df.values, df.scale_factor, axis=0), columns=df.columns ) + + final_df = final_df.drop(columns=['hhid', 'scale_factor', 'taz']) return final_df diff --git a/scripts/rsm_assembler.py b/scripts/rsm_assembler.py index b2d8c17..2057f90 100644 --- a/scripts/rsm_assembler.py +++ b/scripts/rsm_assembler.py @@ -35,6 +35,23 @@ HOUSEHOLDS = os.path.join(org_model_dir, "input", "households.csv") MGRA_CROSSWALK = os.path.join(rsm_dir, "input", "mgra_crosswalk.csv") +TAZ_CROSSWALK = os.path.join(rsm_dir, "input", "taz_crosswalk.csv") +STUDY_AREA = os.path.join(rsm_dir, "input", "study_area.csv") + +if os.path.exists(STUDY_AREA): + logging.info(f"Study Area file: {STUDY_AREA}") + study_area_taz = find_rsm_zone_of_study_area(STUDY_AREA, TAZ_CROSSWALK) + if study_area_taz is not None: + # Process the result + SA_TAZ = study_area_taz + logger.info(f"RSM Zone identified for the Study area are : {SA_TAZ}", ) + else: + # Handle the error + logger.info("Please check the study area file. 'taz' column is expected in the file to find the corresponding RSM zone.") + SA_TAZ = None + +else: + SA_TAZ = None #creating copy of individual and joint trips file shutil.copy(RSM_INDIV_TRIPS, os.path.join(rsm_dir, "output", "indivTripData_abm_"+ str(iteration) + ".csv")) @@ -54,6 +71,7 @@ HOUSEHOLDS, MGRA_CROSSWALK, SAMPLE_RATE, + SA_TAZ, RUN_ASSEMBLER ) From bd974ef7bef227bc52932e75b80cdff62cf8bb10 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Mon, 17 Jul 2023 10:27:28 -0700 Subject: [PATCH 10/16] Fixing path to household file --- scripts/rsm_assembler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rsm_assembler.py b/scripts/rsm_assembler.py index 2057f90..e246c37 100644 --- a/scripts/rsm_assembler.py +++ b/scripts/rsm_assembler.py @@ -33,7 +33,7 @@ RSM_INDIV_TRIPS = os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv") RSM_JOINT_TRIPS = os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv") -HOUSEHOLDS = os.path.join(org_model_dir, "input", "households.csv") +HOUSEHOLDS = os.path.join(rsm_dir, "input", "households.csv") MGRA_CROSSWALK = os.path.join(rsm_dir, "input", "mgra_crosswalk.csv") TAZ_CROSSWALK = os.path.join(rsm_dir, "input", "taz_crosswalk.csv") STUDY_AREA = os.path.join(rsm_dir, "input", "study_area.csv") From 271b5e20c917f8eafcf76a1f3144b69b9b9e4cf2 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Mon, 17 Jul 2023 10:28:03 -0700 Subject: [PATCH 11/16] Minor updates to scaling --- rsm/assembler.py | 3 ++- rsm/utility.py | 9 +-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/rsm/assembler.py b/rsm/assembler.py index c0b5b89..42bdcde 100644 --- a/rsm/assembler.py +++ b/rsm/assembler.py @@ -223,7 +223,8 @@ def _agg_by_hhid_and_tripmode(df, name): # https://stackoverflow.com/questions/50788508/how-can-i-replicate-rows-of-a-pandas-dataframe #final_ind_trips = pd.concat([ind_trips_rsm]*scale_factor, ignore_index=True) #final_jnt_trips = pd.concat([jnt_trips_rsm]*scale_factor, ignore_index=True) - + + final_ind_trips = scaleup_to_rsm_samplingrate(ind_trips_rsm, households, scale_factor, study_area_tazs=sa_rsm) final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm, households, scale_factor, study_area_tazs=sa_rsm) diff --git a/rsm/utility.py b/rsm/utility.py index 0cd95e5..8189258 100644 --- a/rsm/utility.py +++ b/rsm/utility.py @@ -240,21 +240,14 @@ def scaleup_to_rsm_samplingrate(df, household, scale_factor, study_area_tazs=Non hh = pd.read_csv(household) hh = hh[['hhid', 'taz']] - hh['scale_factor'] = scale_factor if study_area_tazs: hh.loc[hh['taz'].isin(study_area_tazs), 'scale_factor'] = 1 df = pd.merge(df, hh, left_on='hh_id', right_on='hhid', how='left') - - final_df = pd.DataFrame( - np.repeat(df.values, df.scale_factor, axis=0), - columns=df.columns - ) - + final_df = df.loc[np.repeat(df.index, df['scale_factor'])] final_df = final_df.drop(columns=['hhid', 'scale_factor', 'taz']) - return final_df def check_column_names(df, columns): From da034770334624b8b7149729a21cd4b648c750bf Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Mon, 24 Jul 2023 07:43:25 -0700 Subject: [PATCH 12/16] Excluding household.csv from input aggregator --- scripts/rsm_input_aggregator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rsm_input_aggregator.py b/scripts/rsm_input_aggregator.py index e8ec496..43c93f7 100644 --- a/scripts/rsm_input_aggregator.py +++ b/scripts/rsm_input_aggregator.py @@ -87,7 +87,7 @@ "TripMatrices.csv", "transponderModelAccessibilities.csv", "crossBorderTours.csv", "internalExternalTrips.csv", "visitorTours.csv", "visitorTrips.csv", "householdAVTrips.csv", "crossBorderTrips.csv", "TNCTrips.csv", "airport_out.SAN.csv", "airport_out.CBX.csv", - "TNCtrips.csv", "households.csv"] + "TNCtrips.csv"] #, "households.csv"] ) logging.info("finished logging rsm_input_aggregator") \ No newline at end of file From f8f86f2151e48c6ca27ecf6f53e28ca4997dbac2 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Mon, 24 Jul 2023 07:46:04 -0700 Subject: [PATCH 13/16] Mapping household file with TAZ crosswalk in RSM Assembler --- rsm/assembler.py | 18 ++++++++++++++++-- rsm/utility.py | 14 ++++++++++++-- scripts/rsm_assembler.py | 1 + 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/rsm/assembler.py b/rsm/assembler.py index 42bdcde..0cf3659 100644 --- a/rsm/assembler.py +++ b/rsm/assembler.py @@ -31,6 +31,7 @@ def rsm_assemble( rsm_joint, households, mgra_crosswalk=None, + taz_crosswalk=None, sample_rate=0.25, study_area_taz=None, run_assembler=1 @@ -96,6 +97,10 @@ def rsm_assemble( mgra_crosswalk = Path(mgra_crosswalk).expanduser() assert os.path.isfile(mgra_crosswalk) + if taz_crosswalk is not None: + taz_crosswalk = Path(taz_crosswalk).expanduser() + assert os.path.isfile(taz_crosswalk) + # load trip data - partial simulation of RSM model logger.info("reading ind_trips_rsm") ind_trips_rsm = pd.read_csv(rsm_indiv) @@ -225,7 +230,16 @@ def _agg_by_hhid_and_tripmode(df, name): #final_jnt_trips = pd.concat([jnt_trips_rsm]*scale_factor, ignore_index=True) - final_ind_trips = scaleup_to_rsm_samplingrate(ind_trips_rsm, households, scale_factor, study_area_tazs=sa_rsm) - final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm, households, scale_factor, study_area_tazs=sa_rsm) + final_ind_trips = scaleup_to_rsm_samplingrate(ind_trips_rsm, + households, + taz_crosswalk, + scale_factor, + study_area_tazs=sa_rsm) + + final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm, + households, + taz_crosswalk, + scale_factor, + study_area_tazs=sa_rsm) return final_ind_trips, final_jnt_trips diff --git a/rsm/utility.py b/rsm/utility.py index 8189258..e766e82 100644 --- a/rsm/utility.py +++ b/rsm/utility.py @@ -232,7 +232,11 @@ def _density_function(mgra_in): return mgra_data -def scaleup_to_rsm_samplingrate(df, household, scale_factor, study_area_tazs=None): +def scaleup_to_rsm_samplingrate(df, + household, + taz_crosswalk, + scale_factor, + study_area_tazs=None): """ scales up the trips based on the sampling rate. @@ -240,14 +244,20 @@ def scaleup_to_rsm_samplingrate(df, household, scale_factor, study_area_tazs=Non hh = pd.read_csv(household) hh = hh[['hhid', 'taz']] + + rsm_zones = pd.read_csv(taz_crosswalk) + dict_clusters = dict(zip(rsm_zones["taz"], rsm_zones["cluster_id"])) + + hh["taz"] = hh["taz"].map(dict_clusters) hh['scale_factor'] = scale_factor - if study_area_tazs: + if study_area_tazs: hh.loc[hh['taz'].isin(study_area_tazs), 'scale_factor'] = 1 df = pd.merge(df, hh, left_on='hh_id', right_on='hhid', how='left') final_df = df.loc[np.repeat(df.index, df['scale_factor'])] final_df = final_df.drop(columns=['hhid', 'scale_factor', 'taz']) + return final_df def check_column_names(df, columns): diff --git a/scripts/rsm_assembler.py b/scripts/rsm_assembler.py index e246c37..664070f 100644 --- a/scripts/rsm_assembler.py +++ b/scripts/rsm_assembler.py @@ -70,6 +70,7 @@ RSM_JOINT_TRIPS, HOUSEHOLDS, MGRA_CROSSWALK, + TAZ_CROSSWALK, SAMPLE_RATE, SA_TAZ, RUN_ASSEMBLER From f0135f30678f29cb174f015733364ed159dfd4fd Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Thu, 27 Jul 2023 10:44:39 -0700 Subject: [PATCH 14/16] fix scaling of IE trips based on study area --- .../main/python/dataExporter/abmScenario.py | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/abm/src/main/python/dataExporter/abmScenario.py b/abm/src/main/python/dataExporter/abmScenario.py index 074008a..94b30de 100644 --- a/abm/src/main/python/dataExporter/abmScenario.py +++ b/abm/src/main/python/dataExporter/abmScenario.py @@ -2760,8 +2760,32 @@ def ie(self) -> pd.DataFrame: dtype={"hh_id": "int32", "transponder": "bool"}) + input_hh = pd.read_csv( + os.path.join(self.scenario_path, "input", "households.csv"), + usecols=["hhid", + "taz"], + dtype={"hhid": "int64", + "taz": "int64"}) + + rsm_zones = pd.read_csv( + os.path.join(self.scenario_path, "input", "taz_crosswalk.csv")) + + dict_clusters = dict(zip(rsm_zones["taz"], rsm_zones["cluster_id"])) + + input_hh["taz"] = input_hh["taz"].map(dict_clusters) + input_hh['scale_factor'] = 1/self.properties["rsmSamplingRate"] + + study_area_file = os.path.join(self.scenario_path, "input", "study_area.csv") + + if os.path.exists(study_area_file): + df = pd.read_csv(study_area_file) + study_area_taz = set(df['taz']) + rsm_zone = set(rsm_zones.loc[rsm_zones['taz'].isin(study_area_taz), 'cluster_id']) + input_hh.loc[input_hh['taz'].isin(rsm_zone), 'scale_factor'] = 1 + # if household has a transponder then all trips can use it trips = trips.merge(hh, left_on="hhID", right_on="hh_id") + trips = trips.merge(input_hh, left_on="hhID", right_on="hhid", how="left") # apply exhaustive field mappings where applicable mappings = { @@ -2838,9 +2862,10 @@ def ie(self) -> pd.DataFrame: sampling_rate = self.properties["rsmSamplingRate"] trips["weightTrip"] = pd.Series( - np.select(conditions, choices, default=1) / (sampling_rate*sampling_rate), + np.select(conditions, choices, default=1), #/ (sampling_rate*sampling_rate), dtype="float32") - trips["weightPersonTrip"] = 1 / (sampling_rate*sampling_rate) + trips["weightTrip"] = trips["weightTrip"]*trips['scale_factor'] + trips["weightPersonTrip"] = trips['scale_factor'] #1 / (sampling_rate*sampling_rate) trips["weightPersonTrip"] = trips["weightPersonTrip"].astype("float32") # rename columns to standard/generic ABM naming conventions From 250bc6a520f9ff462690b879c23e2a9bf5e39c04 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Mon, 28 Aug 2023 20:05:33 -0700 Subject: [PATCH 15/16] Adding differential sampling --- .../main/python/dataExporter/abmScenario.py | 6 +++++- abm/src/main/resources/sandag_abm.properties | 1 + rsm/assembler.py | 6 ++++-- rsm/utility.py | 2 +- scripts/rsm_assembler.py | 21 ++++++++++--------- scripts/rsm_sampler.py | 4 +++- scripts/rsm_zone_aggregator.py | 2 ++ 7 files changed, 27 insertions(+), 15 deletions(-) diff --git a/abm/src/main/python/dataExporter/abmScenario.py b/abm/src/main/python/dataExporter/abmScenario.py index 94b30de..c598136 100644 --- a/abm/src/main/python/dataExporter/abmScenario.py +++ b/abm/src/main/python/dataExporter/abmScenario.py @@ -288,6 +288,10 @@ def properties(self) -> dict: "rsmSamplingRate":{ "line": "rsm.default.sampling.rate=", "type" : "float", + "value": None}, + "useDifferentialSampling":{ + "line": "use.differential.sampling=", + "type" : "int", "value": None} } @@ -2777,7 +2781,7 @@ def ie(self) -> pd.DataFrame: study_area_file = os.path.join(self.scenario_path, "input", "study_area.csv") - if os.path.exists(study_area_file): + if useDifferentialSampling & os.path.exists(study_area_file): df = pd.read_csv(study_area_file) study_area_taz = set(df['taz']) rsm_zone = set(rsm_zones.loc[rsm_zones['taz'].isin(study_area_taz), 'cluster_id']) diff --git a/abm/src/main/resources/sandag_abm.properties b/abm/src/main/resources/sandag_abm.properties index 3e9c074..6eb7414 100644 --- a/abm/src/main/resources/sandag_abm.properties +++ b/abm/src/main/resources/sandag_abm.properties @@ -1344,6 +1344,7 @@ rsm.zones = 2000 external.zones = 12 run.rsm.sampling = 0 rsm.default.sampling.rate = 0.25 +use.differential.sampling = 1 rsm.min.sampling.rate = 0.25 run.rsm.assembler = 0 rsm.centroid.connector.start.id = 55000 diff --git a/rsm/assembler.py b/rsm/assembler.py index 0cf3659..f447f69 100644 --- a/rsm/assembler.py +++ b/rsm/assembler.py @@ -34,7 +34,8 @@ def rsm_assemble( taz_crosswalk=None, sample_rate=0.25, study_area_taz=None, - run_assembler=1 + run_assembler=1, + differential_sampling=1 ): """ Assemble and evaluate RSM trip making. @@ -70,6 +71,7 @@ def rsm_assemble( study_area_rsm_zones : list it is list of study area RSM zones + Returns ------- #final_trips_rsm : pd.DataFrame @@ -239,7 +241,7 @@ def _agg_by_hhid_and_tripmode(df, name): final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm, households, taz_crosswalk, - scale_factor, + scale_factor, study_area_tazs=sa_rsm) return final_ind_trips, final_jnt_trips diff --git a/rsm/utility.py b/rsm/utility.py index e766e82..f94b256 100644 --- a/rsm/utility.py +++ b/rsm/utility.py @@ -251,7 +251,7 @@ def scaleup_to_rsm_samplingrate(df, hh["taz"] = hh["taz"].map(dict_clusters) hh['scale_factor'] = scale_factor - if study_area_tazs: + if study_area_tazs: hh.loc[hh['taz'].isin(study_area_tazs), 'scale_factor'] = 1 df = pd.merge(df, hh, left_on='hh_id', right_on='hhid', how='left') diff --git a/scripts/rsm_assembler.py b/scripts/rsm_assembler.py index 664070f..4ce31a7 100644 --- a/scripts/rsm_assembler.py +++ b/scripts/rsm_assembler.py @@ -38,7 +38,17 @@ TAZ_CROSSWALK = os.path.join(rsm_dir, "input", "taz_crosswalk.csv") STUDY_AREA = os.path.join(rsm_dir, "input", "study_area.csv") -if os.path.exists(STUDY_AREA): +#creating copy of individual and joint trips file +shutil.copy(RSM_INDIV_TRIPS, os.path.join(rsm_dir, "output", "indivTripData_abm_"+ str(iteration) + ".csv")) +shutil.copy(RSM_JOINT_TRIPS, os.path.join(rsm_dir, "output", "jointTripData_abm_"+ str(iteration) + ".csv")) + +ABM_PROPERTIES_FOLDER = os.path.join(rsm_dir, "conf") +ABM_PROPERTIES = os.path.join(ABM_PROPERTIES_FOLDER, "sandag_abm.properties") +RUN_ASSEMBLER = int(get_property(ABM_PROPERTIES, "run.rsm.assembler")) +SAMPLE_RATE = float(get_property(ABM_PROPERTIES, "rsm.default.sampling.rate")) +USE_DIFFERENTIAL_SAMPLING = int(get_property(ABM_PROPERTIES, "use.differential.sampling")) + +if USE_DIFFERENTIAL_SAMPLING & os.path.exists(STUDY_AREA): logging.info(f"Study Area file: {STUDY_AREA}") study_area_taz = find_rsm_zone_of_study_area(STUDY_AREA, TAZ_CROSSWALK) if study_area_taz is not None: @@ -53,15 +63,6 @@ else: SA_TAZ = None -#creating copy of individual and joint trips file -shutil.copy(RSM_INDIV_TRIPS, os.path.join(rsm_dir, "output", "indivTripData_abm_"+ str(iteration) + ".csv")) -shutil.copy(RSM_JOINT_TRIPS, os.path.join(rsm_dir, "output", "jointTripData_abm_"+ str(iteration) + ".csv")) - -ABM_PROPERTIES_FOLDER = os.path.join(rsm_dir, "conf") -ABM_PROPERTIES = os.path.join(ABM_PROPERTIES_FOLDER, "sandag_abm.properties") -RUN_ASSEMBLER = int(get_property(ABM_PROPERTIES, "run.rsm.assembler")) -SAMPLE_RATE = float(get_property(ABM_PROPERTIES, "rsm.default.sampling.rate")) - #RSM Assembler final_ind_trips, final_jnt_trips = rsm_assemble( ORG_INDIV_TRIPS, diff --git a/scripts/rsm_sampler.py b/scripts/rsm_sampler.py index 666f8b3..daa1787 100644 --- a/scripts/rsm_sampler.py +++ b/scripts/rsm_sampler.py @@ -47,6 +47,7 @@ sampling_rate = float(get_property(ABM_PROPERTIES, "rsm.default.sampling.rate")) min_sampling_rate = float(get_property(ABM_PROPERTIES, "rsm.min.sampling.rate")) baseline_run_dir = get_property(ABM_PROPERTIES, "rsm.baseline.run.dir") +use_differential_sampling = int(get_property(ABM_PROPERTIES, "use.differential.sampling")) if run_rsm_sampling == 1: CURR_ITER_ACCESS = os.path.join( @@ -70,7 +71,7 @@ logging.info(f"Current Iteration Accessibility File: {CURR_ITER_ACCESS}") logging.info(f"Previous Iteration Accessibility File: {PREV_ITER_ACCESS}") -if os.path.exists(EXPILICT_AGG_TAZ): +if use_differential_sampling & os.path.exists(EXPILICT_AGG_TAZ): logging.info(f"Study Area file: {EXPILICT_AGG_TAZ}") study_area_taz = find_rsm_zone_of_study_area(EXPILICT_AGG_TAZ, OUTPUT_TAZ_CROSSWALK) if study_area_taz is not None: @@ -83,6 +84,7 @@ sa_taz = None else: + logger.info("All RSM zones will be sampled at the deafult sampling rate") sa_taz = None diff --git a/scripts/rsm_zone_aggregator.py b/scripts/rsm_zone_aggregator.py index a270449..8567584 100644 --- a/scripts/rsm_zone_aggregator.py +++ b/scripts/rsm_zone_aggregator.py @@ -66,6 +66,8 @@ logging.info("Check if the study area file exists in the RSM input folder") if os.path.exists(EXPILICT_AGG_TAZ): EXPLICIT_ZONE_AGG = create_list_study_area_taz(EXPILICT_AGG_TAZ) + logging.info("The input folder has a study_area file. The TAZs will be aggregated based on the study area file") + logging.info(EXPLICIT_ZONE_AGG) else: EXPLICIT_ZONE_AGG = [] From 75b94672ea3bf6ce3052d500676e245476f4504e Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Thu, 31 Aug 2023 17:17:51 -0500 Subject: [PATCH 16/16] Docstrings for differential sampler --- rsm/assembler.py | 1 - rsm/sampler.py | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rsm/assembler.py b/rsm/assembler.py index f447f69..d08de2f 100644 --- a/rsm/assembler.py +++ b/rsm/assembler.py @@ -35,7 +35,6 @@ def rsm_assemble( sample_rate=0.25, study_area_taz=None, run_assembler=1, - differential_sampling=1 ): """ Assemble and evaluate RSM trip making. diff --git a/rsm/sampler.py b/rsm/sampler.py index 8292070..9e87a2e 100644 --- a/rsm/sampler.py +++ b/rsm/sampler.py @@ -45,7 +45,8 @@ def rsm_household_sampler( Accessibility in the latest run is given (preloaded) or read in from here. Give as a relative path (from `input_dir`) or an absolute path. study_area : array-like - Array of RSM zone (these are numbered 1 to N in the RSM) in the study area. These zones are sampled at 100%. + Array of RSM zone (these are numbered 1 to N in the RSM) in the study area. + These zones are sampled at 100% if differential sampling is also turned on. input_household : Path-like or pandas.DataFrame Complete synthetic household file. This data will be filtered to match the sampling of households and written out to a new CSV file.