Skip to content

Commit

Permalink
Merge pull request #41 from SANDAG/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
AshishKuls authored Aug 31, 2023
2 parents 5b38149 + 30cc751 commit c3bcadd
Show file tree
Hide file tree
Showing 10 changed files with 214 additions and 30 deletions.
48 changes: 41 additions & 7 deletions abm/src/main/python/dataExporter/abmScenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,14 @@ def properties(self) -> dict:
"year": {
"line": "scenarioYear=",
"type": "int",
"value": None},
"rsmSamplingRate":{
"line": "rsm.default.sampling.rate=",
"type" : "float",
"value": None},
"useDifferentialSampling":{
"line": "use.differential.sampling=",
"type" : "int",
"value": None}
}

Expand Down Expand Up @@ -637,7 +645,7 @@ def mgra_input(self) -> pd.DataFrame:
"adultschenrl",
"ech_dist",
"hch_dist",
"pseudomsa",
#"pseudomsa",
"parkarea",
"hstallsoth",
"hstallssam",
Expand All @@ -649,17 +657,17 @@ def mgra_input(self) -> pd.DataFrame:
"mstallsoth",
"mstallssam",
"mparkcost",
"zip09",
#"zip09",
"parkactive",
"openspaceparkpreserve",
"beachactive",
"hotelroomtotal",
"truckregiontype",
"district27",
"milestocoast",
"acres",
"effective_acres",
"land_acres",
#"acres",
#"effective_acres",
#"land_acres",
"MicroAccessTime",
"remoteAVParking",
"refueling_stations",
Expand Down Expand Up @@ -2756,8 +2764,32 @@ def ie(self) -> pd.DataFrame:
dtype={"hh_id": "int32",
"transponder": "bool"})

input_hh = pd.read_csv(
os.path.join(self.scenario_path, "input", "households.csv"),
usecols=["hhid",
"taz"],
dtype={"hhid": "int64",
"taz": "int64"})

rsm_zones = pd.read_csv(
os.path.join(self.scenario_path, "input", "taz_crosswalk.csv"))

dict_clusters = dict(zip(rsm_zones["taz"], rsm_zones["cluster_id"]))

input_hh["taz"] = input_hh["taz"].map(dict_clusters)
input_hh['scale_factor'] = 1/self.properties["rsmSamplingRate"]

study_area_file = os.path.join(self.scenario_path, "input", "study_area.csv")

if useDifferentialSampling & os.path.exists(study_area_file):
df = pd.read_csv(study_area_file)
study_area_taz = set(df['taz'])
rsm_zone = set(rsm_zones.loc[rsm_zones['taz'].isin(study_area_taz), 'cluster_id'])
input_hh.loc[input_hh['taz'].isin(rsm_zone), 'scale_factor'] = 1

# if household has a transponder then all trips can use it
trips = trips.merge(hh, left_on="hhID", right_on="hh_id")
trips = trips.merge(input_hh, left_on="hhID", right_on="hhid", how="left")

# apply exhaustive field mappings where applicable
mappings = {
Expand Down Expand Up @@ -2832,10 +2864,12 @@ def ie(self) -> pd.DataFrame:
1 / self.properties["nonPooledTNCPassengers"],
1 / self.properties["pooledTNCPassengers"]]

sampling_rate = self.properties["rsmSamplingRate"]
trips["weightTrip"] = pd.Series(
np.select(conditions, choices, default=1) / self.properties["sampleRate"],
np.select(conditions, choices, default=1), #/ (sampling_rate*sampling_rate),
dtype="float32")
trips["weightPersonTrip"] = 1 / self.properties["sampleRate"]
trips["weightTrip"] = trips["weightTrip"]*trips['scale_factor']
trips["weightPersonTrip"] = trips['scale_factor'] #1 / (sampling_rate*sampling_rate)
trips["weightPersonTrip"] = trips["weightPersonTrip"].astype("float32")

# rename columns to standard/generic ABM naming conventions
Expand Down
1 change: 1 addition & 0 deletions abm/src/main/resources/sandag_abm.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,7 @@ rsm.zones = 2000
external.zones = 12
run.rsm.sampling = 0
rsm.default.sampling.rate = 0.25
use.differential.sampling = 1
rsm.min.sampling.rate = 0.25
run.rsm.assembler = 0
rsm.centroid.connector.start.id = 55000
Expand Down
48 changes: 35 additions & 13 deletions rsm/assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path
import numpy as np
import pandas as pd
from rsm.utility import *

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -30,8 +31,10 @@ def rsm_assemble(
rsm_joint,
households,
mgra_crosswalk=None,
taz_crosswalk=None,
sample_rate=0.25,
run_assembler=1
study_area_taz=None,
run_assembler=1,
):
"""
Assemble and evaluate RSM trip making.
Expand Down Expand Up @@ -64,6 +67,11 @@ def rsm_assemble(
Flag to indicate whether to run RSM assembler or not.
1 is to run assembler, 0 is to turn if off
setting this to 0 is only an option if sampler is turned off
sample_rate : float
default/fixed sample rate if sampler was turned off
this is used to scale the trips if run_assembler is 0
study_area_rsm_zones : list
it is list of study area RSM zones
Returns
-------
Expand Down Expand Up @@ -94,12 +102,18 @@ def rsm_assemble(
mgra_crosswalk = Path(mgra_crosswalk).expanduser()
assert os.path.isfile(mgra_crosswalk)

if taz_crosswalk is not None:
taz_crosswalk = Path(taz_crosswalk).expanduser()
assert os.path.isfile(taz_crosswalk)

# load trip data - partial simulation of RSM model
logger.info("reading ind_trips_rsm")
ind_trips_rsm = pd.read_csv(rsm_indiv)
logger.info("reading jnt_trips_rsm")
jnt_trips_rsm = pd.read_csv(rsm_joint)

scale_factor = int(1.0/sample_rate)

if run_assembler == 1:
# load trip data - full simulation of residual/source model
logger.info("reading ind_trips_full")
Expand Down Expand Up @@ -209,20 +223,28 @@ def _agg_by_hhid_and_tripmode(df, name):
# then scale the trips in the trip list using the fixed sample rate
# trips in the final trip lists will be 100%
scale_factor = int(1.0/sample_rate)


if study_area_taz:
sa_rsm = study_area_taz
else:
sa_rsm = None

# concat is slow
# https://stackoverflow.com/questions/50788508/how-can-i-replicate-rows-of-a-pandas-dataframe
#final_ind_trips = pd.concat([ind_trips_rsm]*scale_factor, ignore_index=True)
#final_jnt_trips = pd.concat([jnt_trips_rsm]*scale_factor, ignore_index=True)

final_ind_trips = pd.DataFrame(
np.repeat(ind_trips_rsm.values, scale_factor, axis=0),
columns=ind_trips_rsm.columns
)

final_jnt_trips = pd.DataFrame(
np.repeat(jnt_trips_rsm.values, scale_factor, axis=0),
columns=jnt_trips_rsm.columns
)



final_ind_trips = scaleup_to_rsm_samplingrate(ind_trips_rsm,
households,
taz_crosswalk,
scale_factor,
study_area_tazs=sa_rsm)

final_jnt_trips = scaleup_to_rsm_samplingrate(jnt_trips_rsm,
households,
taz_crosswalk,
scale_factor,
study_area_tazs=sa_rsm)

return final_ind_trips, final_jnt_trips
5 changes: 3 additions & 2 deletions rsm/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def rsm_household_sampler(
Accessibility in the latest run is given (preloaded) or read in from here.
Give as a relative path (from `input_dir`) or an absolute path.
study_area : study_area (array-like)
Array of RSM zone (these are numbered 1 to N in the RSM) in the study area. These zones are sampled at 100%.
Array of RSM zone (these are numbered 1 to N in the RSM) in the study area.
These zones are sampled at 100% if differential sampling is also turned on.
input_household : input_household (Path-like or pandas.DataFrame)
Complete synthetic household file. This data will be filtered to match the
sampling of households and written out to a new CSV file.
Expand Down Expand Up @@ -140,7 +141,7 @@ def _resolve_out_filename(x):

mgra_hh["sampling_rate"] = default_sampling_rate
if study_area is not None:
mgra_hh.loc[mgra_hh.index.isin(study_area), "sample_rate"] = 1
mgra_hh.loc[mgra_hh.index.isin(study_area), "sampling_rate"] = 1

sample_households = []

Expand Down
75 changes: 75 additions & 0 deletions rsm/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,78 @@ def _density_function(mgra_in):
mgra_data = mgra_data.fillna(0)

return mgra_data


def scaleup_to_rsm_samplingrate(df,
household,
taz_crosswalk,
scale_factor,
study_area_tazs=None):
"""
scales up the trips based on the sampling rate.
"""

hh = pd.read_csv(household)
hh = hh[['hhid', 'taz']]

rsm_zones = pd.read_csv(taz_crosswalk)
dict_clusters = dict(zip(rsm_zones["taz"], rsm_zones["cluster_id"]))

hh["taz"] = hh["taz"].map(dict_clusters)
hh['scale_factor'] = scale_factor

if study_area_tazs:
hh.loc[hh['taz'].isin(study_area_tazs), 'scale_factor'] = 1

df = pd.merge(df, hh, left_on='hh_id', right_on='hhid', how='left')
final_df = df.loc[np.repeat(df.index, df['scale_factor'])]
final_df = final_df.drop(columns=['hhid', 'scale_factor', 'taz'])

return final_df

def check_column_names(df, columns):
"""
Check column names of study area file
"""
df_columns = df.columns.tolist()
if set(columns) != set(df_columns):
raise ValueError("Column names do not match the expected column names : taz and group. Please fix the column names")
return True

def create_list_study_area_taz(study_area_file):
"""
Creates list[int or list] based on the values of the group column
"""

try:
df = pd.read_csv(study_area_file)
columns_to_check = ['taz', 'group']
match = check_column_names(df, columns_to_check)

except ValueError as e:
print("Error:", str(e))
logger.info("Error:", str(e))
return None

grouped_taz = df.groupby('group')['taz'].apply(list).values.tolist()

return grouped_taz


def find_rsm_zone_of_study_area(study_area_file, taz_crosswalk):
"""
finds the RSM zones for the study area using the TAZ crosswalks
"""

try:
df = pd.read_csv(study_area_file)
taz_cwk = pd.read_csv(taz_crosswalk)
study_area_taz = set(df['taz'])
rsm_zone = set(taz_cwk.loc[taz_cwk['taz'].isin(study_area_taz), 'cluster_id'])

except Exception as e:
logger.info("Error in identifying RSM zone for study area:", str(e))
return None

return list(rsm_zone)
3 changes: 1 addition & 2 deletions rsm/zone_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,7 @@ def aggregate_zones(
or lists of integers (groups of MGRAs that should be aggregated exactly as
given, with no less and no more)
explicit_col : explicit_col (str)
The name of the column containing the ID's from `explicit_agg`, usually
'mgra' or 'taz'
The name of the column containing the ID's from `explicit_agg`, usually 'taz'
agg_instruction : agg_instruction (dict)
Dictionary passed to pandas `agg` that says how to aggregate data columns.
start_cluster_ids : start_cluster_ids (int, default 13)
Expand Down
29 changes: 25 additions & 4 deletions scripts/rsm_assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,11 @@
ORG_JOINT_TRIPS = os.path.join(org_model_dir, "output", "jointTripData_3.csv")
RSM_INDIV_TRIPS = os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv")
RSM_JOINT_TRIPS = os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv")
HOUSEHOLDS = os.path.join(org_model_dir, "input", "households.csv")

HOUSEHOLDS = os.path.join(rsm_dir, "input", "households.csv")
MGRA_CROSSWALK = os.path.join(rsm_dir, "input", "mgra_crosswalk.csv")
TAZ_CROSSWALK = os.path.join(rsm_dir, "input", "taz_crosswalk.csv")
STUDY_AREA = os.path.join(rsm_dir, "input", "study_area.csv")

#creating copy of individual and joint trips file
shutil.copy(RSM_INDIV_TRIPS, os.path.join(rsm_dir, "output", "indivTripData_abm_"+ str(iteration) + ".csv"))
Expand All @@ -43,21 +46,39 @@
ABM_PROPERTIES = os.path.join(ABM_PROPERTIES_FOLDER, "sandag_abm.properties")
RUN_ASSEMBLER = int(get_property(ABM_PROPERTIES, "run.rsm.assembler"))
SAMPLE_RATE = float(get_property(ABM_PROPERTIES, "rsm.default.sampling.rate"))
USE_DIFFERENTIAL_SAMPLING = int(get_property(ABM_PROPERTIES, "use.differential.sampling"))

if USE_DIFFERENTIAL_SAMPLING & os.path.exists(STUDY_AREA):
logging.info(f"Study Area file: {STUDY_AREA}")
study_area_taz = find_rsm_zone_of_study_area(STUDY_AREA, TAZ_CROSSWALK)
if study_area_taz is not None:
# Process the result
SA_TAZ = study_area_taz
logger.info(f"RSM Zone identified for the Study area are : {SA_TAZ}", )
else:
# Handle the error
logger.info("Please check the study area file. 'taz' column is expected in the file to find the corresponding RSM zone.")
SA_TAZ = None

else:
SA_TAZ = None

#RSM Assembler
final_ind, final_jnt = rsm_assemble(
final_ind_trips, final_jnt_trips = rsm_assemble(
ORG_INDIV_TRIPS,
ORG_JOINT_TRIPS,
RSM_INDIV_TRIPS,
RSM_JOINT_TRIPS,
HOUSEHOLDS,
MGRA_CROSSWALK,
TAZ_CROSSWALK,
SAMPLE_RATE,
SA_TAZ,
RUN_ASSEMBLER
)

#save as csv files
final_ind.to_csv(os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv"), index = False)
final_jnt.to_csv(os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv"), index = False)
final_ind_trips.to_csv(os.path.join(rsm_dir, "output", "indivTripData_" + str(iteration) + ".csv"), index = False)
final_jnt_trips.to_csv(os.path.join(rsm_dir, "output", "jointTripData_" + str(iteration) + ".csv"), index = False)

logging.info("finished logging rsm_assembler")
2 changes: 1 addition & 1 deletion scripts/rsm_input_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
"TripMatrices.csv", "transponderModelAccessibilities.csv", "crossBorderTours.csv",
"internalExternalTrips.csv", "visitorTours.csv", "visitorTrips.csv", "householdAVTrips.csv",
"crossBorderTrips.csv", "TNCTrips.csv", "airport_out.SAN.csv", "airport_out.CBX.csv",
"TNCtrips.csv"]
"TNCtrips.csv"] #, "households.csv"]
)

logging.info("finished logging rsm_input_aggregator")
Loading

0 comments on commit c3bcadd

Please sign in to comment.