diff --git a/applications/tally_household_share_by_taz_subzone.py b/applications/tally_household_share_by_taz_subzone.py index 700636d..7ae3346 100644 --- a/applications/tally_household_share_by_taz_subzone.py +++ b/applications/tally_household_share_by_taz_subzone.py @@ -1,4 +1,4 @@ -USAGE=""" +USAGE = """ Calculates share households in different TAZ subzones for each TM1454 TAZ See asana task: https://app.asana.com/0/403262763383022/1161734609745564/f @@ -13,58 +13,98 @@ """ -import numpy,pandas +import numpy, pandas import argparse, os, sys, time -BLUEPRINT_DIR = "M:\\Data\\Urban\\BAUS\\PBA50\\Draft_Blueprint" +BLUEPRINT_DIR = "M:\\Data\\Urban\\BAUS\\PBA50\\Draft_Blueprint" LARGE_DATASET_INPUT_DIR = os.path.join(BLUEPRINT_DIR, "Large General Input Data") -PARCEL_TO_SUBZONE_FILE = os.path.join(LARGE_DATASET_INPUT_DIR, '2018_10_17_parcel_to_taz1454sub.csv') - -URBANSIM_RUN_DIR = os.path.join(BLUEPRINT_DIR, 'runs') - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) - parser.add_argument("parcel_data_file", help='Parcel data file relative to "{}". e.g. "Blueprint Plus Crossing (s23)\\v1.5.5\\run998_parcel_data_2050.csv"'.format(URBANSIM_RUN_DIR)) - parser.add_argument("--parcel_gdb", help='Parcel gdb layer, optional for mapping') - parser.add_argument("--parcel_layer", help='Parcel layer in parcel_gdb, optional for mapping') - parser.add_argument("--output_gdb", help='Output/working gdb, optional for mapping') - parser.add_argument("output_file", help='Output file') +PARCEL_TO_SUBZONE_FILE = os.path.join( + LARGE_DATASET_INPUT_DIR, "2018_10_17_parcel_to_taz1454sub.csv" +) + +URBANSIM_RUN_DIR = os.path.join(BLUEPRINT_DIR, "runs") + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "parcel_data_file", + help='Parcel data file relative to "{}". e.g. "Blueprint Plus Crossing (s23)\\v1.5.5\\run998_parcel_data_2050.csv"'.format( + URBANSIM_RUN_DIR + ), + ) + parser.add_argument("--parcel_gdb", help="Parcel gdb layer, optional for mapping") + parser.add_argument( + "--parcel_layer", help="Parcel layer in parcel_gdb, optional for mapping" + ) + parser.add_argument("--output_gdb", help="Output/working gdb, optional for mapping") + parser.add_argument("output_file", help="Output file") args = parser.parse_args() - pandas.set_option('max_columns', 200) - pandas.set_option('display.width', 200) + pandas.set_option("max_columns", 200) + pandas.set_option("display.width", 200) - parcel_data_file = os.path.join(URBANSIM_RUN_DIR, args.parcel_data_file) + parcel_data_file = os.path.join(URBANSIM_RUN_DIR, args.parcel_data_file) print(" {:20}: {}".format("parcel_data_file", parcel_data_file)) - print(" {:20}: {}".format("parcel_gdb", args.parcel_gdb)) - print(" {:20}: {}".format("parcel_layer", args.parcel_layer)) - print(" {:20}: {}".format("output_gdb", args.output_gdb)) - print(" {:20}: {}".format("output_file", args.output_file)) + print(" {:20}: {}".format("parcel_gdb", args.parcel_gdb)) + print(" {:20}: {}".format("parcel_layer", args.parcel_layer)) + print(" {:20}: {}".format("output_gdb", args.output_gdb)) + print(" {:20}: {}".format("output_file", args.output_file)) parcel_to_subzone_df = pandas.read_csv(PARCEL_TO_SUBZONE_FILE) - print('Read {} rows from {}; head:\n{}'.format(len(parcel_to_subzone_df), PARCEL_TO_SUBZONE_FILE, parcel_to_subzone_df.head())) + print( + "Read {} rows from {}; head:\n{}".format( + len(parcel_to_subzone_df), + PARCEL_TO_SUBZONE_FILE, + parcel_to_subzone_df.head(), + ) + ) # make PARCEL_ID and ZONE_ID an int rather than a float - parcel_to_subzone_df['PARCEL_ID'] = parcel_to_subzone_df['PARCEL_ID'].round(0).astype(numpy.int64) - parcel_to_subzone_df['ZONE_ID' ] = parcel_to_subzone_df['ZONE_ID' ].round(0).astype(numpy.int64) + parcel_to_subzone_df["PARCEL_ID"] = ( + parcel_to_subzone_df["PARCEL_ID"].round(0).astype(numpy.int64) + ) + parcel_to_subzone_df["ZONE_ID"] = ( + parcel_to_subzone_df["ZONE_ID"].round(0).astype(numpy.int64) + ) print(parcel_to_subzone_df.dtypes) - parcel_data_df = pandas.read_csv(parcel_data_file) - parcel_data_df = parcel_data_df[['parcel_id','tothh']] + parcel_data_df = pandas.read_csv(parcel_data_file) + parcel_data_df = parcel_data_df[["parcel_id", "tothh"]] - print('Read {} rows from {}; head:\n{}'.format(len(parcel_data_df), parcel_data_file, parcel_data_df.head())) + print( + "Read {} rows from {}; head:\n{}".format( + len(parcel_data_df), parcel_data_file, parcel_data_df.head() + ) + ) # filter to parcels with notnull(tothh) and tothh > 0 - parcel_data_df = parcel_data_df.loc[pandas.notnull(parcel_data_df['tothh'])&(parcel_data_df['tothh']>0), ] + parcel_data_df = parcel_data_df.loc[ + pandas.notnull(parcel_data_df["tothh"]) & (parcel_data_df["tothh"] > 0), + ] print("Filtered to {} rows with tothh>0".format(len(parcel_data_df))) print(parcel_data_df.head()) # join - parcel_data_df = pandas.merge(left=parcel_data_df, right=parcel_to_subzone_df, how="left", left_on="parcel_id", right_on="PARCEL_ID") + parcel_data_df = pandas.merge( + left=parcel_data_df, + right=parcel_to_subzone_df, + how="left", + left_on="parcel_id", + right_on="PARCEL_ID", + ) print("After merge, parcel_data_df.head():\n{}".format(parcel_data_df.head())) # summarize to taz and subzone - taz_subzone_hh = parcel_data_df.groupby(['ZONE_ID','subzone']).agg({'tothh':'sum'}).reset_index() + taz_subzone_hh = ( + parcel_data_df.groupby(["ZONE_ID", "subzone"]) + .agg({"tothh": "sum"}) + .reset_index() + ) # pivot subzone to column - taz_subzone_hh = taz_subzone_hh.pivot(index='ZONE_ID', columns='subzone', values='tothh').reset_index() + taz_subzone_hh = taz_subzone_hh.pivot( + index="ZONE_ID", columns="subzone", values="tothh" + ).reset_index() taz_subzone_hh.fillna(0, inplace=True) # subzone ZONE_ID a b c # 0 1.0 55.0 0.0 0.0 @@ -72,31 +112,42 @@ # 2 3.0 358.0 0.0 0.0 # 3 4.0 398.0 0.0 0.0 # 4 5.0 702.0 0.0 0.0 - taz_subzone_hh['tothh'] = taz_subzone_hh['a'] + taz_subzone_hh['b'] + taz_subzone_hh['c'] + taz_subzone_hh["tothh"] = ( + taz_subzone_hh["a"] + taz_subzone_hh["b"] + taz_subzone_hh["c"] + ) # see mapping here: https://github.com/BayAreaMetro/bayarea_urbansim/blob/9f40b58b731a2cb956543948b7bcba74ba1532e9/baus/datasources.py#L511 # a = short walk # b = long walk # c = no walk - taz_subzone_hh['SHRT'] = taz_subzone_hh['a']/taz_subzone_hh['tothh'] - taz_subzone_hh['LONG'] = taz_subzone_hh['b']/taz_subzone_hh['tothh'] - taz_subzone_hh['NONE'] = taz_subzone_hh['c']/taz_subzone_hh['tothh'] - taz_subzone_hh['TAZ'] = taz_subzone_hh['ZONE_ID'].astype(int) - print(taz_subzone_hh.head()) - - taz_subzone_hh[['TAZ','SHRT','LONG']].to_csv(args.output_file, index=False, float_format='%.2f') + taz_subzone_hh["SHRT"] = taz_subzone_hh["a"] / taz_subzone_hh["tothh"] + taz_subzone_hh["LONG"] = taz_subzone_hh["b"] / taz_subzone_hh["tothh"] + taz_subzone_hh["NONE"] = taz_subzone_hh["c"] / taz_subzone_hh["tothh"] + taz_subzone_hh["TAZ"] = taz_subzone_hh["ZONE_ID"].astype(int) + print(taz_subzone_hh.head()) + + taz_subzone_hh[["TAZ", "SHRT", "LONG"]].to_csv( + args.output_file, index=False, float_format="%.2f" + ) print("Wrote {} rows to {}".format(len(taz_subzone_hh), args.output_file)) # save unpivoted for tableau - taz_subzone_hh_unpivot = pandas.melt(taz_subzone_hh, id_vars=['TAZ'], value_vars=['SHRT','LONG','NONE'], value_name="share") + taz_subzone_hh_unpivot = pandas.melt( + taz_subzone_hh, + id_vars=["TAZ"], + value_vars=["SHRT", "LONG", "NONE"], + value_name="share", + ) print("taz_subzone_hh_unpivot.head():\n{}".format(taz_subzone_hh_unpivot.head())) - - unpivot_file = args.output_file.replace(".csv","_unpivot.csv") - taz_subzone_hh_unpivot.to_csv(unpivot_file, index=False, float_format='%.2f') + + unpivot_file = args.output_file.replace(".csv", "_unpivot.csv") + taz_subzone_hh_unpivot.to_csv(unpivot_file, index=False, float_format="%.2f") print("Wrote {} rows to {}".format(len(taz_subzone_hh_unpivot), unpivot_file)) if not args.parcel_gdb or not args.parcel_layer or not args.output_gdb: - print("One or more of parcel_gdb, parcel_layer, output_gdb not specified -- skipping mapping") + print( + "One or more of parcel_gdb, parcel_layer, output_gdb not specified -- skipping mapping" + ) sys.exit(0) # optional mapping @@ -110,8 +161,10 @@ if arcpy.Exists(args.parcel_layer): print("Found {} -- skipping copy".format(args.parcel_layer)) else: - arcpy.CopyFeatures_management(os.path.join(args.parcel_gdb, args.parcel_layer), - os.path.join(arcpy.env.workspace, args.parcel_layer)) + arcpy.CopyFeatures_management( + os.path.join(args.parcel_gdb, args.parcel_layer), + os.path.join(arcpy.env.workspace, args.parcel_layer), + ) print("Created {}\{}".format(arcpy.env.workspace, args.parcel_layer)) # copy parcel subzone table @@ -119,7 +172,9 @@ if arcpy.Exists(parcel_subzone): print("Found {} -- skipping copy".format(parcel_subzone)) else: - arcpy.TableToTable_conversion(PARCEL_TO_SUBZONE_FILE, arcpy.env.workspace, parcel_subzone) + arcpy.TableToTable_conversion( + PARCEL_TO_SUBZONE_FILE, arcpy.env.workspace, parcel_subzone + ) print("Created {}\{}".format(arcpy.env.workspace, parcel_subzone)) # join with parcel to subzone @@ -127,9 +182,9 @@ if arcpy.Exists(parcel_w_subzone): print("Found {} -- skipping creation".format(parcel_w_subzone)) else: - joined_layer = arcpy.AddJoin_management(args.parcel_layer, "PARCEL_ID", - parcel_subzone, "PARCEL_ID", - "KEEP_ALL") + joined_layer = arcpy.AddJoin_management( + args.parcel_layer, "PARCEL_ID", parcel_subzone, "PARCEL_ID", "KEEP_ALL" + ) print("Created joined layer {}".format(joined_layer)) # make it real @@ -141,7 +196,9 @@ if arcpy.Exists(parcel_subzone_dissolved): print("Found {} -- skipping dissolve".format(parcel_subzone_dissolved)) else: - arcpy.Dissolve_management(parcel_w_subzone, - parcel_subzone_dissolved, - ["{}_taz_key".format(parcel_subzone), "{}_subzone".format(parcel_subzone)]) + arcpy.Dissolve_management( + parcel_w_subzone, + parcel_subzone_dissolved, + ["{}_taz_key".format(parcel_subzone), "{}_subzone".format(parcel_subzone)], + ) print("Created {}\{}".format(arcpy.env.workspace, parcel_subzone_dissolved)) diff --git a/applications/travel_model_lu_inputs/2015/Employment/NETS_2015_BayAreaFirmSizes.py b/applications/travel_model_lu_inputs/2015/Employment/NETS_2015_BayAreaFirmSizes.py index 2ba3e18..a4cd487 100644 --- a/applications/travel_model_lu_inputs/2015/Employment/NETS_2015_BayAreaFirmSizes.py +++ b/applications/travel_model_lu_inputs/2015/Employment/NETS_2015_BayAreaFirmSizes.py @@ -14,37 +14,58 @@ import os, sys import pandas -NETS_DIR = "M:\\Data\\NETS\\2015" +NETS_DIR = "M:\\Data\\NETS\\2015" NETS_FILE = os.path.join(NETS_DIR, "NETSData2015_CA.txt") -OUT_FILE = os.path.join(NETS_DIR, "BayAreaFirmSizes.csv") +OUT_FILE = os.path.join(NETS_DIR, "BayAreaFirmSizes.csv") BAY_AREA_FIPS = [ - "06001", # Alameda - "06013", # Contra Costa - "06041", # Marin - "06055", # Napa - "06075", # San Francisco - "06081", # San Mateo - "06085", # Santa Clara - "06095", # Solano - "06097", # Sonoma + "06001", # Alameda + "06013", # Contra Costa + "06041", # Marin + "06055", # Napa + "06075", # San Francisco + "06081", # San Mateo + "06085", # Santa Clara + "06095", # Solano + "06097", # Sonoma ] -if __name__ == '__main__': - nets_df = pandas.read_csv(NETS_FILE, sep="\t", usecols=["DunsNumber","Emp15","HQDuns15","FIPS15"], - dtype={"DunsNumber":'str',"HQDuns15":'str',"FIPS15":'str'}) - print("Read {} rows from {}; head:\n{}".format(len(nets_df), NETS_FILE, nets_df.head())) +if __name__ == "__main__": + nets_df = pandas.read_csv( + NETS_FILE, + sep="\t", + usecols=["DunsNumber", "Emp15", "HQDuns15", "FIPS15"], + dtype={"DunsNumber": "str", "HQDuns15": "str", "FIPS15": "str"}, + ) + print( + "Read {} rows from {}; head:\n{}".format( + len(nets_df), NETS_FILE, nets_df.head() + ) + ) # filter to Bay Area counties nets_df = nets_df[nets_df.FIPS15.isin(BAY_AREA_FIPS)] print("Filtered to Bay Area FIP15 to get {} rows".format(len(nets_df))) # how many blank Emp15? - print("Count of null Emp15: {}".format(len(nets_df.loc[pandas.isnull(nets_df.Emp15)]))) - print("Count of null HQDuns15: {}".format(len(nets_df.loc[pandas.isnull(nets_df.HQDuns15)]))) + print( + "Count of null Emp15: {}".format( + len(nets_df.loc[pandas.isnull(nets_df.Emp15)]) + ) + ) + print( + "Count of null HQDuns15: {}".format( + len(nets_df.loc[pandas.isnull(nets_df.HQDuns15)]) + ) + ) # groupby HQDuns15 - nets_hq_df = nets_df.groupby(["HQDuns15"]).aggregate({"DunsNumber":"count", "Emp15":"sum"}) - nets_hq_df.rename(columns={"DunsNumber":"BayAreaEstCount", "Emp15":"BayAreaFirmEmp15"}, inplace=True) + nets_hq_df = nets_df.groupby(["HQDuns15"]).aggregate( + {"DunsNumber": "count", "Emp15": "sum"} + ) + nets_hq_df.rename( + columns={"DunsNumber": "BayAreaEstCount", "Emp15": "BayAreaFirmEmp15"}, + inplace=True, + ) nets_hq_df.reset_index(drop=False, inplace=True) print("Grouped to HQDuns15: \n{}".format(nets_hq_df.head())) diff --git a/basemap/create_tazdata_devpipeline_map.py b/basemap/create_tazdata_devpipeline_map.py index 23c5fc3..36707d4 100644 --- a/basemap/create_tazdata_devpipeline_map.py +++ b/basemap/create_tazdata_devpipeline_map.py @@ -6,7 +6,7 @@ # 3) Employment taz data csv (EMPLOYMENT_FILE) # # Outputs -# +# # Notes: # - zone_id and county/county_id aren't always consistent with the TM mapping between zones/county # (https://github.com/BayAreaMetro/travel-model-one/blob/master/utilities/geographies/taz-superdistrict-county.csv) @@ -16,72 +16,81 @@ # for arcpy: # set PATH=C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3 -import logging,os,re,sys,time +import logging, os, re, sys, time import numpy, pandas NOW = time.strftime("%Y%b%d.%H%M") # taz-county file -TAZ_COUNTY_FILE = "X:\\travel-model-one-master\\utilities\\geographies\\taz-superdistrict-county.csv" +TAZ_COUNTY_FILE = ( + "X:\\travel-model-one-master\\utilities\\geographies\\taz-superdistrict-county.csv" +) # taz shapefile TAZ_SHPFILE = "M:\\Data\\GIS layers\\TM1_taz\\bayarea_rtaz1454_rev1_WGS84.shp" -# reference for creation: +# reference for creation: # Create and share 2015 tazdata from basemap plus development pipeline with MTC planners @ # https://app.asana.com/0/385259290425521/1165636787387665/f -if os.getenv("USERNAME")=="lzorn": +if os.getenv("USERNAME") == "lzorn": # use local dir to make things faster - URBANSIM_LOCAL_DIR = "C:\\Users\\lzorn\\Documents\\UrbanSim_InputMapping" + URBANSIM_LOCAL_DIR = "C:\\Users\\lzorn\\Documents\\UrbanSim_InputMapping" # from https://mtcdrive.box.com/s/w0fmrz85l9cti2byd6rjqu9hv0m2edlq URBANSIM_BASEMAP_FILE = "2020_03_20_bayarea_v6.h5" # from https://mtcdrive.box.com/s/wcxlgwov5l6s6p0p0vh2xj1ekdynxxw5 - URBANSIM_PIPELINE_FILE= "pipeline_2020Mar20.1512.csv" + URBANSIM_PIPELINE_FILE = "pipeline_2020Mar20.1512.csv" URBANSIM_PIPELINE_GDB = "devproj_2020Mar20.1512.gdb" # employment data - EMPLOYMENT_FILE = "X:\\petrale\\applications\\travel_model_lu_inputs\\2015\\TAZ1454 2015 Land Use.csv" - OUTPUT_DIR = os.path.join(URBANSIM_LOCAL_DIR, "map_data") - LOG_FILE = os.path.join(OUTPUT_DIR, "create_tazdata_devpipeline_map_{}.log".format(NOW)) + EMPLOYMENT_FILE = "X:\\petrale\\applications\\travel_model_lu_inputs\\2015\\TAZ1454 2015 Land Use.csv" + OUTPUT_DIR = os.path.join(URBANSIM_LOCAL_DIR, "map_data") + LOG_FILE = os.path.join( + OUTPUT_DIR, "create_tazdata_devpipeline_map_{}.log".format(NOW) + ) # building types - BUILDING_TYPE_FILE = "X:\\petrale\\incoming\\dv_buildings_det_type_lu.csv" + BUILDING_TYPE_FILE = "X:\\petrale\\incoming\\dv_buildings_det_type_lu.csv" # with activity categories BUILDING_TYPE_ACTIVITY_FILE = "X:\\petrale\\TableauAliases.xlsx" # geodatabase for arcpy and map - WORKSPACE_GDB = "C:\\Users\\lzorn\\Documents\\UrbanSim_InputMapping\\UrbanSim_InputMapping.gdb" - ARCGIS_PROJECT = "C:\\Users\\lzorn\\Documents\\UrbanSim_InputMapping\\UrbanSim_InputMapping.aprx" + WORKSPACE_GDB = ( + "C:\\Users\\lzorn\\Documents\\UrbanSim_InputMapping\\UrbanSim_InputMapping.gdb" + ) + ARCGIS_PROJECT = ( + "C:\\Users\\lzorn\\Documents\\UrbanSim_InputMapping\\UrbanSim_InputMapping.aprx" + ) # year buit categories we care about # name, min, max YEAR_BUILT_CATEGORIES = [ - ("0000-2000", 0,2000), - ("2001-2010",2001,2010), - ("2011-2015",2011,2015), - ("2016-2020",2016,2020), - ("2021-2030",2021,2030), - ("2031-2050",2031,2050), + ("0000-2000", 0, 2000), + ("2001-2010", 2001, 2010), + ("2011-2015", 2011, 2015), + ("2016-2020", 2016, 2020), + ("2021-2030", 2021, 2030), + ("2031-2050", 2031, 2050), ] # aggregate YEAR_BUILT_CATEGORIES_AGG = [ - ("0000-2015", 0,2015), - ("2016-2050",2016,2050), - ] + ("0000-2015", 0, 2015), + ("2016-2050", 2016, 2050), +] COUNTY_ID_NAME = [ - ("Alameda" , 1), - ("Contra Costa" ,13), - ("Marin" ,41), - ("Napa" ,55), - ("San Francisco",75), - ("San Mateo" ,81), - ("Santa Clara" ,85), - ("Solano" ,95), - ("Sonoma" ,97), + ("Alameda", 1), + ("Contra Costa", 13), + ("Marin", 41), + ("Napa", 55), + ("San Francisco", 75), + ("San Mateo", 81), + ("Santa Clara", 85), + ("Solano", 95), + ("Sonoma", 97), ] -COUNTY_ID_NAME_DF = pandas.DataFrame(COUNTY_ID_NAME, columns=["county","county_id"]) +COUNTY_ID_NAME_DF = pandas.DataFrame(COUNTY_ID_NAME, columns=["county", "county_id"]) + def set_year_built_category(df): # set year_built_category, year_built_category_agg columns based on YEAR_BUILT_CATEGORIES and year_built column @@ -91,7 +100,10 @@ def set_year_built_category(df): YEAR_MIN = category[1] YEAR_MAX = category[2] - df.loc[(df.year_built >= YEAR_MIN)&(df.year_built <= YEAR_MAX), "year_built_category"] = CAT_NAME + df.loc[ + (df.year_built >= YEAR_MIN) & (df.year_built <= YEAR_MAX), + "year_built_category", + ] = CAT_NAME df["year_built_category_agg"] = "????-????" for category in YEAR_BUILT_CATEGORIES_AGG: @@ -99,34 +111,48 @@ def set_year_built_category(df): YEAR_MIN = category[1] YEAR_MAX = category[2] - df.loc[(df.year_built >= YEAR_MIN)&(df.year_built <= YEAR_MAX), "year_built_category_agg"] = CAT_NAME + df.loc[ + (df.year_built >= YEAR_MIN) & (df.year_built <= YEAR_MAX), + "year_built_category_agg", + ] = CAT_NAME return df + def warn_zone_county_disagreement(df): # check if zone/county mapping disagree with the TM mapping and log issues # TODO pass -if __name__ == '__main__': + +if __name__ == "__main__": # pandas options pandas.options.display.max_rows = 999 - if not os.path.exists(OUTPUT_DIR): os.mkdir(OUTPUT_DIR) + if not os.path.exists(OUTPUT_DIR): + os.mkdir(OUTPUT_DIR) # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) logger.info("Output dir: {}".format(OUTPUT_DIR)) @@ -135,8 +161,12 @@ def warn_zone_county_disagreement(df): taz_sd_county_df = pandas.read_csv(TAZ_COUNTY_FILE) logger.info("Read {}; head:\n{}".format(TAZ_COUNTY_FILE, taz_sd_county_df.head())) # let's just keep taz/county - taz_sd_county_df = taz_sd_county_df[["ZONE","COUNTY_NAME", "SD_NAME", "SD_NUM_NAME"]] - taz_sd_county_df.rename(columns={"ZONE":"zone_id", "COUNTY_NAME":"county"},inplace=True) + taz_sd_county_df = taz_sd_county_df[ + ["ZONE", "COUNTY_NAME", "SD_NAME", "SD_NUM_NAME"] + ] + taz_sd_county_df.rename( + columns={"ZONE": "zone_id", "COUNTY_NAME": "county"}, inplace=True + ) # and county_id taz_sd_county_df = pandas.merge(left=taz_sd_county_df, right=COUNTY_ID_NAME_DF) logger.debug("taz_sd_county_df head:\n{}".format(taz_sd_county_df.head())) @@ -150,77 +180,155 @@ def warn_zone_county_disagreement(df): BUILDING_TYPE_TO_DESC["all"] = "all" logger.debug("BUILDING_TYPE_TO_DESC: {}".format(BUILDING_TYPE_TO_DESC)) - building_activity_df = pandas.read_excel(BUILDING_TYPE_ACTIVITY_FILE, sheet_name="building_type") - building_types_df = pandas.merge(left=building_types_df, right=building_activity_df, - how="left", left_index=True, right_on="building_type_det") + building_activity_df = pandas.read_excel( + BUILDING_TYPE_ACTIVITY_FILE, sheet_name="building_type" + ) + building_types_df = pandas.merge( + left=building_types_df, + right=building_activity_df, + how="left", + left_index=True, + right_on="building_type_det", + ) logger.debug("building_types_df: \n{}".format(building_types_df)) #################################### tm_lu_df = pandas.read_csv(EMPLOYMENT_FILE) logger.info("Read {}; head:\n{}".format(EMPLOYMENT_FILE, tm_lu_df.head())) - tm_lu_df.rename(columns={"ZONE":"zone_id"}, inplace=True) + tm_lu_df.rename(columns={"ZONE": "zone_id"}, inplace=True) # keep only employment, tothh, totpop, hhpop - tm_lu_df = tm_lu_df[["zone_id","TOTHH","TOTPOP","HHPOP","TOTEMP","RETEMPN","FPSEMPN","HEREMPN","AGREMPN","MWTEMPN","OTHEMPN"]] + tm_lu_df = tm_lu_df[ + [ + "zone_id", + "TOTHH", + "TOTPOP", + "HHPOP", + "TOTEMP", + "RETEMPN", + "FPSEMPN", + "HEREMPN", + "AGREMPN", + "MWTEMPN", + "OTHEMPN", + ] + ] #################################### - logger.info("Reading parcels and buildings from {}".format(os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_BASEMAP_FILE))) + logger.info( + "Reading parcels and buildings from {}".format( + os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_BASEMAP_FILE) + ) + ) # use this for parcel_id (index), county_id, zone_id, acres - parcels_df = pandas.read_hdf(os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_BASEMAP_FILE), key='parcels') + parcels_df = pandas.read_hdf( + os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_BASEMAP_FILE), key="parcels" + ) # logger.info(parcels_df.dtypes) - parcels_df = parcels_df[["zone_id","acres"]].reset_index().rename(columns={"acres":"parcel_acres"}) + parcels_df = ( + parcels_df[["zone_id", "acres"]] + .reset_index() + .rename(columns={"acres": "parcel_acres"}) + ) logger.info("parcels_df.head():\n{}".format(parcels_df.head())) # sum parcel acres to zone - parcels_zone_df = parcels_df.groupby(["zone_id"]).agg({"parcel_acres":"sum"}).reset_index() + parcels_zone_df = ( + parcels_df.groupby(["zone_id"]).agg({"parcel_acres": "sum"}).reset_index() + ) logger.info("parcels_zone_df:\n{}".format(parcels_zone_df.head())) - buildings_df = pandas.read_hdf(os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_BASEMAP_FILE), key='buildings') + buildings_df = pandas.read_hdf( + os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_BASEMAP_FILE), key="buildings" + ) logger.info("buildings_df.dtypes:\n{}".format(buildings_df.dtypes)) - #logger.info(buildings_df.head()) + # logger.info(buildings_df.head()) # segment year buit to 0000-2000, 2001-2010, 2011-2015 buildings_df = set_year_built_category(buildings_df) - logger.info("buildings_df by year_built_category:\n{}".format(buildings_df["year_built_category"].value_counts())) + logger.info( + "buildings_df by year_built_category:\n{}".format( + buildings_df["year_built_category"].value_counts() + ) + ) # join buildings to parcel to get the zone - buildings_df = pandas.merge(left=buildings_df, right=parcels_df[["parcel_id","zone_id"]], - how="left", left_on=["parcel_id"], right_on=["parcel_id"]) + buildings_df = pandas.merge( + left=buildings_df, + right=parcels_df[["parcel_id", "zone_id"]], + how="left", + left_on=["parcel_id"], + right_on=["parcel_id"], + ) buildings_no_year_built = buildings_df.loc[pandas.isnull(buildings_df.year_built)] if len(buildings_no_year_built) > 0: - logger.warn("buildings_df has {} rows with no year_built:\n{}".format(len(buildings_no_year_built), buildings_no_year_built)) + logger.warn( + "buildings_df has {} rows with no year_built:\n{}".format( + len(buildings_no_year_built), buildings_no_year_built + ) + ) else: logger.info("buildings_df has 0 rows with no year_built") - buildings_no_building_type = buildings_df.loc[pandas.isnull(buildings_df.building_type)] + buildings_no_building_type = buildings_df.loc[ + pandas.isnull(buildings_df.building_type) + ] if len(buildings_no_building_type) > 0: - logger.warn("buildings_df has {} rows with no building_type:\n{}".format(len(buildings_no_building_type), buildings_no_building_type)) + logger.warn( + "buildings_df has {} rows with no building_type:\n{}".format( + len(buildings_no_building_type), buildings_no_building_type + ) + ) else: logger.info("buildings_df has 0 rows with no building_type") #### sum to zone by year_built_category and building_type: residential_units, residential_sqft, non_residential_sqft - buildings_zone_btype_df = buildings_df.groupby(["zone_id","year_built_category_agg","year_built_category","building_type"]).agg( - {"residential_units" :"sum", - "building_sqft" :"sum", - "residential_sqft" :"sum", - "non_residential_sqft":"sum"}) + buildings_zone_btype_df = buildings_df.groupby( + ["zone_id", "year_built_category_agg", "year_built_category", "building_type"] + ).agg( + { + "residential_units": "sum", + "building_sqft": "sum", + "residential_sqft": "sum", + "non_residential_sqft": "sum", + } + ) buildings_zone_btype_df.reset_index(inplace=True) buildings_zone_btype_df["source"] = "buildings" # reorder - buildings_zone_btype_df = buildings_zone_btype_df[["zone_id","source", - "year_built_category_agg","year_built_category","building_type", - "residential_units","building_sqft","residential_sqft","non_residential_sqft"]] + buildings_zone_btype_df = buildings_zone_btype_df[ + [ + "zone_id", + "source", + "year_built_category_agg", + "year_built_category", + "building_type", + "residential_units", + "building_sqft", + "residential_sqft", + "non_residential_sqft", + ] + ] - logger.info("buildings_zone_btype_df.head():\n{}".format(buildings_zone_btype_df.head())) - logger.info("buildings_zone_btype_df.dtypes:\n{}".format(buildings_zone_btype_df.dtypes)) + logger.info( + "buildings_zone_btype_df.head():\n{}".format(buildings_zone_btype_df.head()) + ) + logger.info( + "buildings_zone_btype_df.dtypes:\n{}".format(buildings_zone_btype_df.dtypes) + ) #### sum to zone by year_built_category and NOT building_type: residential_units, residential_sqft, non_residential_sqft - buildings_zone_df = buildings_df.groupby(["zone_id","year_built_category_agg","year_built_category"]).agg( - {"residential_units" :"sum", - "building_sqft" :"sum", - "residential_sqft" :"sum", - "non_residential_sqft":"sum"}) + buildings_zone_df = buildings_df.groupby( + ["zone_id", "year_built_category_agg", "year_built_category"] + ).agg( + { + "residential_units": "sum", + "building_sqft": "sum", + "residential_sqft": "sum", + "non_residential_sqft": "sum", + } + ) buildings_zone_df.reset_index(inplace=True) buildings_zone_df["source"] = "buildings" buildings_zone_df["building_type"] = "all" @@ -233,56 +341,102 @@ def warn_zone_county_disagreement(df): #################################### # read pipeline file - logger.info("Reading pipeline from {}".format(os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_PIPELINE_FILE))) - pipeline_df = pandas.read_csv(os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_PIPELINE_FILE)) + logger.info( + "Reading pipeline from {}".format( + os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_PIPELINE_FILE) + ) + ) + pipeline_df = pandas.read_csv( + os.path.join(URBANSIM_LOCAL_DIR, URBANSIM_PIPELINE_FILE) + ) logger.info("pipeline_df.head():\n{}".format(pipeline_df.head())) logger.info("pipeline_df.dtypes:\n{}".format(pipeline_df.dtypes)) # logger.info("pipeline_df by year_built:\n{}".format(pipeline_df["year_built"].value_counts())) pipeline_df = set_year_built_category(pipeline_df) - logger.info("pipeline_df by year_built_category:\n{}".format(pipeline_df["year_built_category"].value_counts())) - logger.info("pipeline_df by year_built_category_agg:\n{}".format(pipeline_df["year_built_category_agg"].value_counts())) + logger.info( + "pipeline_df by year_built_category:\n{}".format( + pipeline_df["year_built_category"].value_counts() + ) + ) + logger.info( + "pipeline_df by year_built_category_agg:\n{}".format( + pipeline_df["year_built_category_agg"].value_counts() + ) + ) pipeline_no_year_built = pipeline_df.loc[pandas.isnull(pipeline_df.year_built)] if len(pipeline_no_year_built) > 0: - logger.warn("pipeline_df has {} rows with no year_built:\n{}".format(len(pipeline_no_year_built), pipeline_no_year_built)) + logger.warn( + "pipeline_df has {} rows with no year_built:\n{}".format( + len(pipeline_no_year_built), pipeline_no_year_built + ) + ) else: logger.info("pipeline_df has 0 rows with no year_built") - pipeline_no_building_type = pipeline_df.loc[pandas.isnull(pipeline_df.building_type)] + pipeline_no_building_type = pipeline_df.loc[ + pandas.isnull(pipeline_df.building_type) + ] if len(pipeline_no_building_type) > 0: - logger.warn("pipeline_df has {} rows with no building_type:\n{}".format(len(pipeline_no_building_type), pipeline_no_building_type)) + logger.warn( + "pipeline_df has {} rows with no building_type:\n{}".format( + len(pipeline_no_building_type), pipeline_no_building_type + ) + ) else: logger.info("pipeline_df has 0 rows with no building_type") # sum to zone by year_built_category and building_type # assume residential_sqft = building_sqft - non_residential_sqft - pipeline_df["residential_sqft"] = pipeline_df["building_sqft"] - pipeline_df["non_residential_sqft"] + pipeline_df["residential_sqft"] = ( + pipeline_df["building_sqft"] - pipeline_df["non_residential_sqft"] + ) #### sum to zone by year_built_category and building_type: residential_units, residential_sqft, non_residential_sqft - pipeline_zone_btype_df = pipeline_df.groupby(["ZONE_ID","year_built_category_agg","year_built_category","building_type"]).agg( - {"residential_units" :"sum", - "building_sqft" :"sum", - "residential_sqft" :"sum", - "non_residential_sqft":"sum"}) + pipeline_zone_btype_df = pipeline_df.groupby( + ["ZONE_ID", "year_built_category_agg", "year_built_category", "building_type"] + ).agg( + { + "residential_units": "sum", + "building_sqft": "sum", + "residential_sqft": "sum", + "non_residential_sqft": "sum", + } + ) pipeline_zone_btype_df.reset_index(inplace=True) - pipeline_zone_btype_df.rename(columns={"ZONE_ID":"zone_id"}, inplace=True) - pipeline_zone_btype_df.loc[ pandas.isnull(pipeline_zone_btype_df.zone_id), "zone_id"] = 0 # null => 0 + pipeline_zone_btype_df.rename(columns={"ZONE_ID": "zone_id"}, inplace=True) + pipeline_zone_btype_df.loc[ + pandas.isnull(pipeline_zone_btype_df.zone_id), "zone_id" + ] = 0 # null => 0 pipeline_zone_btype_df.zone_id = pipeline_zone_btype_df.zone_id.astype(int) pipeline_zone_btype_df["source"] = "pipeline" - pipeline_zone_btype_df = pipeline_zone_btype_df[list(buildings_zone_btype_df.columns)] - logger.info("pipeline_zone_btype_df.head():\n{}".format(pipeline_zone_btype_df.head())) - logger.info("pipeline_zone_btype_df.dtypes:\n{}".format(pipeline_zone_btype_df.dtypes)) + pipeline_zone_btype_df = pipeline_zone_btype_df[ + list(buildings_zone_btype_df.columns) + ] + logger.info( + "pipeline_zone_btype_df.head():\n{}".format(pipeline_zone_btype_df.head()) + ) + logger.info( + "pipeline_zone_btype_df.dtypes:\n{}".format(pipeline_zone_btype_df.dtypes) + ) #### sum to zone by year_built_category and NOT building_type: residential_units, residential_sqft, non_residential_sqft - pipeline_zone_df = pipeline_df.groupby(["ZONE_ID","year_built_category_agg","year_built_category"]).agg( - {"residential_units" :"sum", - "building_sqft" :"sum", - "residential_sqft" :"sum", - "non_residential_sqft":"sum"}) + pipeline_zone_df = pipeline_df.groupby( + ["ZONE_ID", "year_built_category_agg", "year_built_category"] + ).agg( + { + "residential_units": "sum", + "building_sqft": "sum", + "residential_sqft": "sum", + "non_residential_sqft": "sum", + } + ) pipeline_zone_df.reset_index(inplace=True) - pipeline_zone_df.rename(columns={"ZONE_ID":"zone_id"}, inplace=True) - pipeline_zone_df.loc[ pandas.isnull(pipeline_zone_df.zone_id), "zone_id"] = 0 # null => 0 + pipeline_zone_df.rename(columns={"ZONE_ID": "zone_id"}, inplace=True) + pipeline_zone_df.loc[ + pandas.isnull(pipeline_zone_df.zone_id), "zone_id" + ] = 0 # null => 0 pipeline_zone_df.zone_id = pipeline_zone_df.zone_id.astype(int) pipeline_zone_df["source"] = "pipeline" @@ -291,88 +445,121 @@ def warn_zone_county_disagreement(df): logger.info("pipeline_zone_df.head():\n{}".format(pipeline_zone_df.head())) logger.info("pipeline_zone_df.dtypes:\n{}".format(pipeline_zone_df.dtypes)) - #################################### # take buildings & pipeline by zone - zone_df = pandas.concat([buildings_zone_btype_df, - buildings_zone_df, - pipeline_zone_btype_df, - pipeline_zone_df], axis="index") + zone_df = pandas.concat( + [ + buildings_zone_btype_df, + buildings_zone_df, + pipeline_zone_btype_df, + pipeline_zone_df, + ], + axis="index", + ) logger.info("zone_df.head():\n{}".format(zone_df.head())) - logger.debug("zone_df for zone_id=1: \n{}".format(zone_df.loc[zone_df.zone_id==1])) + logger.debug( + "zone_df for zone_id=1: \n{}".format(zone_df.loc[zone_df.zone_id == 1]) + ) # pivot on buildings/pipeline including ALL building types - zone_piv_df = zone_df.pivot_table(index ="zone_id", - columns=["source","year_built_category","building_type"], - values =["residential_units", "building_sqft", "residential_sqft", "non_residential_sqft"], - aggfunc=numpy.sum) + zone_piv_df = zone_df.pivot_table( + index="zone_id", + columns=["source", "year_built_category", "building_type"], + values=[ + "residential_units", + "building_sqft", + "residential_sqft", + "non_residential_sqft", + ], + aggfunc=numpy.sum, + ) logger.info("zone_piv_df.head():\n{}".format(zone_piv_df.head())) zone_piv_df.reset_index(inplace=True) - logger.debug("zone_piv_df for zone_id=1: \n{}".format(zone_piv_df.loc[zone_piv_df.zone_id==1].squeeze())) + logger.debug( + "zone_piv_df for zone_id=1: \n{}".format( + zone_piv_df.loc[zone_piv_df.zone_id == 1].squeeze() + ) + ) # convert column names from tuples new_cols = [] for col in zone_piv_df.columns.values: - if col[1] == '': # ('zone_id', '', '', '') + if col[1] == "": # ('zone_id', '', '', '') new_cols.append(col[0]) - else: # ('building_sqft', 'buildings', '0000-2000', 'HM') - new_cols.append(col[1]+" "+col[2]+" "+col[3]+" "+col[0]) + else: # ('building_sqft', 'buildings', '0000-2000', 'HM') + new_cols.append(col[1] + " " + col[2] + " " + col[3] + " " + col[0]) zone_piv_df.columns = new_cols logger.debug("zone_piv_df.head():\n{}".format(zone_piv_df.head())) logger.debug("zone_piv_df.dtypes:\n{}".format(zone_piv_df.dtypes)) logger.debug("zone_piv_df.sum():\n{}".format(zone_piv_df.sum())) # pivot on buildings/pipeline including ALL building types - zone_piv_agg_df = zone_df.pivot_table(index ="zone_id", - columns=["source","year_built_category_agg","building_type"], - values =["residential_units", "building_sqft", "residential_sqft", "non_residential_sqft"], - aggfunc=numpy.sum) + zone_piv_agg_df = zone_df.pivot_table( + index="zone_id", + columns=["source", "year_built_category_agg", "building_type"], + values=[ + "residential_units", + "building_sqft", + "residential_sqft", + "non_residential_sqft", + ], + aggfunc=numpy.sum, + ) logger.info("zone_piv_agg_df.head():\n{}".format(zone_piv_agg_df.head())) zone_piv_agg_df.reset_index(inplace=True) - logger.debug("zone_piv_agg_df for zone_id=1: \n{}".format(zone_piv_agg_df.loc[zone_piv_agg_df.zone_id==1].squeeze())) + logger.debug( + "zone_piv_agg_df for zone_id=1: \n{}".format( + zone_piv_agg_df.loc[zone_piv_agg_df.zone_id == 1].squeeze() + ) + ) # convert column names from tuples new_cols = [] for col in zone_piv_agg_df.columns.values: - if col[1] == '': # ('zone_id', '', '', '') + if col[1] == "": # ('zone_id', '', '', '') new_cols.append(col[0]) - else: # ('building_sqft', 'buildings', '0000-2000', 'HM') - new_cols.append(col[1]+" "+col[2]+" "+col[3]+" "+col[0]) + else: # ('building_sqft', 'buildings', '0000-2000', 'HM') + new_cols.append(col[1] + " " + col[2] + " " + col[3] + " " + col[0]) zone_piv_agg_df.columns = new_cols logger.debug("zone_piv_agg_df.head():\n{}".format(zone_piv_agg_df.head())) logger.debug("zone_piv_agg_df.dtypes:\n{}".format(zone_piv_agg_df.dtypes)) logger.debug("zone_piv_agg_df.sum():\n{}".format(zone_piv_agg_df.sum())) # merge zone_piv_df and zone_piv_agg_df - zone_piv_df = pandas.merge(left=zone_piv_df, right=zone_piv_agg_df, left_on="zone_id", right_on="zone_id", how="outer") + zone_piv_df = pandas.merge( + left=zone_piv_df, + right=zone_piv_agg_df, + left_on="zone_id", + right_on="zone_id", + how="outer", + ) # will create 4 datasets KEEP_COLUMNS_BY_DATASET = { - "base_res": ["zone_id","source", + "base_res": [ + "zone_id", + "source", "buildings 0000-2000 DM residential_units", "buildings 0000-2000 HS residential_units", "buildings 0000-2000 HT residential_units", "buildings 0000-2000 HM residential_units", "buildings 0000-2000 MR residential_units", "buildings 0000-2000 all residential_units", - "buildings 2001-2010 DM residential_units", "buildings 2001-2010 HS residential_units", "buildings 2001-2010 HT residential_units", "buildings 2001-2010 HM residential_units", "buildings 2001-2010 MR residential_units", "buildings 2001-2010 all residential_units", - "buildings 2011-2015 DM residential_units", "buildings 2011-2015 HS residential_units", "buildings 2011-2015 HT residential_units", "buildings 2011-2015 HM residential_units", "buildings 2011-2015 MR residential_units", "buildings 2011-2015 all residential_units", - "buildings 0000-2015 DM residential_units", "buildings 0000-2015 HS residential_units", "buildings 0000-2015 HT residential_units", @@ -381,11 +568,12 @@ def warn_zone_county_disagreement(df): # 2015 HU count "buildings 0000-2015 all residential_units", ], - "base_nonres": ["zone_id","source", + "base_nonres": [ + "zone_id", + "source", "buildings 0000-2000 all non_residential_sqft", "buildings 2001-2010 all non_residential_sqft", "buildings 2011-2015 all non_residential_sqft", - # 2015 Commercial Square Feet "buildings 0000-2015 AL non_residential_sqft", "buildings 0000-2015 CM non_residential_sqft", @@ -418,16 +606,15 @@ def warn_zone_county_disagreement(df): "buildings 0000-2015 UN non_residential_sqft", "buildings 0000-2015 VA non_residential_sqft", "buildings 0000-2015 VP non_residential_sqft", - "buildings 0000-2015 all non_residential_sqft", - "buildings 0000-2000 all building_sqft", "buildings 2001-2010 all building_sqft", "buildings 2011-2015 all building_sqft", "buildings 0000-2015 all building_sqft", ], - - "pipe_res": ["zone_id","source", + "pipe_res": [ + "zone_id", + "source", # residential units built from 2016 on "pipeline 2016-2020 AL residential_units", "pipeline 2016-2020 DM residential_units", @@ -437,7 +624,6 @@ def warn_zone_county_disagreement(df): "pipeline 2016-2020 ME residential_units", "pipeline 2016-2020 MR residential_units", "pipeline 2016-2020 all residential_units", - "pipeline 2021-2030 AL residential_units", "pipeline 2021-2030 DM residential_units", "pipeline 2021-2030 HS residential_units", @@ -446,7 +632,6 @@ def warn_zone_county_disagreement(df): "pipeline 2021-2030 ME residential_units", "pipeline 2021-2030 MR residential_units", "pipeline 2021-2030 all residential_units", - "pipeline 2031-2050 AL residential_units", "pipeline 2031-2050 DM residential_units", "pipeline 2031-2050 HS residential_units", @@ -455,7 +640,6 @@ def warn_zone_county_disagreement(df): "pipeline 2031-2050 ME residential_units", "pipeline 2031-2050 MR residential_units", "pipeline 2031-2050 all residential_units", - "pipeline 2016-2050 AL residential_units", "pipeline 2016-2050 DM residential_units", "pipeline 2016-2050 HS residential_units", @@ -465,13 +649,13 @@ def warn_zone_county_disagreement(df): "pipeline 2016-2050 MR residential_units", "pipeline 2016-2050 all residential_units", ], - - "pipe_nonres": ["zone_id","source", + "pipe_nonres": [ + "zone_id", + "source", # commercial Square Feet Built From 2016 "pipeline 2016-2020 all non_residential_sqft", "pipeline 2021-2030 all non_residential_sqft", "pipeline 2031-2050 all non_residential_sqft", - "pipeline 2016-2050 AL non_residential_sqft", "pipeline 2016-2050 CM non_residential_sqft", "pipeline 2016-2050 DM non_residential_sqft", @@ -503,9 +687,8 @@ def warn_zone_county_disagreement(df): "pipeline 2016-2050 UN non_residential_sqft", "pipeline 2016-2050 VA non_residential_sqft", "pipeline 2016-2050 VP non_residential_sqft", - "pipeline 2016-2050 all non_residential_sqft", - ] + ], } zone_datasets = {} @@ -517,39 +700,64 @@ def warn_zone_county_disagreement(df): # but only if they exist keep_columns_present = [] - for col in keep_columns: - if col in list(zone_piv_df.columns.values): keep_columns_present.append(col) + for col in keep_columns: + if col in list(zone_piv_df.columns.values): + keep_columns_present.append(col) zone_dataset_piv_df = zone_piv_df[keep_columns_present] # fill na with zero zone_dataset_piv_df.fillna(value=0, inplace=True) - - logger.info("zone_dataset_piv_df.dtypes:\n{}".format(zone_dataset_piv_df.dtypes)) - + + logger.info( + "zone_dataset_piv_df.dtypes:\n{}".format(zone_dataset_piv_df.dtypes) + ) + # add parcel acres - zone_dataset_piv_df = pandas.merge(left=zone_dataset_piv_df, right=parcels_zone_df, how="outer") + zone_dataset_piv_df = pandas.merge( + left=zone_dataset_piv_df, right=parcels_zone_df, how="outer" + ) # and employment, if relevant if dataset == "base_nonres": - zone_dataset_piv_df = pandas.merge(left=zone_dataset_piv_df, right=tm_lu_df, how="outer") + zone_dataset_piv_df = pandas.merge( + left=zone_dataset_piv_df, right=tm_lu_df, how="outer" + ) # and 2015 Employee Density - zone_dataset_piv_df["Employee Density 2015"] = zone_dataset_piv_df["TOTEMP"]/zone_dataset_piv_df["parcel_acres"] - zone_dataset_piv_df.loc[ zone_dataset_piv_df["parcel_acres"] == 0, "Employee Density 2015" ] = 0.0 + zone_dataset_piv_df["Employee Density 2015"] = ( + zone_dataset_piv_df["TOTEMP"] / zone_dataset_piv_df["parcel_acres"] + ) + zone_dataset_piv_df.loc[ + zone_dataset_piv_df["parcel_acres"] == 0, "Employee Density 2015" + ] = 0.0 # 2015 Commercial Square Feet per Employee - zone_dataset_piv_df["Commercial Square Feet per Employee 2015"] = \ - zone_dataset_piv_df["buildings 0000-2015 all non_residential_sqft"]/zone_dataset_piv_df["TOTEMP"] - zone_dataset_piv_df.loc[ zone_dataset_piv_df["TOTEMP"] == 0, "Commercial Square Feet per Employee 2015"] = 0.0 - + zone_dataset_piv_df["Commercial Square Feet per Employee 2015"] = ( + zone_dataset_piv_df["buildings 0000-2015 all non_residential_sqft"] + / zone_dataset_piv_df["TOTEMP"] + ) + zone_dataset_piv_df.loc[ + zone_dataset_piv_df["TOTEMP"] == 0, + "Commercial Square Feet per Employee 2015", + ] = 0.0 + # and 2015 HU Density, if relevant if dataset == "base_res": - zone_dataset_piv_df["HU Density 2015"] = zone_dataset_piv_df["buildings 0000-2015 all residential_units"]/zone_dataset_piv_df["parcel_acres"] - zone_dataset_piv_df.loc[ zone_dataset_piv_df["parcel_acres"] == 0, "HU Density 2015" ] = 0.0 + zone_dataset_piv_df["HU Density 2015"] = ( + zone_dataset_piv_df["buildings 0000-2015 all residential_units"] + / zone_dataset_piv_df["parcel_acres"] + ) + zone_dataset_piv_df.loc[ + zone_dataset_piv_df["parcel_acres"] == 0, "HU Density 2015" + ] = 0.0 # zone pivot: add county/superdistrict - zone_dataset_piv_df = pandas.merge(left=zone_dataset_piv_df, right=taz_sd_county_df, how="outer") - logger.info("zone_dataset_piv_df.head():\n{}".format(zone_dataset_piv_df.head())) - + zone_dataset_piv_df = pandas.merge( + left=zone_dataset_piv_df, right=taz_sd_county_df, how="outer" + ) + logger.info( + "zone_dataset_piv_df.head():\n{}".format(zone_dataset_piv_df.head()) + ) + # write zone_dataset_piv_df zone_dataset_piv_file = os.path.join(OUTPUT_DIR, "{}.csv".format(dataset)) zone_dataset_piv_df.to_csv(zone_dataset_piv_file, index=False) @@ -557,14 +765,16 @@ def warn_zone_county_disagreement(df): # keep it for arcpy zone_datasets[dataset] = zone_dataset_piv_df - + # for tableau, let's not pivot, and let's not keep the all btypes - zone_df = pandas.concat([buildings_zone_btype_df, pipeline_zone_btype_df], axis="index") - + zone_df = pandas.concat( + [buildings_zone_btype_df, pipeline_zone_btype_df], axis="index" + ) + # zone: add county/superdistrict zone_df = pandas.merge(left=zone_df, right=taz_sd_county_df, how="outer") logger.info("zone_df.head():\n{}".format(zone_df.head())) - + # write zone_df zone_file = os.path.join(OUTPUT_DIR, "urbansim_input_zonedata.csv") zone_df.to_csv(zone_file, index=False) @@ -572,16 +782,20 @@ def warn_zone_county_disagreement(df): logger.info("importing arcpy....") import arcpy - arcpy.env.workspace = WORKSPACE_GDB + + arcpy.env.workspace = WORKSPACE_GDB # create metadata - new_metadata = arcpy.metadata.Metadata() - new_metadata.title = "UrbanSim input" - new_metadata.summary = "Data derived from UrbanSim Basemap and Development Pipeline for review" - new_metadata.description = \ - "Basemap source: {}\n".format(URBANSIM_BASEMAP_FILE) + \ - "Pipeline source: {}\n".format(URBANSIM_PIPELINE_FILE) + \ - "Employment source: {}\n".format(EMPLOYMENT_FILE) + new_metadata = arcpy.metadata.Metadata() + new_metadata.title = "UrbanSim input" + new_metadata.summary = ( + "Data derived from UrbanSim Basemap and Development Pipeline for review" + ) + new_metadata.description = ( + "Basemap source: {}\n".format(URBANSIM_BASEMAP_FILE) + + "Pipeline source: {}\n".format(URBANSIM_PIPELINE_FILE) + + "Employment source: {}\n".format(EMPLOYMENT_FILE) + ) logger.info("Metadata description: {}".format(new_metadata.description)) new_metadata.credits = "create_tazdata_devpipeline_map.py" @@ -591,24 +805,36 @@ def warn_zone_county_disagreement(df): # bring in binary of dataset since arcpy mangles csv datatypes dataset_table = "{}".format(dataset) - try: arcpy.Delete_management(dataset_table) - except: pass + try: + arcpy.Delete_management(dataset_table) + except: + pass logger.info("Converting dataset to arcpy table {}".format(dataset_table)) - zone_piv_nparr = numpy.array(numpy.rec.fromrecords(zone_datasets[dataset].values)) + zone_piv_nparr = numpy.array( + numpy.rec.fromrecords(zone_datasets[dataset].values) + ) zone_piv_nparr.dtype.names = tuple(zone_datasets[dataset].dtypes.index.tolist()) - arcpy.da.NumPyArrayToTable(zone_piv_nparr, os.path.join(WORKSPACE_GDB, dataset_table)) + arcpy.da.NumPyArrayToTable( + zone_piv_nparr, os.path.join(WORKSPACE_GDB, dataset_table) + ) # create join layer with tazdata and zone_file logger.info("Joining {} with {}".format(TAZ_SHPFILE, dataset_table)) - dataset_joined = arcpy.AddJoin_management(TAZ_SHPFILE, "TAZ1454", - os.path.join(WORKSPACE_GDB, dataset_table), "zone_id") + dataset_joined = arcpy.AddJoin_management( + TAZ_SHPFILE, + "TAZ1454", + os.path.join(WORKSPACE_GDB, dataset_table), + "zone_id", + ) # save it as a feature class -- delete one if it already exists first dataset_fc = "{}_fc".format(dataset) - try: arcpy.Delete_management(dataset_fc) - except: pass + try: + arcpy.Delete_management(dataset_fc) + except: + pass logger.info("Saving it as {}".format(dataset_fc)) arcpy.CopyFeatures_management(dataset_joined, dataset_fc) @@ -616,27 +842,36 @@ def warn_zone_county_disagreement(df): # set aliases ALIASES = { # https://github.com/BayAreaMetro/modeling-website/wiki/TazData - "TOTEMP" : "Total employment", + "TOTEMP": "Total employment", "RETEMPN": "Retail employment", "FPSEMPN": "Financial and prof employment", "HEREMPN": "Health edu and rec employment", "AGREMPN": "Ag and natural res employment", "MWTEMPN": "Manuf wholesale and transp employment", - "OTHEMPN": "Other employment" + "OTHEMPN": "Other employment", } fieldList = arcpy.ListFields(dataset_fc) for field in fieldList: logger.debug("field: [{}]".format(field.name)) if field.name.startswith("{}_".format(dataset_table)): - postfix = field.name[len(dataset_table)+1:] + postfix = field.name[len(dataset_table) + 1 :] logger.debug("postfix: [{}]".format(postfix)) if postfix in ALIASES.keys(): - arcpy.AlterField_management(dataset_fc, field.name, new_field_alias=ALIASES[postfix]) + arcpy.AlterField_management( + dataset_fc, field.name, new_field_alias=ALIASES[postfix] + ) else: match = field_re.match(postfix) if match: - logger.debug("match: {} {} {} {}".format(match.group(1), match.group(2), match.group(3), match.group(4))) + logger.debug( + "match: {} {} {} {}".format( + match.group(1), + match.group(2), + match.group(3), + match.group(4), + ) + ) new_alias = "" if match.group(4) == "residential_units": new_alias = "HU " @@ -645,10 +880,13 @@ def warn_zone_county_disagreement(df): elif match.group(4) == "building_sqft": new_alias = "bldg sqft " - new_alias += match.group(2) + " " # year range - new_alias += BUILDING_TYPE_TO_DESC[match.group(3)] # building type - arcpy.AlterField_management(dataset_fc, field.name, new_field_alias=new_alias) - + new_alias += match.group(2) + " " # year range + new_alias += BUILDING_TYPE_TO_DESC[ + match.group(3) + ] # building type + arcpy.AlterField_management( + dataset_fc, field.name, new_field_alias=new_alias + ) # set metadata logger.info("Setting featureclass metadata") @@ -660,33 +898,49 @@ def warn_zone_county_disagreement(df): # logger.debug("Layout: {}".format(lyt.name)) # aprx.save() - dataset_fc_metadata = arcpy.metadata.Metadata(dataset_fc) - logger.debug("feature class metadata isReadOnly? {}".format(dataset_fc_metadata.isReadOnly)) + logger.debug( + "feature class metadata isReadOnly? {}".format( + dataset_fc_metadata.isReadOnly + ) + ) dataset_fc_metadata.copy(new_metadata) dataset_fc_metadata.save() - # copy over pipeline with additional info added from building_types_df building_types_table = "building_types" - try: arcpy.Delete_management(building_types_table) - except: pass + try: + arcpy.Delete_management(building_types_table) + except: + pass building_types_arr = numpy.array(numpy.rec.fromrecords(building_types_df.values)) building_types_arr.dtype.names = tuple(building_types_df.dtypes.index.tolist()) - arcpy.da.NumPyArrayToTable(building_types_arr, os.path.join(WORKSPACE_GDB, building_types_table)) + arcpy.da.NumPyArrayToTable( + building_types_arr, os.path.join(WORKSPACE_GDB, building_types_table) + ) # create join layer with tazdata and zone_file - logger.info("Joining {} with {}".format(os.path.join(URBANSIM_PIPELINE_GDB, "pipeline"), building_types_table)) - dataset_joined = arcpy.AddJoin_management(os.path.join(URBANSIM_PIPELINE_GDB, "pipeline"), "building_type", - os.path.join(WORKSPACE_GDB, building_types_table), "building_type_det") + logger.info( + "Joining {} with {}".format( + os.path.join(URBANSIM_PIPELINE_GDB, "pipeline"), building_types_table + ) + ) + dataset_joined = arcpy.AddJoin_management( + os.path.join(URBANSIM_PIPELINE_GDB, "pipeline"), + "building_type", + os.path.join(WORKSPACE_GDB, building_types_table), + "building_type_det", + ) pipeline_fc = "pipeline" - try: arcpy.Delete_management(pipeline_fc) - except: pass + try: + arcpy.Delete_management(pipeline_fc) + except: + pass logger.info("Saving it as {}".format(pipeline_fc)) arcpy.CopyFeatures_management(dataset_joined, pipeline_fc) - + logger.info("Complete") """ @@ -762,4 +1016,4 @@ def warn_zone_county_disagreement(df): 'RF','retail food or drink', 'unknown') -""" \ No newline at end of file +""" diff --git a/basemap/development_projects.py b/basemap/development_projects.py index 9dde0c9..3c3ab01 100644 --- a/basemap/development_projects.py +++ b/basemap/development_projects.py @@ -3,8 +3,8 @@ # This script brings together many different datasets that each offer some info # on development in the region from 2011 on. Overall approach is to: # 1 spatially join parcels to each point file of new buildings -# 2 recompute all fields in each point file so that they exactly the same schema -# 3 clean out old fields +# 2 recompute all fields in each point file so that they exactly the same schema +# 3 clean out old fields # 4 merge point files into one shapefile (pipeline) including only records w incl=1 # 5 merge point file of opportunity sites with pipeline to form development_projects # 6 run diagnostics @@ -37,21 +37,23 @@ # arcpy workspace and log file setup # note: this runs a lot better if these directories are a local/fast disk # Using a box drive or even a network drive tends to result in arcpy exceptions -if os.getenv("USERNAME")=="lzorn": - WORKING_DIR = "C:\\Users\\lzorn\\Documents\\UrbanSim smelt\\2020 07 16" - LOG_FILE = os.path.join(WORKING_DIR,"devproj_{}.log".format(NOW)) - SMELT_GDB = os.path.join(WORKING_DIR,"smelt.gdb") - WORKSPACE_GDB = "workspace_{}.GDB".format(NOW) # scratch -elif os.getenv("USERNAME")=="MTCPB": - WORKING_DIR = "C:\\Users\\MTCPB\\Documents\\ArcGIS\\Projects\\DevelopmentProjects\\2020 07 16" - LOG_FILE = os.path.join(WORKING_DIR,"devproj_{}.log".format(NOW)) - SMELT_GDB = "C:\\Users\\MTCPB\\Documents\\ArcGIS\\Projects\\DevelopmentProjects\\2020 07 16\\smelt.gdb" - WORKSPACE_GDB = "workspace_{}.GDB".format(NOW) # scratch +if os.getenv("USERNAME") == "lzorn": + WORKING_DIR = "C:\\Users\\lzorn\\Documents\\UrbanSim smelt\\2020 07 16" + LOG_FILE = os.path.join(WORKING_DIR, "devproj_{}.log".format(NOW)) + SMELT_GDB = os.path.join(WORKING_DIR, "smelt.gdb") + WORKSPACE_GDB = "workspace_{}.GDB".format(NOW) # scratch +elif os.getenv("USERNAME") == "MTCPB": + WORKING_DIR = ( + "C:\\Users\\MTCPB\\Documents\\ArcGIS\\Projects\\DevelopmentProjects\\2020 07 16" + ) + LOG_FILE = os.path.join(WORKING_DIR, "devproj_{}.log".format(NOW)) + SMELT_GDB = "C:\\Users\\MTCPB\\Documents\\ArcGIS\\Projects\\DevelopmentProjects\\2020 07 16\\smelt.gdb" + WORKSPACE_GDB = "workspace_{}.GDB".format(NOW) # scratch else: - WORKING_DIR = "E:\\baydata" - LOG_FILE = os.path.join(WORKING_DIR,"devproj_{}.log".format(NOW)) - SMELT_GDB = os.path.join(WORKING_DIR,"smelt.gdb") - WORKSPACE_GDB = "workspace_{}.GDB".format(NOW) # scratch + WORKING_DIR = "E:\\baydata" + LOG_FILE = os.path.join(WORKING_DIR, "devproj_{}.log".format(NOW)) + SMELT_GDB = os.path.join(WORKING_DIR, "smelt.gdb") + WORKSPACE_GDB = "workspace_{}.GDB".format(NOW) # scratch ###BL: I am organizing the process by data sources, so that it is easy to replicate the process @@ -60,18 +62,26 @@ # SET VARS # input -p10_pba50 = os.path.join(SMELT_GDB, "p10_pba50") # 2010 parcels, polygon feature class +p10_pba50 = os.path.join(SMELT_GDB, "p10_pba50") # 2010 parcels, polygon feature class ### costar data -cs1620 = os.path.join(SMELT_GDB, "cs1620") # costar data 2016-2020, point feature class -cs1115 = os.path.join(SMELT_GDB, "cs1115") # costar data 2011-2015, point feature class +cs1620 = os.path.join( + SMELT_GDB, "cs1620" +) # costar data 2016-2020, point feature class +cs1115 = os.path.join( + SMELT_GDB, "cs1115" +) # costar data 2011-2015, point feature class ### redfin data -rfsfr1619 = os.path.join(SMELT_GDB,"rf19_sfr1619" ) # redfin SFD data 2016-2019 -rfmu1619 = os.path.join(SMELT_GDB,"rf19_multiunit1619" ) # redin MFD data 2016-2019 -rfsfr1115 = os.path.join(SMELT_GDB,"rf19_sfr1115" ) # redfin SFD data 2011-2015 -rfcondo1115 = os.path.join(SMELT_GDB,"rf19_condounits1115") # redfin condo data 2011-2015 -rfother1115 = os.path.join(SMELT_GDB,"rf19_othertypes1115") # redfin other data 2011-2015 +rfsfr1619 = os.path.join(SMELT_GDB, "rf19_sfr1619") # redfin SFD data 2016-2019 +rfmu1619 = os.path.join(SMELT_GDB, "rf19_multiunit1619") # redin MFD data 2016-2019 +rfsfr1115 = os.path.join(SMELT_GDB, "rf19_sfr1115") # redfin SFD data 2011-2015 +rfcondo1115 = os.path.join( + SMELT_GDB, "rf19_condounits1115" +) # redfin condo data 2011-2015 +rfother1115 = os.path.join( + SMELT_GDB, "rf19_othertypes1115" +) # redfin other data 2011-2015 ### BASIS pipleline data basis_pipeline = os.path.join(SMELT_GDB, "basis_pipeline_20200228") @@ -80,100 +90,122 @@ basis_pb_new = os.path.join(SMELT_GDB, "basis_pb_new_20200312") -MTC_ONLINE_MANUAL_URL = 'https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/manual_dp_20200716/\ +MTC_ONLINE_MANUAL_URL = "https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/manual_dp_20200716/\ FeatureServer/0?token=5qV1qgKLXC7Uoum0N1sSTZSV2-eF9SG0PhR682PbAHyrmoc69codzAagWRYOM9Xypcu6KW6\ 1Fh6o_gxarcGObsAf07pS0cbvlK-VgakqgY8-DCShwsQ0v1G_O9JQdMPxYfkR7kr6SfjX-00qoRqCF5KOdLiPsgpzbw6gcm\ -6AWWeGZ_d1Hh2smCJV6ShoeyVo1pKLEki3s0r8gZbhXAn6yPAIsWyoTblgsTYRIZsp2Pk.' +6AWWeGZ_d1Hh2smCJV6ShoeyVo1pKLEki3s0r8gZbhXAn6yPAIsWyoTblgsTYRIZsp2Pk." -MTC_ONLINE_OPPSITE_URL = 'https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/opportunity_sites_20201026/\ +MTC_ONLINE_OPPSITE_URL = "https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/opportunity_sites_20201026/\ FeatureServer/0?token=Pl_DJ4-veoG357sbtVCk4Ui-0dM681BNSTxnQp6k-Dxg_2LjX7NHbDocTGJfXal8DKu35WkzK\ Xr69ker1T5jtgRp4YF0rhNepcTDvuhMNaaDR6iBlJmWJAy86Io2qVJHorl71ZnkT5GfJf2EXblhTMiCNz4AOyA5PtBJKDjJP\ -vXmuqffhgo7R3eaC4G_NRJDDdE7rg2MugcenXCEUw8YtBWYx1G36DreyKq1qXF5mx8.' +vXmuqffhgo7R3eaC4G_NRJDDdE7rg2MugcenXCEUw8YtBWYx1G36DreyKq1qXF5mx8." -MTC_ONLINE_PUBLAND_URL = 'https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/public_land_1110/FeatureServer/0\ +MTC_ONLINE_PUBLAND_URL = "https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/public_land_1110/FeatureServer/0\ ?token=NBTkkzYVt0d1nRuq32xku5sthGmR9yZun5I6cS8zilPvxFLm7EJlQxLTrCIqALKhiKzWK_B5f-i6gGq7kOuHMRbeJ\ eOyB1yifgNUZq952Jv2VnHwhqWydSJ1KW47IDB4E6jKJMl16pLv6jHiYGBOveybXcryQiMn21i6xp5dy7bB29-M9xt4MZ2MEKq\ -fkUD44feBoFg7YMqxtKugx458Lu0eQSpwl_nNnhahh5WoCHk.' +fkUD44feBoFg7YMqxtKugx458Lu0eQSpwl_nNnhahh5WoCHk." -MTC_ONLINE_MALLOFFICE_URL = 'https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/Mall_Office_oppsites/\ +MTC_ONLINE_MALLOFFICE_URL = "https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/Mall_Office_oppsites/\ FeatureServer/0?token=dZp8Gg8Cs4Ntepu4PI8B4Vipcvwlt4RhwFUr0NbGetYZQeHi5vZWneSvNpKtxHWmfm_0ofgUP\ eEj_umtvRm4bFjwf68IYRho3AGjh-cyZaHp8ymP-xOke2jwBHV39L4Im3Fy0VdUf260Ma7yo6SJnYqN9yXic4IxXE8y-AyME\ -mKzaKlzxfWFv34AGUYm6gtyQqpHaD1W0fvLjuw1GCnkxST7KyHiPunm2qaLB8gSl6U.' +mKzaKlzxfWFv34AGUYm6gtyQqpHaD1W0fvLjuw1GCnkxST7KyHiPunm2qaLB8gSl6U." -MTC_ONLINE_GROWTHGEO_URL = 'https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/pba2050_growth_geographies_2020/\ +MTC_ONLINE_GROWTHGEO_URL = "https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/pba2050_growth_geographies_2020/\ FeatureServer/0?token=9eFgZEiet4h-R4Eu8hybXs8kpH_D0SQoFRgpf5XpIUNzYyBnmRd1I6LASP9u3w2mrFHQ\ dhvdIS2fadS2c5O5Qx2UwbmUBdYP3Lo8QNZKqZmobKFkmkScThbUg6Txkj-ZgiaIJv_v4iOdQGe5RsuYEIntxjF2cwD\ -KvzWyO9Y-pep_tePdhFR0SBMByR0N_sWwGldJkadkTch1SLJLaL3aruWoSMTCv7Dkq_I2vRxMZb4.' +KvzWyO9Y-pep_tePdhFR0SBMByR0N_sWwGldJkadkTch1SLJLaL3aruWoSMTCv7Dkq_I2vRxMZb4." -MTC_ONLINE_MALLPUB_ALT2_URL = 'https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/mallpub_alt2_Merge_copy/\ +MTC_ONLINE_MALLPUB_ALT2_URL = "https://arcgis.ad.mtc.ca.gov/server/rest/services/Hosted/mallpub_alt2_Merge_copy/\ FeatureServer/0?token=jrR0pAmL_AY0QOs4XW3m9gwz2e5EpTVST_B62xVzOYocmL2zClzhFKg-Up13eLUknbCGY6KfP\ 0mkyETquYBr5CIXngPz3U02O_WkLnY6-q7GVnaI86amPU1eOO_kymNAIZgOEMKbCIfSEbVT4AQWMzLNN79DWPX85dDmsviL-\ -aMtButq-SWeYgwycCGK-DUdxcODH17ZlH_ln9iLkItLzLgTsfx_SR6CfD0_1dncemY.' - -manual_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_MANUAL_URL,'manual_portal') -oppsites_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_OPPSITE_URL,'oppsites_portal') -publicland_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_PUBLAND_URL,'publicland_portal') -malloffice_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_MALLOFFICE_URL,'malloffice_portal') -growth_geo_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_GROWTHGEO_URL,'growth_geo_portal') -mallpub_alt2_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_MALLPUB_ALT2_URL,'mallpub_alt2_portal') +aMtButq-SWeYgwycCGK-DUdxcODH17ZlH_ln9iLkItLzLgTsfx_SR6CfD0_1dncemY." + +manual_portal = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_MANUAL_URL, "manual_portal" +) +oppsites_portal = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_OPPSITE_URL, "oppsites_portal" +) +publicland_portal = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_PUBLAND_URL, "publicland_portal" +) +malloffice_portal = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_MALLOFFICE_URL, "malloffice_portal" +) +growth_geo_portal = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_GROWTHGEO_URL, "growth_geo_portal" +) +mallpub_alt2_portal = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_MALLPUB_ALT2_URL, "mallpub_alt2_portal" +) arcpy.env.workspace = SMELT_GDB arcpy.env.overwriteOutput = True # manually maintained pipeline data -manual_dp = os.path.join(SMELT_GDB, "manual_dp") +manual_dp = os.path.join(SMELT_GDB, "manual_dp") # opportunity sites that keep their scen status from gis file -opp_sites = os.path.join(SMELT_GDB, "opportunity_dp") +opp_sites = os.path.join(SMELT_GDB, "opportunity_dp") # public land sites that keep their scen status from gis file -pub_sites = os.path.join(SMELT_GDB, "pubsites_dp") +pub_sites = os.path.join(SMELT_GDB, "pubsites_dp") # public land sites that keep their scen status from gis file -mall_sites = os.path.join(SMELT_GDB, "mallsites_dp") +mall_sites = os.path.join(SMELT_GDB, "mallsites_dp") # growth geo for filtering mall and pub sites for eir alt -growth_geo = os.path.join(SMELT_GDB, "growth_geo") +growth_geo = os.path.join(SMELT_GDB, "growth_geo") -mallpub_alt2 = os.path.join(SMELT_GDB, "mallpub_alt2") +mallpub_alt2 = os.path.join(SMELT_GDB, "mallpub_alt2") if arcpy.Exists(manual_dp): - arcpy.Delete_management(manual_dp) + arcpy.Delete_management(manual_dp) if arcpy.Exists(opp_sites): - arcpy.Delete_management(opp_sites) + arcpy.Delete_management(opp_sites) if arcpy.Exists(pub_sites): - arcpy.Delete_management(pub_sites) + arcpy.Delete_management(pub_sites) if arcpy.Exists(mall_sites): - arcpy.Delete_management(mall_sites) + arcpy.Delete_management(mall_sites) if arcpy.Exists(growth_geo): - arcpy.Delete_management(growth_geo) + arcpy.Delete_management(growth_geo) if arcpy.Exists(mallpub_alt2): - arcpy.Delete_management(mallpub_alt2) - -arcpy.FeatureClassToFeatureClass_conversion(manual_portal, SMELT_GDB,'manual_dp') -arcpy.FeatureClassToFeatureClass_conversion(oppsites_portal, SMELT_GDB,'opportunity_dp') -arcpy.FeatureClassToFeatureClass_conversion(publicland_portal, SMELT_GDB,'pubsites_dp') -arcpy.FeatureClassToFeatureClass_conversion(malloffice_portal, SMELT_GDB,'mallsites_dp') -arcpy.FeatureClassToFeatureClass_conversion(growth_geo_portal, SMELT_GDB,'growth_geo') -arcpy.FeatureClassToFeatureClass_conversion(mallpub_alt2_portal, SMELT_GDB,'mallpub_alt2') - -GGtra = arcpy.SelectLayerByAttribute_management(growth_geo, "NEW_SELECTION", "designatio <> 'High-Resource Area (16-30 min bus)' And designatio <> 'Priority Production Area'") -arcpy.CopyFeatures_management(GGtra, 'GGtra') - -#set up a process to make sure all incl = 1 records are in the results (also need to make sure that the feature class has column "incl") -def countRow (fc): - if arcpy.ListFields(fc, "incl"): - try: - arcpy.Delete_management("fcInc1") - except: - pass - arcpy.MakeTableView_management(fc,"fcInc1","incl = 1") - count = arcpy.GetCount_management("fcInc1") - result = int(count[0]) - return result - else: - print("incl is not a variable in this file") - - -#this is to reorder file before export so that it could be used for urbansim directly -#source: http://joshwerts.com/blog/2014/04/17/arcpy-reorder-fields/ + arcpy.Delete_management(mallpub_alt2) + +arcpy.FeatureClassToFeatureClass_conversion(manual_portal, SMELT_GDB, "manual_dp") +arcpy.FeatureClassToFeatureClass_conversion( + oppsites_portal, SMELT_GDB, "opportunity_dp" +) +arcpy.FeatureClassToFeatureClass_conversion(publicland_portal, SMELT_GDB, "pubsites_dp") +arcpy.FeatureClassToFeatureClass_conversion( + malloffice_portal, SMELT_GDB, "mallsites_dp" +) +arcpy.FeatureClassToFeatureClass_conversion(growth_geo_portal, SMELT_GDB, "growth_geo") +arcpy.FeatureClassToFeatureClass_conversion( + mallpub_alt2_portal, SMELT_GDB, "mallpub_alt2" +) + +GGtra = arcpy.SelectLayerByAttribute_management( + growth_geo, + "NEW_SELECTION", + "designatio <> 'High-Resource Area (16-30 min bus)' And designatio <> 'Priority Production Area'", +) +arcpy.CopyFeatures_management(GGtra, "GGtra") + +# set up a process to make sure all incl = 1 records are in the results (also need to make sure that the feature class has column "incl") +def countRow(fc): + if arcpy.ListFields(fc, "incl"): + try: + arcpy.Delete_management("fcInc1") + except: + pass + arcpy.MakeTableView_management(fc, "fcInc1", "incl = 1") + count = arcpy.GetCount_management("fcInc1") + result = int(count[0]) + return result + else: + print("incl is not a variable in this file") + + +# this is to reorder file before export so that it could be used for urbansim directly +# source: http://joshwerts.com/blog/2014/04/17/arcpy-reorder-fields/ def reorder_fields(table, out_table, field_order, add_missing=True): """ Reorders fields in input featureclass/table @@ -217,2236 +249,3194 @@ def add_mapping(field_name): arcpy.Merge_management(table, out_table, new_mapping) return out_table + # output # pipeline shp # development_projects shp # development_projects csv # demolish csv -if __name__ == '__main__': - - # create logger - logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') - - # console handler - ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) - logger.addHandler(ch) - # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) - logger.addHandler(fh) - - - logger.info("WORKING_DIR = {}".format(WORKING_DIR)) - logger.info("WORKSPACE_GDB = {}".format(WORKSPACE_GDB)) - logger.info("SMELT_GDB = {}".format(SMELT_GDB)) - - # list info about SMELT_GDB - arcpy.env.workspace = SMELT_GDB - logger.info("workspace: {}".format(arcpy.env.workspace)) - for dataset in arcpy.ListDatasets(): - logger.info(" dataset: {}".format(dataset)) - logger.info(" feature classes: {} ".format(arcpy.ListFeatureClasses(feature_dataset=dataset))) - - logger.info(" feature classes: {} ".format(arcpy.ListFeatureClasses())) - logger.info(" tables: {} ".format(arcpy.ListTables())) - - arcpy.CreateFileGDB_management(WORKING_DIR, WORKSPACE_GDB) - arcpy.env.workspace = os.path.join(WORKING_DIR, WORKSPACE_GDB) - - #get an empty list to add feature class to so that they can be merged in the end all together - dev_projects_temp_layers = [] - - #create another empty geom_id list to use for checking and removing duplicates, the idea is that, once a dataset has been cleaned - #before it gets added to the development projects temp layers, it needs to check against the geom_ids that are already in this list - #not sure how cumbersome this approach would be - geoList = [] - - #count geom_id is null - gidnull = 'gidnull' - - countOne = countRow(manual_dp) - logger.info("Feature Class {} has {} records with incl = 1".format(manual_dp, countOne)) - joinFN = 'ttt_' + arcpy.Describe(manual_dp).name + '_p10_pba50' - dev_projects_temp_layers.append(joinFN) - #based on Mike's ranking, start with manual dp list - try: - count = arcpy.GetCount_management(joinFN) - if int(count[0]) > 100: - logger.info("Found layer {} with {} rows -- skipping creation".format(joinFN, int(count[0]))) - - except: - # go ahead and create it - ### 1 SPATIAL JOINS - logger.info("Creating layer {} by spatial joining manual pipeline data ({}) and parcels ({})".format(joinFN, manual_dp, p10_pba50)) - arcpy.SpatialJoin_analysis(manual_dp, p10_pba50, joinFN) - # rename any conflicting field names - - arcpy.AlterField_management(joinFN, "building_name", "m_building_name") - arcpy.AlterField_management(joinFN, "site_name", "m_site_name") - arcpy.AlterField_management(joinFN, "year_built", "m_year_built") - arcpy.AlterField_management(joinFN, "parcel_id", "m_parcel_id") - arcpy.AlterField_management(joinFN, "last_sale_price", "m_last_sale_price") - arcpy.AlterField_management(joinFN, "last_sale_year", "m_sale_date") - arcpy.AlterField_management(joinFN, "stories", "m_stories") - arcpy.AlterField_management(joinFN, "residential_units", "m_residential_units") - arcpy.AlterField_management(joinFN, "unit_ave_sqft", "m_unit_ave_sqft") - arcpy.AlterField_management(joinFN, "zip", "m_zips") - arcpy.AlterField_management(joinFN, "Average_Weighted_Rent", "m_average_weighted_rent") - arcpy.AlterField_management(joinFN, "x", "p_x") - arcpy.AlterField_management(joinFN, "y", "p_y") - arcpy.AlterField_management(joinFN, "geom_id", "p_geom_id")#this is from the parcel file - # add fields and calc values - # full list development_projects_id,raw_id,building_name,site_name,action,scen0,scen1, - # address,city,zip,county,x,y,geom_id,year_built,duration,building_type_id,building_type,building_sqft,non_residential_sqft, - # residential_units,unit_ave_sqft,tenure,rent_type,stories,parking_spaces,Average Weighted Rent,rent_ave_sqft,rent_ave_unit, - # last_sale_year,last_sale_price,source,edit_date,editor,version - # AddField(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain}) - arcpy.AddField_management(joinFN, "development_projects_id", "LONG") - arcpy.AddField_management(joinFN, "raw_id", "LONG") - arcpy.AddField_management(joinFN, "building_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "site_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") - arcpy.AddField_management(joinFN, "scen0", "SHORT") - arcpy.AddField_management(joinFN, "scen1", "SHORT") - arcpy.AddField_management(joinFN, "scen2", "SHORT") - arcpy.AddField_management(joinFN, "scen3", "SHORT") - arcpy.AddField_management(joinFN, "scen4", "SHORT") - arcpy.AddField_management(joinFN, "scen5", "SHORT") - arcpy.AddField_management(joinFN, "scen6", "SHORT") - arcpy.AddField_management(joinFN, "scen7", "SHORT") - arcpy.AddField_management(joinFN, "scen10", "SHORT") - arcpy.AddField_management(joinFN, "scen11", "SHORT") - arcpy.AddField_management(joinFN, "scen12", "SHORT") - arcpy.AddField_management(joinFN, "scen15", "SHORT") - arcpy.AddField_management(joinFN, "scen20", "SHORT") - arcpy.AddField_management(joinFN, "scen21", "SHORT") - arcpy.AddField_management(joinFN, "scen22", "SHORT") - arcpy.AddField_management(joinFN, "scen23", "SHORT") - arcpy.AddField_management(joinFN, "scen24", "SHORT") - arcpy.AddField_management(joinFN, "scen25", "SHORT") - arcpy.AddField_management(joinFN, "scen26", "SHORT") - arcpy.AddField_management(joinFN, "scen27", "SHORT") - arcpy.AddField_management(joinFN, "scen28", "SHORT") - arcpy.AddField_management(joinFN, "scen29", "SHORT") - arcpy.AddField_management(joinFN, "zip", "TEXT","","",50) - arcpy.AddField_management(joinFN, "x", "FLOAT") - arcpy.AddField_management(joinFN, "y", "FLOAT") - arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") - arcpy.AddField_management(joinFN, "year_built", "SHORT") - arcpy.AddField_management(joinFN, "residential_units", "SHORT") - arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "stories", "SHORT") - arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") - arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") - ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work - arcpy.AddField_management(joinFN, "last_sale_year", "DATE") - arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") - arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") - arcpy.AddField_management(joinFN, "source", "TEXT","","",15) - arcpy.AddField_management(joinFN, "edit_date", "LONG") - arcpy.AddField_management(joinFN, "editor", "TEXT","","",50) - arcpy.AddField_management(joinFN, "version", "SHORT") - if not arcpy.ListFields(joinFN, "incl"): - arcpy.AddField_management(joinFN, "incl", "SHORT") - - arcpy.CalculateField_management(joinFN, "raw_id", '!manual_dp_id!') - arcpy.CalculateField_management(joinFN, "building_name", '!m_building_name!') - arcpy.CalculateField_management(joinFN, "site_name", '!m_site_name!') - arcpy.CalculateField_management(joinFN, "PARCEL_ID", '!m_parcel_id!') - arcpy.CalculateField_management(joinFN, "scen0", 1) - arcpy.CalculateField_management(joinFN, "scen1", 1) - arcpy.CalculateField_management(joinFN, "scen2", 1) - arcpy.CalculateField_management(joinFN, "scen3", 1) - arcpy.CalculateField_management(joinFN, "scen4", 1) - arcpy.CalculateField_management(joinFN, "scen5", 1) - arcpy.CalculateField_management(joinFN, "scen6", 1) - arcpy.CalculateField_management(joinFN, "scen7", 1) - arcpy.CalculateField_management(joinFN, "scen10", 1) - arcpy.CalculateField_management(joinFN, "scen11", 1) - arcpy.CalculateField_management(joinFN, "scen12", 1) - arcpy.CalculateField_management(joinFN, "scen15", 1) - arcpy.CalculateField_management(joinFN, "scen20", 1) - arcpy.CalculateField_management(joinFN, "scen21", 1) - arcpy.CalculateField_management(joinFN, "scen22", 1) - arcpy.CalculateField_management(joinFN, "scen23", 1) - arcpy.CalculateField_management(joinFN, "scen24", 1) - arcpy.CalculateField_management(joinFN, "scen25", 1) - arcpy.CalculateField_management(joinFN, "scen26", 1) - arcpy.CalculateField_management(joinFN, "scen27", 1) - arcpy.CalculateField_management(joinFN, "scen28", 1) - arcpy.CalculateField_management(joinFN, "scen29", 1) # these are committed so 1 for all scens - #not sure how to change zip field type - #arcpy.CalculateField_management(joinFN, "zip", '!m_zip!') - arcpy.CalculateField_management(joinFN, "x", '!X_1!') #use spatial info from parcel file - arcpy.CalculateField_management(joinFN, "y", '!Y_1!') #use spatial info from parcel file - arcpy.CalculateField_management(joinFN, "geom_id", '!p_geom_id!') #use spatial info from parcel file - arcpy.CalculateField_management(joinFN, "year_built", '!m_year_built!') - #arcpy.CalculateField_management(joinFN, "duration", ) - arcpy.CalculateField_management(joinFN, "residential_units", '!m_residential_units!') - arcpy.CalculateField_management(joinFN, "unit_ave_sqft", '!m_unit_ave_sqft!') - arcpy.CalculateField_management(joinFN, "stories", '!m_stories!') - arcpy.CalculateField_management(joinFN, "average_weighted_rent", '!m_average_weighted_rent!') - #arcpy.CalculateField_management(joinFN, "rent_ave_sqft", ) - #arcpy.CalculateField_management(joinFN, "rent_ave_unit", ) - arcpy.CalculateField_management(joinFN, "last_sale_year", '!m_sale_date!') #need to make into year - arcpy.CalculateField_management(joinFN, "last_sale_price", '!m_last_sale_price!') - arcpy.CalculateField_management(joinFN, "deed_restricted_units", 0) - arcpy.CalculateField_management(joinFN, "source", "'manual'") - arcpy.CalculateField_management(joinFN, "edit_date", 20200429) - arcpy.CalculateField_management(joinFN, "editor", "'MKR'") - #arcpy.CalculateField_management(joinFN, "version", ) - #remove row where incl != 1 - with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: - for row in cursor: - if row[0] != 1: - cursor.deleteRow() - #check to make sure that the number of remaining records in the temp file (which should still have var incl) is the same as the raw file - countTwo = countRow(joinFN) - if countTwo == countOne: - logger.info("All records with incl = 1 in feature class {} are included in the temp file".format(manual_dp)) - else: - logger.fatal("Something is wrong in the code, please check") - raise - ### 3 DELETE OTHER FIELDS - FCfields = [f.name for f in arcpy.ListFields(joinFN)] - #add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed - DontDeleteFields = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25", "scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor", "Shape_Length", "Shape_Area"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(joinFN, fields2Delete) - arcpy.MakeTableView_management(joinFN,gidnull,"geom_id is NULL") - nullcount = arcpy.GetCount_management(gidnull) - logger.info("{} list has {} records with geom_id info missing".format(joinFN, nullcount)) - arcpy.Delete_management(gidnull) - - gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, 'PARCEL_ID')] - for geo in gList: - geoList.append(geo) - - ### for costar data - ### create a list of feature class - cs = [cs1115,cs1620] - for fc in cs: - countOne = countRow(fc) - logger.info("Feature Class {} has {} records with incl = 1".format(fc, countOne)) - joinFN = 'ttt_' + arcpy.Describe(fc).name + '__p10_pba50' - dev_projects_temp_layers.append(joinFN) - - ### 1 SPATIAL JOINS - logger.info("Creating layer {} by spatial joining costar ({}) and parcels ({})".format(joinFN, fc, p10_pba50)) - arcpy.SpatialJoin_analysis(fc, p10_pba50, joinFN) - ### 2 VARIABLE CLEANING - - # rename any conflicting field names - arcpy.AlterField_management(joinFN, "building_name", "cs_building_name") - arcpy.AlterField_management(joinFN, "parcel_id", "cs_parcel_id") - arcpy.AlterField_management(joinFN, "city", "cs_city") - arcpy.AlterField_management(joinFN, "Zip", "cs_zip") - arcpy.AlterField_management(joinFN, "rent_type", "cs_rent_type") - arcpy.AlterField_management(joinFN, "year_built", "cs_year_built") - arcpy.AlterField_management(joinFN, "last_sale_price", "cs_last_sale_price") - arcpy.AlterField_management(joinFN, "last_sale_date", "cs_last_sale_date") - arcpy.AlterField_management(joinFN, "Average_Weighted_Rent", "cs_average_weighted_rent") - arcpy.AlterField_management(joinFN, "x", "p_x") - arcpy.AlterField_management(joinFN, "y", "p_y") - arcpy.AlterField_management(joinFN, "geom_id", "p_geom_id") # this is from the parcel - - # add fields and calc values - # full list development_projects_id,raw_id,building_name,site_name,action,scen0,scen1, - # address,city,zip,county,x,y,geom_id,year_built,duration,building_type_id,building_type,building_sqft,non_residential_sqft, - # residential_units,unit_ave_sqft,tenure,rent_type,stories,parking_spaces,Average Weighted Rent,rent_ave_sqft,rent_ave_unit, - # last_sale_year,last_sale_price,source,edit_date,editor,version - # AddField(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain}) - arcpy.AddField_management(joinFN, "development_projects_id", "LONG") - arcpy.AddField_management(joinFN, "raw_id", "LONG") - arcpy.AddField_management(joinFN, "building_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "site_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") - arcpy.AddField_management(joinFN, "action", "TEXT","","",10) - arcpy.AddField_management(joinFN, "scen0", "SHORT") - arcpy.AddField_management(joinFN, "scen1", "SHORT") - arcpy.AddField_management(joinFN, "scen2", "SHORT") - arcpy.AddField_management(joinFN, "scen3", "SHORT") - arcpy.AddField_management(joinFN, "scen4", "SHORT") - arcpy.AddField_management(joinFN, "scen5", "SHORT") - arcpy.AddField_management(joinFN, "scen6", "SHORT") - arcpy.AddField_management(joinFN, "scen7", "SHORT") - arcpy.AddField_management(joinFN, "scen10", "SHORT") - arcpy.AddField_management(joinFN, "scen11", "SHORT") - arcpy.AddField_management(joinFN, "scen12", "SHORT") - arcpy.AddField_management(joinFN, "scen15", "SHORT") - arcpy.AddField_management(joinFN, "scen20", "SHORT") - arcpy.AddField_management(joinFN, "scen21", "SHORT") - arcpy.AddField_management(joinFN, "scen22", "SHORT") - arcpy.AddField_management(joinFN, "scen23", "SHORT") - arcpy.AddField_management(joinFN, "scen24", "SHORT") - arcpy.AddField_management(joinFN, "scen25", "SHORT") - arcpy.AddField_management(joinFN, "scen26", "SHORT") - arcpy.AddField_management(joinFN, "scen27", "SHORT") - arcpy.AddField_management(joinFN, "scen28", "SHORT") - arcpy.AddField_management(joinFN, "scen29", "SHORT") - arcpy.AddField_management(joinFN, "address", "TEXT","","",200) - arcpy.AddField_management(joinFN, "city", "TEXT","","",50) - arcpy.AddField_management(joinFN, "zip", "TEXT","","",50) ## this is changed from LONG to TEXT because cs1115 file has some text formatted zipcode with "-" - arcpy.AddField_management(joinFN, "county", "TEXT","","",50) - arcpy.AddField_management(joinFN, "x", "FLOAT") - arcpy.AddField_management(joinFN, "y", "FLOAT") - arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") - arcpy.AddField_management(joinFN, "year_built", "SHORT") - arcpy.AddField_management(joinFN, "duration", "SHORT") - arcpy.AddField_management(joinFN, "building_type_id", "LONG") - arcpy.AddField_management(joinFN, "building_type", "TEXT","","",4) - arcpy.AddField_management(joinFN, "building_sqft", "LONG") - arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") - arcpy.AddField_management(joinFN, "residential_units", "SHORT") - arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "tenure", "TEXT","","",5) - arcpy.AddField_management(joinFN, "rent_type", "TEXT","","",25) - arcpy.AddField_management(joinFN, "stories", "SHORT") - arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") - arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") - arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") - ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work - arcpy.AddField_management(joinFN, "last_sale_year", "DATE") - arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") - arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") - arcpy.AddField_management(joinFN, "source", "TEXT","","",15) - arcpy.AddField_management(joinFN, "edit_date", "LONG") - arcpy.AddField_management(joinFN, "editor", "TEXT","","",50) - arcpy.AddField_management(joinFN, "version", "SHORT") - if not arcpy.ListFields(joinFN, "incl"): - arcpy.AddField_management(joinFN, "incl", "SHORT") - - arcpy.CalculateField_management(joinFN, "raw_id", '!PropertyID!') - arcpy.CalculateField_management(joinFN, "building_name", '!cs_building_name!') - arcpy.CalculateField_management(joinFN, "site_name", '!Building_Park!') - arcpy.CalculateField_management(joinFN, "PARCEL_ID", '!cs_parcel_id!') - arcpy.CalculateField_management(joinFN, "action", "'build'")# need to quote marks here - arcpy.CalculateField_management(joinFN, "scen0", 1) - arcpy.CalculateField_management(joinFN, "scen1", 1) - arcpy.CalculateField_management(joinFN, "scen2", 1) - arcpy.CalculateField_management(joinFN, "scen3", 1) - arcpy.CalculateField_management(joinFN, "scen4", 1) - arcpy.CalculateField_management(joinFN, "scen5", 1) - arcpy.CalculateField_management(joinFN, "scen6", 1) - arcpy.CalculateField_management(joinFN, "scen7", 1) - arcpy.CalculateField_management(joinFN, "scen10", 1) - arcpy.CalculateField_management(joinFN, "scen11", 1) - arcpy.CalculateField_management(joinFN, "scen12", 1) - arcpy.CalculateField_management(joinFN, "scen15", 1) - arcpy.CalculateField_management(joinFN, "scen20", 1) - arcpy.CalculateField_management(joinFN, "scen21", 1) - arcpy.CalculateField_management(joinFN, "scen22", 1) - arcpy.CalculateField_management(joinFN, "scen23", 1) - arcpy.CalculateField_management(joinFN, "scen24", 1) - arcpy.CalculateField_management(joinFN, "scen25", 1) - arcpy.CalculateField_management(joinFN, "scen26", 1) - arcpy.CalculateField_management(joinFN, "scen27", 1) - arcpy.CalculateField_management(joinFN, "scen28", 1) - arcpy.CalculateField_management(joinFN, "scen29", 1) # these are committed so 1 for all scens - arcpy.CalculateField_management(joinFN, "address", '!Building_Address!') - arcpy.CalculateField_management(joinFN, "city", '!cs_city!') - arcpy.CalculateField_management(joinFN, "zip", '!cs_zip!') - arcpy.CalculateField_management(joinFN, "county", '!County_Name!') - arcpy.CalculateField_management(joinFN, "x", '!p_x!') #use spatial info from parcel file - arcpy.CalculateField_management(joinFN, "y", '!p_y!') #use spatial info from parcel file - arcpy.CalculateField_management(joinFN, "geom_id", '!p_geom_id!') #use spatial info from parcel file - arcpy.CalculateField_management(joinFN, "year_built", '!cs_year_built!') - #arcpy.CalculateField_management(joinFN, "duration", ) - #arcpy.CalculateField_management(joinFN, "building_type_id", ) - arcpy.CalculateField_management(joinFN, "building_type", '!det_bldg_type!') - arcpy.CalculateField_management(joinFN, "building_sqft", '!Rentable_Building_Area!') # how often null for res - arcpy.CalculateField_management(joinFN, "non_residential_sqft", '!Rentable_Building_Area!') # need to zero out for res - arcpy.CalculateField_management(joinFN, "residential_units", '!Number_Of_Units!') - arcpy.CalculateField_management(joinFN, "unit_ave_sqft", '!Avg_Unit_SF!') - arcpy.CalculateField_management(joinFN, "tenure", "'Rent'") - arcpy.CalculateField_management(joinFN, "rent_type", '!cs_rent_type!') # need to clean - arcpy.CalculateField_management(joinFN, "stories", '!Number_Of_Stories!') - #there is a worng parking space value is one of the tables, so adding this to work around - with arcpy.da.UpdateCursor(joinFN, ["Number_Of_Parking_Spaces","parking_spaces"]) as cursor: - for row in cursor: - if len(str((row[0]))) <= 5: ##short integer has value less than 32700 - row[1] = row[0] - cursor.updateRow(row) - #arcpy.CalculateField_management(joinFN, "parking_spaces", '!Number_Of_Parking_Spaces!') - arcpy.CalculateField_management(joinFN, "average_weighted_rent", '!cs_average_weighted_rent!') - #arcpy.CalculateField_management(joinFN, "rent_ave_sqft", ) - #arcpy.CalculateField_management(joinFN, "rent_ave_unit", ) - arcpy.CalculateField_management(joinFN, "last_sale_year", '!cs_last_sale_date!') #need to make into year - arcpy.CalculateField_management(joinFN, "last_sale_price", '!cs_last_sale_price!') - - with arcpy.da.UpdateCursor(joinFN, ["cs_rent_type","residential_units","deed_restricted_units"]) as cursor: - for row in cursor: - if row[0] == "Affordable": - row[2] = row[1] - elif row[0] == "Market/Affordable": - row[2] = int(row[1] // 5) - else: - row[2] =0 - cursor.updateRow(row) - - arcpy.CalculateField_management(joinFN, "source", "'cs'") - arcpy.CalculateField_management(joinFN, "edit_date", 20200429) - arcpy.CalculateField_management(joinFN, "editor", "'MKR'") - #arcpy.CalculateField_management(joinFN, "version", ) - - #remove row where incl != 1 - with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: - for row in cursor: - if row[0] != 1: - cursor.deleteRow() - - #check all incl = 1 records are included - countTwo = countRow(joinFN) - if countTwo == countOne: - logger.info("All records with incl = 1 in feature class {} is included in the temp file".format(fc)) - else: - logger.fatal("Something is wrong in the code, please check") - raise - - ### 3 DELETE OTHER FIELDS AND TEMP FILES - FCfields = [f.name for f in arcpy.ListFields(joinFN)] - #add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed - DontDeleteFields = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25","scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor", "Shape_Length", "Shape_Area"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(joinFN, fields2Delete) - - #zero out non res sqft for residential types (HS, HM, HT) - with arcpy.da.UpdateCursor(joinFN, ["building_type","non_residential_sqft"]) as cursor: - for row in cursor: - if row[0] == 'HT': - row[1] = 0 - elif row[0] == 'HS': - row[1] = 0 - elif row[0] == 'HM': - row[1] = 0 - cursor.updateRow(row) - - gidnull = 'gidnull' - arcpy.MakeTableView_management(joinFN,gidnull,"geom_id is NULL") - nullcount = arcpy.GetCount_management(gidnull) - logger.info("{} list has {} records with geom_id info missing".format(joinFN, nullcount)) - arcpy.Delete_management(gidnull) - - ###4 REMOVE DUPLICATES - #check again existing geomList and remove duplicates - with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: - for row in cursor: - if row[0] in geoList: - cursor.deleteRow() - - #then add the geoms in the geomList - gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, 'PARCEL_ID')] - for geo in gList: - geoList.append(geo) - - - ### for BASIS pipeline data - countOne = countRow(basis_pipeline) - logger.info("Feature Class {} has {} records with incl = 1".format(basis_pipeline, countOne)) - joinFN = 'ttt_basispp_p10_pba50' - dev_projects_temp_layers.append(joinFN) - - ### 1 SPATIAL JOINS - logger.info("Creating layer {} by spatial joining BASIS pipeline data ({}) and parcels ({})".format(joinFN, basis_pipeline, p10_pba50)) - arcpy.SpatialJoin_analysis(basis_pipeline, p10_pba50, joinFN) - - ### 2 VARIABLE CLEANING - # rename any conflicting field names - arcpy.AlterField_management(joinFN, "county", "b_county") - arcpy.AlterField_management(joinFN, "parcel_id", "b_parcel_id") - arcpy.AlterField_management(joinFN, "raw_id", "b_id") - arcpy.AlterField_management(joinFN, "year_built", "b_year_built") - arcpy.AlterField_management(joinFN, "zip", "b_zip") - arcpy.AlterField_management(joinFN, "stories", "b_stories") - arcpy.AlterField_management(joinFN, "x", "p_x") # this is from the parcel centroid - arcpy.AlterField_management(joinFN, "y", "p_y") # this is from the parcel centroid - arcpy.AlterField_management(joinFN, "geom_id", "p_geom_id")# this is from the parcel - arcpy.AlterField_management(joinFN, "residential_units", "p_residential_units") - arcpy.AlterField_management(joinFN, "edit_date", "p_edit_date") - # add fields and calc values - # full list development_projects_id,raw_id,building_name,site_name,action,scen0,scen1, - # address,city,zip,county,x,y,geom_id,year_built,duration,building_type_id,building_type,building_sqft,non_residential_sqft, - # residential_units,unit_ave_sqft,tenure,rent_type,stories,parking_spaces,Average Weighted Rent,rent_ave_sqft,rent_ave_unit, - # last_sale_year,last_sale_price,source,edit_date,editor,version - # AddField(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain}) - arcpy.AddField_management(joinFN, "development_projects_id", "LONG") - arcpy.AddField_management(joinFN, "raw_id", "LONG") - arcpy.AddField_management(joinFN, "building_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "site_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") - arcpy.AddField_management(joinFN, "action", "TEXT","","",10) - arcpy.AddField_management(joinFN, "scen0", "SHORT") - arcpy.AddField_management(joinFN, "scen1", "SHORT") - arcpy.AddField_management(joinFN, "scen2", "SHORT") - arcpy.AddField_management(joinFN, "scen3", "SHORT") - arcpy.AddField_management(joinFN, "scen4", "SHORT") - arcpy.AddField_management(joinFN, "scen5", "SHORT") - arcpy.AddField_management(joinFN, "scen6", "SHORT") - arcpy.AddField_management(joinFN, "scen7", "SHORT") - arcpy.AddField_management(joinFN, "scen10", "SHORT") - arcpy.AddField_management(joinFN, "scen11", "SHORT") - arcpy.AddField_management(joinFN, "scen12", "SHORT") - arcpy.AddField_management(joinFN, "scen15", "SHORT") - arcpy.AddField_management(joinFN, "scen20", "SHORT") - arcpy.AddField_management(joinFN, "scen21", "SHORT") - arcpy.AddField_management(joinFN, "scen22", "SHORT") - arcpy.AddField_management(joinFN, "scen23", "SHORT") - arcpy.AddField_management(joinFN, "scen24", "SHORT") - arcpy.AddField_management(joinFN, "scen25", "SHORT") - arcpy.AddField_management(joinFN, "scen26", "SHORT") - arcpy.AddField_management(joinFN, "scen27", "SHORT") - arcpy.AddField_management(joinFN, "scen28", "SHORT") - arcpy.AddField_management(joinFN, "scen29", "SHORT") - arcpy.AddField_management(joinFN, "address", "TEXT","","",200) - arcpy.AddField_management(joinFN, "city", "TEXT","","",50) - arcpy.AddField_management(joinFN, "zip", "TEXT","","",50) - arcpy.AddField_management(joinFN, "county", "TEXT","","",50) - arcpy.AddField_management(joinFN, "x", "FLOAT") - arcpy.AddField_management(joinFN, "y", "FLOAT") - arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") - arcpy.AddField_management(joinFN, "year_built", "SHORT") - arcpy.AddField_management(joinFN, "duration", "SHORT") - arcpy.AddField_management(joinFN, "building_type_id", "LONG") - arcpy.AddField_management(joinFN, "building_type", "TEXT","","",4) - arcpy.AddField_management(joinFN, "building_sqft", "LONG") - arcpy.AddField_management(joinFN, "residential_units", "SHORT") - arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "tenure", "TEXT","","",5) - arcpy.AddField_management(joinFN, "rent_type", "TEXT","","",25) - arcpy.AddField_management(joinFN, "stories", "SHORT") - arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") - arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") - arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") - ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work - arcpy.AddField_management(joinFN, "last_sale_year", "DATE") - arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") - arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") - arcpy.AddField_management(joinFN, "source", "TEXT","","",15) - arcpy.AddField_management(joinFN, "edit_date", "LONG") - if not arcpy.ListFields(joinFN, "incl"): - arcpy.AddField_management(joinFN, "incl", "SHORT") - - arcpy.CalculateField_management(joinFN, "building_name", '!project_name!') - arcpy.CalculateField_management(joinFN, "action", "'build'")# need to quote marks here - arcpy.CalculateField_management(joinFN, "PARCEL_ID", '!b_parcel_id!') - arcpy.CalculateField_management(joinFN, "scen0", 1) - arcpy.CalculateField_management(joinFN, "scen1", 1) - arcpy.CalculateField_management(joinFN, "scen2", 1) - arcpy.CalculateField_management(joinFN, "scen3", 1) - arcpy.CalculateField_management(joinFN, "scen4", 1) - arcpy.CalculateField_management(joinFN, "scen5", 1) - arcpy.CalculateField_management(joinFN, "scen6", 1) - arcpy.CalculateField_management(joinFN, "scen7", 1) - arcpy.CalculateField_management(joinFN, "scen10", 1) - arcpy.CalculateField_management(joinFN, "scen11", 1) - arcpy.CalculateField_management(joinFN, "scen12", 1) - arcpy.CalculateField_management(joinFN, "scen15", 1) - arcpy.CalculateField_management(joinFN, "scen20", 1) - arcpy.CalculateField_management(joinFN, "scen21", 1) - arcpy.CalculateField_management(joinFN, "scen22", 1) - arcpy.CalculateField_management(joinFN, "scen23", 1) - arcpy.CalculateField_management(joinFN, "scen24", 1) - arcpy.CalculateField_management(joinFN, "scen25", 1) - arcpy.CalculateField_management(joinFN, "scen26", 1) - arcpy.CalculateField_management(joinFN, "scen27", 1) - arcpy.CalculateField_management(joinFN, "scen28", 1) - arcpy.CalculateField_management(joinFN, "scen29", 1) # these are committed so 1 for all scens - arcpy.CalculateField_management(joinFN, "address", '!street_address!') - arcpy.CalculateField_management(joinFN, "city", '!mailing_city_name!') - ##arcpy.CalculateField_management(joinFN, "zip", '!b_zip!') ##not sure how to convert text to long data type - arcpy.CalculateField_management(joinFN, "county", '!b_county!') - arcpy.CalculateField_management(joinFN, "x", '!p_x!') - arcpy.CalculateField_management(joinFN, "y", '!p_y!') - arcpy.CalculateField_management(joinFN, "geom_id", '!p_geom_id!') - arcpy.CalculateField_management(joinFN, "year_built", '!b_year_built!') - arcpy.CalculateField_management(joinFN, "building_type", '!building_type_det!') - arcpy.CalculateField_management(joinFN, "building_sqft", '!building_sqft!') # how often null for res - arcpy.CalculateField_management(joinFN, "residential_units", '!p_residential_units!') - arcpy.CalculateField_management(joinFN, "tenure", "'Rent'") ##what is tenure - arcpy.CalculateField_management(joinFN, "stories", '!b_stories!') - arcpy.CalculateField_management(joinFN, "deed_restricted_units", 0) - arcpy.CalculateField_management(joinFN, "source", "'basis'") - arcpy.CalculateField_management(joinFN, "edit_date", 20200429) - #arcpy.CalculateField_management(joinFN, "version", ) - - #remove row where incl != 1 - with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: - for row in cursor: - if row[0] != 1: - cursor.deleteRow() - - #remove Vallco project pointed out by Mark: parcel_id 1445028 - with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: - for row in cursor: - if row[0] == 1445028: - cursor.deleteRow() - - #check all incl = 1 records are included - countTwo = countRow(joinFN) - if countTwo == countOne - 1: #deleting one project - logger.info("All records with incl = 1 in feature class {} are included in the temp file".format(basis_pipeline)) - else: - logger.fatal("Something is wrong in the code, please check") - raise - - ### 3 DELETE OTHER FIELDS - FCfields = [f.name for f in arcpy.ListFields(joinFN)] - #add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed - DontDeleteFields = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25","scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor", "Shape_Length", "Shape_Area"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(joinFN, fields2Delete) - - gidnull = 'gidnull' - arcpy.MakeTableView_management(joinFN,gidnull,"geom_id is NULL") - nullcount = arcpy.GetCount_management(gidnull) - logger.info("{} list has {} records with geom_id info missing".format(joinFN, nullcount)) - arcpy.Delete_management(gidnull) - - ###4 REMOVE DUPLICATES - #check again existing geomList and remove duplicates - with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: - for row in cursor: - if row[0] in geoList: - cursor.deleteRow() - #then add the geoms in the geomList - gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, 'PARCEL_ID')] - for geo in gList: - geoList.append(geo) - - - ### for basis_pb - countOne = countRow(basis_pb_new) - logger.info("Feature Class {} has {} records with incl = 1".format(basis_pb_new, countOne)) - joinFN = 'ttt_basis_pb_new_p10__pba50' - dev_projects_temp_layers.append(joinFN) - - logger.info("Creating layer {} by spatial joining basis pba pipeline data ({}) and parcels ({})".format(joinFN, basis_pb_new, p10_pba50)) - arcpy.DeleteField_management(basis_pb_new, "geom_id") #this column is causing trouble - arcpy.SpatialJoin_analysis(basis_pb_new, p10_pba50, joinFN) - - #remove records on parcels where there are no increase in residential units -- in comparsion to b10 table - #first count existing rows - cnt1 = arcpy.GetCount_management(joinFN) - #examine building - b10_smelt = os.path.join(SMELT_GDB, "b10") - arcpy.TableToTable_conversion(b10_smelt, arcpy.env.workspace,'b10') - b10 = 'b10' - arcpy.analysis.Statistics(b10, 'b10_unitSUM',"residential_units SUM", "parcel_id") - nonZero = arcpy.SelectLayerByAttribute_management('b10_unitSUM', "NEW_SELECTION", '"SUM_residential_units" > 0')#choose only parcels with residential units already - arcpy.CopyRows_management(nonZero, 'nonZeroParcel') - arcpy.MakeFeatureLayer_management(joinFN, 'basis_join','', arcpy.env.workspace) - arcpy.AddJoin_management('basis_join', "PARCEL_ID", "nonZeroParcel", "parcel_id","KEEP_COMMON") - #arcpy.SelectLayerByAttribute_management(joinFN, "NEW_SELECTION", "ttt_basis_pb_new_p10__pba50.urbansim_parcels_v3_geo_county_id = 85", None) - #find parcels to remove - parcelRemoveList = [] - with arcpy.da.SearchCursor('basis_join',['ttt_basis_pb_new_p10__pba50.parcel_id', - "ttt_basis_pb_new_p10__pba50.residential_units", - "nonZeroParcel.SUM_residential_units"]) as cursor: - for row in cursor: - if row[1] is not None: - if row[1] - row[2] == 0: - parcelRemoveList.append(row[0]) - logger.info("There are {} records in basis_pb_new that do not see increase in residential unit counts on the parcel".format(len(parcelRemoveList))) - #remove join - #arcpy.RemoveJoin_management(joinFN, "nonZeroParcel") - #arcpy.SelectLayerByAttribute_management(joinFN, "CLEAR_SELECTION") - - arcpy.AlterField_management(joinFN, "year_built", "n_year_built") - arcpy.AlterField_management(joinFN, "building_sqft", "n_building_sqft") - arcpy.AlterField_management(joinFN, "residential_units", "n_residential_units") - arcpy.AlterField_management(joinFN, "X", "n_x") - arcpy.AlterField_management(joinFN, "Y", "n_y") - arcpy.AlterField_management(joinFN, "GEOM_ID", "n_geom_id") - arcpy.AlterField_management(joinFN, "parcel_id", "n_parcel_id") - - arcpy.AddField_management(joinFN, "development_projects_id", "LONG") - arcpy.AddField_management(joinFN, "raw_id", "LONG") - arcpy.AddField_management(joinFN, "building_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "site_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") - arcpy.AddField_management(joinFN, "action", "TEXT","","",10) - arcpy.AddField_management(joinFN, "scen0", "SHORT") - arcpy.AddField_management(joinFN, "scen1", "SHORT") - arcpy.AddField_management(joinFN, "scen2", "SHORT") - arcpy.AddField_management(joinFN, "scen3", "SHORT") - arcpy.AddField_management(joinFN, "scen4", "SHORT") - arcpy.AddField_management(joinFN, "scen5", "SHORT") - arcpy.AddField_management(joinFN, "scen6", "SHORT") - arcpy.AddField_management(joinFN, "scen7", "SHORT") - arcpy.AddField_management(joinFN, "scen10", "SHORT") - arcpy.AddField_management(joinFN, "scen11", "SHORT") - arcpy.AddField_management(joinFN, "scen12", "SHORT") - arcpy.AddField_management(joinFN, "scen15", "SHORT") - arcpy.AddField_management(joinFN, "scen20", "SHORT") - arcpy.AddField_management(joinFN, "scen21", "SHORT") - arcpy.AddField_management(joinFN, "scen22", "SHORT") - arcpy.AddField_management(joinFN, "scen23", "SHORT") - arcpy.AddField_management(joinFN, "scen24", "SHORT") - arcpy.AddField_management(joinFN, "scen25", "SHORT") - arcpy.AddField_management(joinFN, "scen26", "SHORT") - arcpy.AddField_management(joinFN, "scen27", "SHORT") - arcpy.AddField_management(joinFN, "scen28", "SHORT") - arcpy.AddField_management(joinFN, "scen29", "SHORT") - arcpy.AddField_management(joinFN, "address", "TEXT","","",200) - arcpy.AddField_management(joinFN, "city", "TEXT","","",50) - arcpy.AddField_management(joinFN, "zip", "TEXT","","",50) ## this is changed from LONG to TEXT because cs1115 file has some text formatted zipcode with "-" - arcpy.AddField_management(joinFN, "county", "TEXT","","",50) - arcpy.AddField_management(joinFN, "x", "FLOAT") - arcpy.AddField_management(joinFN, "y", "FLOAT") - arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") - arcpy.AddField_management(joinFN, "year_built", "SHORT") - arcpy.AddField_management(joinFN, "duration", "SHORT") - arcpy.AddField_management(joinFN, "building_type_id", "LONG") - arcpy.AddField_management(joinFN, "building_sqft", "LONG") - arcpy.AddField_management(joinFN, "residential_units", "SHORT") - arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "tenure", "TEXT","","",5) - arcpy.AddField_management(joinFN, "rent_type", "TEXT","","",25) - arcpy.AddField_management(joinFN, "stories", "SHORT") - arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") - arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") - arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") - ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work - arcpy.AddField_management(joinFN, "last_sale_year", "DATE") - arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") - arcpy.AddField_management(joinFN, "source", "TEXT","","",15) - arcpy.AddField_management(joinFN, "edit_date", "LONG") - arcpy.AddField_management(joinFN, "editor", "TEXT","","",50) - arcpy.AddField_management(joinFN, "version", "SHORT") - - arcpy.CalculateField_management(joinFN, "PARCEL_ID", '!n_parcel_id!') - arcpy.CalculateField_management(joinFN, "scen0", 1) - arcpy.CalculateField_management(joinFN, "scen1", 1) - arcpy.CalculateField_management(joinFN, "scen2", 1) - arcpy.CalculateField_management(joinFN, "scen3", 1) - arcpy.CalculateField_management(joinFN, "scen4", 1) - arcpy.CalculateField_management(joinFN, "scen5", 1) - arcpy.CalculateField_management(joinFN, "scen6", 1) - arcpy.CalculateField_management(joinFN, "scen7", 1) - arcpy.CalculateField_management(joinFN, "scen10", 1) - arcpy.CalculateField_management(joinFN, "scen11", 1) - arcpy.CalculateField_management(joinFN, "scen12", 1) - arcpy.CalculateField_management(joinFN, "scen15", 1) - arcpy.CalculateField_management(joinFN, "scen21", 1) - arcpy.CalculateField_management(joinFN, "scen22", 1) - arcpy.CalculateField_management(joinFN, "scen23", 1) - #instead of deleting remodel projects records, do not include them in the scenarios - with arcpy.da.UpdateCursor(joinFN, ["PARCEL_ID", "scen20","scen24", 'scen25']) as cursor: - for row in cursor: - if row[0] in parcelRemoveList: - row[1] = 0 - row[2] = 0 - row[3] = 0 - else: - row[1] = 1 - row[2] = 1 - row[3] = 1 - cursor.updateRow(row) - arcpy.CalculateField_management(joinFN, "scen26", "!scen25!") - arcpy.CalculateField_management(joinFN, "scen27", "!scen25!") - arcpy.CalculateField_management(joinFN, "scen28", "!scen25!") - arcpy.CalculateField_management(joinFN, "scen29", "!scen25!") - arcpy.CalculateField_management(joinFN, "action", "'build'") - arcpy.CalculateField_management(joinFN, "city", '!urbansim_parcels_v3_geo_city!') - with arcpy.da.UpdateCursor(joinFN, ["urbansim_parcels_v3_geo_county", "county"]) as cursor: - for row in cursor: - if row[0] == 1: - row[1] = 'Alameda' - elif row[0] == 13: - row[1] = 'Contra Costa' - elif row[0] == 41: - row[1] = 'Marin' - elif row[0] == 55: - row[1] = 'Napa' - elif row[0] == 75: - row[1] = 'San Francisco' - elif row[0] == 81: - row[1] = 'San Mateo' - elif row[0] == 85: - row[1] = 'Santa Clara' - elif row[0] == 95: - row[1] = 'Solano' - elif row[0] == 97: - row[1] = 'Sonoma' - cursor.updateRow(row) - arcpy.CalculateField_management(joinFN, "x", '!n_x!') - arcpy.CalculateField_management(joinFN, "y", '!n_y!') - arcpy.CalculateField_management(joinFN, "geom_id", '!n_geom_id!') - arcpy.CalculateField_management(joinFN, "year_built", '!n_year_built!') - arcpy.CalculateField_management(joinFN, "building_sqft", '!n_building_sqft!') - arcpy.CalculateField_management(joinFN, "residential_units", '!n_residential_units!') - - with arcpy.da.UpdateCursor(joinFN, ["building_sqft", "residential_units", "unit_ave_sqft"]) as cursor: - for row in cursor: - if row[1] is int: - row[2] = row[0] / row[1] - cursor.updateRow(row) - - arcpy.CalculateField_management(joinFN, "last_sale_year", '!last_sale_date!') #need to make into year - arcpy.CalculateField_management(joinFN, "deed_restricted_units", 0) - arcpy.CalculateField_management(joinFN, "source", "'bas_bp_new'") - arcpy.CalculateField_management(joinFN, "edit_date", 20200429) - arcpy.CalculateField_management(joinFN, "editor", "'MKR'") - - with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: - for row in cursor: - if row[0] != 1: - cursor.deleteRow() - - #check to make sure that the number of remaining records in the temp file (which should still have var incl) is the same as the raw file - #countTwo = countRow(joinFN) - #if countTwo == countOne: - # logger.info("All records with incl = 1 in feature class {} are included in the temp file".format(basis_pb_new)) - #else: - # logger.fatal("Something is wrong in the code, please check") - # raise - ### 3 DELETE OTHER FIELDS - FCfields = [f.name for f in arcpy.ListFields(joinFN)] - #add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed - DontDeleteFields = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25","scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor", "Shape_Length", "Shape_Area"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(joinFN, fields2Delete) - - gidnull = 'gidnull' - arcpy.MakeTableView_management(joinFN,gidnull,"geom_id is NULL") - nullcount = arcpy.GetCount_management(gidnull) - logger.info("{} list has {} records with geom_id info missing".format(joinFN, nullcount)) - arcpy.Delete_management(gidnull) - ###4 REMOVE DUPLICATES - #check again existing geomList and remove duplicates - with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: - for row in cursor: - if row[0] in geoList: - cursor.deleteRow() - #then add the geoms in the geomList - gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, 'PARCEL_ID')] - for geo in gList: - geoList.append(geo) - - ### for redfin data - ### create a list of feature class - rf = [rfsfr1619, rfsfr1115, rfmu1619, rfcondo1115, rfother1115] - for fc in rf: - countOne = countRow(fc) - logger.info("Feature Class {} has {} records with incl = 1".format(fc, countOne)) - joinFN = 'ttt_' + arcpy.Describe(fc).name + '__p10_pba50' - dev_projects_temp_layers.append(joinFN) - - ### 1 SPATIAL JOINS - logger.info("Creating layer {} by spatial joining redfin ({}) and parcels ({})".format(joinFN, fc, p10_pba50)) - arcpy.SpatialJoin_analysis(fc, os.path.join(SMELT_GDB, p10_pba50), joinFN) - ### 2 VARIABLE CLEANING - - # rename any conflicting field names - arcpy.AlterField_management(joinFN, "CITY", "rf_city") - arcpy.AlterField_management(joinFN, "COUNTY", "rf_county") - arcpy.AlterField_management(joinFN, "YEAR_BUILT", "rf_year_built") - arcpy.AlterField_management(joinFN, "ADDRESS", "rf_address") - arcpy.AlterField_management(joinFN, "x", "p_x") # this is from the parcel centroid - arcpy.AlterField_management(joinFN, "y", "p_y") # this is from the parcel centroid - arcpy.AlterField_management(joinFN, "geom_id", "p_geom_id") # this is from the parcel - arcpy.AlterField_management(joinFN, "parcel_id", "rf_parcel_id") - - # add fields and calc values - # full list development_projects_id,raw_id,building_name,site_name,action,scen0,scen1, - # address,city,zip,county,x,y,geom_id,year_built,duration,building_type_id,building_type,building_sqft,non_residential_sqft, - # residential_units,unit_ave_sqft,tenure,rent_type,stories,parking_spaces,Average Weighted Rent,rent_ave_sqft,rent_ave_unit, - # last_sale_year,last_sale_price,source,edit_date,editor,version - # AddField(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain}) - arcpy.AddField_management(joinFN, "development_projects_id", "LONG") - arcpy.AddField_management(joinFN, "raw_id", "LONG") - arcpy.AddField_management(joinFN, "building_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "site_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") - arcpy.AddField_management(joinFN, "action", "TEXT","","",10) - arcpy.AddField_management(joinFN, "scen0", "SHORT") - arcpy.AddField_management(joinFN, "scen1", "SHORT") - arcpy.AddField_management(joinFN, "scen2", "SHORT") - arcpy.AddField_management(joinFN, "scen3", "SHORT") - arcpy.AddField_management(joinFN, "scen4", "SHORT") - arcpy.AddField_management(joinFN, "scen5", "SHORT") - arcpy.AddField_management(joinFN, "scen6", "SHORT") - arcpy.AddField_management(joinFN, "scen7", "SHORT") - arcpy.AddField_management(joinFN, "scen10", "SHORT") - arcpy.AddField_management(joinFN, "scen11", "SHORT") - arcpy.AddField_management(joinFN, "scen12", "SHORT") - arcpy.AddField_management(joinFN, "scen15", "SHORT") - arcpy.AddField_management(joinFN, "scen20", "SHORT") - arcpy.AddField_management(joinFN, "scen21", "SHORT") - arcpy.AddField_management(joinFN, "scen22", "SHORT") - arcpy.AddField_management(joinFN, "scen23", "SHORT") - arcpy.AddField_management(joinFN, "scen24", "SHORT") - arcpy.AddField_management(joinFN, "scen25", "SHORT") - arcpy.AddField_management(joinFN, "scen26", "SHORT") - arcpy.AddField_management(joinFN, "scen27", "SHORT") - arcpy.AddField_management(joinFN, "scen28", "SHORT") - arcpy.AddField_management(joinFN, "scen29", "SHORT") - arcpy.AddField_management(joinFN, "address", "TEXT","","",200) - arcpy.AddField_management(joinFN, "city", "TEXT","","",50) - arcpy.AddField_management(joinFN, "zip", "TEXT","","",50) - arcpy.AddField_management(joinFN, "county", "TEXT","","",50) - arcpy.AddField_management(joinFN, "x", "FLOAT") - arcpy.AddField_management(joinFN, "y", "FLOAT") - arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") - arcpy.AddField_management(joinFN, "year_built", "SHORT") - arcpy.AddField_management(joinFN, "duration", "SHORT") - arcpy.AddField_management(joinFN, "building_type_id", "LONG") - arcpy.AddField_management(joinFN, "building_type", "TEXT","","",4) - arcpy.AddField_management(joinFN, "building_sqft", "LONG") - arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") - arcpy.AddField_management(joinFN, "residential_units", "SHORT") - arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "tenure", "TEXT","","",5) - arcpy.AddField_management(joinFN, "rent_type", "TEXT","","",25) - arcpy.AddField_management(joinFN, "stories", "SHORT") - arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") - arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") - arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") - arcpy.AddField_management(joinFN, "last_sale_year", "DATE") - arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") - arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") - arcpy.AddField_management(joinFN, "source", "TEXT","","",15) - arcpy.AddField_management(joinFN, "edit_date", "LONG") - arcpy.AddField_management(joinFN, "editor", "TEXT","","",50) - arcpy.AddField_management(joinFN, "version", "SHORT") - if not arcpy.ListFields(joinFN, "incl"): - arcpy.AddField_management(joinFN, "incl", "SHORT") - - arcpy.CalculateField_management(joinFN, "raw_id", '!redfinid!') - arcpy.CalculateField_management(joinFN, "action", "'build'") - arcpy.CalculateField_management(joinFN, "PARCEL_ID", '!rf_parcel_id!') - arcpy.CalculateField_management(joinFN, "scen0", 1) - arcpy.CalculateField_management(joinFN, "scen1", 1) - arcpy.CalculateField_management(joinFN, "scen2", 1) - arcpy.CalculateField_management(joinFN, "scen3", 1) - arcpy.CalculateField_management(joinFN, "scen4", 1) - arcpy.CalculateField_management(joinFN, "scen5", 1) - arcpy.CalculateField_management(joinFN, "scen6", 1) - arcpy.CalculateField_management(joinFN, "scen7", 1) - arcpy.CalculateField_management(joinFN, "scen10", 1) - arcpy.CalculateField_management(joinFN, "scen11", 1) - arcpy.CalculateField_management(joinFN, "scen12", 1) - arcpy.CalculateField_management(joinFN, "scen15", 1) - arcpy.CalculateField_management(joinFN, "scen20", 1) - arcpy.CalculateField_management(joinFN, "scen21", 1) - arcpy.CalculateField_management(joinFN, "scen22", 1) - arcpy.CalculateField_management(joinFN, "scen23", 1) - arcpy.CalculateField_management(joinFN, "scen24", 1) - arcpy.CalculateField_management(joinFN, "scen25", 1) - arcpy.CalculateField_management(joinFN, "scen26", 1) - arcpy.CalculateField_management(joinFN, "scen27", 1) - arcpy.CalculateField_management(joinFN, "scen28", 1) - arcpy.CalculateField_management(joinFN, "scen29", 1) # these are committed so 1 for all scens - arcpy.CalculateField_management(joinFN, "address", '!rf_address!') - arcpy.CalculateField_management(joinFN, "city", '!rf_city!') - arcpy.CalculateField_management(joinFN, "county", '!rf_county!') - arcpy.CalculateField_management(joinFN, "x", '!p_x!') - arcpy.CalculateField_management(joinFN, "y", '!p_y!') - arcpy.CalculateField_management(joinFN, "geom_id", '!p_geom_id!') - arcpy.CalculateField_management(joinFN, "year_built", '!rf_year_built!') - if 'sfr' in arcpy.Describe(fc).name: - arcpy.CalculateField_management(joinFN, "building_type", "'HS'") - else: - arcpy.CalculateField_management(joinFN, "building_type", "'HM'") - arcpy.CalculateField_management(joinFN, "building_sqft", '!SQFT!') # how often null for res - arcpy.CalculateField_management(joinFN, "non_residential_sqft", 0) # seems redfin data are all residential - arcpy.CalculateField_management(joinFN, "residential_units", '!UNITS!') - ###ideally, everything could be done using cursor since it is much faster to run - with arcpy.da.UpdateCursor(joinFN, ["SQFT", "UNITS", "unit_ave_sqft"]) as cursor: - for row in cursor: - row[2] = row[0] / row[1] - cursor.updateRow(row) - arcpy.CalculateField_management(joinFN, "tenure", "'Sale'") #is redfin data rental? - arcpy.CalculateField_management(joinFN, "last_sale_year", '!SOLD_DATE!') #need to make into year - arcpy.CalculateField_management(joinFN, "last_sale_price", '!PRICE!') - arcpy.CalculateField_management(joinFN, "deed_restricted_units", 0) - arcpy.CalculateField_management(joinFN, "source", "'rf'") - arcpy.CalculateField_management(joinFN, "edit_date", 20200429) - arcpy.CalculateField_management(joinFN, "editor", "'MKR'") - - #remove row where incl != 1 - with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: - for row in cursor: - if row[0] != 1: - cursor.deleteRow() - - countTwo = countRow(joinFN) - if countTwo == countOne: - logger.info("All records with incl = 1 in feature class {} are included in the temp file".format(fc)) - else: - logger.fatal("Something is wrong in the code, please check") - raise - - ### 3 DELETE OTHER FIELDS AND TEMP FILES - FCfields = [f.name for f in arcpy.ListFields(joinFN)] - #add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed - DontDeleteFields = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25","scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor", "Shape_Length", "Shape_Area"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(joinFN, fields2Delete) - - gidnull = 'gidnull' - arcpy.MakeTableView_management(joinFN,gidnull,"geom_id is NULL") - nullcount = arcpy.GetCount_management(gidnull) - logger.info("{} list has {} records with geom_id info missing".format(joinFN, nullcount)) - arcpy.Delete_management(gidnull) - ###4 REMOVE DUPLICATES - #check again existing geomList and remove duplicates - with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: - for row in cursor: - if row[0] in geoList: - cursor.deleteRow() - #then add the geoms in the geomList - gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, 'PARCEL_ID')] - for geo in gList: - geoList.append(geo) - - - ### 5 MERGE ALL INCL=1 POINTS INTO A SINGLE SHP FILE CALLED PIPELINE - #now let's get to the real full pipeline file - pipeline_fc = "pipeline" - logger.info("Merging feature classes {} into {}".format(dev_projects_temp_layers, pipeline_fc)) - - #merge - arcpy.Merge_management(dev_projects_temp_layers, pipeline_fc) - count = arcpy.GetCount_management(pipeline_fc) - logger.info(" Results in {} rows in {}".format(int(count[0]), pipeline_fc)) - - - ### 6 MERGE OPPSITES SHP WITH PIPELINE TO GET DEVELOPMENT PROJECTS - #public sites - joinFN = 'ttt_pubsites_p10_pba50' - dev_projects_temp_layers.append(joinFN) - - try: - count = arcpy.GetCount_management(joinFN) - if int(count[0]) > 100: - logger.info("Found layer {} with {} rows -- skipping creation".format(joinFN, int(count[0]))) - except: - # go ahead and create it - logger.info("Creating layer {} by spatial joining pub sites data ({}) and parcels ({})".format(joinFN, pub_sites, p10_pba50)) - arcpy.SpatialJoin_analysis(pub_sites, p10_pba50, joinFN) - - arcpy.AlterField_management(joinFN, "PARCEL_ID", "pb_parcel_id") - arcpy.AlterField_management(joinFN, "X", "p_x") - arcpy.AlterField_management(joinFN, "Y", "p_y") - arcpy.AlterField_management(joinFN, "GEOM_ID", "pb_geom_id") - arcpy.AlterField_management(joinFN, "scen20", "p_scen20") - arcpy.AlterField_management(joinFN, "scen25", "p_scen25") - - arcpy.AddField_management(joinFN, "development_projects_id", "LONG") - arcpy.AddField_management(joinFN, "building_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "scen20", "SHORT") - arcpy.AddField_management(joinFN, "scen25", "SHORT") - arcpy.AddField_management(joinFN, "scen26", "SHORT") - arcpy.AddField_management(joinFN, "scen27", "SHORT") - arcpy.AddField_management(joinFN, "scen28", "SHORT") - arcpy.AddField_management(joinFN, "scen29", "SHORT") - arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") - arcpy.AddField_management(joinFN, "x", "FLOAT") - arcpy.AddField_management(joinFN, "y", "FLOAT") - arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") - arcpy.AddField_management(joinFN, "building_type", "TEXT","","",4) - arcpy.AddField_management(joinFN, "building_sqft", "LONG") - arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") - arcpy.AddField_management(joinFN, "residential_units", "SHORT") - arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") - arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") - arcpy.AddField_management(joinFN, "last_sale_year", "DATE") - arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") - arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") - - # NOTE THAT OPPSITES HAS SCEN SET IN GIS FILE - arcpy.CalculateField_management(joinFN, "PARCEL_ID", '!pb_parcel_id!') - arcpy.CalculateField_management(joinFN, "development_projects_id", '!developmen!') - arcpy.CalculateField_management(joinFN, "building_name", '!building_n!') - arcpy.CalculateField_management(joinFN, "scen20", 0) - arcpy.CalculateField_management(joinFN, "scen25", 0) - - # create a list of parcel id of public land projects that are in GGtra - # because just using select and update doesn't work somehow - pub_GGtra = arcpy.SelectLayerByLocation_management(joinFN, "WITHIN", GGtra, None, "NEW_SELECTION", "NOT_INVERT") - arcpy.CopyFeatures_management(pub_GGtra, 'pub_GGtra') - arcpy.SelectLayerByAttribute_management(joinFN, "CLEAR_SELECTION") - pubTraList = [row[0] for row in arcpy.da.SearchCursor(pub_GGtra, 'PARCEL_ID')] - with arcpy.da.UpdateCursor(joinFN, ["PARCEL_ID", "scen26"]) as cursor: - for row in cursor: - if row[0] in pubTraList: - row[1] = 1 - else: - row[1] = 0 - cursor.updateRow(row) - - arcpy.CalculateField_management(joinFN, "scen27", 0) - arcpy.CalculateField_management(joinFN, "scen28", 0) - arcpy.CalculateField_management(joinFN, "scen29", 0) - arcpy.CalculateField_management(joinFN, "x", '!p_x!') - arcpy.CalculateField_management(joinFN, "y", '!p_y!') - arcpy.CalculateField_management(joinFN, "geom_id", '!pb_geom_id!') - arcpy.CalculateField_management(joinFN, "building_type", "'MR'") - arcpy.CalculateField_management(joinFN, "building_sqft", "!building_s!") - arcpy.CalculateField_management(joinFN, "non_residential_sqft", "!non_reside!") - arcpy.CalculateField_management(joinFN, "residential_units", "!residentia!") - arcpy.CalculateField_management(joinFN, "unit_ave_sqft", "!unit_ave_s!") - arcpy.CalculateField_management(joinFN, "deed_restricted_units", "!deed_restr!") - - FCfields = [f.name for f in arcpy.ListFields(joinFN)] - #add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed - DontDeleteFields = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25","scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor", "Shape_Length", "Shape_Area"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(joinFN, fields2Delete) - - gidnull = 'gidnull' - arcpy.MakeTableView_management(joinFN,gidnull,"geom_id is NULL") - nullcount = arcpy.GetCount_management(gidnull) - logger.info("{} list has {} records with geom_id info missing".format(joinFN, nullcount)) - arcpy.Delete_management(gidnull) - - - scen26count = 'scen26count' - arcpy.MakeTableView_management(joinFN,scen26count,"scen26 = 1") - onecount = arcpy.GetCount_management(scen26count) - logger.info("{} list has {} records with scen26 is 1".format(joinFN, onecount)) - arcpy.Delete_management(scen26count) - ###4 REMOVE DUPLICATES - #check again existing geomList and remove duplicates - with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: - for row in cursor: - if row[0] in geoList: - cursor.deleteRow() - #then add the geoms in the geomList - gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, 'PARCEL_ID')] - for geo in gList: - geoList.append(geo) - - joinFN = 'ttt_mallsites_p10_pba50' - dev_projects_temp_layers.append(joinFN) - - try: - count = arcpy.GetCount_management(joinFN) - if int(count[0]) > 100: - logger.info("Found layer {} with {} rows -- skipping creation".format(joinFN, int(count[0]))) - except: - # go ahead and create it - logger.info("Creating layer {} by spatial joining mall office sites data ({}) and parcels ({})".format(joinFN, mall_sites, p10_pba50)) - arcpy.SpatialJoin_analysis(mall_sites, p10_pba50, joinFN) - - arcpy.AlterField_management(joinFN, "PARCEL_ID", "m_parcel_id") - arcpy.AlterField_management(joinFN, "X", "p_x") - arcpy.AlterField_management(joinFN, "Y", "p_y") - arcpy.AlterField_management(joinFN, "GEOM_ID", "m_geom_id") - arcpy.AlterField_management(joinFN, "scen20", "m_scen20") - arcpy.AlterField_management(joinFN, "scen25", "m_scen25") - - arcpy.AddField_management(joinFN, "development_projects_id", "LONG") - arcpy.AddField_management(joinFN, "building_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "scen25", "SHORT") - arcpy.AddField_management(joinFN, "scen26", "SHORT") - arcpy.AddField_management(joinFN, "scen27", "SHORT") - arcpy.AddField_management(joinFN, "scen28", "SHORT") - arcpy.AddField_management(joinFN, "scen29", "SHORT") - arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") - arcpy.AddField_management(joinFN, "x", "FLOAT") - arcpy.AddField_management(joinFN, "y", "FLOAT") - arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") - arcpy.AddField_management(joinFN, "building_type", "TEXT","","",4) - arcpy.AddField_management(joinFN, "building_sqft", "LONG") - arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") - arcpy.AddField_management(joinFN, "residential_units", "SHORT") - arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") - arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") - arcpy.AddField_management(joinFN, "last_sale_year", "DATE") - arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") - arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") - - # NOTE THAT OPPSITES HAS SCEN SET IN GIS FILE - arcpy.CalculateField_management(joinFN, "PARCEL_ID", '!m_parcel_id!') - arcpy.CalculateField_management(joinFN, "development_projects_id", '!developmen!') - arcpy.CalculateField_management(joinFN, "building_name", '!building_n!') - arcpy.CalculateField_management(joinFN, "scen20", 0) - arcpy.CalculateField_management(joinFN, "scen25", 0) - - # create a list of parcel id of public land projects that are in GGtra - # because just using select and update doesn't work somehow - pub_GGtra = arcpy.SelectLayerByLocation_management(joinFN, "WITHIN", GGtra, None, "NEW_SELECTION", "NOT_INVERT") - arcpy.CopyFeatures_management(pub_GGtra, 'pub_GGtra') - arcpy.SelectLayerByAttribute_management(joinFN, "CLEAR_SELECTION") - pubTraList = [row[0] for row in arcpy.da.SearchCursor(pub_GGtra, 'PARCEL_ID')] - with arcpy.da.UpdateCursor(joinFN, ["PARCEL_ID", "scen26"]) as cursor: - for row in cursor: - if row[0] in pubTraList: - row[1] = 1 - else: - row[1] = 0 - cursor.updateRow(row) - - arcpy.CalculateField_management(joinFN, "scen27", 0) - arcpy.CalculateField_management(joinFN, "scen28", 0) - arcpy.CalculateField_management(joinFN, "scen29", 0) - arcpy.CalculateField_management(joinFN, "x", '!p_x!') - arcpy.CalculateField_management(joinFN, "y", '!p_y!') - arcpy.CalculateField_management(joinFN, "geom_id", '!m_geom_id!') - arcpy.CalculateField_management(joinFN, "building_type", "'MR'") - arcpy.CalculateField_management(joinFN, "building_sqft", "!building_s!") - arcpy.CalculateField_management(joinFN, "non_residential_sqft", "!non_reside!") - arcpy.CalculateField_management(joinFN, "residential_units", "!residentia!") - arcpy.CalculateField_management(joinFN, "unit_ave_sqft", "!unit_ave_s!") - arcpy.CalculateField_management(joinFN, "deed_restricted_units", "!deed_restr!") - - FCfields = [f.name for f in arcpy.ListFields(joinFN)] - #add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed - DontDeleteFields = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25","scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor", "Shape_Length", "Shape_Area"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(joinFN, fields2Delete) - - gidnull = 'gidnull' - arcpy.MakeTableView_management(joinFN,gidnull,"geom_id is NULL") - nullcount = arcpy.GetCount_management(gidnull) - logger.info("{} list has {} records with geom_id info missing".format(joinFN, nullcount)) - arcpy.Delete_management(gidnull) - - scen26count = 'scen26count' - arcpy.MakeTableView_management(joinFN,scen26count,"scen26 = 1") - onecount = arcpy.GetCount_management(scen26count) - logger.info("{} list has {} records with scen26 is 1".format(joinFN, onecount)) - arcpy.Delete_management(scen26count) - ###4 REMOVE DUPLICATES - #check again existing geomList and remove duplicates - #for malls, changing those into add to make sure every mall project is in, per Mark's comment - with arcpy.da.UpdateCursor(joinFN, ["PARCEL_ID","action"]) as cursor: - for row in cursor: - if row[0] in geoList: - if row[1] == 'build': - row[1] == 'add' - cursor.updateRow(row) - #then add the geoms in the geomList - gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, 'PARCEL_ID')] - for geo in gList: - geoList.append(geo) - - #opportunity sites - joinFN = 'ttt_opp_p10_pba50' - dev_projects_temp_layers.append(joinFN) - - try: - count = arcpy.GetCount_management(joinFN) - if int(count[0]) > 100: - logger.info("Found layer {} with {} rows -- skipping creation".format(joinFN, int(count[0]))) - except: - # go ahead and create it - logger.info("Creating layer {} by spatial joining opp sites data ({}) and parcels ({})".format(joinFN, opp_sites, p10_pba50)) - arcpy.SpatialJoin_analysis(opp_sites, p10_pba50, joinFN) - - arcpy.AlterField_management(joinFN, "year_built", "o_year_built") - arcpy.AlterField_management(joinFN, "last_sale_price", "o_last_sale_price") - arcpy.AlterField_management(joinFN, "last_sale_year", "o_sale_date") - arcpy.AlterField_management(joinFN, "stories", "o_stories") - arcpy.AlterField_management(joinFN, "building_name", "o_building_name") - arcpy.AlterField_management(joinFN, "site_name", "o_site_name") - arcpy.AlterField_management(joinFN, "PARCEL_ID", "o_parcel_id") - arcpy.AlterField_management(joinFN, "scen0", "o_scen0") - arcpy.AlterField_management(joinFN, "scen1", "o_scen1") - arcpy.AlterField_management(joinFN, "scen2", "o_scen2") - arcpy.AlterField_management(joinFN, "scen3", "o_scen3") - arcpy.AlterField_management(joinFN, "scen4", "o_scen4") - arcpy.AlterField_management(joinFN, "scen5", "o_scen5") - arcpy.AlterField_management(joinFN, "scen6", "o_scen6") - arcpy.AlterField_management(joinFN, "scen7", "o_scen7") - arcpy.AlterField_management(joinFN, "scen10", "o_scen10") - arcpy.AlterField_management(joinFN, "scen11", "o_scen11") - arcpy.AlterField_management(joinFN, "scen12", "o_scen12") - arcpy.AlterField_management(joinFN, "scen15", "o_scen15") - arcpy.AlterField_management(joinFN, "scen20", "o_scen20") - arcpy.AlterField_management(joinFN, "scen21", "o_scen21") - arcpy.AlterField_management(joinFN, "scen22", "o_scen22") - arcpy.AlterField_management(joinFN, "scen23", "o_scen23") - arcpy.AlterField_management(joinFN, "scen24", "o_scen24") - arcpy.AlterField_management(joinFN, "scen25", "o_scen25") - arcpy.AlterField_management(joinFN, "duration", "o_duration") - arcpy.AlterField_management(joinFN, "parking_spaces", "o_parking_spaces") - arcpy.AlterField_management(joinFN, "non_residential_sqft", "o_non_residential_sqft") - arcpy.AlterField_management(joinFN, "building_sqft", "o_building_sqft") - arcpy.AlterField_management(joinFN, "residential_units", "o_residential_units") - arcpy.AlterField_management(joinFN, "unit_ave_sqft", "o_unit_ave_sqft") - arcpy.AlterField_management(joinFN, "rent_ave_sqft", "o_rent_ave_sqft") - arcpy.AlterField_management(joinFN, "zip", "o_zips") - arcpy.AlterField_management(joinFN, "Average_Weighted_Rent", "average_weighted_rent") - arcpy.AlterField_management(joinFN, "x", "o_x") - arcpy.AlterField_management(joinFN, "y", "o_y") - arcpy.AlterField_management(joinFN, "geom_id", "o_geom_id") - arcpy.AlterField_management(joinFN, "geom_id_s", "o_geom_id2") - arcpy.AlterField_management(joinFN, "source", "o_source") - - arcpy.AddField_management(joinFN, "development_projects_id", "LONG") - arcpy.AddField_management(joinFN, "building_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "site_name", "TEXT","","",200) - arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") - arcpy.AddField_management(joinFN, "raw_id", "LONG") - arcpy.AddField_management(joinFN, "scen0", "SHORT") - arcpy.AddField_management(joinFN, "scen1", "SHORT") - arcpy.AddField_management(joinFN, "scen2", "SHORT") - arcpy.AddField_management(joinFN, "scen3", "SHORT") - arcpy.AddField_management(joinFN, "scen4", "SHORT") - arcpy.AddField_management(joinFN, "scen5", "SHORT") - arcpy.AddField_management(joinFN, "scen6", "SHORT") - arcpy.AddField_management(joinFN, "scen7", "SHORT") - arcpy.AddField_management(joinFN, "scen10", "SHORT") - arcpy.AddField_management(joinFN, "scen11", "SHORT") - arcpy.AddField_management(joinFN, "scen12", "SHORT") - arcpy.AddField_management(joinFN, "scen15", "SHORT") - arcpy.AddField_management(joinFN, "scen20", "SHORT") - arcpy.AddField_management(joinFN, "scen21", "SHORT") - arcpy.AddField_management(joinFN, "scen22", "SHORT") - arcpy.AddField_management(joinFN, "scen23", "SHORT") - arcpy.AddField_management(joinFN, "scen24", "SHORT") - arcpy.AddField_management(joinFN, "scen25", "SHORT") - arcpy.AddField_management(joinFN, "scen26", "SHORT") - arcpy.AddField_management(joinFN, "scen27", "SHORT") - arcpy.AddField_management(joinFN, "scen28", "SHORT") - arcpy.AddField_management(joinFN, "scen29", "SHORT") - arcpy.AddField_management(joinFN, "zip", "TEXT","","",50) - arcpy.AddField_management(joinFN, "x", "FLOAT") - arcpy.AddField_management(joinFN, "y", "FLOAT") - arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") - arcpy.AddField_management(joinFN, "year_built", "SHORT") - arcpy.AddField_management(joinFN, "duration", "SHORT") - arcpy.AddField_management(joinFN, "building_sqft", "LONG") - arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") - arcpy.AddField_management(joinFN, "residential_units", "SHORT") - arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") - arcpy.AddField_management(joinFN, "stories", "SHORT") - arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") - arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") - ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work - arcpy.AddField_management(joinFN, "edit_date", "LONG") - arcpy.AddField_management(joinFN, "editor", "TEXT","","",50) - arcpy.AddField_management(joinFN, "version", "SHORT") - arcpy.AddField_management(joinFN, "source", "TEXT","","",15) - # NOTE THAT OPPSITES HAS SCEN SET IN GIS FILE - arcpy.CalculateField_management(joinFN, "raw_id", "!opp_id!") - arcpy.CalculateField_management(joinFN, "building_name", "!o_building_name!") - arcpy.CalculateField_management(joinFN, "site_name", "!o_site_name!") - arcpy.CalculateField_management(joinFN, "PARCEL_ID", '!o_parcel_id!') - arcpy.CalculateField_management(joinFN, "scen0", "!o_scen0!") - arcpy.CalculateField_management(joinFN, "scen0", "!o_scen0!") - arcpy.CalculateField_management(joinFN, "scen0", "!o_scen0!") - arcpy.CalculateField_management(joinFN, "scen1", "!o_scen1!") - arcpy.CalculateField_management(joinFN, "scen2", "!o_scen2!") - arcpy.CalculateField_management(joinFN, "scen3", "!o_scen3!") - arcpy.CalculateField_management(joinFN, "scen4", "!o_scen4!") - arcpy.CalculateField_management(joinFN, "scen5", "!o_scen5!") - arcpy.CalculateField_management(joinFN, "scen6", "!o_scen6!") - arcpy.CalculateField_management(joinFN, "scen7", "!o_scen7!") - arcpy.CalculateField_management(joinFN, "scen10", "!o_scen10!") - arcpy.CalculateField_management(joinFN, "scen11", "!o_scen11!") - arcpy.CalculateField_management(joinFN, "scen12", "!o_scen12!") - arcpy.CalculateField_management(joinFN, "scen15", "!o_scen15!") - arcpy.CalculateField_management(joinFN, "scen20", 0) - arcpy.CalculateField_management(joinFN, "scen21", "!o_scen21!") - arcpy.CalculateField_management(joinFN, "scen22", "!o_scen22!") - arcpy.CalculateField_management(joinFN, "scen23", "!o_scen23!") - arcpy.CalculateField_management(joinFN, "scen24", "!o_scen23!") - arcpy.CalculateField_management(joinFN, "scen25", 0) - arcpy.CalculateField_management(joinFN, "scen26", "!o_scen23!") - arcpy.CalculateField_management(joinFN, "scen27", 0) - arcpy.CalculateField_management(joinFN, "scen28", "!o_scen23!") - arcpy.CalculateField_management(joinFN, "scen29", 0) - arcpy.CalculateField_management(joinFN, "x", '!X_1!') - arcpy.CalculateField_management(joinFN, "y", '!Y_1!') - arcpy.CalculateField_management(joinFN, "geom_id", '!o_geom_id2!') - arcpy.CalculateField_management(joinFN, "year_built", '!o_year_built!') - arcpy.CalculateField_management(joinFN, "building_sqft", '!o_building_sqft!') - arcpy.CalculateField_management(joinFN, "non_residential_sqft", '!o_non_residential_sqft!') - arcpy.CalculateField_management(joinFN, "residential_units", '!o_residential_units!') - arcpy.CalculateField_management(joinFN, "unit_ave_sqft", '!o_unit_ave_sqft!') - #remove mall_office in opp layer by removing pb50_opp - with arcpy.da.UpdateCursor(joinFN, "type") as cursor: - for row in cursor: - if row[0] == 'pb50_opp': - cursor.deleteRow() - with arcpy.da.UpdateCursor(joinFN, ["source","building_name","o_source"]) as cursor: - for row in cursor: - if row[1] == 'incubator': - row[0] = row[1] - elif row[2] == 'ppa': - row[0] = row[2] - else: - row[0] = 'opp' - cursor.updateRow(row) - arcpy.CalculateField_management(joinFN, "edit_date", 20200611) - arcpy.CalculateField_management(joinFN, "editor", "'MKR'") - - FCfields = [f.name for f in arcpy.ListFields(joinFN)] - #add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed - DontDeleteFields = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25","scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor", "Shape_Length", "Shape_Area"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(joinFN, fields2Delete) - - gidnull = 'gidnull' - arcpy.MakeTableView_management(joinFN,gidnull,"geom_id is NULL") - nullcount = arcpy.GetCount_management(gidnull) - logger.info("{} list has {} records with geom_id info missing".format(joinFN, nullcount)) - arcpy.Delete_management(gidnull) - ###4 REMOVE DUPLICATES - #check again existing geomList and remove duplicates - with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: - for row in cursor: - if row[0] in geoList: - cursor.deleteRow() - #then add the geoms in the geomList - gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, 'PARCEL_ID')] - for geo in gList: - geoList.append(geo) - - #not going to check duplicates, since opp sites should not duplicate - - #all non opp sites should be in the list dev_projects_temp_layers already - devproj_fc = "development_project" - logger.info("Merging feature classes {} into {}".format(dev_projects_temp_layers, devproj_fc)) - - arcpy.Merge_management(dev_projects_temp_layers, devproj_fc) - count = arcpy.GetCount_management(devproj_fc) - logger.info(" Results in {} rows in {}".format(int(count[0]), devproj_fc)) - - #assign unique incremental development_id - i = 1 - with arcpy.da.UpdateCursor(devproj_fc, "development_projects_id") as cursor: - for row in cursor: - if i <= int(count[0]) : - row[0] = i - i = i + 1 - cursor.updateRow(row) - - # it's no longer necessary to delete temporary spatial join layers since they're in the temporary WORKSPACE_GDB - - #update mapping of building types from detailed to simplified in both pipeline - arcpy.AlterField_management(pipeline_fc, "building_type", "building_type_det","building_type_det") - arcpy.AddField_management(pipeline_fc, "building_type", "TEXT","","","800") - arcpy.AddField_management(pipeline_fc, "building_type_id", "LONG") - arcpy.AddField_management(pipeline_fc, "development_type_id", "LONG") - - with arcpy.da.UpdateCursor(pipeline_fc, ['building_type_det', "building_type","building_type_id", 'development_type_id']) as cursor: - for row in cursor: - if row[0] == 'HS': - row[1] = 'HS' - row[2] = 1 - row[3] = 1 - elif row[0] == 'HT': - row[1] = 'HT' - row[2] = 2 - row[3] = 2 - elif row[0] == 'HM': - row[1] = 'HM' - row[2] = 3 - row[3] = 2 - elif row[0] == 'MH': - row[1] = 'HM' - row[2] = 3 - row[3] = 4 - elif row[0] == 'SR': - row[1] = 'HM' - row[2] = 3 - row[3] = 2 - elif row[0] == 'AL': - row[1] = 'GQ' - row[2] = 3 - row[3] = 6 - elif row[0] == 'DM': - row[1] = 'GQ' - row[2] = 3 - row[3] = 6 - elif row[0] == 'CM': - row[1] = 'HM' - row[2] = 3 - row[3] = 2 - elif row[0] == 'OF': - row[1] = 'OF' - row[2] = 4 - row[3] = 10 - elif row[0] == 'GV': - row[1] = 'OF' - row[2] = 4 - row[3] = 10 - elif row[0] == 'HP': - row[1] = 'OF' - row[2] = 4 - row[3] = 10 - elif row[0] == 'HO': - row[1] = 'HO' - row[2] = 5 - row[3] = 9 - elif row[0] == 'SC': - row[1] = 'SC' - row[2] = 6 - row[3] = 17 - elif row[0] == 'UN': - row[1] = 'SC' - row[2] = 6 - row[3] = 18 - elif row[0] == 'IL': - row[1] = 'IL' - row[2] = 7 - row[2] = 14 - elif row[0] == 'FP': - row[1] = 'IL' - row[2] = 7 - row[2] = 14 - elif row[0] == 'IW': - row[1] = 'IW' - row[2] = 8 - row[3] = 13 - elif row[0] == 'IH': - row[1] = 'IH' - row[2] = 9 - row[3] = 15 - elif row[0] == 'RS': - row[1] = 'RS' - row[2] = 10 - row[3] = 7 - elif row[0] == 'RB': - row[1] = 'RB' - row[2] = 11 - row[3] = 8 - elif row[0] == 'MR': - row[1] = 'MR' - row[2] = 12 - row[3] = 5 - elif row[0] == 'MT': - row[1] = 'MT' - row[2] = 12 - elif row[0] == 'ME': - row[1] = 'ME' - row[2] = 14 - row[3] = 11 - elif row[0] == 'PA': - row[1] = 'VA' - row[2] = 15 - row[3] = 23 - elif row[0] == 'PG': - row[1] = 'PG' - row[2] = 16 - row[3] = 22 - elif row[0] == 'VA': - row[1] = 'VA' - row[2] = 0 - row[3] = 21 - elif row[0] == 'LR': - row[1] = 'RS' - row[2] = 10 - row[3] = 7 - elif row[0] == 'VP': - row[1] = 'VP' - row[2] = 0 - row[3] = 20 - elif row[0] == 'OT': - row[1] = 'OT' - row[2] = 0 - elif row[0] == 'IN': - row[1] = 'OF' - row[2] = 4 - row[3] = 10 - elif row[0] == 'RF': - row[1] = 'RS' - row[2] = 10 - row[3] = 7 - elif row[0] == 'GQ': - row[1] = 'GQ' - row[2] = 3 - row[3] = 6 - cursor.updateRow(row) - - ## count missing value - btnull = 'btnull' ##stands for building type null - arcpy.MakeTableView_management(pipeline_fc,btnull,"building_type is NULL") - nullcount = arcpy.GetCount_management(btnull) - logger.info("Pipeline list has {} records with building type info missing".format(nullcount)) - arcpy.Delete_management(btnull) - - arcpy.AlterField_management(pipeline_fc, 'building_sqft','temp_building_sqft') - arcpy.AddField_management(pipeline_fc, 'building_sqft',"LONG") - arcpy.SelectLayerByAttribute_management(pipeline_fc, "NEW_SELECTION",'"residential_units">0') - arcpy.SelectLayerByAttribute_management(pipeline_fc, "SUBSET_SELECTION",'"non_residential_sqft" = "temp_building_sqft"') - arcpy.SelectLayerByAttribute_management(pipeline_fc, "SUBSET_SELECTION",'"building_type_id" = 3') #HM - arcpy.CalculateField_management(pipeline_fc, "building_sqft","!residential_units! * 1400 + !temp_building_sqft! ", "PYTHON") - - arcpy.SelectLayerByAttribute_management(pipeline_fc, "NEW_SELECTION",'"residential_units">0') - arcpy.SelectLayerByAttribute_management(pipeline_fc, "SUBSET_SELECTION",'"non_residential_sqft" = "temp_building_sqft"') - arcpy.SelectLayerByAttribute_management(pipeline_fc, "SUBSET_SELECTION",'"building_type_id" = 12') #MR - arcpy.CalculateField_management(pipeline_fc, "building_sqft","!residential_units! * 1400 + !temp_building_sqft! ", "PYTHON") - - arcpy.SelectLayerByAttribute_management(pipeline_fc, "NEW_SELECTION",'"building_sqft" is NULL ') - arcpy.CalculateField_management(pipeline_fc, "building_sqft","!temp_building_sqft!", "PYTHON") - arcpy.SelectLayerByAttribute_management(pipeline_fc, "CLEAR_SELECTION") - - arcpy.DeleteField_management(pipeline_fc, 'temp_building_sqft') - - #same process for development project list - arcpy.AlterField_management(devproj_fc, "building_type", "building_type_det","building_type_det") - arcpy.AddField_management(devproj_fc, "building_type", "TEXT","","","800") - arcpy.AddField_management(devproj_fc, "building_type_id", "LONG") - arcpy.AddField_management(devproj_fc, "development_type_id", "LONG") - - with arcpy.da.UpdateCursor(devproj_fc, ['building_type_det', "building_type","building_type_id", 'development_type_id']) as cursor: - for row in cursor: - if row[0] == 'HS': - row[1] = 'HS' - row[2] = 1 - row[3] = 1 - elif row[0] == 'HT': - row[1] = 'HT' - row[2] = 2 - row[3] = 2 - elif row[0] == 'HM': - row[1] = 'HM' - row[2] = 3 - row[3] = 2 - elif row[0] == 'MH': - row[1] = 'HM' - row[2] = 3 - row[3] = 4 - elif row[0] == 'SR': - row[1] = 'HM' - row[2] = 3 - row[3] = 2 - elif row[0] == 'AL': - row[1] = 'GQ' - row[2] = 3 - row[3] = 6 - elif row[0] == 'DM': - row[1] = 'GQ' - row[2] = 3 - row[3] = 6 - elif row[0] == 'CM': - row[1] = 'HM' - row[2] = 3 - row[3] = 2 - elif row[0] == 'OF': - row[1] = 'OF' - row[2] = 4 - row[3] = 10 - elif row[0] == 'GV': - row[1] = 'OF' - row[2] = 4 - row[3] = 10 - elif row[0] == 'HP': - row[1] = 'OF' - row[2] = 4 - row[3] = 10 - elif row[0] == 'HO': - row[1] = 'HO' - row[2] = 5 - row[3] = 9 - elif row[0] == 'SC': - row[1] = 'SC' - row[2] = 6 - row[3] = 17 - elif row[0] == 'UN': - row[1] = 'SC' - row[2] = 6 - row[3] = 18 - elif row[0] == 'IL': - row[1] = 'IL' - row[2] = 7 - row[2] = 14 - elif row[0] == 'FP': - row[1] = 'IL' - row[2] = 7 - row[2] = 14 - elif row[0] == 'IW': - row[1] = 'IW' - row[2] = 8 - row[3] = 13 - elif row[0] == 'IH': - row[1] = 'IH' - row[2] = 9 - row[3] = 15 - elif row[0] == 'RS': - row[1] = 'RS' - row[2] = 10 - row[3] = 7 - elif row[0] == 'RB': - row[1] = 'RB' - row[2] = 11 - row[3] = 8 - elif row[0] == 'MR': - row[1] = 'MR' - row[2] = 12 - row[3] = 5 - elif row[0] == 'MT': - row[1] = 'MT' - row[2] = 12 - elif row[0] == 'ME': - row[1] = 'ME' - row[2] = 14 - row[3] = 11 - elif row[0] == 'PA': - row[1] = 'VA' - row[2] = 15 - row[3] = 23 - elif row[0] == 'PG': - row[1] = 'PG' - row[2] = 16 - row[3] = 22 - elif row[0] == 'VA': - row[1] = 'VA' - row[2] = 0 - row[3] = 21 - elif row[0] == 'LR': - row[1] = 'RS' - row[2] = 10 - row[3] = 7 - elif row[0] == 'VP': - row[1] = 'VP' - row[2] = 0 - row[3] = 20 - elif row[0] == 'OT': - row[1] = 'OT' - row[2] = 0 - elif row[0] == 'IN': - row[1] = 'OF' - row[2] = 4 - row[3] = 10 - elif row[0] == 'RF': - row[1] = 'RS' - row[2] = 10 - row[3] = 7 - elif row[0] == 'GQ': - row[1] = 'GQ' - row[2] = 3 - row[3] = 6 - cursor.updateRow(row) - ## count missing value - btnull = 'btnull' ##stands for building type null - arcpy.MakeTableView_management(devproj_fc,btnull,"building_type is NULL") - nullcount = arcpy.GetCount_management(btnull) - logger.info("Development Project list has {} records with building type info missing".format(nullcount)) - arcpy.Delete_management(btnull) - - - arcpy.AlterField_management(devproj_fc, 'building_sqft','temp_building_sqft') - arcpy.AddField_management(devproj_fc, 'building_sqft',"LONG") - arcpy.SelectLayerByAttribute_management(devproj_fc, "NEW_SELECTION",'"residential_units">0') - arcpy.SelectLayerByAttribute_management(devproj_fc, "SUBSET_SELECTION",'"non_residential_sqft" = "temp_building_sqft"') - arcpy.SelectLayerByAttribute_management(devproj_fc, "SUBSET_SELECTION",'"building_type_id" = 3') #HM - arcpy.CalculateField_management(devproj_fc, "building_sqft","!residential_units! * 1400 + !temp_building_sqft! ", "PYTHON") - - arcpy.SelectLayerByAttribute_management(devproj_fc, "NEW_SELECTION",'"residential_units">0') - arcpy.SelectLayerByAttribute_management(devproj_fc, "SUBSET_SELECTION",'"non_residential_sqft" = "temp_building_sqft"') - arcpy.SelectLayerByAttribute_management(devproj_fc, "SUBSET_SELECTION",'"building_type_id" = 12') #MR - arcpy.CalculateField_management(devproj_fc, "building_sqft","!residential_units! * 1400 + !temp_building_sqft! ", "PYTHON") - - arcpy.SelectLayerByAttribute_management(devproj_fc, "NEW_SELECTION",'"building_sqft" is NULL ') - arcpy.CalculateField_management(devproj_fc, "building_sqft","!temp_building_sqft!", "PYTHON") - arcpy.SelectLayerByAttribute_management(devproj_fc, "CLEAR_SELECTION") - - arcpy.DeleteField_management(devproj_fc, 'temp_building_sqft') - - # 6 DIAGNOSTICS - #number of units total by year - arcpy.Statistics_analysis(devproj_fc, 'res_stats_y', [["residential_units", "SUM"]], "year_built") - #then calculate the total - arcpy.Statistics_analysis(devproj_fc, 'res_stats_a', [["residential_units", "SUM"]]) - #get the total result and write into log - cursor = arcpy.SearchCursor('res_stats_a','','', 'SUM_residential_units') - row = cursor.next() - sum_value = row.getValue('SUM_residential_units') - logger.info("Total number of residential units in {} file: {:,} units".format(devproj_fc, int(sum_value))) - - #number of nonres sqft by year - arcpy.Statistics_analysis(devproj_fc, 'nonres_stats_y', [["non_residential_sqft", "SUM"]], "year_built") - #then calculate the total - arcpy.Statistics_analysis(devproj_fc, 'nonres_stats_a', [["non_residential_sqft", "SUM"]]) - #get the total result and write into log - cursor = arcpy.SearchCursor('nonres_stats_a','','', 'SUM_non_residential_sqft') - row = cursor.next() - sum_value = row.getValue('SUM_non_residential_sqft') - logger.info("Total number of non residential square footage in {}: {:,} square feet".format(devproj_fc, int(sum_value))) - - #count parcels with more than one points on them - pipeline - #first, there is no development projects id for them, so set value for that - count = arcpy.GetCount_management(pipeline_fc) - i = 1 - with arcpy.da.UpdateCursor(pipeline_fc, "development_projects_id") as cursor: - for row in cursor: - if i <= int(count[0]) : - row[0] = i - i = i + 1 - cursor.updateRow(row) - - p_pipeline = "p_pipeline" - arcpy.Statistics_analysis(pipeline_fc, p_pipeline, [["development_projects_id", "COUNT"]], "geom_id") - #there are projects with geom_id null, so in order to count, delete those first - with arcpy.da.UpdateCursor(p_pipeline, "geom_id") as cursor: - for row in cursor: - if row[0] is None: - cursor.deleteRow() - - ppCount = "ppCount" - arcpy.MakeTableView_management(p_pipeline,ppCount,"COUNT_development_projects_id > 1") - countParcelP = arcpy.GetCount_management(ppCount) - logger.info("There are {} of parcels with multiple project points (more than 1) on them in the pipeline file".format(countParcelP)) - - #count parcels with more than one points on them - development projects - p_dev = "p_dev" - arcpy.Statistics_analysis(devproj_fc, p_dev, [["development_projects_id", "COUNT"]], "geom_id") - #there are projects with geom_id null, so in order to count, delete those first - with arcpy.da.UpdateCursor(p_dev, "geom_id") as cursor: - for row in cursor: - if row[0] is None: - cursor.deleteRow() - - pdCount = "pdCount" - arcpy.MakeTableView_management(p_dev,pdCount,"COUNT_development_projects_id > 1") - countParcelD = arcpy.GetCount_management(pdCount) - logger.info("There are {} of parcels with multiple project points (more than 1) on them".format(countParcelD)) - - # 7 BUILDINGS TO ADD INSTEAD OF BUILD - # change a short list of activity to add - # first doing it for the pipeline file - pList_pipeline= [row[0] for row in arcpy.da.SearchCursor(ppCount, 'geom_id')] - if "8016918253805" not in pList_pipeline: - pList_pipeline.append('8016918253805') - if "9551692992638" not in pList_pipeline: - pList_pipeline.append('9551692992638') - with arcpy.da.UpdateCursor(pipeline_fc, ["geom_id","action"]) as cursor: - for row in cursor: - if row[0] in pList_pipeline: - row[1] = 'add' - cursor.updateRow(row) - # second doing it for the development project file - pList_dev= [row[0] for row in arcpy.da.SearchCursor(pdCount, 'geom_id')] - if "8016918253805" not in pList_pipeline: - pList_dev.append('8016918253805') - if "9551692992638" not in pList_pipeline: - pList_dev.append('9551692992638') - with arcpy.da.UpdateCursor(devproj_fc, ["geom_id","action"]) as cursor: - for row in cursor: - if row[0] in pList_dev: - row[1] = 'add' - cursor.updateRow(row) - - # change NaNs in non_residential_sqft to 0 - with arcpy.da.UpdateCursor(pipeline_fc, "non_residential_sqft") as cursor: - for row in cursor: - if row[0] is None: - row[0] = 0 - cursor.updateRow(row) - - with arcpy.da.UpdateCursor(devproj_fc, "non_residential_sqft") as cursor: - for row in cursor: - if row[0] is None: - row[0] = 0 - cursor.updateRow(row) - - #reordering before making the output - new_field_order = ["OBJECTID","Shape","development_projects_id", "raw_id", "building_name", "site_name", "action", - "scen0", "scen1", "scen2", "scen3", "scen4", "scen5", "scen6", "scen7", "scen10", "scen11", "scen12", "scen15", "scen20", "scen21", "scen22", "scen23", "scen24", "scen25","scen26", "scen27", "scen28", - "scen29","address", "city", "zip", "county", "x", "y", "geom_id", "year_built","building_type_det","building_type", "building_type_id","development_type_id", "building_sqft", "non_residential_sqft", "residential_units", "unit_ave_sqft", - "tenure", "rent_type", "stories", "parking_spaces", "average_weighted_rent", "last_sale_year", "last_sale_price", "deed_restricted_units","source", "PARCEL_ID", "ZONE_ID", "edit_date", "editor"] - pipeline_fc_reordered = 'pipeline_reordered' - devproj_fc_reordered = 'devproj_reordered' - reorder_fields(pipeline_fc, pipeline_fc_reordered, new_field_order) - reorder_fields(devproj_fc, devproj_fc_reordered, new_field_order) - - # append the alt2 mallpub projects into the devproj list - arcpy.CalculateField_management(mallpub_alt2, "edit_date", 20210308) - arcpy.CalculateField_management(mallpub_alt2, "editor", "'BL'") - arcpy.DeleteField_management(mallpub_alt2, 'globalid') - arcpy.Append_management(mallpub_alt2, devproj_fc_reordered, 'NO_TEST') - - count = arcpy.GetCount_management(devproj_fc_reordered) - i = 1 - with arcpy.da.UpdateCursor(devproj_fc_reordered, "development_projects_id") as cursor: - for row in cursor: - if i <= int(count[0]) : - row[0] = i - i = i + 1 - cursor.updateRow(row) - - #we are only keeping one set of data. move this blolock of code to the end - #export csv to folder -- remember to change fold path when run on other machines - pipeline_output = "{}_pipeline.csv".format(NOW) - arcpy.TableToTable_conversion(pipeline_fc_reordered, WORKING_DIR, pipeline_output) - logger.info("Wrote {}".format(os.path.join(WORKING_DIR,pipeline_output))) - - development_project_output = "{}_development_projects.csv".format(NOW) - arcpy.TableToTable_conversion(devproj_fc_reordered, WORKING_DIR, development_project_output) - logger.info("Wrote {}".format(os.path.join(WORKING_DIR,development_project_output))) - - #long_cols that were cutoff are 'development_proj', 'non_residential_', 'development_type' , 'deed_restricted_' - pipeline_df = pd.read_csv(os.path.join(WORKING_DIR, pipeline_output)) - pipeline_df = pipeline_df.rename(columns = {'development_proj' : 'development_projects_id', - 'non_residential_' : 'non_residential_sqft', - 'development_type' : 'development_type_id', - 'average_weighted' : 'average_weighted_rent', - 'building_type_de' : 'building_type_det', - 'residential_unit' : 'residential_units', - - 'deed_restricted_' : 'deed_restricted_units'}) - development_project_df = pd.read_csv(os.path.join(WORKING_DIR, development_project_output)) - development_project_df = development_project_df.rename(columns = {'development_proj' : 'development_projects_id', - 'non_residential_' : 'non_residential_sqft', - 'development_type' : 'development_type_id', - 'residential_unit' : 'residential_units', - 'average_weighted' : 'average_weighted_rent', - 'building_type_de' : 'building_type_det', - 'deed_restricted_' : 'deed_restricted_units'}) - #fix int column problem in csv - field_types = {"OBJECTID" : "int", - "development_projects_id":"int", - "raw_id": "int", - "scen0": "int", - "scen1": "int", - "scen2": "int", - "scen3": "int", - "scen4": "int", - "scen5": "int", - "scen6": "int", - "scen7": "int", - "scen10": "int", - "scen11": "int", - "scen12": "int", - "scen15": "int", - "scen20": "int", - "scen21": "int", - "scen22": "int", - "scen23": "int", - "scen24": "int", - "scen25": "int", - "scen26": "int", - "scen27": "int", - "scen28": "int", - "scen29": "int", - "geom_id": "int64", - "year_built": "int", - "building_type_id": "int", - "development_type_id":"int", - "building_sqft": "int", - "non_residential_sqft":"int", - "residential_units":"int", - "stories":"int", - "deed_restricted_units":"int", - "PARCEL_ID":"int", - "ZONE_ID": "int"} - for key, value in field_types.items(): - pipeline_df[key] = pipeline_df[key].fillna(0) - development_project_df[key] = development_project_df[key].fillna(0) - if key == 'geom_id' or key == 'PARCEL_ID': - pipeline_df[key] = pipeline_df[key].round(0).astype(value) - development_project_df[key] = development_project_df[key].round(0).astype(value) - else: - pipeline_df[key] = pipeline_df[key].astype(value) - development_project_df[key] = development_project_df[key].astype(value) - - res_type = ['HS','HT','HM','GQ','MR'] - nonres_type = ['MT','ME','VP','OF','HO','SC','IL','IW','IH','RS','RB','VA','PG','OT'] - - pipeline_df.loc[(pipeline_df['residential_units'] < 0) & (pipeline_df.building_type.isin(res_type)),'residential_units'] = 0 - pipeline_df.loc[(pipeline_df['residential_units'] != 0) & (pipeline_df.building_type.isin(nonres_type)),'residential_units'] = 0 - - development_project_df.loc[(development_project_df['residential_units'] < 0) & (development_project_df.building_type.isin(res_type)),'residential_units'] = 0 - development_project_df.loc[(development_project_df['residential_units'] != 0) & (development_project_df.building_type.isin(nonres_type)),'residential_units'] = 0 - - pipeline_df.to_csv(os.path.join(WORKING_DIR, pipeline_output), index = False) - development_project_df.to_csv(os.path.join(WORKING_DIR, development_project_output), index = False) - - #adding the two map files into a new gdb - #first create that new gdb -- right now save and locally and upload manually - out_name = "{}_devproj.gdb".format(NOW) - arcpy.CreateFileGDB_management(WORKING_DIR, out_name) - logger.info("Created {}".format(out_name)) - - #second, move file to the new gdb - fcs = [pipeline_fc_reordered, devproj_fc_reordered] - for fc in fcs: - arcpy.FeatureClassToFeatureClass_conversion(fc, os.path.join(WORKING_DIR, out_name), - arcpy.Describe(fc).name) - - # 8 adding 2011-2015 projects to buildings - pipeline = 'pipeline_reordered' - arcpy.FeatureClassToFeatureClass_conversion(pipeline, arcpy.env.workspace, - 'p1115', "year_built >= 2011 AND year_built <= 2015") - p1115 = 'p1115' - arcpy.AlterField_management(p1115, "PARCEL_ID", "b_PARCEL_ID") - arcpy.AlterField_management(p1115, "residential_units", "b_residential_units") - arcpy.AlterField_management(p1115, "unit_ave_sqft", "b_unit_ave_sqft") - arcpy.AlterField_management(p1115, "building_sqft", "b_building_sqft") - arcpy.AlterField_management(p1115, "year_built", "b_year_built") - arcpy.AlterField_management(p1115, "stories", "b_stories") - - arcpy.AddField_management(p1115, "building_id", "LONG") - arcpy.AddField_management(p1115, "parcel_id", "LONG") - arcpy.AddField_management(p1115, "improvement_value", "DOUBLE") - arcpy.AddField_management(p1115, "residential_units", "LONG") - arcpy.AddField_management(p1115, "residential_sqft", "LONG") - arcpy.AddField_management(p1115, "sqft_per_unit", "DOUBLE") - arcpy.AddField_management(p1115, "non_residential_sqft", "LONG") - arcpy.AddField_management(p1115, "building_sqft", "DOUBLE") - arcpy.AddField_management(p1115, "nonres_rent_per_sqft", "DOUBLE") - arcpy.AddField_management(p1115, "res_price_per_sqft", "DOUBLE") - arcpy.AddField_management(p1115, "stories", "LONG") - arcpy.AddField_management(p1115, "year_built", "LONG") - arcpy.AddField_management(p1115, "redfin_sale_price", "DOUBLE") - arcpy.AddField_management(p1115, "redfin_sale_year", "DOUBLE") - arcpy.AddField_management(p1115, "redfin_home_type", "TEXT","","","800") - arcpy.AddField_management(p1115, "costar_property_type", "TEXT","","","800") - arcpy.AddField_management(p1115, "costar_rent", "TEXT","","","800") - - #arcpy.CalculateField_management(p1115, "building_id", ) - arcpy.CalculateField_management(p1115, "parcel_id", "!b_PARCEL_ID!") - #arcpy.CalculateField_management(p1115, "development_type_id",) - #arcpy.CalculateField_management(p1115, "improvement_value",) - arcpy.CalculateField_management(p1115, "residential_units", "!b_residential_units!") - #arcpy.CalculateField_management(p1115, "residential_sqft", ) - arcpy.CalculateField_management(p1115, "sqft_per_unit", "!b_unit_ave_sqft!") - #arcpy.CalculateField_management(p1115, "non_residential_sqft",) - arcpy.CalculateField_management(p1115, "building_sqft", "!b_building_sqft!") - #arcpy.CalculateField_management(p1115, "nonres_rent_per_sqft", ) - #arcpy.CalculateField_management(p1115, "res_price_per_sqft", ) - arcpy.CalculateField_management(p1115, "stories", "!b_stories!") - arcpy.CalculateField_management(p1115, "year_built", "!b_year_built!") - arcpy.CalculateField_management(p1115, "redfin_sale_price", "!last_sale_price!") - #arcpy.CalculateField_management(p1115, "redfin_sale_year", "!last_sale_year!") - #arcpy.CalculateField_management(p1115, "redfin_home_type", ) - #arcpy.CalculateField_management(p1115, "costar_property_type", ) - arcpy.CalculateField_management(p1115, "costar_rent", "!average_weighted_rent!") - - arcpy.FeatureClassToFeatureClass_conversion(p1115, arcpy.env.workspace,'p1115_add', "action = 'add'") - arcpy.FeatureClassToFeatureClass_conversion(p1115, arcpy.env.workspace,'p1115_build', "action = 'build'") - - p1115_add = 'p1115_add' - p1115_build = 'p1115_build' - - FCfields = [f.name for f in arcpy.ListFields(p1115_add)] - DontDeleteFields = ["OBJECTID","Shape","building_id","parcel_id","development_type_id", "improvement_value", "residential_units", "residential_sqft", "sqft_per_unit", - "non_residential_sqft","building_sqft","nonres_rent_per_sqft","res_price_per_sqft","stories","year_built", "redfin_sale_price","redfin_sale_year", - "redfin_home_type","costar_property_type","costar_rent","building_type","building_type_id","development_type_id"] - fields2Delete = list(set(FCfields) - set(DontDeleteFields)) - arcpy.DeleteField_management(p1115_add, fields2Delete) - arcpy.DeleteField_management(p1115_build, fields2Delete) #because the two dataset should have the same structure - - b10_smelt = os.path.join(SMELT_GDB, "b10") - logger.info("Reading 2010 building file {}".format(b10_smelt)) - arcpy.TableToTable_conversion(b10_smelt, arcpy.env.workspace,'b10') - b10 = 'b10' - arcpy.AddField_management(b10, "building_type", "TEXT","","","800") - arcpy.AddField_management(b10, "building_type_id", "LONG") - - with arcpy.da.UpdateCursor(b10, ["development_type_id","building_type","building_type_id"]) as cursor: - for row in cursor: - if row[0] == 1: - row[1] = "HS" - row[2] = 1 - elif row[0] == 2: - row[1] = 'HM' - row[2] = 3 - elif row[0] == 3: - row[1] = 'HM' - row[2] = 3 - elif row[0] == 4: - row[1] = 'HM' - row[2] = 3 - elif row[0] == 5: - row[1] = 'MR' - row[2] = 12 - elif row[0] == 6: - row[1] = 'GQ' - row[2] = 3 - elif row[0] == 7: - row[1] = 'RS' - row[2] = 10 - elif row[0] == 8: - row[1] = 'RB' - row[2] = 11 - elif row[0] == 9: - row[1] = 'HO' - row[2] = 5 - elif row[0] == 10: - row[1] = 'OF' - row[2] = 4 - elif row[0] == 11: - row[1] = 'ME' - row[2] = 14 - elif row[0] == 12: - row[1] = 'OF' - row[2] = 4 - elif row[0] == 13: - row[1] = 'IW' - row[2] = 8 - elif row[0] == 14: - row[1] = 'IL' - row[2] = 7 - elif row[0] == 15: - row[1] = 'IH' - row[2] = 9 - elif row[0] == 16: - row[1] == 'IL' - row[2] = 7 - elif row[0] == 17: - row[1] = 'SC' - row[2] = 6 - elif row[0] == 18: - row[1] = 'SC' - row[2] = 6 - elif row[0] == 19: - row[1] = 'OF' - row[2] = 4 - elif row[0] == 20: - row[1] = 'VP' - row[2] = 0 - elif row[0] == 21: - row[1] = 'VA' - row[2] = 0 - elif row[0] == 22: - row[1] = 'PG' - row[2] = 16 - elif row[0] == 23: - row[1] = 'PA' - row[2] = 15 - elif row[0] == 24: - row[1] = 'VP' - row[2] = 0 - elif row[0] == 25: - row[1] = 'VA' - row[2] = 0 - cursor.updateRow(row) - - arcpy.DeleteField_management(b10, 'id') - - #the approach is: - #1. simply merge the projects with action == add - #2. find out the parcel ids where projects would be built in p1115_build, then remove those parcels in b10, the merge the build file - #need to build some diagnostic stuff to compare what was there that gets removed, and what's added - - #part 1: add the projects - b10_p1115_part1 = 'b10_p1115_part1' - mergeList = [b10,p1115_add] - arcpy.Merge_management(mergeList, b10_p1115_part1) - - #create a copy of the merged file for diagnostics - arcpy.TableToTable_conversion(b10_p1115_part1, arcpy.env.workspace,'b10_p1115_part1_copy') - - #part 2: remove and merge - parcelBuildList = [row[0] for row in arcpy.da.SearchCursor(p1115_build, 'parcel_id')] - with arcpy.da.UpdateCursor(b10_p1115_part1, "parcel_id") as cursor: - for row in cursor: - if row[0] in parcelBuildList: - cursor.deleteRow() - - rawp10_b15_pba50 = 'rawp10_b15_pba50_{}'.format(NOW)[0:26] #delete ".time" part, because that dot breaks it. - mergeList2 = [b10_p1115_part1,p1115_build] - arcpy.Merge_management(mergeList2, rawp10_b15_pba50) - - btnull = 'btnull' ##stands for building type null - arcpy.MakeTableView_management(rawp10_b15_pba50,btnull,"building_type is NULL") - nullcount = arcpy.GetCount_management(btnull) - logger.info("Building file list has {} records with building type info missing".format(nullcount)) - arcpy.Delete_management(btnull) - - #diagnotics using the copy - b10_p1115_part1_copy = 'b10_p1115_part1_copy' - with arcpy.da.UpdateCursor(b10_p1115_part1_copy, "parcel_id") as cursor: - for row in cursor: - if row[0] not in parcelBuildList: - cursor.deleteRow() - - del cursor, row - - arcpy.Statistics_analysis(b10_p1115_part1_copy, 'removed_units', [["residential_units", "SUM"]]) - cursor = arcpy.SearchCursor('removed_units','','', 'SUM_residential_units') - row = cursor.next() - sum_value1 = row.getValue('SUM_residential_units') - - arcpy.Statistics_analysis(b10_p1115_part1_copy, 'removed_nonres', [["non_residential_sqft", "SUM"]]) - cursor = arcpy.SearchCursor('removed_nonres','','', 'SUM_non_residential_sqft') - row = cursor.next() - sum_value2 = row.getValue('SUM_non_residential_sqft') - - arcpy.Statistics_analysis(p1115_build, 'built_units', [["residential_units", "SUM"]]) - cursor = arcpy.SearchCursor('built_units','','', 'SUM_residential_units') - row = cursor.next() - sum_value3 = row.getValue('SUM_residential_units') - - arcpy.Statistics_analysis(p1115_build, 'built_nonres', [["non_residential_sqft", "SUM"]]) - cursor = arcpy.SearchCursor('built_nonres','','', 'SUM_non_residential_sqft') - row = cursor.next() - sum_value4 = row.getValue('SUM_non_residential_sqft') - - if sum_value1 >= sum_value3: - logger.info("There is a net decrease of {} units from {} units to {} units after incorporating the 'built' projects".format(sum_value1 - sum_value3, sum_value1, sum_value3)) - else: - logger.info("There is a net increase of {} units from {} units to {} units after incorporating the 'built' projects".format(sum_value3 - sum_value1,sum_value1, sum_value3)) - if sum_value2 >= sum_value4: - logger.info("There is a net decrease of {} square feet of nonresidential from {} sqft to {} sqft after incorporating the 'built' projects".format(sum_value2 - sum_value4, sum_value2, sum_value4)) - else: - logger.info("There is a net increase of {} square feet of nonresidential from {} sqft to {} sqft after incorporating the 'built' projects".format(sum_value4 - sum_value2, sum_value2, sum_value4)) - - building_output = "{}_buildings.csv".format(NOW) - arcpy.TableToTable_conversion(rawp10_b15_pba50, WORKING_DIR, building_output) - building_df = pd.read_csv(os.path.join(WORKING_DIR, building_output)) - building_df = building_df.rename(columns = {'development_type' : 'development_type_id', - 'improvement_valu' : 'improvement_value', - 'residential_unit' : 'residential_units', - 'non_residential_' : 'non_residential_sqft', - 'nonres_rent_per_' : 'nonres_rent_per_sqft', - 'res_price_per_sq' : 'res_price_per_sqft', - 'redfin_sale_pric' : 'redfin_sale_price', - 'costar_property_' : 'costar_property_type'}) - - #fix int column problem in csv - field_types_building = {"OBJECTID" : "int", - "building_id": "int", - "parcel_id": "int", - "stories":"int", - "year_built": "int", - "building_type_id": "int", - "development_type_id":"int", - "building_sqft": "int", - "non_residential_sqft":"int", - "residential_units":"int", - "residential_sqft":"int"} - - for key, value in field_types_building.items(): - building_df[key] = building_df[key].fillna(0) - building_df[key] = building_df[key].astype(value) - building_df.to_csv(os.path.join(WORKING_DIR, building_output), index = False) - logger.info("Transform {} to building table".format(rawp10_b15_pba50)) - - - - +if __name__ == "__main__": + + # create logger + logger = logging.getLogger(__name__) + logger.setLevel("DEBUG") + + # console handler + ch = logging.StreamHandler() + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) + logger.addHandler(ch) + # file handler + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) + logger.addHandler(fh) + + logger.info("WORKING_DIR = {}".format(WORKING_DIR)) + logger.info("WORKSPACE_GDB = {}".format(WORKSPACE_GDB)) + logger.info("SMELT_GDB = {}".format(SMELT_GDB)) + + # list info about SMELT_GDB + arcpy.env.workspace = SMELT_GDB + logger.info("workspace: {}".format(arcpy.env.workspace)) + for dataset in arcpy.ListDatasets(): + logger.info(" dataset: {}".format(dataset)) + logger.info( + " feature classes: {} ".format( + arcpy.ListFeatureClasses(feature_dataset=dataset) + ) + ) + + logger.info(" feature classes: {} ".format(arcpy.ListFeatureClasses())) + logger.info(" tables: {} ".format(arcpy.ListTables())) + + arcpy.CreateFileGDB_management(WORKING_DIR, WORKSPACE_GDB) + arcpy.env.workspace = os.path.join(WORKING_DIR, WORKSPACE_GDB) + + # get an empty list to add feature class to so that they can be merged in the end all together + dev_projects_temp_layers = [] + + # create another empty geom_id list to use for checking and removing duplicates, the idea is that, once a dataset has been cleaned + # before it gets added to the development projects temp layers, it needs to check against the geom_ids that are already in this list + # not sure how cumbersome this approach would be + geoList = [] + + # count geom_id is null + gidnull = "gidnull" + + countOne = countRow(manual_dp) + logger.info( + "Feature Class {} has {} records with incl = 1".format(manual_dp, countOne) + ) + joinFN = "ttt_" + arcpy.Describe(manual_dp).name + "_p10_pba50" + dev_projects_temp_layers.append(joinFN) + # based on Mike's ranking, start with manual dp list + try: + count = arcpy.GetCount_management(joinFN) + if int(count[0]) > 100: + logger.info( + "Found layer {} with {} rows -- skipping creation".format( + joinFN, int(count[0]) + ) + ) + + except: + # go ahead and create it + ### 1 SPATIAL JOINS + logger.info( + "Creating layer {} by spatial joining manual pipeline data ({}) and parcels ({})".format( + joinFN, manual_dp, p10_pba50 + ) + ) + arcpy.SpatialJoin_analysis(manual_dp, p10_pba50, joinFN) + # rename any conflicting field names + + arcpy.AlterField_management(joinFN, "building_name", "m_building_name") + arcpy.AlterField_management(joinFN, "site_name", "m_site_name") + arcpy.AlterField_management(joinFN, "year_built", "m_year_built") + arcpy.AlterField_management(joinFN, "parcel_id", "m_parcel_id") + arcpy.AlterField_management(joinFN, "last_sale_price", "m_last_sale_price") + arcpy.AlterField_management(joinFN, "last_sale_year", "m_sale_date") + arcpy.AlterField_management(joinFN, "stories", "m_stories") + arcpy.AlterField_management(joinFN, "residential_units", "m_residential_units") + arcpy.AlterField_management(joinFN, "unit_ave_sqft", "m_unit_ave_sqft") + arcpy.AlterField_management(joinFN, "zip", "m_zips") + arcpy.AlterField_management( + joinFN, "Average_Weighted_Rent", "m_average_weighted_rent" + ) + arcpy.AlterField_management(joinFN, "x", "p_x") + arcpy.AlterField_management(joinFN, "y", "p_y") + arcpy.AlterField_management( + joinFN, "geom_id", "p_geom_id" + ) # this is from the parcel file + # add fields and calc values + # full list development_projects_id,raw_id,building_name,site_name,action,scen0,scen1, + # address,city,zip,county,x,y,geom_id,year_built,duration,building_type_id,building_type,building_sqft,non_residential_sqft, + # residential_units,unit_ave_sqft,tenure,rent_type,stories,parking_spaces,Average Weighted Rent,rent_ave_sqft,rent_ave_unit, + # last_sale_year,last_sale_price,source,edit_date,editor,version + # AddField(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain}) + arcpy.AddField_management(joinFN, "development_projects_id", "LONG") + arcpy.AddField_management(joinFN, "raw_id", "LONG") + arcpy.AddField_management(joinFN, "building_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "site_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") + arcpy.AddField_management(joinFN, "scen0", "SHORT") + arcpy.AddField_management(joinFN, "scen1", "SHORT") + arcpy.AddField_management(joinFN, "scen2", "SHORT") + arcpy.AddField_management(joinFN, "scen3", "SHORT") + arcpy.AddField_management(joinFN, "scen4", "SHORT") + arcpy.AddField_management(joinFN, "scen5", "SHORT") + arcpy.AddField_management(joinFN, "scen6", "SHORT") + arcpy.AddField_management(joinFN, "scen7", "SHORT") + arcpy.AddField_management(joinFN, "scen10", "SHORT") + arcpy.AddField_management(joinFN, "scen11", "SHORT") + arcpy.AddField_management(joinFN, "scen12", "SHORT") + arcpy.AddField_management(joinFN, "scen15", "SHORT") + arcpy.AddField_management(joinFN, "scen20", "SHORT") + arcpy.AddField_management(joinFN, "scen21", "SHORT") + arcpy.AddField_management(joinFN, "scen22", "SHORT") + arcpy.AddField_management(joinFN, "scen23", "SHORT") + arcpy.AddField_management(joinFN, "scen24", "SHORT") + arcpy.AddField_management(joinFN, "scen25", "SHORT") + arcpy.AddField_management(joinFN, "scen26", "SHORT") + arcpy.AddField_management(joinFN, "scen27", "SHORT") + arcpy.AddField_management(joinFN, "scen28", "SHORT") + arcpy.AddField_management(joinFN, "scen29", "SHORT") + arcpy.AddField_management(joinFN, "zip", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "x", "FLOAT") + arcpy.AddField_management(joinFN, "y", "FLOAT") + arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") + arcpy.AddField_management(joinFN, "year_built", "SHORT") + arcpy.AddField_management(joinFN, "residential_units", "SHORT") + arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "stories", "SHORT") + arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") + arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") + ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work + arcpy.AddField_management(joinFN, "last_sale_year", "DATE") + arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") + arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") + arcpy.AddField_management(joinFN, "source", "TEXT", "", "", 15) + arcpy.AddField_management(joinFN, "edit_date", "LONG") + arcpy.AddField_management(joinFN, "editor", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "version", "SHORT") + if not arcpy.ListFields(joinFN, "incl"): + arcpy.AddField_management(joinFN, "incl", "SHORT") + + arcpy.CalculateField_management(joinFN, "raw_id", "!manual_dp_id!") + arcpy.CalculateField_management(joinFN, "building_name", "!m_building_name!") + arcpy.CalculateField_management(joinFN, "site_name", "!m_site_name!") + arcpy.CalculateField_management(joinFN, "PARCEL_ID", "!m_parcel_id!") + arcpy.CalculateField_management(joinFN, "scen0", 1) + arcpy.CalculateField_management(joinFN, "scen1", 1) + arcpy.CalculateField_management(joinFN, "scen2", 1) + arcpy.CalculateField_management(joinFN, "scen3", 1) + arcpy.CalculateField_management(joinFN, "scen4", 1) + arcpy.CalculateField_management(joinFN, "scen5", 1) + arcpy.CalculateField_management(joinFN, "scen6", 1) + arcpy.CalculateField_management(joinFN, "scen7", 1) + arcpy.CalculateField_management(joinFN, "scen10", 1) + arcpy.CalculateField_management(joinFN, "scen11", 1) + arcpy.CalculateField_management(joinFN, "scen12", 1) + arcpy.CalculateField_management(joinFN, "scen15", 1) + arcpy.CalculateField_management(joinFN, "scen20", 1) + arcpy.CalculateField_management(joinFN, "scen21", 1) + arcpy.CalculateField_management(joinFN, "scen22", 1) + arcpy.CalculateField_management(joinFN, "scen23", 1) + arcpy.CalculateField_management(joinFN, "scen24", 1) + arcpy.CalculateField_management(joinFN, "scen25", 1) + arcpy.CalculateField_management(joinFN, "scen26", 1) + arcpy.CalculateField_management(joinFN, "scen27", 1) + arcpy.CalculateField_management(joinFN, "scen28", 1) + arcpy.CalculateField_management( + joinFN, "scen29", 1 + ) # these are committed so 1 for all scens + # not sure how to change zip field type + # arcpy.CalculateField_management(joinFN, "zip", '!m_zip!') + arcpy.CalculateField_management( + joinFN, "x", "!X_1!" + ) # use spatial info from parcel file + arcpy.CalculateField_management( + joinFN, "y", "!Y_1!" + ) # use spatial info from parcel file + arcpy.CalculateField_management( + joinFN, "geom_id", "!p_geom_id!" + ) # use spatial info from parcel file + arcpy.CalculateField_management(joinFN, "year_built", "!m_year_built!") + # arcpy.CalculateField_management(joinFN, "duration", ) + arcpy.CalculateField_management( + joinFN, "residential_units", "!m_residential_units!" + ) + arcpy.CalculateField_management(joinFN, "unit_ave_sqft", "!m_unit_ave_sqft!") + arcpy.CalculateField_management(joinFN, "stories", "!m_stories!") + arcpy.CalculateField_management( + joinFN, "average_weighted_rent", "!m_average_weighted_rent!" + ) + # arcpy.CalculateField_management(joinFN, "rent_ave_sqft", ) + # arcpy.CalculateField_management(joinFN, "rent_ave_unit", ) + arcpy.CalculateField_management( + joinFN, "last_sale_year", "!m_sale_date!" + ) # need to make into year + arcpy.CalculateField_management( + joinFN, "last_sale_price", "!m_last_sale_price!" + ) + arcpy.CalculateField_management(joinFN, "deed_restricted_units", 0) + arcpy.CalculateField_management(joinFN, "source", "'manual'") + arcpy.CalculateField_management(joinFN, "edit_date", 20200429) + arcpy.CalculateField_management(joinFN, "editor", "'MKR'") + # arcpy.CalculateField_management(joinFN, "version", ) + # remove row where incl != 1 + with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: + for row in cursor: + if row[0] != 1: + cursor.deleteRow() + # check to make sure that the number of remaining records in the temp file (which should still have var incl) is the same as the raw file + countTwo = countRow(joinFN) + if countTwo == countOne: + logger.info( + "All records with incl = 1 in feature class {} are included in the temp file".format( + manual_dp + ) + ) + else: + logger.fatal("Something is wrong in the code, please check") + raise + ### 3 DELETE OTHER FIELDS + FCfields = [f.name for f in arcpy.ListFields(joinFN)] + # add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed + DontDeleteFields = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + "Shape_Length", + "Shape_Area", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(joinFN, fields2Delete) + arcpy.MakeTableView_management(joinFN, gidnull, "geom_id is NULL") + nullcount = arcpy.GetCount_management(gidnull) + logger.info( + "{} list has {} records with geom_id info missing".format(joinFN, nullcount) + ) + arcpy.Delete_management(gidnull) + + gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, "PARCEL_ID")] + for geo in gList: + geoList.append(geo) + + ### for costar data + ### create a list of feature class + cs = [cs1115, cs1620] + for fc in cs: + countOne = countRow(fc) + logger.info( + "Feature Class {} has {} records with incl = 1".format(fc, countOne) + ) + joinFN = "ttt_" + arcpy.Describe(fc).name + "__p10_pba50" + dev_projects_temp_layers.append(joinFN) + + ### 1 SPATIAL JOINS + logger.info( + "Creating layer {} by spatial joining costar ({}) and parcels ({})".format( + joinFN, fc, p10_pba50 + ) + ) + arcpy.SpatialJoin_analysis(fc, p10_pba50, joinFN) + ### 2 VARIABLE CLEANING + + # rename any conflicting field names + arcpy.AlterField_management(joinFN, "building_name", "cs_building_name") + arcpy.AlterField_management(joinFN, "parcel_id", "cs_parcel_id") + arcpy.AlterField_management(joinFN, "city", "cs_city") + arcpy.AlterField_management(joinFN, "Zip", "cs_zip") + arcpy.AlterField_management(joinFN, "rent_type", "cs_rent_type") + arcpy.AlterField_management(joinFN, "year_built", "cs_year_built") + arcpy.AlterField_management(joinFN, "last_sale_price", "cs_last_sale_price") + arcpy.AlterField_management(joinFN, "last_sale_date", "cs_last_sale_date") + arcpy.AlterField_management( + joinFN, "Average_Weighted_Rent", "cs_average_weighted_rent" + ) + arcpy.AlterField_management(joinFN, "x", "p_x") + arcpy.AlterField_management(joinFN, "y", "p_y") + arcpy.AlterField_management( + joinFN, "geom_id", "p_geom_id" + ) # this is from the parcel + + # add fields and calc values + # full list development_projects_id,raw_id,building_name,site_name,action,scen0,scen1, + # address,city,zip,county,x,y,geom_id,year_built,duration,building_type_id,building_type,building_sqft,non_residential_sqft, + # residential_units,unit_ave_sqft,tenure,rent_type,stories,parking_spaces,Average Weighted Rent,rent_ave_sqft,rent_ave_unit, + # last_sale_year,last_sale_price,source,edit_date,editor,version + # AddField(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain}) + arcpy.AddField_management(joinFN, "development_projects_id", "LONG") + arcpy.AddField_management(joinFN, "raw_id", "LONG") + arcpy.AddField_management(joinFN, "building_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "site_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") + arcpy.AddField_management(joinFN, "action", "TEXT", "", "", 10) + arcpy.AddField_management(joinFN, "scen0", "SHORT") + arcpy.AddField_management(joinFN, "scen1", "SHORT") + arcpy.AddField_management(joinFN, "scen2", "SHORT") + arcpy.AddField_management(joinFN, "scen3", "SHORT") + arcpy.AddField_management(joinFN, "scen4", "SHORT") + arcpy.AddField_management(joinFN, "scen5", "SHORT") + arcpy.AddField_management(joinFN, "scen6", "SHORT") + arcpy.AddField_management(joinFN, "scen7", "SHORT") + arcpy.AddField_management(joinFN, "scen10", "SHORT") + arcpy.AddField_management(joinFN, "scen11", "SHORT") + arcpy.AddField_management(joinFN, "scen12", "SHORT") + arcpy.AddField_management(joinFN, "scen15", "SHORT") + arcpy.AddField_management(joinFN, "scen20", "SHORT") + arcpy.AddField_management(joinFN, "scen21", "SHORT") + arcpy.AddField_management(joinFN, "scen22", "SHORT") + arcpy.AddField_management(joinFN, "scen23", "SHORT") + arcpy.AddField_management(joinFN, "scen24", "SHORT") + arcpy.AddField_management(joinFN, "scen25", "SHORT") + arcpy.AddField_management(joinFN, "scen26", "SHORT") + arcpy.AddField_management(joinFN, "scen27", "SHORT") + arcpy.AddField_management(joinFN, "scen28", "SHORT") + arcpy.AddField_management(joinFN, "scen29", "SHORT") + arcpy.AddField_management(joinFN, "address", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "city", "TEXT", "", "", 50) + arcpy.AddField_management( + joinFN, "zip", "TEXT", "", "", 50 + ) ## this is changed from LONG to TEXT because cs1115 file has some text formatted zipcode with "-" + arcpy.AddField_management(joinFN, "county", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "x", "FLOAT") + arcpy.AddField_management(joinFN, "y", "FLOAT") + arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") + arcpy.AddField_management(joinFN, "year_built", "SHORT") + arcpy.AddField_management(joinFN, "duration", "SHORT") + arcpy.AddField_management(joinFN, "building_type_id", "LONG") + arcpy.AddField_management(joinFN, "building_type", "TEXT", "", "", 4) + arcpy.AddField_management(joinFN, "building_sqft", "LONG") + arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") + arcpy.AddField_management(joinFN, "residential_units", "SHORT") + arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "tenure", "TEXT", "", "", 5) + arcpy.AddField_management(joinFN, "rent_type", "TEXT", "", "", 25) + arcpy.AddField_management(joinFN, "stories", "SHORT") + arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") + arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") + arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") + ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work + arcpy.AddField_management(joinFN, "last_sale_year", "DATE") + arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") + arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") + arcpy.AddField_management(joinFN, "source", "TEXT", "", "", 15) + arcpy.AddField_management(joinFN, "edit_date", "LONG") + arcpy.AddField_management(joinFN, "editor", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "version", "SHORT") + if not arcpy.ListFields(joinFN, "incl"): + arcpy.AddField_management(joinFN, "incl", "SHORT") + + arcpy.CalculateField_management(joinFN, "raw_id", "!PropertyID!") + arcpy.CalculateField_management(joinFN, "building_name", "!cs_building_name!") + arcpy.CalculateField_management(joinFN, "site_name", "!Building_Park!") + arcpy.CalculateField_management(joinFN, "PARCEL_ID", "!cs_parcel_id!") + arcpy.CalculateField_management( + joinFN, "action", "'build'" + ) # need to quote marks here + arcpy.CalculateField_management(joinFN, "scen0", 1) + arcpy.CalculateField_management(joinFN, "scen1", 1) + arcpy.CalculateField_management(joinFN, "scen2", 1) + arcpy.CalculateField_management(joinFN, "scen3", 1) + arcpy.CalculateField_management(joinFN, "scen4", 1) + arcpy.CalculateField_management(joinFN, "scen5", 1) + arcpy.CalculateField_management(joinFN, "scen6", 1) + arcpy.CalculateField_management(joinFN, "scen7", 1) + arcpy.CalculateField_management(joinFN, "scen10", 1) + arcpy.CalculateField_management(joinFN, "scen11", 1) + arcpy.CalculateField_management(joinFN, "scen12", 1) + arcpy.CalculateField_management(joinFN, "scen15", 1) + arcpy.CalculateField_management(joinFN, "scen20", 1) + arcpy.CalculateField_management(joinFN, "scen21", 1) + arcpy.CalculateField_management(joinFN, "scen22", 1) + arcpy.CalculateField_management(joinFN, "scen23", 1) + arcpy.CalculateField_management(joinFN, "scen24", 1) + arcpy.CalculateField_management(joinFN, "scen25", 1) + arcpy.CalculateField_management(joinFN, "scen26", 1) + arcpy.CalculateField_management(joinFN, "scen27", 1) + arcpy.CalculateField_management(joinFN, "scen28", 1) + arcpy.CalculateField_management( + joinFN, "scen29", 1 + ) # these are committed so 1 for all scens + arcpy.CalculateField_management(joinFN, "address", "!Building_Address!") + arcpy.CalculateField_management(joinFN, "city", "!cs_city!") + arcpy.CalculateField_management(joinFN, "zip", "!cs_zip!") + arcpy.CalculateField_management(joinFN, "county", "!County_Name!") + arcpy.CalculateField_management( + joinFN, "x", "!p_x!" + ) # use spatial info from parcel file + arcpy.CalculateField_management( + joinFN, "y", "!p_y!" + ) # use spatial info from parcel file + arcpy.CalculateField_management( + joinFN, "geom_id", "!p_geom_id!" + ) # use spatial info from parcel file + arcpy.CalculateField_management(joinFN, "year_built", "!cs_year_built!") + # arcpy.CalculateField_management(joinFN, "duration", ) + # arcpy.CalculateField_management(joinFN, "building_type_id", ) + arcpy.CalculateField_management(joinFN, "building_type", "!det_bldg_type!") + arcpy.CalculateField_management( + joinFN, "building_sqft", "!Rentable_Building_Area!" + ) # how often null for res + arcpy.CalculateField_management( + joinFN, "non_residential_sqft", "!Rentable_Building_Area!" + ) # need to zero out for res + arcpy.CalculateField_management( + joinFN, "residential_units", "!Number_Of_Units!" + ) + arcpy.CalculateField_management(joinFN, "unit_ave_sqft", "!Avg_Unit_SF!") + arcpy.CalculateField_management(joinFN, "tenure", "'Rent'") + arcpy.CalculateField_management( + joinFN, "rent_type", "!cs_rent_type!" + ) # need to clean + arcpy.CalculateField_management(joinFN, "stories", "!Number_Of_Stories!") + # there is a worng parking space value is one of the tables, so adding this to work around + with arcpy.da.UpdateCursor( + joinFN, ["Number_Of_Parking_Spaces", "parking_spaces"] + ) as cursor: + for row in cursor: + if len(str((row[0]))) <= 5: ##short integer has value less than 32700 + row[1] = row[0] + cursor.updateRow(row) + # arcpy.CalculateField_management(joinFN, "parking_spaces", '!Number_Of_Parking_Spaces!') + arcpy.CalculateField_management( + joinFN, "average_weighted_rent", "!cs_average_weighted_rent!" + ) + # arcpy.CalculateField_management(joinFN, "rent_ave_sqft", ) + # arcpy.CalculateField_management(joinFN, "rent_ave_unit", ) + arcpy.CalculateField_management( + joinFN, "last_sale_year", "!cs_last_sale_date!" + ) # need to make into year + arcpy.CalculateField_management( + joinFN, "last_sale_price", "!cs_last_sale_price!" + ) + + with arcpy.da.UpdateCursor( + joinFN, ["cs_rent_type", "residential_units", "deed_restricted_units"] + ) as cursor: + for row in cursor: + if row[0] == "Affordable": + row[2] = row[1] + elif row[0] == "Market/Affordable": + row[2] = int(row[1] // 5) + else: + row[2] = 0 + cursor.updateRow(row) + + arcpy.CalculateField_management(joinFN, "source", "'cs'") + arcpy.CalculateField_management(joinFN, "edit_date", 20200429) + arcpy.CalculateField_management(joinFN, "editor", "'MKR'") + # arcpy.CalculateField_management(joinFN, "version", ) + + # remove row where incl != 1 + with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: + for row in cursor: + if row[0] != 1: + cursor.deleteRow() + + # check all incl = 1 records are included + countTwo = countRow(joinFN) + if countTwo == countOne: + logger.info( + "All records with incl = 1 in feature class {} is included in the temp file".format( + fc + ) + ) + else: + logger.fatal("Something is wrong in the code, please check") + raise + + ### 3 DELETE OTHER FIELDS AND TEMP FILES + FCfields = [f.name for f in arcpy.ListFields(joinFN)] + # add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed + DontDeleteFields = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + "Shape_Length", + "Shape_Area", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(joinFN, fields2Delete) + + # zero out non res sqft for residential types (HS, HM, HT) + with arcpy.da.UpdateCursor( + joinFN, ["building_type", "non_residential_sqft"] + ) as cursor: + for row in cursor: + if row[0] == "HT": + row[1] = 0 + elif row[0] == "HS": + row[1] = 0 + elif row[0] == "HM": + row[1] = 0 + cursor.updateRow(row) + + gidnull = "gidnull" + arcpy.MakeTableView_management(joinFN, gidnull, "geom_id is NULL") + nullcount = arcpy.GetCount_management(gidnull) + logger.info( + "{} list has {} records with geom_id info missing".format(joinFN, nullcount) + ) + arcpy.Delete_management(gidnull) + + ###4 REMOVE DUPLICATES + # check again existing geomList and remove duplicates + with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: + for row in cursor: + if row[0] in geoList: + cursor.deleteRow() + + # then add the geoms in the geomList + gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, "PARCEL_ID")] + for geo in gList: + geoList.append(geo) + + ### for BASIS pipeline data + countOne = countRow(basis_pipeline) + logger.info( + "Feature Class {} has {} records with incl = 1".format(basis_pipeline, countOne) + ) + joinFN = "ttt_basispp_p10_pba50" + dev_projects_temp_layers.append(joinFN) + + ### 1 SPATIAL JOINS + logger.info( + "Creating layer {} by spatial joining BASIS pipeline data ({}) and parcels ({})".format( + joinFN, basis_pipeline, p10_pba50 + ) + ) + arcpy.SpatialJoin_analysis(basis_pipeline, p10_pba50, joinFN) + + ### 2 VARIABLE CLEANING + # rename any conflicting field names + arcpy.AlterField_management(joinFN, "county", "b_county") + arcpy.AlterField_management(joinFN, "parcel_id", "b_parcel_id") + arcpy.AlterField_management(joinFN, "raw_id", "b_id") + arcpy.AlterField_management(joinFN, "year_built", "b_year_built") + arcpy.AlterField_management(joinFN, "zip", "b_zip") + arcpy.AlterField_management(joinFN, "stories", "b_stories") + arcpy.AlterField_management(joinFN, "x", "p_x") # this is from the parcel centroid + arcpy.AlterField_management(joinFN, "y", "p_y") # this is from the parcel centroid + arcpy.AlterField_management( + joinFN, "geom_id", "p_geom_id" + ) # this is from the parcel + arcpy.AlterField_management(joinFN, "residential_units", "p_residential_units") + arcpy.AlterField_management(joinFN, "edit_date", "p_edit_date") + # add fields and calc values + # full list development_projects_id,raw_id,building_name,site_name,action,scen0,scen1, + # address,city,zip,county,x,y,geom_id,year_built,duration,building_type_id,building_type,building_sqft,non_residential_sqft, + # residential_units,unit_ave_sqft,tenure,rent_type,stories,parking_spaces,Average Weighted Rent,rent_ave_sqft,rent_ave_unit, + # last_sale_year,last_sale_price,source,edit_date,editor,version + # AddField(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain}) + arcpy.AddField_management(joinFN, "development_projects_id", "LONG") + arcpy.AddField_management(joinFN, "raw_id", "LONG") + arcpy.AddField_management(joinFN, "building_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "site_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") + arcpy.AddField_management(joinFN, "action", "TEXT", "", "", 10) + arcpy.AddField_management(joinFN, "scen0", "SHORT") + arcpy.AddField_management(joinFN, "scen1", "SHORT") + arcpy.AddField_management(joinFN, "scen2", "SHORT") + arcpy.AddField_management(joinFN, "scen3", "SHORT") + arcpy.AddField_management(joinFN, "scen4", "SHORT") + arcpy.AddField_management(joinFN, "scen5", "SHORT") + arcpy.AddField_management(joinFN, "scen6", "SHORT") + arcpy.AddField_management(joinFN, "scen7", "SHORT") + arcpy.AddField_management(joinFN, "scen10", "SHORT") + arcpy.AddField_management(joinFN, "scen11", "SHORT") + arcpy.AddField_management(joinFN, "scen12", "SHORT") + arcpy.AddField_management(joinFN, "scen15", "SHORT") + arcpy.AddField_management(joinFN, "scen20", "SHORT") + arcpy.AddField_management(joinFN, "scen21", "SHORT") + arcpy.AddField_management(joinFN, "scen22", "SHORT") + arcpy.AddField_management(joinFN, "scen23", "SHORT") + arcpy.AddField_management(joinFN, "scen24", "SHORT") + arcpy.AddField_management(joinFN, "scen25", "SHORT") + arcpy.AddField_management(joinFN, "scen26", "SHORT") + arcpy.AddField_management(joinFN, "scen27", "SHORT") + arcpy.AddField_management(joinFN, "scen28", "SHORT") + arcpy.AddField_management(joinFN, "scen29", "SHORT") + arcpy.AddField_management(joinFN, "address", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "city", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "zip", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "county", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "x", "FLOAT") + arcpy.AddField_management(joinFN, "y", "FLOAT") + arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") + arcpy.AddField_management(joinFN, "year_built", "SHORT") + arcpy.AddField_management(joinFN, "duration", "SHORT") + arcpy.AddField_management(joinFN, "building_type_id", "LONG") + arcpy.AddField_management(joinFN, "building_type", "TEXT", "", "", 4) + arcpy.AddField_management(joinFN, "building_sqft", "LONG") + arcpy.AddField_management(joinFN, "residential_units", "SHORT") + arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "tenure", "TEXT", "", "", 5) + arcpy.AddField_management(joinFN, "rent_type", "TEXT", "", "", 25) + arcpy.AddField_management(joinFN, "stories", "SHORT") + arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") + arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") + arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") + ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work + arcpy.AddField_management(joinFN, "last_sale_year", "DATE") + arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") + arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") + arcpy.AddField_management(joinFN, "source", "TEXT", "", "", 15) + arcpy.AddField_management(joinFN, "edit_date", "LONG") + if not arcpy.ListFields(joinFN, "incl"): + arcpy.AddField_management(joinFN, "incl", "SHORT") + + arcpy.CalculateField_management(joinFN, "building_name", "!project_name!") + arcpy.CalculateField_management( + joinFN, "action", "'build'" + ) # need to quote marks here + arcpy.CalculateField_management(joinFN, "PARCEL_ID", "!b_parcel_id!") + arcpy.CalculateField_management(joinFN, "scen0", 1) + arcpy.CalculateField_management(joinFN, "scen1", 1) + arcpy.CalculateField_management(joinFN, "scen2", 1) + arcpy.CalculateField_management(joinFN, "scen3", 1) + arcpy.CalculateField_management(joinFN, "scen4", 1) + arcpy.CalculateField_management(joinFN, "scen5", 1) + arcpy.CalculateField_management(joinFN, "scen6", 1) + arcpy.CalculateField_management(joinFN, "scen7", 1) + arcpy.CalculateField_management(joinFN, "scen10", 1) + arcpy.CalculateField_management(joinFN, "scen11", 1) + arcpy.CalculateField_management(joinFN, "scen12", 1) + arcpy.CalculateField_management(joinFN, "scen15", 1) + arcpy.CalculateField_management(joinFN, "scen20", 1) + arcpy.CalculateField_management(joinFN, "scen21", 1) + arcpy.CalculateField_management(joinFN, "scen22", 1) + arcpy.CalculateField_management(joinFN, "scen23", 1) + arcpy.CalculateField_management(joinFN, "scen24", 1) + arcpy.CalculateField_management(joinFN, "scen25", 1) + arcpy.CalculateField_management(joinFN, "scen26", 1) + arcpy.CalculateField_management(joinFN, "scen27", 1) + arcpy.CalculateField_management(joinFN, "scen28", 1) + arcpy.CalculateField_management( + joinFN, "scen29", 1 + ) # these are committed so 1 for all scens + arcpy.CalculateField_management(joinFN, "address", "!street_address!") + arcpy.CalculateField_management(joinFN, "city", "!mailing_city_name!") + ##arcpy.CalculateField_management(joinFN, "zip", '!b_zip!') ##not sure how to convert text to long data type + arcpy.CalculateField_management(joinFN, "county", "!b_county!") + arcpy.CalculateField_management(joinFN, "x", "!p_x!") + arcpy.CalculateField_management(joinFN, "y", "!p_y!") + arcpy.CalculateField_management(joinFN, "geom_id", "!p_geom_id!") + arcpy.CalculateField_management(joinFN, "year_built", "!b_year_built!") + arcpy.CalculateField_management(joinFN, "building_type", "!building_type_det!") + arcpy.CalculateField_management( + joinFN, "building_sqft", "!building_sqft!" + ) # how often null for res + arcpy.CalculateField_management( + joinFN, "residential_units", "!p_residential_units!" + ) + arcpy.CalculateField_management(joinFN, "tenure", "'Rent'") ##what is tenure + arcpy.CalculateField_management(joinFN, "stories", "!b_stories!") + arcpy.CalculateField_management(joinFN, "deed_restricted_units", 0) + arcpy.CalculateField_management(joinFN, "source", "'basis'") + arcpy.CalculateField_management(joinFN, "edit_date", 20200429) + # arcpy.CalculateField_management(joinFN, "version", ) + + # remove row where incl != 1 + with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: + for row in cursor: + if row[0] != 1: + cursor.deleteRow() + + # remove Vallco project pointed out by Mark: parcel_id 1445028 + with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: + for row in cursor: + if row[0] == 1445028: + cursor.deleteRow() + + # check all incl = 1 records are included + countTwo = countRow(joinFN) + if countTwo == countOne - 1: # deleting one project + logger.info( + "All records with incl = 1 in feature class {} are included in the temp file".format( + basis_pipeline + ) + ) + else: + logger.fatal("Something is wrong in the code, please check") + raise + + ### 3 DELETE OTHER FIELDS + FCfields = [f.name for f in arcpy.ListFields(joinFN)] + # add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed + DontDeleteFields = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + "Shape_Length", + "Shape_Area", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(joinFN, fields2Delete) + + gidnull = "gidnull" + arcpy.MakeTableView_management(joinFN, gidnull, "geom_id is NULL") + nullcount = arcpy.GetCount_management(gidnull) + logger.info( + "{} list has {} records with geom_id info missing".format(joinFN, nullcount) + ) + arcpy.Delete_management(gidnull) + + ###4 REMOVE DUPLICATES + # check again existing geomList and remove duplicates + with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: + for row in cursor: + if row[0] in geoList: + cursor.deleteRow() + # then add the geoms in the geomList + gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, "PARCEL_ID")] + for geo in gList: + geoList.append(geo) + + ### for basis_pb + countOne = countRow(basis_pb_new) + logger.info( + "Feature Class {} has {} records with incl = 1".format(basis_pb_new, countOne) + ) + joinFN = "ttt_basis_pb_new_p10__pba50" + dev_projects_temp_layers.append(joinFN) + + logger.info( + "Creating layer {} by spatial joining basis pba pipeline data ({}) and parcels ({})".format( + joinFN, basis_pb_new, p10_pba50 + ) + ) + arcpy.DeleteField_management( + basis_pb_new, "geom_id" + ) # this column is causing trouble + arcpy.SpatialJoin_analysis(basis_pb_new, p10_pba50, joinFN) + + # remove records on parcels where there are no increase in residential units -- in comparsion to b10 table + # first count existing rows + cnt1 = arcpy.GetCount_management(joinFN) + # examine building + b10_smelt = os.path.join(SMELT_GDB, "b10") + arcpy.TableToTable_conversion(b10_smelt, arcpy.env.workspace, "b10") + b10 = "b10" + arcpy.analysis.Statistics(b10, "b10_unitSUM", "residential_units SUM", "parcel_id") + nonZero = arcpy.SelectLayerByAttribute_management( + "b10_unitSUM", "NEW_SELECTION", '"SUM_residential_units" > 0' + ) # choose only parcels with residential units already + arcpy.CopyRows_management(nonZero, "nonZeroParcel") + arcpy.MakeFeatureLayer_management(joinFN, "basis_join", "", arcpy.env.workspace) + arcpy.AddJoin_management( + "basis_join", "PARCEL_ID", "nonZeroParcel", "parcel_id", "KEEP_COMMON" + ) + # arcpy.SelectLayerByAttribute_management(joinFN, "NEW_SELECTION", "ttt_basis_pb_new_p10__pba50.urbansim_parcels_v3_geo_county_id = 85", None) + # find parcels to remove + parcelRemoveList = [] + with arcpy.da.SearchCursor( + "basis_join", + [ + "ttt_basis_pb_new_p10__pba50.parcel_id", + "ttt_basis_pb_new_p10__pba50.residential_units", + "nonZeroParcel.SUM_residential_units", + ], + ) as cursor: + for row in cursor: + if row[1] is not None: + if row[1] - row[2] == 0: + parcelRemoveList.append(row[0]) + logger.info( + "There are {} records in basis_pb_new that do not see increase in residential unit counts on the parcel".format( + len(parcelRemoveList) + ) + ) + # remove join + # arcpy.RemoveJoin_management(joinFN, "nonZeroParcel") + # arcpy.SelectLayerByAttribute_management(joinFN, "CLEAR_SELECTION") + + arcpy.AlterField_management(joinFN, "year_built", "n_year_built") + arcpy.AlterField_management(joinFN, "building_sqft", "n_building_sqft") + arcpy.AlterField_management(joinFN, "residential_units", "n_residential_units") + arcpy.AlterField_management(joinFN, "X", "n_x") + arcpy.AlterField_management(joinFN, "Y", "n_y") + arcpy.AlterField_management(joinFN, "GEOM_ID", "n_geom_id") + arcpy.AlterField_management(joinFN, "parcel_id", "n_parcel_id") + + arcpy.AddField_management(joinFN, "development_projects_id", "LONG") + arcpy.AddField_management(joinFN, "raw_id", "LONG") + arcpy.AddField_management(joinFN, "building_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "site_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") + arcpy.AddField_management(joinFN, "action", "TEXT", "", "", 10) + arcpy.AddField_management(joinFN, "scen0", "SHORT") + arcpy.AddField_management(joinFN, "scen1", "SHORT") + arcpy.AddField_management(joinFN, "scen2", "SHORT") + arcpy.AddField_management(joinFN, "scen3", "SHORT") + arcpy.AddField_management(joinFN, "scen4", "SHORT") + arcpy.AddField_management(joinFN, "scen5", "SHORT") + arcpy.AddField_management(joinFN, "scen6", "SHORT") + arcpy.AddField_management(joinFN, "scen7", "SHORT") + arcpy.AddField_management(joinFN, "scen10", "SHORT") + arcpy.AddField_management(joinFN, "scen11", "SHORT") + arcpy.AddField_management(joinFN, "scen12", "SHORT") + arcpy.AddField_management(joinFN, "scen15", "SHORT") + arcpy.AddField_management(joinFN, "scen20", "SHORT") + arcpy.AddField_management(joinFN, "scen21", "SHORT") + arcpy.AddField_management(joinFN, "scen22", "SHORT") + arcpy.AddField_management(joinFN, "scen23", "SHORT") + arcpy.AddField_management(joinFN, "scen24", "SHORT") + arcpy.AddField_management(joinFN, "scen25", "SHORT") + arcpy.AddField_management(joinFN, "scen26", "SHORT") + arcpy.AddField_management(joinFN, "scen27", "SHORT") + arcpy.AddField_management(joinFN, "scen28", "SHORT") + arcpy.AddField_management(joinFN, "scen29", "SHORT") + arcpy.AddField_management(joinFN, "address", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "city", "TEXT", "", "", 50) + arcpy.AddField_management( + joinFN, "zip", "TEXT", "", "", 50 + ) ## this is changed from LONG to TEXT because cs1115 file has some text formatted zipcode with "-" + arcpy.AddField_management(joinFN, "county", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "x", "FLOAT") + arcpy.AddField_management(joinFN, "y", "FLOAT") + arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") + arcpy.AddField_management(joinFN, "year_built", "SHORT") + arcpy.AddField_management(joinFN, "duration", "SHORT") + arcpy.AddField_management(joinFN, "building_type_id", "LONG") + arcpy.AddField_management(joinFN, "building_sqft", "LONG") + arcpy.AddField_management(joinFN, "residential_units", "SHORT") + arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "tenure", "TEXT", "", "", 5) + arcpy.AddField_management(joinFN, "rent_type", "TEXT", "", "", 25) + arcpy.AddField_management(joinFN, "stories", "SHORT") + arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") + arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") + arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") + ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work + arcpy.AddField_management(joinFN, "last_sale_year", "DATE") + arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") + arcpy.AddField_management(joinFN, "source", "TEXT", "", "", 15) + arcpy.AddField_management(joinFN, "edit_date", "LONG") + arcpy.AddField_management(joinFN, "editor", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "version", "SHORT") + + arcpy.CalculateField_management(joinFN, "PARCEL_ID", "!n_parcel_id!") + arcpy.CalculateField_management(joinFN, "scen0", 1) + arcpy.CalculateField_management(joinFN, "scen1", 1) + arcpy.CalculateField_management(joinFN, "scen2", 1) + arcpy.CalculateField_management(joinFN, "scen3", 1) + arcpy.CalculateField_management(joinFN, "scen4", 1) + arcpy.CalculateField_management(joinFN, "scen5", 1) + arcpy.CalculateField_management(joinFN, "scen6", 1) + arcpy.CalculateField_management(joinFN, "scen7", 1) + arcpy.CalculateField_management(joinFN, "scen10", 1) + arcpy.CalculateField_management(joinFN, "scen11", 1) + arcpy.CalculateField_management(joinFN, "scen12", 1) + arcpy.CalculateField_management(joinFN, "scen15", 1) + arcpy.CalculateField_management(joinFN, "scen21", 1) + arcpy.CalculateField_management(joinFN, "scen22", 1) + arcpy.CalculateField_management(joinFN, "scen23", 1) + # instead of deleting remodel projects records, do not include them in the scenarios + with arcpy.da.UpdateCursor( + joinFN, ["PARCEL_ID", "scen20", "scen24", "scen25"] + ) as cursor: + for row in cursor: + if row[0] in parcelRemoveList: + row[1] = 0 + row[2] = 0 + row[3] = 0 + else: + row[1] = 1 + row[2] = 1 + row[3] = 1 + cursor.updateRow(row) + arcpy.CalculateField_management(joinFN, "scen26", "!scen25!") + arcpy.CalculateField_management(joinFN, "scen27", "!scen25!") + arcpy.CalculateField_management(joinFN, "scen28", "!scen25!") + arcpy.CalculateField_management(joinFN, "scen29", "!scen25!") + arcpy.CalculateField_management(joinFN, "action", "'build'") + arcpy.CalculateField_management(joinFN, "city", "!urbansim_parcels_v3_geo_city!") + with arcpy.da.UpdateCursor( + joinFN, ["urbansim_parcels_v3_geo_county", "county"] + ) as cursor: + for row in cursor: + if row[0] == 1: + row[1] = "Alameda" + elif row[0] == 13: + row[1] = "Contra Costa" + elif row[0] == 41: + row[1] = "Marin" + elif row[0] == 55: + row[1] = "Napa" + elif row[0] == 75: + row[1] = "San Francisco" + elif row[0] == 81: + row[1] = "San Mateo" + elif row[0] == 85: + row[1] = "Santa Clara" + elif row[0] == 95: + row[1] = "Solano" + elif row[0] == 97: + row[1] = "Sonoma" + cursor.updateRow(row) + arcpy.CalculateField_management(joinFN, "x", "!n_x!") + arcpy.CalculateField_management(joinFN, "y", "!n_y!") + arcpy.CalculateField_management(joinFN, "geom_id", "!n_geom_id!") + arcpy.CalculateField_management(joinFN, "year_built", "!n_year_built!") + arcpy.CalculateField_management(joinFN, "building_sqft", "!n_building_sqft!") + arcpy.CalculateField_management( + joinFN, "residential_units", "!n_residential_units!" + ) + + with arcpy.da.UpdateCursor( + joinFN, ["building_sqft", "residential_units", "unit_ave_sqft"] + ) as cursor: + for row in cursor: + if row[1] is int: + row[2] = row[0] / row[1] + cursor.updateRow(row) + + arcpy.CalculateField_management( + joinFN, "last_sale_year", "!last_sale_date!" + ) # need to make into year + arcpy.CalculateField_management(joinFN, "deed_restricted_units", 0) + arcpy.CalculateField_management(joinFN, "source", "'bas_bp_new'") + arcpy.CalculateField_management(joinFN, "edit_date", 20200429) + arcpy.CalculateField_management(joinFN, "editor", "'MKR'") + + with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: + for row in cursor: + if row[0] != 1: + cursor.deleteRow() + + # check to make sure that the number of remaining records in the temp file (which should still have var incl) is the same as the raw file + # countTwo = countRow(joinFN) + # if countTwo == countOne: + # logger.info("All records with incl = 1 in feature class {} are included in the temp file".format(basis_pb_new)) + # else: + # logger.fatal("Something is wrong in the code, please check") + # raise + ### 3 DELETE OTHER FIELDS + FCfields = [f.name for f in arcpy.ListFields(joinFN)] + # add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed + DontDeleteFields = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + "Shape_Length", + "Shape_Area", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(joinFN, fields2Delete) + + gidnull = "gidnull" + arcpy.MakeTableView_management(joinFN, gidnull, "geom_id is NULL") + nullcount = arcpy.GetCount_management(gidnull) + logger.info( + "{} list has {} records with geom_id info missing".format(joinFN, nullcount) + ) + arcpy.Delete_management(gidnull) + ###4 REMOVE DUPLICATES + # check again existing geomList and remove duplicates + with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: + for row in cursor: + if row[0] in geoList: + cursor.deleteRow() + # then add the geoms in the geomList + gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, "PARCEL_ID")] + for geo in gList: + geoList.append(geo) + + ### for redfin data + ### create a list of feature class + rf = [rfsfr1619, rfsfr1115, rfmu1619, rfcondo1115, rfother1115] + for fc in rf: + countOne = countRow(fc) + logger.info( + "Feature Class {} has {} records with incl = 1".format(fc, countOne) + ) + joinFN = "ttt_" + arcpy.Describe(fc).name + "__p10_pba50" + dev_projects_temp_layers.append(joinFN) + + ### 1 SPATIAL JOINS + logger.info( + "Creating layer {} by spatial joining redfin ({}) and parcels ({})".format( + joinFN, fc, p10_pba50 + ) + ) + arcpy.SpatialJoin_analysis(fc, os.path.join(SMELT_GDB, p10_pba50), joinFN) + ### 2 VARIABLE CLEANING + + # rename any conflicting field names + arcpy.AlterField_management(joinFN, "CITY", "rf_city") + arcpy.AlterField_management(joinFN, "COUNTY", "rf_county") + arcpy.AlterField_management(joinFN, "YEAR_BUILT", "rf_year_built") + arcpy.AlterField_management(joinFN, "ADDRESS", "rf_address") + arcpy.AlterField_management( + joinFN, "x", "p_x" + ) # this is from the parcel centroid + arcpy.AlterField_management( + joinFN, "y", "p_y" + ) # this is from the parcel centroid + arcpy.AlterField_management( + joinFN, "geom_id", "p_geom_id" + ) # this is from the parcel + arcpy.AlterField_management(joinFN, "parcel_id", "rf_parcel_id") + + # add fields and calc values + # full list development_projects_id,raw_id,building_name,site_name,action,scen0,scen1, + # address,city,zip,county,x,y,geom_id,year_built,duration,building_type_id,building_type,building_sqft,non_residential_sqft, + # residential_units,unit_ave_sqft,tenure,rent_type,stories,parking_spaces,Average Weighted Rent,rent_ave_sqft,rent_ave_unit, + # last_sale_year,last_sale_price,source,edit_date,editor,version + # AddField(in_table, field_name, field_type, {field_precision}, {field_scale}, {field_length}, {field_alias}, {field_is_nullable}, {field_is_required}, {field_domain}) + arcpy.AddField_management(joinFN, "development_projects_id", "LONG") + arcpy.AddField_management(joinFN, "raw_id", "LONG") + arcpy.AddField_management(joinFN, "building_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "site_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") + arcpy.AddField_management(joinFN, "action", "TEXT", "", "", 10) + arcpy.AddField_management(joinFN, "scen0", "SHORT") + arcpy.AddField_management(joinFN, "scen1", "SHORT") + arcpy.AddField_management(joinFN, "scen2", "SHORT") + arcpy.AddField_management(joinFN, "scen3", "SHORT") + arcpy.AddField_management(joinFN, "scen4", "SHORT") + arcpy.AddField_management(joinFN, "scen5", "SHORT") + arcpy.AddField_management(joinFN, "scen6", "SHORT") + arcpy.AddField_management(joinFN, "scen7", "SHORT") + arcpy.AddField_management(joinFN, "scen10", "SHORT") + arcpy.AddField_management(joinFN, "scen11", "SHORT") + arcpy.AddField_management(joinFN, "scen12", "SHORT") + arcpy.AddField_management(joinFN, "scen15", "SHORT") + arcpy.AddField_management(joinFN, "scen20", "SHORT") + arcpy.AddField_management(joinFN, "scen21", "SHORT") + arcpy.AddField_management(joinFN, "scen22", "SHORT") + arcpy.AddField_management(joinFN, "scen23", "SHORT") + arcpy.AddField_management(joinFN, "scen24", "SHORT") + arcpy.AddField_management(joinFN, "scen25", "SHORT") + arcpy.AddField_management(joinFN, "scen26", "SHORT") + arcpy.AddField_management(joinFN, "scen27", "SHORT") + arcpy.AddField_management(joinFN, "scen28", "SHORT") + arcpy.AddField_management(joinFN, "scen29", "SHORT") + arcpy.AddField_management(joinFN, "address", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "city", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "zip", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "county", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "x", "FLOAT") + arcpy.AddField_management(joinFN, "y", "FLOAT") + arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") + arcpy.AddField_management(joinFN, "year_built", "SHORT") + arcpy.AddField_management(joinFN, "duration", "SHORT") + arcpy.AddField_management(joinFN, "building_type_id", "LONG") + arcpy.AddField_management(joinFN, "building_type", "TEXT", "", "", 4) + arcpy.AddField_management(joinFN, "building_sqft", "LONG") + arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") + arcpy.AddField_management(joinFN, "residential_units", "SHORT") + arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "tenure", "TEXT", "", "", 5) + arcpy.AddField_management(joinFN, "rent_type", "TEXT", "", "", 25) + arcpy.AddField_management(joinFN, "stories", "SHORT") + arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") + arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") + arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "rent_ave_unit", "SHORT") + arcpy.AddField_management(joinFN, "last_sale_year", "DATE") + arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") + arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") + arcpy.AddField_management(joinFN, "source", "TEXT", "", "", 15) + arcpy.AddField_management(joinFN, "edit_date", "LONG") + arcpy.AddField_management(joinFN, "editor", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "version", "SHORT") + if not arcpy.ListFields(joinFN, "incl"): + arcpy.AddField_management(joinFN, "incl", "SHORT") + + arcpy.CalculateField_management(joinFN, "raw_id", "!redfinid!") + arcpy.CalculateField_management(joinFN, "action", "'build'") + arcpy.CalculateField_management(joinFN, "PARCEL_ID", "!rf_parcel_id!") + arcpy.CalculateField_management(joinFN, "scen0", 1) + arcpy.CalculateField_management(joinFN, "scen1", 1) + arcpy.CalculateField_management(joinFN, "scen2", 1) + arcpy.CalculateField_management(joinFN, "scen3", 1) + arcpy.CalculateField_management(joinFN, "scen4", 1) + arcpy.CalculateField_management(joinFN, "scen5", 1) + arcpy.CalculateField_management(joinFN, "scen6", 1) + arcpy.CalculateField_management(joinFN, "scen7", 1) + arcpy.CalculateField_management(joinFN, "scen10", 1) + arcpy.CalculateField_management(joinFN, "scen11", 1) + arcpy.CalculateField_management(joinFN, "scen12", 1) + arcpy.CalculateField_management(joinFN, "scen15", 1) + arcpy.CalculateField_management(joinFN, "scen20", 1) + arcpy.CalculateField_management(joinFN, "scen21", 1) + arcpy.CalculateField_management(joinFN, "scen22", 1) + arcpy.CalculateField_management(joinFN, "scen23", 1) + arcpy.CalculateField_management(joinFN, "scen24", 1) + arcpy.CalculateField_management(joinFN, "scen25", 1) + arcpy.CalculateField_management(joinFN, "scen26", 1) + arcpy.CalculateField_management(joinFN, "scen27", 1) + arcpy.CalculateField_management(joinFN, "scen28", 1) + arcpy.CalculateField_management( + joinFN, "scen29", 1 + ) # these are committed so 1 for all scens + arcpy.CalculateField_management(joinFN, "address", "!rf_address!") + arcpy.CalculateField_management(joinFN, "city", "!rf_city!") + arcpy.CalculateField_management(joinFN, "county", "!rf_county!") + arcpy.CalculateField_management(joinFN, "x", "!p_x!") + arcpy.CalculateField_management(joinFN, "y", "!p_y!") + arcpy.CalculateField_management(joinFN, "geom_id", "!p_geom_id!") + arcpy.CalculateField_management(joinFN, "year_built", "!rf_year_built!") + if "sfr" in arcpy.Describe(fc).name: + arcpy.CalculateField_management(joinFN, "building_type", "'HS'") + else: + arcpy.CalculateField_management(joinFN, "building_type", "'HM'") + arcpy.CalculateField_management( + joinFN, "building_sqft", "!SQFT!" + ) # how often null for res + arcpy.CalculateField_management( + joinFN, "non_residential_sqft", 0 + ) # seems redfin data are all residential + arcpy.CalculateField_management(joinFN, "residential_units", "!UNITS!") + ###ideally, everything could be done using cursor since it is much faster to run + with arcpy.da.UpdateCursor( + joinFN, ["SQFT", "UNITS", "unit_ave_sqft"] + ) as cursor: + for row in cursor: + row[2] = row[0] / row[1] + cursor.updateRow(row) + arcpy.CalculateField_management( + joinFN, "tenure", "'Sale'" + ) # is redfin data rental? + arcpy.CalculateField_management( + joinFN, "last_sale_year", "!SOLD_DATE!" + ) # need to make into year + arcpy.CalculateField_management(joinFN, "last_sale_price", "!PRICE!") + arcpy.CalculateField_management(joinFN, "deed_restricted_units", 0) + arcpy.CalculateField_management(joinFN, "source", "'rf'") + arcpy.CalculateField_management(joinFN, "edit_date", 20200429) + arcpy.CalculateField_management(joinFN, "editor", "'MKR'") + + # remove row where incl != 1 + with arcpy.da.UpdateCursor(joinFN, "incl") as cursor: + for row in cursor: + if row[0] != 1: + cursor.deleteRow() + + countTwo = countRow(joinFN) + if countTwo == countOne: + logger.info( + "All records with incl = 1 in feature class {} are included in the temp file".format( + fc + ) + ) + else: + logger.fatal("Something is wrong in the code, please check") + raise + + ### 3 DELETE OTHER FIELDS AND TEMP FILES + FCfields = [f.name for f in arcpy.ListFields(joinFN)] + # add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed + DontDeleteFields = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + "Shape_Length", + "Shape_Area", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(joinFN, fields2Delete) + + gidnull = "gidnull" + arcpy.MakeTableView_management(joinFN, gidnull, "geom_id is NULL") + nullcount = arcpy.GetCount_management(gidnull) + logger.info( + "{} list has {} records with geom_id info missing".format(joinFN, nullcount) + ) + arcpy.Delete_management(gidnull) + ###4 REMOVE DUPLICATES + # check again existing geomList and remove duplicates + with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: + for row in cursor: + if row[0] in geoList: + cursor.deleteRow() + # then add the geoms in the geomList + gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, "PARCEL_ID")] + for geo in gList: + geoList.append(geo) + + ### 5 MERGE ALL INCL=1 POINTS INTO A SINGLE SHP FILE CALLED PIPELINE + # now let's get to the real full pipeline file + pipeline_fc = "pipeline" + logger.info( + "Merging feature classes {} into {}".format( + dev_projects_temp_layers, pipeline_fc + ) + ) + + # merge + arcpy.Merge_management(dev_projects_temp_layers, pipeline_fc) + count = arcpy.GetCount_management(pipeline_fc) + logger.info(" Results in {} rows in {}".format(int(count[0]), pipeline_fc)) + + ### 6 MERGE OPPSITES SHP WITH PIPELINE TO GET DEVELOPMENT PROJECTS + # public sites + joinFN = "ttt_pubsites_p10_pba50" + dev_projects_temp_layers.append(joinFN) + + try: + count = arcpy.GetCount_management(joinFN) + if int(count[0]) > 100: + logger.info( + "Found layer {} with {} rows -- skipping creation".format( + joinFN, int(count[0]) + ) + ) + except: + # go ahead and create it + logger.info( + "Creating layer {} by spatial joining pub sites data ({}) and parcels ({})".format( + joinFN, pub_sites, p10_pba50 + ) + ) + arcpy.SpatialJoin_analysis(pub_sites, p10_pba50, joinFN) + + arcpy.AlterField_management(joinFN, "PARCEL_ID", "pb_parcel_id") + arcpy.AlterField_management(joinFN, "X", "p_x") + arcpy.AlterField_management(joinFN, "Y", "p_y") + arcpy.AlterField_management(joinFN, "GEOM_ID", "pb_geom_id") + arcpy.AlterField_management(joinFN, "scen20", "p_scen20") + arcpy.AlterField_management(joinFN, "scen25", "p_scen25") + + arcpy.AddField_management(joinFN, "development_projects_id", "LONG") + arcpy.AddField_management(joinFN, "building_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "scen20", "SHORT") + arcpy.AddField_management(joinFN, "scen25", "SHORT") + arcpy.AddField_management(joinFN, "scen26", "SHORT") + arcpy.AddField_management(joinFN, "scen27", "SHORT") + arcpy.AddField_management(joinFN, "scen28", "SHORT") + arcpy.AddField_management(joinFN, "scen29", "SHORT") + arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") + arcpy.AddField_management(joinFN, "x", "FLOAT") + arcpy.AddField_management(joinFN, "y", "FLOAT") + arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") + arcpy.AddField_management(joinFN, "building_type", "TEXT", "", "", 4) + arcpy.AddField_management(joinFN, "building_sqft", "LONG") + arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") + arcpy.AddField_management(joinFN, "residential_units", "SHORT") + arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") + arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") + arcpy.AddField_management(joinFN, "last_sale_year", "DATE") + arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") + arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") + + # NOTE THAT OPPSITES HAS SCEN SET IN GIS FILE + arcpy.CalculateField_management(joinFN, "PARCEL_ID", "!pb_parcel_id!") + arcpy.CalculateField_management( + joinFN, "development_projects_id", "!developmen!" + ) + arcpy.CalculateField_management(joinFN, "building_name", "!building_n!") + arcpy.CalculateField_management(joinFN, "scen20", 0) + arcpy.CalculateField_management(joinFN, "scen25", 0) + + # create a list of parcel id of public land projects that are in GGtra + # because just using select and update doesn't work somehow + pub_GGtra = arcpy.SelectLayerByLocation_management( + joinFN, "WITHIN", GGtra, None, "NEW_SELECTION", "NOT_INVERT" + ) + arcpy.CopyFeatures_management(pub_GGtra, "pub_GGtra") + arcpy.SelectLayerByAttribute_management(joinFN, "CLEAR_SELECTION") + pubTraList = [row[0] for row in arcpy.da.SearchCursor(pub_GGtra, "PARCEL_ID")] + with arcpy.da.UpdateCursor(joinFN, ["PARCEL_ID", "scen26"]) as cursor: + for row in cursor: + if row[0] in pubTraList: + row[1] = 1 + else: + row[1] = 0 + cursor.updateRow(row) + + arcpy.CalculateField_management(joinFN, "scen27", 0) + arcpy.CalculateField_management(joinFN, "scen28", 0) + arcpy.CalculateField_management(joinFN, "scen29", 0) + arcpy.CalculateField_management(joinFN, "x", "!p_x!") + arcpy.CalculateField_management(joinFN, "y", "!p_y!") + arcpy.CalculateField_management(joinFN, "geom_id", "!pb_geom_id!") + arcpy.CalculateField_management(joinFN, "building_type", "'MR'") + arcpy.CalculateField_management(joinFN, "building_sqft", "!building_s!") + arcpy.CalculateField_management(joinFN, "non_residential_sqft", "!non_reside!") + arcpy.CalculateField_management(joinFN, "residential_units", "!residentia!") + arcpy.CalculateField_management(joinFN, "unit_ave_sqft", "!unit_ave_s!") + arcpy.CalculateField_management(joinFN, "deed_restricted_units", "!deed_restr!") + + FCfields = [f.name for f in arcpy.ListFields(joinFN)] + # add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed + DontDeleteFields = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + "Shape_Length", + "Shape_Area", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(joinFN, fields2Delete) + + gidnull = "gidnull" + arcpy.MakeTableView_management(joinFN, gidnull, "geom_id is NULL") + nullcount = arcpy.GetCount_management(gidnull) + logger.info( + "{} list has {} records with geom_id info missing".format(joinFN, nullcount) + ) + arcpy.Delete_management(gidnull) + + scen26count = "scen26count" + arcpy.MakeTableView_management(joinFN, scen26count, "scen26 = 1") + onecount = arcpy.GetCount_management(scen26count) + logger.info("{} list has {} records with scen26 is 1".format(joinFN, onecount)) + arcpy.Delete_management(scen26count) + ###4 REMOVE DUPLICATES + # check again existing geomList and remove duplicates + with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: + for row in cursor: + if row[0] in geoList: + cursor.deleteRow() + # then add the geoms in the geomList + gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, "PARCEL_ID")] + for geo in gList: + geoList.append(geo) + + joinFN = "ttt_mallsites_p10_pba50" + dev_projects_temp_layers.append(joinFN) + + try: + count = arcpy.GetCount_management(joinFN) + if int(count[0]) > 100: + logger.info( + "Found layer {} with {} rows -- skipping creation".format( + joinFN, int(count[0]) + ) + ) + except: + # go ahead and create it + logger.info( + "Creating layer {} by spatial joining mall office sites data ({}) and parcels ({})".format( + joinFN, mall_sites, p10_pba50 + ) + ) + arcpy.SpatialJoin_analysis(mall_sites, p10_pba50, joinFN) + + arcpy.AlterField_management(joinFN, "PARCEL_ID", "m_parcel_id") + arcpy.AlterField_management(joinFN, "X", "p_x") + arcpy.AlterField_management(joinFN, "Y", "p_y") + arcpy.AlterField_management(joinFN, "GEOM_ID", "m_geom_id") + arcpy.AlterField_management(joinFN, "scen20", "m_scen20") + arcpy.AlterField_management(joinFN, "scen25", "m_scen25") + + arcpy.AddField_management(joinFN, "development_projects_id", "LONG") + arcpy.AddField_management(joinFN, "building_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "scen25", "SHORT") + arcpy.AddField_management(joinFN, "scen26", "SHORT") + arcpy.AddField_management(joinFN, "scen27", "SHORT") + arcpy.AddField_management(joinFN, "scen28", "SHORT") + arcpy.AddField_management(joinFN, "scen29", "SHORT") + arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") + arcpy.AddField_management(joinFN, "x", "FLOAT") + arcpy.AddField_management(joinFN, "y", "FLOAT") + arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") + arcpy.AddField_management(joinFN, "building_type", "TEXT", "", "", 4) + arcpy.AddField_management(joinFN, "building_sqft", "LONG") + arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") + arcpy.AddField_management(joinFN, "residential_units", "SHORT") + arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") + arcpy.AddField_management(joinFN, "average_weighted_rent", "TEXT") + arcpy.AddField_management(joinFN, "last_sale_year", "DATE") + arcpy.AddField_management(joinFN, "last_sale_price", "DOUBLE") + arcpy.AddField_management(joinFN, "deed_restricted_units", "SHORT") + + # NOTE THAT OPPSITES HAS SCEN SET IN GIS FILE + arcpy.CalculateField_management(joinFN, "PARCEL_ID", "!m_parcel_id!") + arcpy.CalculateField_management( + joinFN, "development_projects_id", "!developmen!" + ) + arcpy.CalculateField_management(joinFN, "building_name", "!building_n!") + arcpy.CalculateField_management(joinFN, "scen20", 0) + arcpy.CalculateField_management(joinFN, "scen25", 0) + + # create a list of parcel id of public land projects that are in GGtra + # because just using select and update doesn't work somehow + pub_GGtra = arcpy.SelectLayerByLocation_management( + joinFN, "WITHIN", GGtra, None, "NEW_SELECTION", "NOT_INVERT" + ) + arcpy.CopyFeatures_management(pub_GGtra, "pub_GGtra") + arcpy.SelectLayerByAttribute_management(joinFN, "CLEAR_SELECTION") + pubTraList = [row[0] for row in arcpy.da.SearchCursor(pub_GGtra, "PARCEL_ID")] + with arcpy.da.UpdateCursor(joinFN, ["PARCEL_ID", "scen26"]) as cursor: + for row in cursor: + if row[0] in pubTraList: + row[1] = 1 + else: + row[1] = 0 + cursor.updateRow(row) + + arcpy.CalculateField_management(joinFN, "scen27", 0) + arcpy.CalculateField_management(joinFN, "scen28", 0) + arcpy.CalculateField_management(joinFN, "scen29", 0) + arcpy.CalculateField_management(joinFN, "x", "!p_x!") + arcpy.CalculateField_management(joinFN, "y", "!p_y!") + arcpy.CalculateField_management(joinFN, "geom_id", "!m_geom_id!") + arcpy.CalculateField_management(joinFN, "building_type", "'MR'") + arcpy.CalculateField_management(joinFN, "building_sqft", "!building_s!") + arcpy.CalculateField_management(joinFN, "non_residential_sqft", "!non_reside!") + arcpy.CalculateField_management(joinFN, "residential_units", "!residentia!") + arcpy.CalculateField_management(joinFN, "unit_ave_sqft", "!unit_ave_s!") + arcpy.CalculateField_management(joinFN, "deed_restricted_units", "!deed_restr!") + + FCfields = [f.name for f in arcpy.ListFields(joinFN)] + # add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed + DontDeleteFields = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + "Shape_Length", + "Shape_Area", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(joinFN, fields2Delete) + + gidnull = "gidnull" + arcpy.MakeTableView_management(joinFN, gidnull, "geom_id is NULL") + nullcount = arcpy.GetCount_management(gidnull) + logger.info( + "{} list has {} records with geom_id info missing".format(joinFN, nullcount) + ) + arcpy.Delete_management(gidnull) + + scen26count = "scen26count" + arcpy.MakeTableView_management(joinFN, scen26count, "scen26 = 1") + onecount = arcpy.GetCount_management(scen26count) + logger.info("{} list has {} records with scen26 is 1".format(joinFN, onecount)) + arcpy.Delete_management(scen26count) + ###4 REMOVE DUPLICATES + # check again existing geomList and remove duplicates + # for malls, changing those into add to make sure every mall project is in, per Mark's comment + with arcpy.da.UpdateCursor(joinFN, ["PARCEL_ID", "action"]) as cursor: + for row in cursor: + if row[0] in geoList: + if row[1] == "build": + row[1] == "add" + cursor.updateRow(row) + # then add the geoms in the geomList + gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, "PARCEL_ID")] + for geo in gList: + geoList.append(geo) + + # opportunity sites + joinFN = "ttt_opp_p10_pba50" + dev_projects_temp_layers.append(joinFN) + + try: + count = arcpy.GetCount_management(joinFN) + if int(count[0]) > 100: + logger.info( + "Found layer {} with {} rows -- skipping creation".format( + joinFN, int(count[0]) + ) + ) + except: + # go ahead and create it + logger.info( + "Creating layer {} by spatial joining opp sites data ({}) and parcels ({})".format( + joinFN, opp_sites, p10_pba50 + ) + ) + arcpy.SpatialJoin_analysis(opp_sites, p10_pba50, joinFN) + + arcpy.AlterField_management(joinFN, "year_built", "o_year_built") + arcpy.AlterField_management(joinFN, "last_sale_price", "o_last_sale_price") + arcpy.AlterField_management(joinFN, "last_sale_year", "o_sale_date") + arcpy.AlterField_management(joinFN, "stories", "o_stories") + arcpy.AlterField_management(joinFN, "building_name", "o_building_name") + arcpy.AlterField_management(joinFN, "site_name", "o_site_name") + arcpy.AlterField_management(joinFN, "PARCEL_ID", "o_parcel_id") + arcpy.AlterField_management(joinFN, "scen0", "o_scen0") + arcpy.AlterField_management(joinFN, "scen1", "o_scen1") + arcpy.AlterField_management(joinFN, "scen2", "o_scen2") + arcpy.AlterField_management(joinFN, "scen3", "o_scen3") + arcpy.AlterField_management(joinFN, "scen4", "o_scen4") + arcpy.AlterField_management(joinFN, "scen5", "o_scen5") + arcpy.AlterField_management(joinFN, "scen6", "o_scen6") + arcpy.AlterField_management(joinFN, "scen7", "o_scen7") + arcpy.AlterField_management(joinFN, "scen10", "o_scen10") + arcpy.AlterField_management(joinFN, "scen11", "o_scen11") + arcpy.AlterField_management(joinFN, "scen12", "o_scen12") + arcpy.AlterField_management(joinFN, "scen15", "o_scen15") + arcpy.AlterField_management(joinFN, "scen20", "o_scen20") + arcpy.AlterField_management(joinFN, "scen21", "o_scen21") + arcpy.AlterField_management(joinFN, "scen22", "o_scen22") + arcpy.AlterField_management(joinFN, "scen23", "o_scen23") + arcpy.AlterField_management(joinFN, "scen24", "o_scen24") + arcpy.AlterField_management(joinFN, "scen25", "o_scen25") + arcpy.AlterField_management(joinFN, "duration", "o_duration") + arcpy.AlterField_management(joinFN, "parking_spaces", "o_parking_spaces") + arcpy.AlterField_management( + joinFN, "non_residential_sqft", "o_non_residential_sqft" + ) + arcpy.AlterField_management(joinFN, "building_sqft", "o_building_sqft") + arcpy.AlterField_management(joinFN, "residential_units", "o_residential_units") + arcpy.AlterField_management(joinFN, "unit_ave_sqft", "o_unit_ave_sqft") + arcpy.AlterField_management(joinFN, "rent_ave_sqft", "o_rent_ave_sqft") + arcpy.AlterField_management(joinFN, "zip", "o_zips") + arcpy.AlterField_management( + joinFN, "Average_Weighted_Rent", "average_weighted_rent" + ) + arcpy.AlterField_management(joinFN, "x", "o_x") + arcpy.AlterField_management(joinFN, "y", "o_y") + arcpy.AlterField_management(joinFN, "geom_id", "o_geom_id") + arcpy.AlterField_management(joinFN, "geom_id_s", "o_geom_id2") + arcpy.AlterField_management(joinFN, "source", "o_source") + + arcpy.AddField_management(joinFN, "development_projects_id", "LONG") + arcpy.AddField_management(joinFN, "building_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "site_name", "TEXT", "", "", 200) + arcpy.AddField_management(joinFN, "PARCEL_ID", "LONG") + arcpy.AddField_management(joinFN, "raw_id", "LONG") + arcpy.AddField_management(joinFN, "scen0", "SHORT") + arcpy.AddField_management(joinFN, "scen1", "SHORT") + arcpy.AddField_management(joinFN, "scen2", "SHORT") + arcpy.AddField_management(joinFN, "scen3", "SHORT") + arcpy.AddField_management(joinFN, "scen4", "SHORT") + arcpy.AddField_management(joinFN, "scen5", "SHORT") + arcpy.AddField_management(joinFN, "scen6", "SHORT") + arcpy.AddField_management(joinFN, "scen7", "SHORT") + arcpy.AddField_management(joinFN, "scen10", "SHORT") + arcpy.AddField_management(joinFN, "scen11", "SHORT") + arcpy.AddField_management(joinFN, "scen12", "SHORT") + arcpy.AddField_management(joinFN, "scen15", "SHORT") + arcpy.AddField_management(joinFN, "scen20", "SHORT") + arcpy.AddField_management(joinFN, "scen21", "SHORT") + arcpy.AddField_management(joinFN, "scen22", "SHORT") + arcpy.AddField_management(joinFN, "scen23", "SHORT") + arcpy.AddField_management(joinFN, "scen24", "SHORT") + arcpy.AddField_management(joinFN, "scen25", "SHORT") + arcpy.AddField_management(joinFN, "scen26", "SHORT") + arcpy.AddField_management(joinFN, "scen27", "SHORT") + arcpy.AddField_management(joinFN, "scen28", "SHORT") + arcpy.AddField_management(joinFN, "scen29", "SHORT") + arcpy.AddField_management(joinFN, "zip", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "x", "FLOAT") + arcpy.AddField_management(joinFN, "y", "FLOAT") + arcpy.AddField_management(joinFN, "geom_id", "DOUBLE") + arcpy.AddField_management(joinFN, "year_built", "SHORT") + arcpy.AddField_management(joinFN, "duration", "SHORT") + arcpy.AddField_management(joinFN, "building_sqft", "LONG") + arcpy.AddField_management(joinFN, "non_residential_sqft", "LONG") + arcpy.AddField_management(joinFN, "residential_units", "SHORT") + arcpy.AddField_management(joinFN, "unit_ave_sqft", "FLOAT") + arcpy.AddField_management(joinFN, "stories", "SHORT") + arcpy.AddField_management(joinFN, "parking_spaces", "SHORT") + arcpy.AddField_management(joinFN, "rent_ave_sqft", "FLOAT") + ###using date for now, as I tried to use datetime.datetime.strptime('cs_sale_date','%m/%d/%Y %I:%M:%S %p').strftime('%Y')) it didn't work + arcpy.AddField_management(joinFN, "edit_date", "LONG") + arcpy.AddField_management(joinFN, "editor", "TEXT", "", "", 50) + arcpy.AddField_management(joinFN, "version", "SHORT") + arcpy.AddField_management(joinFN, "source", "TEXT", "", "", 15) + # NOTE THAT OPPSITES HAS SCEN SET IN GIS FILE + arcpy.CalculateField_management(joinFN, "raw_id", "!opp_id!") + arcpy.CalculateField_management(joinFN, "building_name", "!o_building_name!") + arcpy.CalculateField_management(joinFN, "site_name", "!o_site_name!") + arcpy.CalculateField_management(joinFN, "PARCEL_ID", "!o_parcel_id!") + arcpy.CalculateField_management(joinFN, "scen0", "!o_scen0!") + arcpy.CalculateField_management(joinFN, "scen0", "!o_scen0!") + arcpy.CalculateField_management(joinFN, "scen0", "!o_scen0!") + arcpy.CalculateField_management(joinFN, "scen1", "!o_scen1!") + arcpy.CalculateField_management(joinFN, "scen2", "!o_scen2!") + arcpy.CalculateField_management(joinFN, "scen3", "!o_scen3!") + arcpy.CalculateField_management(joinFN, "scen4", "!o_scen4!") + arcpy.CalculateField_management(joinFN, "scen5", "!o_scen5!") + arcpy.CalculateField_management(joinFN, "scen6", "!o_scen6!") + arcpy.CalculateField_management(joinFN, "scen7", "!o_scen7!") + arcpy.CalculateField_management(joinFN, "scen10", "!o_scen10!") + arcpy.CalculateField_management(joinFN, "scen11", "!o_scen11!") + arcpy.CalculateField_management(joinFN, "scen12", "!o_scen12!") + arcpy.CalculateField_management(joinFN, "scen15", "!o_scen15!") + arcpy.CalculateField_management(joinFN, "scen20", 0) + arcpy.CalculateField_management(joinFN, "scen21", "!o_scen21!") + arcpy.CalculateField_management(joinFN, "scen22", "!o_scen22!") + arcpy.CalculateField_management(joinFN, "scen23", "!o_scen23!") + arcpy.CalculateField_management(joinFN, "scen24", "!o_scen23!") + arcpy.CalculateField_management(joinFN, "scen25", 0) + arcpy.CalculateField_management(joinFN, "scen26", "!o_scen23!") + arcpy.CalculateField_management(joinFN, "scen27", 0) + arcpy.CalculateField_management(joinFN, "scen28", "!o_scen23!") + arcpy.CalculateField_management(joinFN, "scen29", 0) + arcpy.CalculateField_management(joinFN, "x", "!X_1!") + arcpy.CalculateField_management(joinFN, "y", "!Y_1!") + arcpy.CalculateField_management(joinFN, "geom_id", "!o_geom_id2!") + arcpy.CalculateField_management(joinFN, "year_built", "!o_year_built!") + arcpy.CalculateField_management(joinFN, "building_sqft", "!o_building_sqft!") + arcpy.CalculateField_management( + joinFN, "non_residential_sqft", "!o_non_residential_sqft!" + ) + arcpy.CalculateField_management( + joinFN, "residential_units", "!o_residential_units!" + ) + arcpy.CalculateField_management(joinFN, "unit_ave_sqft", "!o_unit_ave_sqft!") + # remove mall_office in opp layer by removing pb50_opp + with arcpy.da.UpdateCursor(joinFN, "type") as cursor: + for row in cursor: + if row[0] == "pb50_opp": + cursor.deleteRow() + with arcpy.da.UpdateCursor( + joinFN, ["source", "building_name", "o_source"] + ) as cursor: + for row in cursor: + if row[1] == "incubator": + row[0] = row[1] + elif row[2] == "ppa": + row[0] = row[2] + else: + row[0] = "opp" + cursor.updateRow(row) + arcpy.CalculateField_management(joinFN, "edit_date", 20200611) + arcpy.CalculateField_management(joinFN, "editor", "'MKR'") + + FCfields = [f.name for f in arcpy.ListFields(joinFN)] + # add "rent_ave_sqft", "rent_ave_unit","version", "duration", "building_type_id" if needed + DontDeleteFields = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + "Shape_Length", + "Shape_Area", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(joinFN, fields2Delete) + + gidnull = "gidnull" + arcpy.MakeTableView_management(joinFN, gidnull, "geom_id is NULL") + nullcount = arcpy.GetCount_management(gidnull) + logger.info( + "{} list has {} records with geom_id info missing".format(joinFN, nullcount) + ) + arcpy.Delete_management(gidnull) + ###4 REMOVE DUPLICATES + # check again existing geomList and remove duplicates + with arcpy.da.UpdateCursor(joinFN, "PARCEL_ID") as cursor: + for row in cursor: + if row[0] in geoList: + cursor.deleteRow() + # then add the geoms in the geomList + gList = [row[0] for row in arcpy.da.SearchCursor(joinFN, "PARCEL_ID")] + for geo in gList: + geoList.append(geo) + + # not going to check duplicates, since opp sites should not duplicate + + # all non opp sites should be in the list dev_projects_temp_layers already + devproj_fc = "development_project" + logger.info( + "Merging feature classes {} into {}".format( + dev_projects_temp_layers, devproj_fc + ) + ) + + arcpy.Merge_management(dev_projects_temp_layers, devproj_fc) + count = arcpy.GetCount_management(devproj_fc) + logger.info(" Results in {} rows in {}".format(int(count[0]), devproj_fc)) + + # assign unique incremental development_id + i = 1 + with arcpy.da.UpdateCursor(devproj_fc, "development_projects_id") as cursor: + for row in cursor: + if i <= int(count[0]): + row[0] = i + i = i + 1 + cursor.updateRow(row) + + # it's no longer necessary to delete temporary spatial join layers since they're in the temporary WORKSPACE_GDB + + # update mapping of building types from detailed to simplified in both pipeline + arcpy.AlterField_management( + pipeline_fc, "building_type", "building_type_det", "building_type_det" + ) + arcpy.AddField_management(pipeline_fc, "building_type", "TEXT", "", "", "800") + arcpy.AddField_management(pipeline_fc, "building_type_id", "LONG") + arcpy.AddField_management(pipeline_fc, "development_type_id", "LONG") + + with arcpy.da.UpdateCursor( + pipeline_fc, + [ + "building_type_det", + "building_type", + "building_type_id", + "development_type_id", + ], + ) as cursor: + for row in cursor: + if row[0] == "HS": + row[1] = "HS" + row[2] = 1 + row[3] = 1 + elif row[0] == "HT": + row[1] = "HT" + row[2] = 2 + row[3] = 2 + elif row[0] == "HM": + row[1] = "HM" + row[2] = 3 + row[3] = 2 + elif row[0] == "MH": + row[1] = "HM" + row[2] = 3 + row[3] = 4 + elif row[0] == "SR": + row[1] = "HM" + row[2] = 3 + row[3] = 2 + elif row[0] == "AL": + row[1] = "GQ" + row[2] = 3 + row[3] = 6 + elif row[0] == "DM": + row[1] = "GQ" + row[2] = 3 + row[3] = 6 + elif row[0] == "CM": + row[1] = "HM" + row[2] = 3 + row[3] = 2 + elif row[0] == "OF": + row[1] = "OF" + row[2] = 4 + row[3] = 10 + elif row[0] == "GV": + row[1] = "OF" + row[2] = 4 + row[3] = 10 + elif row[0] == "HP": + row[1] = "OF" + row[2] = 4 + row[3] = 10 + elif row[0] == "HO": + row[1] = "HO" + row[2] = 5 + row[3] = 9 + elif row[0] == "SC": + row[1] = "SC" + row[2] = 6 + row[3] = 17 + elif row[0] == "UN": + row[1] = "SC" + row[2] = 6 + row[3] = 18 + elif row[0] == "IL": + row[1] = "IL" + row[2] = 7 + row[2] = 14 + elif row[0] == "FP": + row[1] = "IL" + row[2] = 7 + row[2] = 14 + elif row[0] == "IW": + row[1] = "IW" + row[2] = 8 + row[3] = 13 + elif row[0] == "IH": + row[1] = "IH" + row[2] = 9 + row[3] = 15 + elif row[0] == "RS": + row[1] = "RS" + row[2] = 10 + row[3] = 7 + elif row[0] == "RB": + row[1] = "RB" + row[2] = 11 + row[3] = 8 + elif row[0] == "MR": + row[1] = "MR" + row[2] = 12 + row[3] = 5 + elif row[0] == "MT": + row[1] = "MT" + row[2] = 12 + elif row[0] == "ME": + row[1] = "ME" + row[2] = 14 + row[3] = 11 + elif row[0] == "PA": + row[1] = "VA" + row[2] = 15 + row[3] = 23 + elif row[0] == "PG": + row[1] = "PG" + row[2] = 16 + row[3] = 22 + elif row[0] == "VA": + row[1] = "VA" + row[2] = 0 + row[3] = 21 + elif row[0] == "LR": + row[1] = "RS" + row[2] = 10 + row[3] = 7 + elif row[0] == "VP": + row[1] = "VP" + row[2] = 0 + row[3] = 20 + elif row[0] == "OT": + row[1] = "OT" + row[2] = 0 + elif row[0] == "IN": + row[1] = "OF" + row[2] = 4 + row[3] = 10 + elif row[0] == "RF": + row[1] = "RS" + row[2] = 10 + row[3] = 7 + elif row[0] == "GQ": + row[1] = "GQ" + row[2] = 3 + row[3] = 6 + cursor.updateRow(row) + + ## count missing value + btnull = "btnull" ##stands for building type null + arcpy.MakeTableView_management(pipeline_fc, btnull, "building_type is NULL") + nullcount = arcpy.GetCount_management(btnull) + logger.info( + "Pipeline list has {} records with building type info missing".format(nullcount) + ) + arcpy.Delete_management(btnull) + + arcpy.AlterField_management(pipeline_fc, "building_sqft", "temp_building_sqft") + arcpy.AddField_management(pipeline_fc, "building_sqft", "LONG") + arcpy.SelectLayerByAttribute_management( + pipeline_fc, "NEW_SELECTION", '"residential_units">0' + ) + arcpy.SelectLayerByAttribute_management( + pipeline_fc, "SUBSET_SELECTION", '"non_residential_sqft" = "temp_building_sqft"' + ) + arcpy.SelectLayerByAttribute_management( + pipeline_fc, "SUBSET_SELECTION", '"building_type_id" = 3' + ) # HM + arcpy.CalculateField_management( + pipeline_fc, + "building_sqft", + "!residential_units! * 1400 + !temp_building_sqft! ", + "PYTHON", + ) + + arcpy.SelectLayerByAttribute_management( + pipeline_fc, "NEW_SELECTION", '"residential_units">0' + ) + arcpy.SelectLayerByAttribute_management( + pipeline_fc, "SUBSET_SELECTION", '"non_residential_sqft" = "temp_building_sqft"' + ) + arcpy.SelectLayerByAttribute_management( + pipeline_fc, "SUBSET_SELECTION", '"building_type_id" = 12' + ) # MR + arcpy.CalculateField_management( + pipeline_fc, + "building_sqft", + "!residential_units! * 1400 + !temp_building_sqft! ", + "PYTHON", + ) + + arcpy.SelectLayerByAttribute_management( + pipeline_fc, "NEW_SELECTION", '"building_sqft" is NULL ' + ) + arcpy.CalculateField_management( + pipeline_fc, "building_sqft", "!temp_building_sqft!", "PYTHON" + ) + arcpy.SelectLayerByAttribute_management(pipeline_fc, "CLEAR_SELECTION") + + arcpy.DeleteField_management(pipeline_fc, "temp_building_sqft") + + # same process for development project list + arcpy.AlterField_management( + devproj_fc, "building_type", "building_type_det", "building_type_det" + ) + arcpy.AddField_management(devproj_fc, "building_type", "TEXT", "", "", "800") + arcpy.AddField_management(devproj_fc, "building_type_id", "LONG") + arcpy.AddField_management(devproj_fc, "development_type_id", "LONG") + + with arcpy.da.UpdateCursor( + devproj_fc, + [ + "building_type_det", + "building_type", + "building_type_id", + "development_type_id", + ], + ) as cursor: + for row in cursor: + if row[0] == "HS": + row[1] = "HS" + row[2] = 1 + row[3] = 1 + elif row[0] == "HT": + row[1] = "HT" + row[2] = 2 + row[3] = 2 + elif row[0] == "HM": + row[1] = "HM" + row[2] = 3 + row[3] = 2 + elif row[0] == "MH": + row[1] = "HM" + row[2] = 3 + row[3] = 4 + elif row[0] == "SR": + row[1] = "HM" + row[2] = 3 + row[3] = 2 + elif row[0] == "AL": + row[1] = "GQ" + row[2] = 3 + row[3] = 6 + elif row[0] == "DM": + row[1] = "GQ" + row[2] = 3 + row[3] = 6 + elif row[0] == "CM": + row[1] = "HM" + row[2] = 3 + row[3] = 2 + elif row[0] == "OF": + row[1] = "OF" + row[2] = 4 + row[3] = 10 + elif row[0] == "GV": + row[1] = "OF" + row[2] = 4 + row[3] = 10 + elif row[0] == "HP": + row[1] = "OF" + row[2] = 4 + row[3] = 10 + elif row[0] == "HO": + row[1] = "HO" + row[2] = 5 + row[3] = 9 + elif row[0] == "SC": + row[1] = "SC" + row[2] = 6 + row[3] = 17 + elif row[0] == "UN": + row[1] = "SC" + row[2] = 6 + row[3] = 18 + elif row[0] == "IL": + row[1] = "IL" + row[2] = 7 + row[2] = 14 + elif row[0] == "FP": + row[1] = "IL" + row[2] = 7 + row[2] = 14 + elif row[0] == "IW": + row[1] = "IW" + row[2] = 8 + row[3] = 13 + elif row[0] == "IH": + row[1] = "IH" + row[2] = 9 + row[3] = 15 + elif row[0] == "RS": + row[1] = "RS" + row[2] = 10 + row[3] = 7 + elif row[0] == "RB": + row[1] = "RB" + row[2] = 11 + row[3] = 8 + elif row[0] == "MR": + row[1] = "MR" + row[2] = 12 + row[3] = 5 + elif row[0] == "MT": + row[1] = "MT" + row[2] = 12 + elif row[0] == "ME": + row[1] = "ME" + row[2] = 14 + row[3] = 11 + elif row[0] == "PA": + row[1] = "VA" + row[2] = 15 + row[3] = 23 + elif row[0] == "PG": + row[1] = "PG" + row[2] = 16 + row[3] = 22 + elif row[0] == "VA": + row[1] = "VA" + row[2] = 0 + row[3] = 21 + elif row[0] == "LR": + row[1] = "RS" + row[2] = 10 + row[3] = 7 + elif row[0] == "VP": + row[1] = "VP" + row[2] = 0 + row[3] = 20 + elif row[0] == "OT": + row[1] = "OT" + row[2] = 0 + elif row[0] == "IN": + row[1] = "OF" + row[2] = 4 + row[3] = 10 + elif row[0] == "RF": + row[1] = "RS" + row[2] = 10 + row[3] = 7 + elif row[0] == "GQ": + row[1] = "GQ" + row[2] = 3 + row[3] = 6 + cursor.updateRow(row) + ## count missing value + btnull = "btnull" ##stands for building type null + arcpy.MakeTableView_management(devproj_fc, btnull, "building_type is NULL") + nullcount = arcpy.GetCount_management(btnull) + logger.info( + "Development Project list has {} records with building type info missing".format( + nullcount + ) + ) + arcpy.Delete_management(btnull) + + arcpy.AlterField_management(devproj_fc, "building_sqft", "temp_building_sqft") + arcpy.AddField_management(devproj_fc, "building_sqft", "LONG") + arcpy.SelectLayerByAttribute_management( + devproj_fc, "NEW_SELECTION", '"residential_units">0' + ) + arcpy.SelectLayerByAttribute_management( + devproj_fc, "SUBSET_SELECTION", '"non_residential_sqft" = "temp_building_sqft"' + ) + arcpy.SelectLayerByAttribute_management( + devproj_fc, "SUBSET_SELECTION", '"building_type_id" = 3' + ) # HM + arcpy.CalculateField_management( + devproj_fc, + "building_sqft", + "!residential_units! * 1400 + !temp_building_sqft! ", + "PYTHON", + ) + + arcpy.SelectLayerByAttribute_management( + devproj_fc, "NEW_SELECTION", '"residential_units">0' + ) + arcpy.SelectLayerByAttribute_management( + devproj_fc, "SUBSET_SELECTION", '"non_residential_sqft" = "temp_building_sqft"' + ) + arcpy.SelectLayerByAttribute_management( + devproj_fc, "SUBSET_SELECTION", '"building_type_id" = 12' + ) # MR + arcpy.CalculateField_management( + devproj_fc, + "building_sqft", + "!residential_units! * 1400 + !temp_building_sqft! ", + "PYTHON", + ) + + arcpy.SelectLayerByAttribute_management( + devproj_fc, "NEW_SELECTION", '"building_sqft" is NULL ' + ) + arcpy.CalculateField_management( + devproj_fc, "building_sqft", "!temp_building_sqft!", "PYTHON" + ) + arcpy.SelectLayerByAttribute_management(devproj_fc, "CLEAR_SELECTION") + + arcpy.DeleteField_management(devproj_fc, "temp_building_sqft") + + # 6 DIAGNOSTICS + # number of units total by year + arcpy.Statistics_analysis( + devproj_fc, "res_stats_y", [["residential_units", "SUM"]], "year_built" + ) + # then calculate the total + arcpy.Statistics_analysis(devproj_fc, "res_stats_a", [["residential_units", "SUM"]]) + # get the total result and write into log + cursor = arcpy.SearchCursor("res_stats_a", "", "", "SUM_residential_units") + row = cursor.next() + sum_value = row.getValue("SUM_residential_units") + logger.info( + "Total number of residential units in {} file: {:,} units".format( + devproj_fc, int(sum_value) + ) + ) + + # number of nonres sqft by year + arcpy.Statistics_analysis( + devproj_fc, "nonres_stats_y", [["non_residential_sqft", "SUM"]], "year_built" + ) + # then calculate the total + arcpy.Statistics_analysis( + devproj_fc, "nonres_stats_a", [["non_residential_sqft", "SUM"]] + ) + # get the total result and write into log + cursor = arcpy.SearchCursor("nonres_stats_a", "", "", "SUM_non_residential_sqft") + row = cursor.next() + sum_value = row.getValue("SUM_non_residential_sqft") + logger.info( + "Total number of non residential square footage in {}: {:,} square feet".format( + devproj_fc, int(sum_value) + ) + ) + + # count parcels with more than one points on them - pipeline + # first, there is no development projects id for them, so set value for that + count = arcpy.GetCount_management(pipeline_fc) + i = 1 + with arcpy.da.UpdateCursor(pipeline_fc, "development_projects_id") as cursor: + for row in cursor: + if i <= int(count[0]): + row[0] = i + i = i + 1 + cursor.updateRow(row) + + p_pipeline = "p_pipeline" + arcpy.Statistics_analysis( + pipeline_fc, p_pipeline, [["development_projects_id", "COUNT"]], "geom_id" + ) + # there are projects with geom_id null, so in order to count, delete those first + with arcpy.da.UpdateCursor(p_pipeline, "geom_id") as cursor: + for row in cursor: + if row[0] is None: + cursor.deleteRow() + + ppCount = "ppCount" + arcpy.MakeTableView_management( + p_pipeline, ppCount, "COUNT_development_projects_id > 1" + ) + countParcelP = arcpy.GetCount_management(ppCount) + logger.info( + "There are {} of parcels with multiple project points (more than 1) on them in the pipeline file".format( + countParcelP + ) + ) + + # count parcels with more than one points on them - development projects + p_dev = "p_dev" + arcpy.Statistics_analysis( + devproj_fc, p_dev, [["development_projects_id", "COUNT"]], "geom_id" + ) + # there are projects with geom_id null, so in order to count, delete those first + with arcpy.da.UpdateCursor(p_dev, "geom_id") as cursor: + for row in cursor: + if row[0] is None: + cursor.deleteRow() + + pdCount = "pdCount" + arcpy.MakeTableView_management(p_dev, pdCount, "COUNT_development_projects_id > 1") + countParcelD = arcpy.GetCount_management(pdCount) + logger.info( + "There are {} of parcels with multiple project points (more than 1) on them".format( + countParcelD + ) + ) + + # 7 BUILDINGS TO ADD INSTEAD OF BUILD + # change a short list of activity to add + # first doing it for the pipeline file + pList_pipeline = [row[0] for row in arcpy.da.SearchCursor(ppCount, "geom_id")] + if "8016918253805" not in pList_pipeline: + pList_pipeline.append("8016918253805") + if "9551692992638" not in pList_pipeline: + pList_pipeline.append("9551692992638") + with arcpy.da.UpdateCursor(pipeline_fc, ["geom_id", "action"]) as cursor: + for row in cursor: + if row[0] in pList_pipeline: + row[1] = "add" + cursor.updateRow(row) + # second doing it for the development project file + pList_dev = [row[0] for row in arcpy.da.SearchCursor(pdCount, "geom_id")] + if "8016918253805" not in pList_pipeline: + pList_dev.append("8016918253805") + if "9551692992638" not in pList_pipeline: + pList_dev.append("9551692992638") + with arcpy.da.UpdateCursor(devproj_fc, ["geom_id", "action"]) as cursor: + for row in cursor: + if row[0] in pList_dev: + row[1] = "add" + cursor.updateRow(row) + + # change NaNs in non_residential_sqft to 0 + with arcpy.da.UpdateCursor(pipeline_fc, "non_residential_sqft") as cursor: + for row in cursor: + if row[0] is None: + row[0] = 0 + cursor.updateRow(row) + + with arcpy.da.UpdateCursor(devproj_fc, "non_residential_sqft") as cursor: + for row in cursor: + if row[0] is None: + row[0] = 0 + cursor.updateRow(row) + + # reordering before making the output + new_field_order = [ + "OBJECTID", + "Shape", + "development_projects_id", + "raw_id", + "building_name", + "site_name", + "action", + "scen0", + "scen1", + "scen2", + "scen3", + "scen4", + "scen5", + "scen6", + "scen7", + "scen10", + "scen11", + "scen12", + "scen15", + "scen20", + "scen21", + "scen22", + "scen23", + "scen24", + "scen25", + "scen26", + "scen27", + "scen28", + "scen29", + "address", + "city", + "zip", + "county", + "x", + "y", + "geom_id", + "year_built", + "building_type_det", + "building_type", + "building_type_id", + "development_type_id", + "building_sqft", + "non_residential_sqft", + "residential_units", + "unit_ave_sqft", + "tenure", + "rent_type", + "stories", + "parking_spaces", + "average_weighted_rent", + "last_sale_year", + "last_sale_price", + "deed_restricted_units", + "source", + "PARCEL_ID", + "ZONE_ID", + "edit_date", + "editor", + ] + pipeline_fc_reordered = "pipeline_reordered" + devproj_fc_reordered = "devproj_reordered" + reorder_fields(pipeline_fc, pipeline_fc_reordered, new_field_order) + reorder_fields(devproj_fc, devproj_fc_reordered, new_field_order) + + # append the alt2 mallpub projects into the devproj list + arcpy.CalculateField_management(mallpub_alt2, "edit_date", 20210308) + arcpy.CalculateField_management(mallpub_alt2, "editor", "'BL'") + arcpy.DeleteField_management(mallpub_alt2, "globalid") + arcpy.Append_management(mallpub_alt2, devproj_fc_reordered, "NO_TEST") + + count = arcpy.GetCount_management(devproj_fc_reordered) + i = 1 + with arcpy.da.UpdateCursor( + devproj_fc_reordered, "development_projects_id" + ) as cursor: + for row in cursor: + if i <= int(count[0]): + row[0] = i + i = i + 1 + cursor.updateRow(row) + + # we are only keeping one set of data. move this blolock of code to the end + # export csv to folder -- remember to change fold path when run on other machines + pipeline_output = "{}_pipeline.csv".format(NOW) + arcpy.TableToTable_conversion(pipeline_fc_reordered, WORKING_DIR, pipeline_output) + logger.info("Wrote {}".format(os.path.join(WORKING_DIR, pipeline_output))) + + development_project_output = "{}_development_projects.csv".format(NOW) + arcpy.TableToTable_conversion( + devproj_fc_reordered, WORKING_DIR, development_project_output + ) + logger.info( + "Wrote {}".format(os.path.join(WORKING_DIR, development_project_output)) + ) + + # long_cols that were cutoff are 'development_proj', 'non_residential_', 'development_type' , 'deed_restricted_' + pipeline_df = pd.read_csv(os.path.join(WORKING_DIR, pipeline_output)) + pipeline_df = pipeline_df.rename( + columns={ + "development_proj": "development_projects_id", + "non_residential_": "non_residential_sqft", + "development_type": "development_type_id", + "average_weighted": "average_weighted_rent", + "building_type_de": "building_type_det", + "residential_unit": "residential_units", + "deed_restricted_": "deed_restricted_units", + } + ) + development_project_df = pd.read_csv( + os.path.join(WORKING_DIR, development_project_output) + ) + development_project_df = development_project_df.rename( + columns={ + "development_proj": "development_projects_id", + "non_residential_": "non_residential_sqft", + "development_type": "development_type_id", + "residential_unit": "residential_units", + "average_weighted": "average_weighted_rent", + "building_type_de": "building_type_det", + "deed_restricted_": "deed_restricted_units", + } + ) + # fix int column problem in csv + field_types = { + "OBJECTID": "int", + "development_projects_id": "int", + "raw_id": "int", + "scen0": "int", + "scen1": "int", + "scen2": "int", + "scen3": "int", + "scen4": "int", + "scen5": "int", + "scen6": "int", + "scen7": "int", + "scen10": "int", + "scen11": "int", + "scen12": "int", + "scen15": "int", + "scen20": "int", + "scen21": "int", + "scen22": "int", + "scen23": "int", + "scen24": "int", + "scen25": "int", + "scen26": "int", + "scen27": "int", + "scen28": "int", + "scen29": "int", + "geom_id": "int64", + "year_built": "int", + "building_type_id": "int", + "development_type_id": "int", + "building_sqft": "int", + "non_residential_sqft": "int", + "residential_units": "int", + "stories": "int", + "deed_restricted_units": "int", + "PARCEL_ID": "int", + "ZONE_ID": "int", + } + for key, value in field_types.items(): + pipeline_df[key] = pipeline_df[key].fillna(0) + development_project_df[key] = development_project_df[key].fillna(0) + if key == "geom_id" or key == "PARCEL_ID": + pipeline_df[key] = pipeline_df[key].round(0).astype(value) + development_project_df[key] = ( + development_project_df[key].round(0).astype(value) + ) + else: + pipeline_df[key] = pipeline_df[key].astype(value) + development_project_df[key] = development_project_df[key].astype(value) + + res_type = ["HS", "HT", "HM", "GQ", "MR"] + nonres_type = [ + "MT", + "ME", + "VP", + "OF", + "HO", + "SC", + "IL", + "IW", + "IH", + "RS", + "RB", + "VA", + "PG", + "OT", + ] + + pipeline_df.loc[ + (pipeline_df["residential_units"] < 0) + & (pipeline_df.building_type.isin(res_type)), + "residential_units", + ] = 0 + pipeline_df.loc[ + (pipeline_df["residential_units"] != 0) + & (pipeline_df.building_type.isin(nonres_type)), + "residential_units", + ] = 0 + + development_project_df.loc[ + (development_project_df["residential_units"] < 0) + & (development_project_df.building_type.isin(res_type)), + "residential_units", + ] = 0 + development_project_df.loc[ + (development_project_df["residential_units"] != 0) + & (development_project_df.building_type.isin(nonres_type)), + "residential_units", + ] = 0 + + pipeline_df.to_csv(os.path.join(WORKING_DIR, pipeline_output), index=False) + development_project_df.to_csv( + os.path.join(WORKING_DIR, development_project_output), index=False + ) + + # adding the two map files into a new gdb + # first create that new gdb -- right now save and locally and upload manually + out_name = "{}_devproj.gdb".format(NOW) + arcpy.CreateFileGDB_management(WORKING_DIR, out_name) + logger.info("Created {}".format(out_name)) + + # second, move file to the new gdb + fcs = [pipeline_fc_reordered, devproj_fc_reordered] + for fc in fcs: + arcpy.FeatureClassToFeatureClass_conversion( + fc, os.path.join(WORKING_DIR, out_name), arcpy.Describe(fc).name + ) + + # 8 adding 2011-2015 projects to buildings + pipeline = "pipeline_reordered" + arcpy.FeatureClassToFeatureClass_conversion( + pipeline, + arcpy.env.workspace, + "p1115", + "year_built >= 2011 AND year_built <= 2015", + ) + p1115 = "p1115" + arcpy.AlterField_management(p1115, "PARCEL_ID", "b_PARCEL_ID") + arcpy.AlterField_management(p1115, "residential_units", "b_residential_units") + arcpy.AlterField_management(p1115, "unit_ave_sqft", "b_unit_ave_sqft") + arcpy.AlterField_management(p1115, "building_sqft", "b_building_sqft") + arcpy.AlterField_management(p1115, "year_built", "b_year_built") + arcpy.AlterField_management(p1115, "stories", "b_stories") + + arcpy.AddField_management(p1115, "building_id", "LONG") + arcpy.AddField_management(p1115, "parcel_id", "LONG") + arcpy.AddField_management(p1115, "improvement_value", "DOUBLE") + arcpy.AddField_management(p1115, "residential_units", "LONG") + arcpy.AddField_management(p1115, "residential_sqft", "LONG") + arcpy.AddField_management(p1115, "sqft_per_unit", "DOUBLE") + arcpy.AddField_management(p1115, "non_residential_sqft", "LONG") + arcpy.AddField_management(p1115, "building_sqft", "DOUBLE") + arcpy.AddField_management(p1115, "nonres_rent_per_sqft", "DOUBLE") + arcpy.AddField_management(p1115, "res_price_per_sqft", "DOUBLE") + arcpy.AddField_management(p1115, "stories", "LONG") + arcpy.AddField_management(p1115, "year_built", "LONG") + arcpy.AddField_management(p1115, "redfin_sale_price", "DOUBLE") + arcpy.AddField_management(p1115, "redfin_sale_year", "DOUBLE") + arcpy.AddField_management(p1115, "redfin_home_type", "TEXT", "", "", "800") + arcpy.AddField_management(p1115, "costar_property_type", "TEXT", "", "", "800") + arcpy.AddField_management(p1115, "costar_rent", "TEXT", "", "", "800") + + # arcpy.CalculateField_management(p1115, "building_id", ) + arcpy.CalculateField_management(p1115, "parcel_id", "!b_PARCEL_ID!") + # arcpy.CalculateField_management(p1115, "development_type_id",) + # arcpy.CalculateField_management(p1115, "improvement_value",) + arcpy.CalculateField_management(p1115, "residential_units", "!b_residential_units!") + # arcpy.CalculateField_management(p1115, "residential_sqft", ) + arcpy.CalculateField_management(p1115, "sqft_per_unit", "!b_unit_ave_sqft!") + # arcpy.CalculateField_management(p1115, "non_residential_sqft",) + arcpy.CalculateField_management(p1115, "building_sqft", "!b_building_sqft!") + # arcpy.CalculateField_management(p1115, "nonres_rent_per_sqft", ) + # arcpy.CalculateField_management(p1115, "res_price_per_sqft", ) + arcpy.CalculateField_management(p1115, "stories", "!b_stories!") + arcpy.CalculateField_management(p1115, "year_built", "!b_year_built!") + arcpy.CalculateField_management(p1115, "redfin_sale_price", "!last_sale_price!") + # arcpy.CalculateField_management(p1115, "redfin_sale_year", "!last_sale_year!") + # arcpy.CalculateField_management(p1115, "redfin_home_type", ) + # arcpy.CalculateField_management(p1115, "costar_property_type", ) + arcpy.CalculateField_management(p1115, "costar_rent", "!average_weighted_rent!") + + arcpy.FeatureClassToFeatureClass_conversion( + p1115, arcpy.env.workspace, "p1115_add", "action = 'add'" + ) + arcpy.FeatureClassToFeatureClass_conversion( + p1115, arcpy.env.workspace, "p1115_build", "action = 'build'" + ) + + p1115_add = "p1115_add" + p1115_build = "p1115_build" + + FCfields = [f.name for f in arcpy.ListFields(p1115_add)] + DontDeleteFields = [ + "OBJECTID", + "Shape", + "building_id", + "parcel_id", + "development_type_id", + "improvement_value", + "residential_units", + "residential_sqft", + "sqft_per_unit", + "non_residential_sqft", + "building_sqft", + "nonres_rent_per_sqft", + "res_price_per_sqft", + "stories", + "year_built", + "redfin_sale_price", + "redfin_sale_year", + "redfin_home_type", + "costar_property_type", + "costar_rent", + "building_type", + "building_type_id", + "development_type_id", + ] + fields2Delete = list(set(FCfields) - set(DontDeleteFields)) + arcpy.DeleteField_management(p1115_add, fields2Delete) + arcpy.DeleteField_management( + p1115_build, fields2Delete + ) # because the two dataset should have the same structure + + b10_smelt = os.path.join(SMELT_GDB, "b10") + logger.info("Reading 2010 building file {}".format(b10_smelt)) + arcpy.TableToTable_conversion(b10_smelt, arcpy.env.workspace, "b10") + b10 = "b10" + arcpy.AddField_management(b10, "building_type", "TEXT", "", "", "800") + arcpy.AddField_management(b10, "building_type_id", "LONG") + + with arcpy.da.UpdateCursor( + b10, ["development_type_id", "building_type", "building_type_id"] + ) as cursor: + for row in cursor: + if row[0] == 1: + row[1] = "HS" + row[2] = 1 + elif row[0] == 2: + row[1] = "HM" + row[2] = 3 + elif row[0] == 3: + row[1] = "HM" + row[2] = 3 + elif row[0] == 4: + row[1] = "HM" + row[2] = 3 + elif row[0] == 5: + row[1] = "MR" + row[2] = 12 + elif row[0] == 6: + row[1] = "GQ" + row[2] = 3 + elif row[0] == 7: + row[1] = "RS" + row[2] = 10 + elif row[0] == 8: + row[1] = "RB" + row[2] = 11 + elif row[0] == 9: + row[1] = "HO" + row[2] = 5 + elif row[0] == 10: + row[1] = "OF" + row[2] = 4 + elif row[0] == 11: + row[1] = "ME" + row[2] = 14 + elif row[0] == 12: + row[1] = "OF" + row[2] = 4 + elif row[0] == 13: + row[1] = "IW" + row[2] = 8 + elif row[0] == 14: + row[1] = "IL" + row[2] = 7 + elif row[0] == 15: + row[1] = "IH" + row[2] = 9 + elif row[0] == 16: + row[1] == "IL" + row[2] = 7 + elif row[0] == 17: + row[1] = "SC" + row[2] = 6 + elif row[0] == 18: + row[1] = "SC" + row[2] = 6 + elif row[0] == 19: + row[1] = "OF" + row[2] = 4 + elif row[0] == 20: + row[1] = "VP" + row[2] = 0 + elif row[0] == 21: + row[1] = "VA" + row[2] = 0 + elif row[0] == 22: + row[1] = "PG" + row[2] = 16 + elif row[0] == 23: + row[1] = "PA" + row[2] = 15 + elif row[0] == 24: + row[1] = "VP" + row[2] = 0 + elif row[0] == 25: + row[1] = "VA" + row[2] = 0 + cursor.updateRow(row) + + arcpy.DeleteField_management(b10, "id") + + # the approach is: + # 1. simply merge the projects with action == add + # 2. find out the parcel ids where projects would be built in p1115_build, then remove those parcels in b10, the merge the build file + # need to build some diagnostic stuff to compare what was there that gets removed, and what's added + + # part 1: add the projects + b10_p1115_part1 = "b10_p1115_part1" + mergeList = [b10, p1115_add] + arcpy.Merge_management(mergeList, b10_p1115_part1) + + # create a copy of the merged file for diagnostics + arcpy.TableToTable_conversion( + b10_p1115_part1, arcpy.env.workspace, "b10_p1115_part1_copy" + ) + + # part 2: remove and merge + parcelBuildList = [ + row[0] for row in arcpy.da.SearchCursor(p1115_build, "parcel_id") + ] + with arcpy.da.UpdateCursor(b10_p1115_part1, "parcel_id") as cursor: + for row in cursor: + if row[0] in parcelBuildList: + cursor.deleteRow() + + rawp10_b15_pba50 = "rawp10_b15_pba50_{}".format(NOW)[ + 0:26 + ] # delete ".time" part, because that dot breaks it. + mergeList2 = [b10_p1115_part1, p1115_build] + arcpy.Merge_management(mergeList2, rawp10_b15_pba50) + + btnull = "btnull" ##stands for building type null + arcpy.MakeTableView_management(rawp10_b15_pba50, btnull, "building_type is NULL") + nullcount = arcpy.GetCount_management(btnull) + logger.info( + "Building file list has {} records with building type info missing".format( + nullcount + ) + ) + arcpy.Delete_management(btnull) + + # diagnotics using the copy + b10_p1115_part1_copy = "b10_p1115_part1_copy" + with arcpy.da.UpdateCursor(b10_p1115_part1_copy, "parcel_id") as cursor: + for row in cursor: + if row[0] not in parcelBuildList: + cursor.deleteRow() + + del cursor, row + + arcpy.Statistics_analysis( + b10_p1115_part1_copy, "removed_units", [["residential_units", "SUM"]] + ) + cursor = arcpy.SearchCursor("removed_units", "", "", "SUM_residential_units") + row = cursor.next() + sum_value1 = row.getValue("SUM_residential_units") + + arcpy.Statistics_analysis( + b10_p1115_part1_copy, "removed_nonres", [["non_residential_sqft", "SUM"]] + ) + cursor = arcpy.SearchCursor("removed_nonres", "", "", "SUM_non_residential_sqft") + row = cursor.next() + sum_value2 = row.getValue("SUM_non_residential_sqft") + + arcpy.Statistics_analysis( + p1115_build, "built_units", [["residential_units", "SUM"]] + ) + cursor = arcpy.SearchCursor("built_units", "", "", "SUM_residential_units") + row = cursor.next() + sum_value3 = row.getValue("SUM_residential_units") + + arcpy.Statistics_analysis( + p1115_build, "built_nonres", [["non_residential_sqft", "SUM"]] + ) + cursor = arcpy.SearchCursor("built_nonres", "", "", "SUM_non_residential_sqft") + row = cursor.next() + sum_value4 = row.getValue("SUM_non_residential_sqft") + + if sum_value1 >= sum_value3: + logger.info( + "There is a net decrease of {} units from {} units to {} units after incorporating the 'built' projects".format( + sum_value1 - sum_value3, sum_value1, sum_value3 + ) + ) + else: + logger.info( + "There is a net increase of {} units from {} units to {} units after incorporating the 'built' projects".format( + sum_value3 - sum_value1, sum_value1, sum_value3 + ) + ) + if sum_value2 >= sum_value4: + logger.info( + "There is a net decrease of {} square feet of nonresidential from {} sqft to {} sqft after incorporating the 'built' projects".format( + sum_value2 - sum_value4, sum_value2, sum_value4 + ) + ) + else: + logger.info( + "There is a net increase of {} square feet of nonresidential from {} sqft to {} sqft after incorporating the 'built' projects".format( + sum_value4 - sum_value2, sum_value2, sum_value4 + ) + ) + + building_output = "{}_buildings.csv".format(NOW) + arcpy.TableToTable_conversion(rawp10_b15_pba50, WORKING_DIR, building_output) + building_df = pd.read_csv(os.path.join(WORKING_DIR, building_output)) + building_df = building_df.rename( + columns={ + "development_type": "development_type_id", + "improvement_valu": "improvement_value", + "residential_unit": "residential_units", + "non_residential_": "non_residential_sqft", + "nonres_rent_per_": "nonres_rent_per_sqft", + "res_price_per_sq": "res_price_per_sqft", + "redfin_sale_pric": "redfin_sale_price", + "costar_property_": "costar_property_type", + } + ) + + # fix int column problem in csv + field_types_building = { + "OBJECTID": "int", + "building_id": "int", + "parcel_id": "int", + "stories": "int", + "year_built": "int", + "building_type_id": "int", + "development_type_id": "int", + "building_sqft": "int", + "non_residential_sqft": "int", + "residential_units": "int", + "residential_sqft": "int", + } + + for key, value in field_types_building.items(): + building_df[key] = building_df[key].fillna(0) + building_df[key] = building_df[key].astype(value) + building_df.to_csv(os.path.join(WORKING_DIR, building_output), index=False) + logger.info("Transform {} to building table".format(rawp10_b15_pba50)) diff --git a/policies/plu/1_PLU_BOC_data_combine.py b/policies/plu/1_PLU_BOC_data_combine.py index 56cf084..bf4a352 100644 --- a/policies/plu/1_PLU_BOC_data_combine.py +++ b/policies/plu/1_PLU_BOC_data_combine.py @@ -5,45 +5,61 @@ import dev_capacity_calculation_module NOW = time.strftime("%Y_%m%d_%H%M") -today = time.strftime('%Y_%m_%d') +today = time.strftime("%Y_%m_%d") ## set up the directories -if os.getenv('USERNAME') =='ywang': - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) - M_URBANSIM_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50' - M_SMELT_DIR = 'M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12' - GITHUB_PETRALE_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\petrale'.format(os.getenv('USERNAME')) - GITHUB_URBANSIM_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\bayarea_urbansim\\data'.format(os.getenv('USERNAME')) - -elif os.getenv('USERNAME') =='lzorn': - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) - M_URBANSIM_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50' - M_SMELT_DIR = 'M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12' - GITHUB_PETRALE_DIR = 'X:\\petrale' - GITHUB_URBANSIM_DIR = 'X:\\bayarea_urbansim\\data' +if os.getenv("USERNAME") == "ywang": + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) + M_URBANSIM_DIR = "M:\\Data\\Urban\\BAUS\\PBA50" + M_SMELT_DIR = "M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12" + GITHUB_PETRALE_DIR = "C:\\Users\\{}\\Documents\\GitHub\\petrale".format( + os.getenv("USERNAME") + ) + GITHUB_URBANSIM_DIR = ( + "C:\\Users\\{}\\Documents\\GitHub\\bayarea_urbansim\\data".format( + os.getenv("USERNAME") + ) + ) + +elif os.getenv("USERNAME") == "lzorn": + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) + M_URBANSIM_DIR = "M:\\Data\\Urban\\BAUS\\PBA50" + M_SMELT_DIR = "M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12" + GITHUB_PETRALE_DIR = "X:\\petrale" + GITHUB_URBANSIM_DIR = "X:\\bayarea_urbansim\\data" # input file locations -PBA40_ZONING_BOX_DIR = os.path.join(M_URBANSIM_DIR, 'Horizon', 'Large General Input Data') -PBA50_ZONINGMOD_DIR = os.path.join(M_URBANSIM_DIR, 'Final_Blueprint', 'Zoning Modifications') -OTHER_INPUTS_DIR = os.path.join(M_URBANSIM_DIR, 'Final_Blueprint', 'Base zoning', 'input') - +PBA40_ZONING_BOX_DIR = os.path.join( + M_URBANSIM_DIR, "Horizon", "Large General Input Data" +) +PBA50_ZONINGMOD_DIR = os.path.join( + M_URBANSIM_DIR, "Final_Blueprint", "Zoning Modifications" +) +OTHER_INPUTS_DIR = os.path.join( + M_URBANSIM_DIR, "Final_Blueprint", "Base zoning", "input" +) + # output file location -DATA_OUTPUT_DIR = os.path.join(BOX_DIR, 'Policies\\Base zoning\\outputs') -QA_QC_DIR = os.path.join(BOX_DIR, 'Policies\\Base zoning\\outputs\\QAQC') -LOG_FILE = os.path.join(DATA_OUTPUT_DIR,'{}_plu_boc_combine.log'.format(today)) +DATA_OUTPUT_DIR = os.path.join(BOX_DIR, "Policies\\Base zoning\\outputs") +QA_QC_DIR = os.path.join(BOX_DIR, "Policies\\Base zoning\\outputs\\QAQC") +LOG_FILE = os.path.join(DATA_OUTPUT_DIR, "{}_plu_boc_combine.log".format(today)) ## Three steps of data clearing - combine PBA40 plu data and BASIS BOC data using p10 parcel geography - ## - assign allow residential and/or non-residential development to each parcel; - ## - impute max_dua and max_far for parcels missing the info +## - assign allow residential and/or non-residential development to each parcel; +## - impute max_dua and max_far for parcels missing the info -def impute_max_dua(df_original,boc_source): +def impute_max_dua(df_original, boc_source): """ Impute max_dua from max_far or max_height - Returns dataframe with PARCEL_ID, max_dua, source_dua_[boc_source] + Returns dataframe with PARCEL_ID, max_dua, source_dua_[boc_source] source_dua is one of: [boc_source]: if it's already set so no imputation is necessary imputed from max_far imputed from max_height @@ -54,86 +70,141 @@ def impute_max_dua(df_original,boc_source): # don't modify passed df df = df_original.copy() - logger.info("impute_max_dua_{}: Before imputation, number of parcels with missing max_dua_{}: {:,}".format( - boc_source, boc_source, sum(df['max_dua_'+boc_source].isnull()))) - - # we can only fill in missing if either max_far or max_height is not null - df['max_dua_from_far'] = \ - df['max_far_' +boc_source] * dev_capacity_calculation_module.SQUARE_FEET_PER_ACRE / dev_capacity_calculation_module.SQUARE_FEET_PER_DU - df['max_far_from_height'] = \ - df['max_height_' +boc_source] / dev_capacity_calculation_module.FEET_PER_STORY * dev_capacity_calculation_module.PARCEL_USE_EFFICIENCY - df['max_dua_from_height'] = \ - df['max_far_from_height'] * dev_capacity_calculation_module.SQUARE_FEET_PER_ACRE / dev_capacity_calculation_module.SQUARE_FEET_PER_DU - + logger.info( + "impute_max_dua_{}: Before imputation, number of parcels with missing max_dua_{}: {:,}".format( + boc_source, boc_source, sum(df["max_dua_" + boc_source].isnull()) + ) + ) + + # we can only fill in missing if either max_far or max_height is not null + df["max_dua_from_far"] = ( + df["max_far_" + boc_source] + * dev_capacity_calculation_module.SQUARE_FEET_PER_ACRE + / dev_capacity_calculation_module.SQUARE_FEET_PER_DU + ) + df["max_far_from_height"] = ( + df["max_height_" + boc_source] + / dev_capacity_calculation_module.FEET_PER_STORY + * dev_capacity_calculation_module.PARCEL_USE_EFFICIENCY + ) + df["max_dua_from_height"] = ( + df["max_far_from_height"] + * dev_capacity_calculation_module.SQUARE_FEET_PER_ACRE + / dev_capacity_calculation_module.SQUARE_FEET_PER_DU + ) + # default to missing - df['source_dua_'+boc_source] = 'missing' - + df["source_dua_" + boc_source] = "missing" + # this is set already -- nothing to do - df.loc[(df['max_dua_'+boc_source].notnull()) & - (df['max_dua_'+boc_source] > 0), 'source_dua_'+boc_source] = boc_source + df.loc[ + (df["max_dua_" + boc_source].notnull()) & (df["max_dua_" + boc_source] > 0), + "source_dua_" + boc_source, + ] = boc_source # decide on imputation source # for missing values, fill from max_far or max_height -- if both are available, use the min unless the min is 0 - df.loc[(df['source_dua_'+boc_source]=='missing') & - df.max_dua_from_height.notnull() & - df.max_dua_from_far.notnull() & - (df.max_dua_from_height > df.max_dua_from_far) & - (df.max_dua_from_far > 0), "source_dua_"+boc_source] = 'imputed from max_far (as min)' - - df.loc[(df['source_dua_'+boc_source]=='missing') & - df.max_dua_from_height.notnull() & - df.max_dua_from_far.notnull() & - (df.max_dua_from_height > df.max_dua_from_far) & - (df.max_dua_from_far == 0), "source_dua_"+boc_source] = 'imputed from max_height' - - df.loc[(df['source_dua_'+boc_source]=='missing') & - df.max_dua_from_height.notnull() & - df.max_dua_from_far.notnull() & - (df.max_dua_from_height < df.max_dua_from_far) & - (df.max_dua_from_height > 0), 'source_dua_'+boc_source] = 'imputed from max_height (as min)' - - df.loc[(df['source_dua_'+boc_source]=='missing') & - df.max_dua_from_height.notnull() & - df.max_dua_from_far.notnull() & - (df.max_dua_from_height < df.max_dua_from_far) & - (df.max_dua_from_height == 0), 'source_dua_'+boc_source] = 'imputed from max_far' - - df.loc[(df['source_dua_'+boc_source]=='missing') & - (df.max_dua_from_height == 0) & - (df.max_dua_from_far == 0), 'source_dua_'+boc_source] = 'imputed from max_far' + df.loc[ + (df["source_dua_" + boc_source] == "missing") + & df.max_dua_from_height.notnull() + & df.max_dua_from_far.notnull() + & (df.max_dua_from_height > df.max_dua_from_far) + & (df.max_dua_from_far > 0), + "source_dua_" + boc_source, + ] = "imputed from max_far (as min)" + + df.loc[ + (df["source_dua_" + boc_source] == "missing") + & df.max_dua_from_height.notnull() + & df.max_dua_from_far.notnull() + & (df.max_dua_from_height > df.max_dua_from_far) + & (df.max_dua_from_far == 0), + "source_dua_" + boc_source, + ] = "imputed from max_height" + + df.loc[ + (df["source_dua_" + boc_source] == "missing") + & df.max_dua_from_height.notnull() + & df.max_dua_from_far.notnull() + & (df.max_dua_from_height < df.max_dua_from_far) + & (df.max_dua_from_height > 0), + "source_dua_" + boc_source, + ] = "imputed from max_height (as min)" + + df.loc[ + (df["source_dua_" + boc_source] == "missing") + & df.max_dua_from_height.notnull() + & df.max_dua_from_far.notnull() + & (df.max_dua_from_height < df.max_dua_from_far) + & (df.max_dua_from_height == 0), + "source_dua_" + boc_source, + ] = "imputed from max_far" + + df.loc[ + (df["source_dua_" + boc_source] == "missing") + & (df.max_dua_from_height == 0) + & (df.max_dua_from_far == 0), + "source_dua_" + boc_source, + ] = "imputed from max_far" # if only one available use that - df.loc[(df['source_dua_'+boc_source]=="missing") & - df.max_dua_from_height.isnull() & - df.max_dua_from_far.notnull(), 'source_dua_'+boc_source] = 'imputed from max_far' - - df.loc[(df['source_dua_'+boc_source]=='missing') & - df.max_dua_from_height.notnull() & - df.max_dua_from_far.isnull(), 'source_dua_'+boc_source] = 'imputed from max_height' + df.loc[ + (df["source_dua_" + boc_source] == "missing") + & df.max_dua_from_height.isnull() + & df.max_dua_from_far.notnull(), + "source_dua_" + boc_source, + ] = "imputed from max_far" + + df.loc[ + (df["source_dua_" + boc_source] == "missing") + & df.max_dua_from_height.notnull() + & df.max_dua_from_far.isnull(), + "source_dua_" + boc_source, + ] = "imputed from max_height" # imputation is decided -- set it - df.loc[ df['source_dua_'+boc_source]=='imputed from max_height (as min)', 'max_dua_'+boc_source] = \ - df.loc[df['source_dua_'+boc_source]=='imputed from max_height (as min)', 'max_dua_from_height'] - - df.loc[ df['source_dua_'+boc_source]=='imputed from max_height', 'max_dua_'+boc_source] = \ - df.loc[df['source_dua_'+boc_source]=='imputed from max_height', 'max_dua_from_height'] - - df.loc[ df['source_dua_'+boc_source]=='imputed from max_far (as min)', 'max_dua_'+boc_source] = \ - df.loc[df['source_dua_'+boc_source]=='imputed from max_far (as min)', 'max_dua_from_far'] - - df.loc[ df['source_dua_'+boc_source]=='imputed from max_far', 'max_dua_'+boc_source] = \ - df.loc[df['source_dua_'+boc_source]=='imputed from max_far', 'max_dua_from_far'] - - logger.info("After imputation: \n{}".format(df['source_dua_'+boc_source].value_counts())) - - return df[['PARCEL_ID','max_dua_'+boc_source,'source_dua_'+boc_source]] - - - -def impute_max_far(df_original,boc_source): + df.loc[ + df["source_dua_" + boc_source] == "imputed from max_height (as min)", + "max_dua_" + boc_source, + ] = df.loc[ + df["source_dua_" + boc_source] == "imputed from max_height (as min)", + "max_dua_from_height", + ] + + df.loc[ + df["source_dua_" + boc_source] == "imputed from max_height", + "max_dua_" + boc_source, + ] = df.loc[ + df["source_dua_" + boc_source] == "imputed from max_height", + "max_dua_from_height", + ] + + df.loc[ + df["source_dua_" + boc_source] == "imputed from max_far (as min)", + "max_dua_" + boc_source, + ] = df.loc[ + df["source_dua_" + boc_source] == "imputed from max_far (as min)", + "max_dua_from_far", + ] + + df.loc[ + df["source_dua_" + boc_source] == "imputed from max_far", + "max_dua_" + boc_source, + ] = df.loc[ + df["source_dua_" + boc_source] == "imputed from max_far", "max_dua_from_far" + ] + + logger.info( + "After imputation: \n{}".format(df["source_dua_" + boc_source].value_counts()) + ) + + return df[["PARCEL_ID", "max_dua_" + boc_source, "source_dua_" + boc_source]] + + +def impute_max_far(df_original, boc_source): """ Impute max_far from max_height - Returns dataframe with PARCEL_ID, max_far, source_far_[boc_source] + Returns dataframe with PARCEL_ID, max_far, source_far_[boc_source] source_far is one of: [boc_source]: if it's already set so no imputation is necessary imputed from max_height missing: if it can't be imputed because max_far and max_height are missing too @@ -144,38 +215,56 @@ def impute_max_far(df_original,boc_source): # don't modify passed df df = df_original.copy() - logger.info("impute_max_far_{}: Before imputation, number of parcels with missing max_far_{}: {:,}".format( - boc_source, boc_source, sum(df['max_far_'+boc_source].isnull()))) - + logger.info( + "impute_max_far_{}: Before imputation, number of parcels with missing max_far_{}: {:,}".format( + boc_source, boc_source, sum(df["max_far_" + boc_source].isnull()) + ) + ) + # we can only fill in missing if max_height is not null - df['max_far_from_height'] = \ - df['max_height_' +boc_source] / dev_capacity_calculation_module.FEET_PER_STORY * dev_capacity_calculation_module.PARCEL_USE_EFFICIENCY - + df["max_far_from_height"] = ( + df["max_height_" + boc_source] + / dev_capacity_calculation_module.FEET_PER_STORY + * dev_capacity_calculation_module.PARCEL_USE_EFFICIENCY + ) + # default to missing - df['source_far_'+boc_source] = 'missing' - + df["source_far_" + boc_source] = "missing" + # this is set already -- nothing to do - df.loc[(df['max_far_'+boc_source].notnull()) & - (df['max_far_'+boc_source] > 0), 'source_far_'+boc_source] = boc_source + df.loc[ + (df["max_far_" + boc_source].notnull()) & (df["max_far_" + boc_source] > 0), + "source_far_" + boc_source, + ] = boc_source # decide on imputation source # for missing values, fill from max_height - df.loc[(df['source_far_'+boc_source]=='missing') & df.max_far_from_height.notnull(), - 'source_far_'+boc_source] = 'imputed from max_height' + df.loc[ + (df["source_far_" + boc_source] == "missing") + & df.max_far_from_height.notnull(), + "source_far_" + boc_source, + ] = "imputed from max_height" # imputation is decided -- set it - df.loc[ df['source_far_'+boc_source]=='imputed from max_height', 'max_far_'+boc_source] = \ - df.loc[df['source_far_'+boc_source]=='imputed from max_height', 'max_far_from_height'] + df.loc[ + df["source_far_" + boc_source] == "imputed from max_height", + "max_far_" + boc_source, + ] = df.loc[ + df["source_far_" + boc_source] == "imputed from max_height", + "max_far_from_height", + ] - logger.info("After imputation: \n{}".format(df['source_far_'+boc_source].value_counts())) + logger.info( + "After imputation: \n{}".format(df["source_far_" + boc_source].value_counts()) + ) - return df[['PARCEL_ID','max_far_'+boc_source,'source_far_'+boc_source]] + return df[["PARCEL_ID", "max_far_" + boc_source, "source_far_" + boc_source]] def impute_basis_devtypes_from_pba40(df): """ Where basis allowed development type is missing, impute value from pba40. - Note this in source_[btype]_basis, which will be set to one of + Note this in source_[btype]_basis, which will be set to one of ['basis', 'missing', 'imputed from pba40'] Returns df with [btype]_basis and source_[btype]_basis columns updated @@ -183,212 +272,323 @@ def impute_basis_devtypes_from_pba40(df): logger.info("impute_basis_devtypes_from_pba40():") for btype in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: - df['source_'+btype+'_basis'] = 'basis' # default - df.loc[ df[btype+'_basis'].isnull(), 'source_'+btype+'_basis'] = 'missing' # or missing if null - - logger.info("Before imputation of {}_basis:\n{}".format(btype, df['source_'+btype+'_basis'].value_counts())) + df["source_" + btype + "_basis"] = "basis" # default + df.loc[ + df[btype + "_basis"].isnull(), "source_" + btype + "_basis" + ] = "missing" # or missing if null + + logger.info( + "Before imputation of {}_basis:\n{}".format( + btype, df["source_" + btype + "_basis"].value_counts() + ) + ) # if basis value is missing # and we care about it (nodev_zod == 0) # and the pba40 value is present # => impute - impute_idx = ((df[btype+'_basis'].isnull()) & \ - (df[btype+'_pba40'].notnull())) + impute_idx = (df[btype + "_basis"].isnull()) & (df[btype + "_pba40"].notnull()) # impute and note source - df.loc[impute_idx, btype+'_basis' ] = df.loc[impute_idx, btype + '_pba40'] - df.loc[impute_idx, 'source_'+btype+'_basis' ] = 'imputed from pba40' + df.loc[impute_idx, btype + "_basis"] = df.loc[impute_idx, btype + "_pba40"] + df.loc[impute_idx, "source_" + btype + "_basis"] = "imputed from pba40" - logger.info("After imputation of {}_basis:\n{}".format(btype, df['source_'+btype+'_basis'].value_counts())) + logger.info( + "After imputation of {}_basis:\n{}".format( + btype, df["source_" + btype + "_basis"].value_counts() + ) + ) logger.info("") return df + def impute_basis_max_height_from_pba40(df): """ Where max_height_basis is missing, impute value from pba40. - Note this in source_height_basis, which will be set to one of + Note this in source_height_basis, which will be set to one of ['basis', 'missing', 'imputed from pba40'] Returns df with max_height_basis and source_height_basis columns updated """ logger.info("impute_basis_max_height_from_pba40():") - df['source_height_basis'] = 'basis' # default - df.loc[ df['max_height_basis'].isnull(), 'source_height_basis'] = 'missing' # or missing if null + df["source_height_basis"] = "basis" # default + df.loc[ + df["max_height_basis"].isnull(), "source_height_basis" + ] = "missing" # or missing if null - logger.info("Before imputation:\n{}".format(df['source_height_basis'].value_counts())) + logger.info( + "Before imputation:\n{}".format(df["source_height_basis"].value_counts()) + ) # if basis value is missing # and the pba40 value is present # => impute - impute_idx = ((df['max_height_basis'].isnull()) & \ - (df['max_height_pba40'].notnull())) + impute_idx = (df["max_height_basis"].isnull()) & (df["max_height_pba40"].notnull()) # impute and note source - df.loc[impute_idx, 'max_height_basis' ] = df.loc[impute_idx, 'max_height_pba40'] - df.loc[impute_idx, 'source_height_basis' ] = 'imputed from pba40' + df.loc[impute_idx, "max_height_basis"] = df.loc[impute_idx, "max_height_pba40"] + df.loc[impute_idx, "source_height_basis"] = "imputed from pba40" - logger.info("After imputation:\n{}".format(df['source_height_basis'].value_counts())) + logger.info( + "After imputation:\n{}".format(df["source_height_basis"].value_counts()) + ) return df -if __name__ == '__main__': + +if __name__ == "__main__": # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) logger.info("BOX_DIR = {}".format(BOX_DIR)) logger.info("M_URBANSIM_DIR = {}".format(M_URBANSIM_DIR)) - logger.info("M_SMELT_DIR = {}".format(M_SMELT_DIR)) + logger.info("M_SMELT_DIR = {}".format(M_SMELT_DIR)) logger.info("DATA_OUTPUT_DIR = {}".format(DATA_OUTPUT_DIR)) ## Basemap parcels - basemap_p10_file = os.path.join(M_SMELT_DIR, 'p10.csv') - basemap_p10 = pd.read_csv(basemap_p10_file, - usecols =['PARCEL_ID','geom_id_s','ACRES','LAND_VALUE'], - dtype ={'PARCEL_ID':np.float64, 'geom_id_s':str, 'ACRES':np.float64, 'LAND_VALUE':np.float64}) + basemap_p10_file = os.path.join(M_SMELT_DIR, "p10.csv") + basemap_p10 = pd.read_csv( + basemap_p10_file, + usecols=["PARCEL_ID", "geom_id_s", "ACRES", "LAND_VALUE"], + dtype={ + "PARCEL_ID": np.float64, + "geom_id_s": str, + "ACRES": np.float64, + "LAND_VALUE": np.float64, + }, + ) # conver PARCEL_ID to integer: - basemap_p10['PARCEL_ID'] = basemap_p10['PARCEL_ID'].apply(lambda x: int(round(x))) + basemap_p10["PARCEL_ID"] = basemap_p10["PARCEL_ID"].apply(lambda x: int(round(x))) logger.info("Read {:,} rows from {}".format(len(basemap_p10), basemap_p10_file)) logger.info("\n{}".format(basemap_p10.head())) - logger.info('Number of unique PARCEL_ID: {}'.format(len(basemap_p10.PARCEL_ID.unique()))) - + logger.info( + "Number of unique PARCEL_ID: {}".format(len(basemap_p10.PARCEL_ID.unique())) + ) ## p10 pacel to pba40 zoning code mapping - pba40_pz_file = os.path.join(PBA40_ZONING_BOX_DIR, '2015_12_21_zoning_parcels.csv') + pba40_pz_file = os.path.join(PBA40_ZONING_BOX_DIR, "2015_12_21_zoning_parcels.csv") pba40_pz = pd.read_csv( pba40_pz_file, - usecols = ['geom_id','zoning_id','nodev'], - dtype = {'geom_id':str, 'zoning_id':np.float64, 'nodev_pba40':np.int}) + usecols=["geom_id", "zoning_id", "nodev"], + dtype={"geom_id": str, "zoning_id": np.float64, "nodev_pba40": np.int}, + ) logger.info("Read {:,} rows from {}".format(len(pba40_pz), pba40_pz_file)) logger.info("\n{}".format(pba40_pz.head())) ## add zoning_id, nodev_pba40 columns to p10 - p10_pba40_pz = pd.merge(left=basemap_p10, right=pba40_pz, left_on='geom_id_s', right_on = 'geom_id', how='left') - p10_pba40_pz.rename(columns={'nodev' :'nodev_pba40', - 'zoning_id':'zoning_id_pba40'}, inplace=True) - #display(p10_pba40_pz.head()) + p10_pba40_pz = pd.merge( + left=basemap_p10, + right=pba40_pz, + left_on="geom_id_s", + right_on="geom_id", + how="left", + ) + p10_pba40_pz.rename( + columns={"nodev": "nodev_pba40", "zoning_id": "zoning_id_pba40"}, inplace=True + ) + # display(p10_pba40_pz.head()) ## Check Number of parcels missing zoning designation - p10_pba40_pz_missing = p10_pba40_pz.loc[p10_pba40_pz['zoning_id_pba40'].isnull()] - logger.info("Out of {0:,} p10 parcels, {1:,} or {2:.1f}% are missing 'zoning_id' values".format( - len(p10_pba40_pz), len(p10_pba40_pz_missing), 100.0*len(p10_pba40_pz_missing)/len(p10_pba40_pz))) - + p10_pba40_pz_missing = p10_pba40_pz.loc[p10_pba40_pz["zoning_id_pba40"].isnull()] + logger.info( + "Out of {0:,} p10 parcels, {1:,} or {2:.1f}% are missing 'zoning_id' values".format( + len(p10_pba40_pz), + len(p10_pba40_pz_missing), + 100.0 * len(p10_pba40_pz_missing) / len(p10_pba40_pz), + ) + ) ## P10 parcels with PBA40 zoning code PLU - pba40_plu_file = os.path.join(GITHUB_URBANSIM_DIR, 'zoning_lookup.csv') - pba40_plu = pd.read_csv(pba40_plu_file, dtype={'id':float}) + pba40_plu_file = os.path.join(GITHUB_URBANSIM_DIR, "zoning_lookup.csv") + pba40_plu = pd.read_csv(pba40_plu_file, dtype={"id": float}) logger.info("Read {:,} rows from {}".format(len(pba40_plu), pba40_plu_file)) # coerce this column to float -- it's a string for some reason - pba40_plu['SC'] = pd.to_numeric(pba40_plu['SC'], errors='coerce') + pba40_plu["SC"] = pd.to_numeric(pba40_plu["SC"], errors="coerce") # append _pba40 to column names - rename_cols = dict((col,col+"_pba40") for col in pba40_plu.columns.values) + rename_cols = dict((col, col + "_pba40") for col in pba40_plu.columns.values) pba40_plu.rename(columns=rename_cols, inplace=True) logger.info("\n{}".format(pba40_plu.head())) # check duplicates in zoning id - pba40_plu['jz_o'] = pba40_plu['city_pba40'].str.cat(pba40_plu['name_pba40'],sep=" ") - logger.info("Out of {:,} rows in pba40_plu, {:,} have unique values of 'id', {:,} have unique values of 'jz_o'".format( - len(pba40_plu), len(pba40_plu.id_pba40.unique()), len(pba40_plu.jz_o.unique()))) + pba40_plu["jz_o"] = pba40_plu["city_pba40"].str.cat( + pba40_plu["name_pba40"], sep=" " + ) + logger.info( + "Out of {:,} rows in pba40_plu, {:,} have unique values of 'id', {:,} have unique values of 'jz_o'".format( + len(pba40_plu), + len(pba40_plu.id_pba40.unique()), + len(pba40_plu.jz_o.unique()), + ) + ) # using the zoning_id, get the pba40 zoning data (intensities, allowed building types) - p10_pba40_plu = pd.merge(left=p10_pba40_pz, - right=pba40_plu, - left_on='zoning_id_pba40', - right_on='id_pba40', - how='left') + p10_pba40_plu = pd.merge( + left=p10_pba40_pz, + right=pba40_plu, + left_on="zoning_id_pba40", + right_on="id_pba40", + how="left", + ) # Check number of p10 records failed to find a matching PLU - p10_pba40_plu_missing = p10_pba40_plu.loc[p10_pba40_plu['jz_o'].isnull()] - logger.info("Out of {0:,} rows in p10_pba40_plu, {1:,} or {2:.1f}% are missing 'jz_o' values".format( - len(p10_pba40_plu), len(p10_pba40_plu_missing), 100.0*len(p10_pba40_plu_missing)/len(p10_pba40_plu))) + p10_pba40_plu_missing = p10_pba40_plu.loc[p10_pba40_plu["jz_o"].isnull()] + logger.info( + "Out of {0:,} rows in p10_pba40_plu, {1:,} or {2:.1f}% are missing 'jz_o' values".format( + len(p10_pba40_plu), + len(p10_pba40_plu_missing), + 100.0 * len(p10_pba40_plu_missing) / len(p10_pba40_plu), + ) + ) logger.info("\n{}".format(p10_pba40_plu.head())) - ## P10 with BASIS BOC ## Read BASIS Parcel-plu_id data - basis_parcel_plu_id_file = os.path.join(OTHER_INPUTS_DIR,'p10_urbansim_boc_opt_b_v2.csv') - - basis_parcel_plu_id = pd.read_csv(basis_parcel_plu_id_file, - usecols = ['parcel_id', 'plu_id'], - dtype = {'parcel_id': float}) - logger.info("Read {:,} rows from {}, with header: \n{}".format(len(basis_parcel_plu_id), - basis_parcel_plu_id_file, - basis_parcel_plu_id.head())) + basis_parcel_plu_id_file = os.path.join( + OTHER_INPUTS_DIR, "p10_urbansim_boc_opt_b_v2.csv" + ) + + basis_parcel_plu_id = pd.read_csv( + basis_parcel_plu_id_file, + usecols=["parcel_id", "plu_id"], + dtype={"parcel_id": float}, + ) + logger.info( + "Read {:,} rows from {}, with header: \n{}".format( + len(basis_parcel_plu_id), + basis_parcel_plu_id_file, + basis_parcel_plu_id.head(), + ) + ) # drop records with no parcel_id - basis_parcel_plu_id = basis_parcel_plu_id.loc[basis_parcel_plu_id.parcel_id.notnull()] + basis_parcel_plu_id = basis_parcel_plu_id.loc[ + basis_parcel_plu_id.parcel_id.notnull() + ] # convert parcel_id to integer - basis_parcel_plu_id['parcel_id'] = basis_parcel_plu_id['parcel_id'].apply(lambda x: int(round(x))) + basis_parcel_plu_id["parcel_id"] = basis_parcel_plu_id["parcel_id"].apply( + lambda x: int(round(x)) + ) - logger.info('After dropping nan parcel_id, BASIS parcel-plu_id has {} parcels, {} unique parcel_id'.format( - len(basis_parcel_plu_id),len(basis_parcel_plu_id.parcel_id.unique()))) + logger.info( + "After dropping nan parcel_id, BASIS parcel-plu_id has {} parcels, {} unique parcel_id".format( + len(basis_parcel_plu_id), len(basis_parcel_plu_id.parcel_id.unique()) + ) + ) # drop duplicates of parcel_id; this should be unique basis_parcel_plu_id_rows = len(basis_parcel_plu_id) - basis_parcel_plu_id.drop_duplicates(subset='parcel_id', inplace=True) + basis_parcel_plu_id.drop_duplicates(subset="parcel_id", inplace=True) if len(basis_parcel_plu_id) == basis_parcel_plu_id_rows: - logger.info("No duplicate parcel_ids found in {}".format(basis_parcel_plu_id_file)) + logger.info( + "No duplicate parcel_ids found in {}".format(basis_parcel_plu_id_file) + ) else: - logger.warning("Dropped duplicate parcel_id rows from {}".format(basis_parcel_plu_id_file)) - logger.warning("Went from {:,} to {:,} rows; dropped {:,} duplicates".format( - basis_parcel_plu_id_rows, - len(basis_parcel_plu_id), - basis_parcel_plu_id_rows-len(basis_parcel_plu_id))) + logger.warning( + "Dropped duplicate parcel_id rows from {}".format(basis_parcel_plu_id_file) + ) + logger.warning( + "Went from {:,} to {:,} rows; dropped {:,} duplicates".format( + basis_parcel_plu_id_rows, + len(basis_parcel_plu_id), + basis_parcel_plu_id_rows - len(basis_parcel_plu_id), + ) + ) # Read BASIS BOC Lookup data - basis_boc_lookup_file = os.path.join(OTHER_INPUTS_DIR,'boc_lookup_2020_rev_10_13_20_final.csv') + basis_boc_lookup_file = os.path.join( + OTHER_INPUTS_DIR, "boc_lookup_2020_rev_10_13_20_final.csv" + ) basis_boc_lookup_columns = [ - 'zoning_id', 'jurisdiction', 'zn_code', 'zn_description', 'zn_area_overlay', - 'max_far', 'max_dua', 'building_height', - 'hs', 'ht', 'hm', 'of_', 'ho', 'sc', 'il', 'iw', 'ih', 'rs', 'rb', 'mr', 'mt', 'me'] - basis_boc_lookup = pd.read_csv(basis_boc_lookup_file, - usecols = basis_boc_lookup_columns) - logger.info("Read {:,} rows from {} with {} unique zoning_id, with header: \n{}".format( - len(basis_boc_lookup), - basis_boc_lookup_file, - len(basis_boc_lookup.zoning_id.unique()), - basis_boc_lookup.head())) + "zoning_id", + "jurisdiction", + "zn_code", + "zn_description", + "zn_area_overlay", + "max_far", + "max_dua", + "building_height", + "hs", + "ht", + "hm", + "of_", + "ho", + "sc", + "il", + "iw", + "ih", + "rs", + "rb", + "mr", + "mt", + "me", + ] + basis_boc_lookup = pd.read_csv( + basis_boc_lookup_file, usecols=basis_boc_lookup_columns + ) + logger.info( + "Read {:,} rows from {} with {} unique zoning_id, with header: \n{}".format( + len(basis_boc_lookup), + basis_boc_lookup_file, + len(basis_boc_lookup.zoning_id.unique()), + basis_boc_lookup.head(), + ) + ) # drop duplicates basis_boc_lookup.drop_duplicates(inplace=True) - logger.info("After dropping duplicates, {:,} rows left, with {} unique zoning_id".format( - len(basis_boc_lookup), - len(basis_boc_lookup.zoning_id.unique()))) + logger.info( + "After dropping duplicates, {:,} rows left, with {} unique zoning_id".format( + len(basis_boc_lookup), len(basis_boc_lookup.zoning_id.unique()) + ) + ) # rename some columns to be consistent with the data's previous versions - basis_boc_lookup.rename(columns = {'jurisdiction': 'plu_jurisdiction', - 'zn_description': 'plu_description', - 'zn_code': 'plu_code', - 'building_height': 'max_height', - 'of_': 'of'}, inplace=True) + basis_boc_lookup.rename( + columns={ + "jurisdiction": "plu_jurisdiction", + "zn_description": "plu_description", + "zn_code": "plu_code", + "building_height": "max_height", + "of_": "of", + }, + inplace=True, + ) # merge lookup to parcel-plu_id to get parcel-level BOC data - basis_boc = basis_parcel_plu_id.merge(basis_boc_lookup, - left_on='plu_id', - right_on='zoning_id', - how='left') + basis_boc = basis_parcel_plu_id.merge( + basis_boc_lookup, left_on="plu_id", right_on="zoning_id", how="left" + ) # drop columns w/ duplicated info - basis_boc.drop(columns=['zoning_id'], inplace=True) + basis_boc.drop(columns=["zoning_id"], inplace=True) - # append _basis to column names to differentiate between basis PLU and pba40 PLU between + # append _basis to column names to differentiate between basis PLU and pba40 PLU between rename_cols = {} for col in basis_boc.columns.values: # rename the ht, hm, etc to HT_basis, HM_basis, ... - if len(col)==2: + if len(col) == 2: rename_cols[col] = col.upper() + "_basis" else: rename_cols[col] = col + "_basis" @@ -396,73 +596,114 @@ def impute_basis_max_height_from_pba40(df): # report on missing allowed building types for btype in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: - null_btype_count = len(basis_boc.loc[basis_boc["{}_basis".format(btype)].isnull()]) - logger.info('Number of parcels missing allowable type for {}: {:,} or {:.1f}%'.format(btype, - null_btype_count, 100.0*null_btype_count/len(basis_boc))) + null_btype_count = len( + basis_boc.loc[basis_boc["{}_basis".format(btype)].isnull()] + ) + logger.info( + "Number of parcels missing allowable type for {}: {:,} or {:.1f}%".format( + btype, null_btype_count, 100.0 * null_btype_count / len(basis_boc) + ) + ) # merge basis plu to p10 + pba40 plu - p10_basis_pba40_boc = pd.merge(left=p10_pba40_plu, right=basis_boc, left_on='PARCEL_ID', right_on='parcel_id_basis', how='left') - - p10_basis_pba40_boc.drop(columns = ['id_pba40','plandate_pba40','jz_o'],inplace = True) - logger.info('Create p10_basis_pba40_boc:') + p10_basis_pba40_boc = pd.merge( + left=p10_pba40_plu, + right=basis_boc, + left_on="PARCEL_ID", + right_on="parcel_id_basis", + how="left", + ) + + p10_basis_pba40_boc.drop( + columns=["id_pba40", "plandate_pba40", "jz_o"], inplace=True + ) + logger.info("Create p10_basis_pba40_boc:") logger.info(p10_basis_pba40_boc.dtypes) - ## Bring in zoning scenarios data - zmod_file = os.path.join(PBA50_ZONINGMOD_DIR,'p10_pba50_attr_20200915.csv') - zmod = pd.read_csv(zmod_file, - usecols = ['PARCEL_ID','juris','fbpzoningm','nodev']) - zmod['PARCEL_ID'] = zmod['PARCEL_ID'].apply(lambda x: int(round(x))) + zmod_file = os.path.join(PBA50_ZONINGMOD_DIR, "p10_pba50_attr_20200915.csv") + zmod = pd.read_csv(zmod_file, usecols=["PARCEL_ID", "juris", "fbpzoningm", "nodev"]) + zmod["PARCEL_ID"] = zmod["PARCEL_ID"].apply(lambda x: int(round(x))) logger.info("Read {:,} rows from {}".format(len(zmod), zmod_file)) logger.info("\n{}".format(zmod.head())) - logger.info('Number of unique parcel_id: {}'.format(len(zmod.PARCEL_ID.unique()))) + logger.info("Number of unique parcel_id: {}".format(len(zmod.PARCEL_ID.unique()))) # append _zmod to column names to clarify source of these columns - rename_cols = dict((col, col+"_zmod") for col in zmod.columns.values) + rename_cols = dict((col, col + "_zmod") for col in zmod.columns.values) zmod.rename(columns=rename_cols, inplace=True) # merge parcel data with zoning mods - p10_basis_pba40_boc_zmod = pd.merge(left=p10_basis_pba40_boc, right=zmod, - left_on='PARCEL_ID', right_on='PARCEL_ID_zmod', how = 'left') + p10_basis_pba40_boc_zmod = pd.merge( + left=p10_basis_pba40_boc, + right=zmod, + left_on="PARCEL_ID", + right_on="PARCEL_ID_zmod", + how="left", + ) logger.info("Created p10_b10_basis_pba40_boc_zmod:") logger.info(p10_basis_pba40_boc_zmod.dtypes) - ## Bring in jurisdiction_county lookup data - juris_county_lookup_file = os.path.join(GITHUB_PETRALE_DIR,'zones\\jurisdictions\\juris_county_id.csv') + juris_county_lookup_file = os.path.join( + GITHUB_PETRALE_DIR, "zones\\jurisdictions\\juris_county_id.csv" + ) juris_county_lookup = pd.read_csv( juris_county_lookup_file, - usecols = ['juris_name_full','juris_id','county_name', 'county_id','jurisdiction_id']) - - p10_basis_pba40_boc_zmod_withJuris = p10_basis_pba40_boc_zmod.merge(juris_county_lookup, - left_on = 'juris_zmod',right_on='juris_name_full',how='left') - - p10_basis_pba40_boc_zmod_withJuris.drop(columns = ['juris_name_full'],inplace = True) - - logger.info('Add jurisdiction names and IDs: ') + usecols=[ + "juris_name_full", + "juris_id", + "county_name", + "county_id", + "jurisdiction_id", + ], + ) + + p10_basis_pba40_boc_zmod_withJuris = p10_basis_pba40_boc_zmod.merge( + juris_county_lookup, + left_on="juris_zmod", + right_on="juris_name_full", + how="left", + ) + + p10_basis_pba40_boc_zmod_withJuris.drop(columns=["juris_name_full"], inplace=True) + + logger.info("Add jurisdiction names and IDs: ") logger.info("\n{}".format(p10_basis_pba40_boc_zmod_withJuris.head())) - ## Add basis and pba40 allowed_res_ and allowed_nonres_ - allowed_basis = dev_capacity_calculation_module.set_allow_dev_type(p10_basis_pba40_boc_zmod_withJuris, "basis") - allowed_pba40 = dev_capacity_calculation_module.set_allow_dev_type(p10_basis_pba40_boc_zmod_withJuris, "pba40") - - p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris, - right=allowed_basis, - how="left", on="PARCEL_ID") - p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris, - right=allowed_pba40, - how="left", on="PARCEL_ID") - #p10_basis_pba40_boc_zmod_withJuris - - logger.info('Add basis and pba40 allowed_res_ and allowed_nonres_:') + allowed_basis = dev_capacity_calculation_module.set_allow_dev_type( + p10_basis_pba40_boc_zmod_withJuris, "basis" + ) + allowed_pba40 = dev_capacity_calculation_module.set_allow_dev_type( + p10_basis_pba40_boc_zmod_withJuris, "pba40" + ) + + p10_basis_pba40_boc_zmod_withJuris = pd.merge( + left=p10_basis_pba40_boc_zmod_withJuris, + right=allowed_basis, + how="left", + on="PARCEL_ID", + ) + p10_basis_pba40_boc_zmod_withJuris = pd.merge( + left=p10_basis_pba40_boc_zmod_withJuris, + right=allowed_pba40, + how="left", + on="PARCEL_ID", + ) + # p10_basis_pba40_boc_zmod_withJuris + + logger.info("Add basis and pba40 allowed_res_ and allowed_nonres_:") logger.info(p10_basis_pba40_boc_zmod_withJuris.head()) # as the function suggests -- impute basis allowed development types from pba40 - p10_basis_pba40_boc_zmod_withJuris = impute_basis_devtypes_from_pba40(p10_basis_pba40_boc_zmod_withJuris) + p10_basis_pba40_boc_zmod_withJuris = impute_basis_devtypes_from_pba40( + p10_basis_pba40_boc_zmod_withJuris + ) # and impute basis max height from pba40 - do this before max_dua/max_far since it's used for that - p10_basis_pba40_boc_zmod_withJuris = impute_basis_max_height_from_pba40(p10_basis_pba40_boc_zmod_withJuris) + p10_basis_pba40_boc_zmod_withJuris = impute_basis_max_height_from_pba40( + p10_basis_pba40_boc_zmod_withJuris + ) ## Impute max_dua and max_far dua_basis = impute_max_dua(p10_basis_pba40_boc_zmod_withJuris, "basis") @@ -472,46 +713,79 @@ def impute_basis_max_height_from_pba40(df): far_pba40 = impute_max_far(p10_basis_pba40_boc_zmod_withJuris, "pba40") ## replace the columns with those with imputations - logger.info('Parcels count after all imputations: {:,}'.format(len(p10_basis_pba40_boc_zmod_withJuris))) - p10_basis_pba40_boc_zmod_withJuris.drop(columns=['max_dua_basis','max_dua_pba40','max_far_basis','max_far_pba40'], inplace=True) - - p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris, - right=dua_basis, - how="left", on="PARCEL_ID") - p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris, - right=dua_pba40, - how="left", on="PARCEL_ID") - p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris, - right=far_basis, - how="left", on="PARCEL_ID") - p10_basis_pba40_boc_zmod_withJuris = pd.merge(left=p10_basis_pba40_boc_zmod_withJuris, - right=far_pba40, - how="left", on="PARCEL_ID") + logger.info( + "Parcels count after all imputations: {:,}".format( + len(p10_basis_pba40_boc_zmod_withJuris) + ) + ) + p10_basis_pba40_boc_zmod_withJuris.drop( + columns=["max_dua_basis", "max_dua_pba40", "max_far_basis", "max_far_pba40"], + inplace=True, + ) + + p10_basis_pba40_boc_zmod_withJuris = pd.merge( + left=p10_basis_pba40_boc_zmod_withJuris, + right=dua_basis, + how="left", + on="PARCEL_ID", + ) + p10_basis_pba40_boc_zmod_withJuris = pd.merge( + left=p10_basis_pba40_boc_zmod_withJuris, + right=dua_pba40, + how="left", + on="PARCEL_ID", + ) + p10_basis_pba40_boc_zmod_withJuris = pd.merge( + left=p10_basis_pba40_boc_zmod_withJuris, + right=far_basis, + how="left", + on="PARCEL_ID", + ) + p10_basis_pba40_boc_zmod_withJuris = pd.merge( + left=p10_basis_pba40_boc_zmod_withJuris, + right=far_pba40, + how="left", + on="PARCEL_ID", + ) ## Export PLU BOC data to csv output_columns = [ - 'PARCEL_ID', 'geom_id','county_id', 'county_name', 'juris_zmod', 'jurisdiction_id', - - 'ACRES', 'zoning_id_pba40', 'name_pba40', 'fbpzoningm_zmod', - + "PARCEL_ID", + "geom_id", + "county_id", + "county_name", + "juris_zmod", + "jurisdiction_id", + "ACRES", + "zoning_id_pba40", + "name_pba40", + "fbpzoningm_zmod", # intensity - 'max_far_basis', 'max_far_pba40', - 'source_far_basis', 'source_far_pba40', - 'max_dua_basis', 'max_dua_pba40', - 'source_dua_basis', 'source_dua_pba40', - 'max_height_basis', 'max_height_pba40', - 'source_height_basis', - - 'nodev_zmod', 'nodev_pba40', - + "max_far_basis", + "max_far_pba40", + "source_far_basis", + "source_far_pba40", + "max_dua_basis", + "max_dua_pba40", + "source_dua_basis", + "source_dua_pba40", + "max_height_basis", + "max_height_pba40", + "source_height_basis", + "nodev_zmod", + "nodev_pba40", # allow building types sum - 'allow_res_basis', 'allow_res_pba40', - 'allow_nonres_basis', 'allow_nonres_pba40', - + "allow_res_basis", + "allow_res_pba40", + "allow_nonres_basis", + "allow_nonres_pba40", # BASIS metadata - 'plu_id_basis','plu_code_basis','plu_jurisdiction_basis','plu_description_basis' - ] + "plu_id_basis", + "plu_code_basis", + "plu_jurisdiction_basis", + "plu_description_basis", + ] # allowed building types for devType in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: @@ -523,53 +797,105 @@ def impute_basis_max_height_from_pba40(df): plu_boc_output = p10_basis_pba40_boc_zmod_withJuris[output_columns] for devType in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: - for boc_source in ['basis', 'pba40']: - miss_idx = plu_boc_output[devType + '_' + boc_source].isnull().sum() - logger.info('Missing {} {} parcel count: {}'.format(boc_source, - devType, - miss_idx)) - - logger.info('Export pba40/BASIS combined base zoning data: {} records of the following fields: {}'.format(len(plu_boc_output), plu_boc_output.dtypes)) + for boc_source in ["basis", "pba40"]: + miss_idx = plu_boc_output[devType + "_" + boc_source].isnull().sum() + logger.info( + "Missing {} {} parcel count: {}".format(boc_source, devType, miss_idx) + ) + + logger.info( + "Export pba40/BASIS combined base zoning data: {} records of the following fields: {}".format( + len(plu_boc_output), plu_boc_output.dtypes + ) + ) + + plu_boc_output.to_csv( + os.path.join(DATA_OUTPUT_DIR, today + "_p10_plu_boc_allAttrs.csv"), index=False + ) - plu_boc_output.to_csv(os.path.join(DATA_OUTPUT_DIR, today+'_p10_plu_boc_allAttrs.csv'), index = False) - - ## Evaluate development type for QA/QC plu_boc = plu_boc_output.copy() for devType in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: - plu_boc[devType+'_comp'] = np.nan - - plu_boc.loc[(plu_boc[devType + '_pba40'] == 1) & - (plu_boc[devType + '_basis'] == 0),devType+'_comp'] = 'only PBA40 allow' - plu_boc.loc[(plu_boc[devType + '_pba40'] == 0) & - (plu_boc[devType + '_basis'] == 1),devType+'_comp'] = 'only BASIS allow' - plu_boc.loc[(plu_boc[devType + '_pba40'] == 1) & - (plu_boc[devType + '_basis'] == 1),devType+'_comp'] = 'both allow' - plu_boc.loc[(plu_boc[devType + '_pba40'] == 0) & - (plu_boc[devType + '_basis'] == 0),devType+'_comp'] = 'both not allow' - plu_boc.loc[(plu_boc[devType + '_basis'].isnull()) & - (plu_boc[devType + '_pba40'].notnull()),devType+'_comp'] = 'missing BASIS data' - plu_boc.loc[plu_boc[devType + '_pba40' ].isnull(),devType+'_comp'] = 'missing PBA40 data' - plu_boc.loc[plu_boc['nodev_zmod' ] == 1,devType+'_comp'] = 'not developable' - - devType_comp = plu_boc[['PARCEL_ID','county_id','county_name','juris_zmod', 'ACRES', - 'nodev_zmod','nodev_pba40'] + - [devType+'_comp' for devType in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES]] - - devType_comp.to_csv(os.path.join(QA_QC_DIR, today+'_devType_comparison.csv'),index = False) - - - ## Check PBA40 zoning_id / BASIS plu_id completeness - logger.info('Export parcels that have a zoning_id_pba40 but no plu_id_basis') - missing_plu_id_basis = plu_boc_output.loc[(plu_boc_output['plu_id_basis'].isnull()) & - (plu_boc_output['zoning_id_pba40'].notnull())][['PARCEL_ID','juris_zmod','zoning_id_pba40', - 'plu_id_basis', 'plu_description_basis']] - missing_plu_id_basis.to_csv(os.path.join(QA_QC_DIR, today+'_missing_plu_id_basis.csv'),index = False) - - - logger.info('Export parcels that have a plu_id_basis but no zoning_id_pba40') - missing_zoning_id_pba40 = plu_boc_output.loc[(plu_boc_output['zoning_id_pba40'].isnull()) & - (plu_boc_output['plu_id_basis'].notnull())][['PARCEL_ID','juris_zmod','zoning_id_pba40', - 'plu_id_basis', 'plu_description_basis']] - missing_zoning_id_pba40.to_csv(os.path.join(QA_QC_DIR, today+'_missing_zoning_id_pba40.csv'),index = False) + plu_boc[devType + "_comp"] = np.nan + + plu_boc.loc[ + (plu_boc[devType + "_pba40"] == 1) & (plu_boc[devType + "_basis"] == 0), + devType + "_comp", + ] = "only PBA40 allow" + plu_boc.loc[ + (plu_boc[devType + "_pba40"] == 0) & (plu_boc[devType + "_basis"] == 1), + devType + "_comp", + ] = "only BASIS allow" + plu_boc.loc[ + (plu_boc[devType + "_pba40"] == 1) & (plu_boc[devType + "_basis"] == 1), + devType + "_comp", + ] = "both allow" + plu_boc.loc[ + (plu_boc[devType + "_pba40"] == 0) & (plu_boc[devType + "_basis"] == 0), + devType + "_comp", + ] = "both not allow" + plu_boc.loc[ + (plu_boc[devType + "_basis"].isnull()) + & (plu_boc[devType + "_pba40"].notnull()), + devType + "_comp", + ] = "missing BASIS data" + plu_boc.loc[ + plu_boc[devType + "_pba40"].isnull(), devType + "_comp" + ] = "missing PBA40 data" + plu_boc.loc[plu_boc["nodev_zmod"] == 1, devType + "_comp"] = "not developable" + + devType_comp = plu_boc[ + [ + "PARCEL_ID", + "county_id", + "county_name", + "juris_zmod", + "ACRES", + "nodev_zmod", + "nodev_pba40", + ] + + [ + devType + "_comp" + for devType in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + ] + ] + + devType_comp.to_csv( + os.path.join(QA_QC_DIR, today + "_devType_comparison.csv"), index=False + ) + + ## Check PBA40 zoning_id / BASIS plu_id completeness + logger.info("Export parcels that have a zoning_id_pba40 but no plu_id_basis") + missing_plu_id_basis = plu_boc_output.loc[ + (plu_boc_output["plu_id_basis"].isnull()) + & (plu_boc_output["zoning_id_pba40"].notnull()) + ][ + [ + "PARCEL_ID", + "juris_zmod", + "zoning_id_pba40", + "plu_id_basis", + "plu_description_basis", + ] + ] + missing_plu_id_basis.to_csv( + os.path.join(QA_QC_DIR, today + "_missing_plu_id_basis.csv"), index=False + ) + + logger.info("Export parcels that have a plu_id_basis but no zoning_id_pba40") + missing_zoning_id_pba40 = plu_boc_output.loc[ + (plu_boc_output["zoning_id_pba40"].isnull()) + & (plu_boc_output["plu_id_basis"].notnull()) + ][ + [ + "PARCEL_ID", + "juris_zmod", + "zoning_id_pba40", + "plu_id_basis", + "plu_description_basis", + ] + ] + missing_zoning_id_pba40.to_csv( + os.path.join(QA_QC_DIR, today + "_missing_zoning_id_pba40.csv"), index=False + ) diff --git a/policies/plu/2_calculate_juris_basis_pba40_capacity_metrics.py b/policies/plu/2_calculate_juris_basis_pba40_capacity_metrics.py index 5734ca7..6465402 100644 --- a/policies/plu/2_calculate_juris_basis_pba40_capacity_metrics.py +++ b/policies/plu/2_calculate_juris_basis_pba40_capacity_metrics.py @@ -1,4 +1,4 @@ -USAGE=""" +USAGE = """ Given a set of p10 combined with pba40 and basis PLU/BOC data, creates a series of test hybrid configurations where all variables are set to use PBA40 values and one variable is set to use BASIS. @@ -22,65 +22,88 @@ import dev_capacity_calculation_module NOW = time.strftime("%Y_%m%d_%H%M") -today = time.strftime('%Y_%m_%d') +today = time.strftime("%Y_%m_%d") + + +if os.getenv("USERNAME") == "ywang": + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) + M_URBANSIM_DIR = "M:\\Data\\Urban\\BAUS\\PBA50" + M_SMELT_DIR = "M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12" + GITHUB_PETRALE_DIR = "C:\\Users\\{}\\Documents\\GitHub\\petrale\\".format( + os.getenv("USERNAME") + ) +elif os.getenv("USERNAME") == "lzorn": + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) + M_URBANSIM_DIR = "M:\\Data\\Urban\\BAUS\\PBA50" + M_SMELT_DIR = "M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12" + GITHUB_PETRALE_DIR = "X:\\petrale" -if os.getenv('USERNAME') =='ywang': - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) - M_URBANSIM_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50' - M_SMELT_DIR = 'M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12' - GITHUB_PETRALE_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\petrale\\'.format(os.getenv('USERNAME')) -elif os.getenv('USERNAME') =='lzorn': - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) - M_URBANSIM_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50' - M_SMELT_DIR = 'M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12' - GITHUB_PETRALE_DIR = 'X:\\petrale' - - # input file locations -PLU_BOC_M_DIR = os.path.join(M_URBANSIM_DIR, 'Draft_Blueprint', 'Base zoning', 'output') -PLU_BOC_FILE = os.path.join(PLU_BOC_M_DIR, '2020_06_03_p10_plu_boc_allAttrs.csv') +PLU_BOC_M_DIR = os.path.join(M_URBANSIM_DIR, "Draft_Blueprint", "Base zoning", "output") +PLU_BOC_FILE = os.path.join(PLU_BOC_M_DIR, "2020_06_03_p10_plu_boc_allAttrs.csv") # output file locations -PLU_BOC_BOX_DIR = os.path.join(BOX_DIR, 'Policies', 'Base zoning', 'outputs') +PLU_BOC_BOX_DIR = os.path.join(BOX_DIR, "Policies", "Base zoning", "outputs") # output file # In test mode (specified by --test), outputs to cwd and without date prefix; otherwise, outputs to PLU_BOC_DIR with date prefix -JURIS_CAPACITY_FILE = "juris_basis_pba40_capacity_metrics.csv" -LOG_FILE = "juris_basis_pba40_capacity_metrics.log" +JURIS_CAPACITY_FILE = "juris_basis_pba40_capacity_metrics.csv" +LOG_FILE = "juris_basis_pba40_capacity_metrics.log" + def countMissing(df, attr): null_attr_count = len(df.loc[df["{}_basis".format(attr)].isnull()]) - logger.info('Number of parcels missing {}_basis info: {:,} or {:.1f}%'.format(attr, - null_attr_count, 100.0*null_attr_count/len(df))) + logger.info( + "Number of parcels missing {}_basis info: {:,} or {:.1f}%".format( + attr, null_attr_count, 100.0 * null_attr_count / len(df) + ) + ) -if __name__ == '__main__': +if __name__ == "__main__": - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) parser.add_argument("--test", action="store_true", help="Test mode") args = parser.parse_args() if args.test == False: - LOG_FILE = os.path.join(PLU_BOC_BOX_DIR, "{}_{}".format(today, LOG_FILE)) - JURIS_CAPACITY_FILE = os.path.join(PLU_BOC_BOX_DIR, "{}_{}".format(today, JURIS_CAPACITY_FILE)) + LOG_FILE = os.path.join(PLU_BOC_BOX_DIR, "{}_{}".format(today, LOG_FILE)) + JURIS_CAPACITY_FILE = os.path.join( + PLU_BOC_BOX_DIR, "{}_{}".format(today, JURIS_CAPACITY_FILE) + ) - pd.set_option('max_columns', 200) - pd.set_option('display.width', 200) + pd.set_option("max_columns", 200) + pd.set_option("display.width", 200) # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) logger.info("BOX_DIR = {}".format(BOX_DIR)) @@ -92,29 +115,36 @@ def countMissing(df, attr): logger.info("\n{}".format(plu_boc.head())) ## B10 buildings with p10 parcels data - basemap_b10_file = os.path.join(M_SMELT_DIR, 'b10.csv') + basemap_b10_file = os.path.join(M_SMELT_DIR, "b10.csv") basemap_b10 = pd.read_csv(basemap_b10_file) # conver PARCEL_ID to integer: - basemap_b10['parcel_id'] = basemap_b10['parcel_id'].apply(lambda x: int(round(x))) + basemap_b10["parcel_id"] = basemap_b10["parcel_id"].apply(lambda x: int(round(x))) logger.info("Read {:,} rows from {}".format(len(basemap_b10), basemap_b10_file)) logger.info("\n{}".format(basemap_b10.head())) - logger.info('Number of unique PARCEL_ID: {}'.format(len(basemap_b10.parcel_id.unique()))) - - basemap_p10_file = os.path.join(M_SMELT_DIR, 'p10.csv') - basemap_p10 = pd.read_csv(basemap_p10_file, - usecols =['PARCEL_ID','geom_id_s','ACRES','LAND_VALUE']) + logger.info( + "Number of unique PARCEL_ID: {}".format(len(basemap_b10.parcel_id.unique())) + ) + + basemap_p10_file = os.path.join(M_SMELT_DIR, "p10.csv") + basemap_p10 = pd.read_csv( + basemap_p10_file, usecols=["PARCEL_ID", "geom_id_s", "ACRES", "LAND_VALUE"] + ) # conver PARCEL_ID to integer: - basemap_p10['PARCEL_ID'] = basemap_p10['PARCEL_ID'].apply(lambda x: int(round(x))) + basemap_p10["PARCEL_ID"] = basemap_p10["PARCEL_ID"].apply(lambda x: int(round(x))) logger.info("Read {:,} rows from {}".format(len(basemap_p10), basemap_p10_file)) logger.info("\n{}".format(basemap_p10.head())) - logger.info('Number of unique PARCEL_ID: {}'.format(len(basemap_p10.PARCEL_ID.unique()))) + logger.info( + "Number of unique PARCEL_ID: {}".format(len(basemap_p10.PARCEL_ID.unique())) + ) # join parcels to buildings which is used to determine current built-out condition when calculating net capacity - building_parcel = pd.merge(left=basemap_b10, - right=basemap_p10[['PARCEL_ID','LAND_VALUE']], - left_on='parcel_id', - right_on='PARCEL_ID', - how='outer') + building_parcel = pd.merge( + left=basemap_b10, + right=basemap_p10[["PARCEL_ID", "LAND_VALUE"]], + left_on="parcel_id", + right_on="PARCEL_ID", + how="outer", + ) ## Create test hybrid indices on the fly, representing: # What if we used PBA40 data for all fields and BASIS data for this one field @@ -123,13 +153,17 @@ def countMissing(df, attr): # create all PBA40 hybrid idx to start pba40_juris_idx = juris_df.copy() - pba40_juris_idx.set_index('juris_name',inplace = True) + pba40_juris_idx.set_index("juris_name", inplace=True) for dev_type in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: # use PBA40 allowed dev type - pba40_juris_idx["{}_idx".format(dev_type)] = dev_capacity_calculation_module.USE_PBA40 + pba40_juris_idx[ + "{}_idx".format(dev_type) + ] = dev_capacity_calculation_module.USE_PBA40 for intensity in dev_capacity_calculation_module.INTENSITY_CODES: # use PBA40 max intensity - pba40_juris_idx["max_{}_idx".format(intensity)] = dev_capacity_calculation_module.USE_PBA40 + pba40_juris_idx[ + "max_{}_idx".format(intensity) + ] = dev_capacity_calculation_module.USE_PBA40 # don't adjust pba40_juris_idx["proportion_adj_{}".format(intensity)] = 1.0 @@ -137,187 +171,354 @@ def countMissing(df, attr): # for each attribute # construct hybrid index for testing -- e.g. use PBA40 idx for all columns, BASIS idx for this one - for test_attr in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + \ - dev_capacity_calculation_module.INTENSITY_CODES: + for test_attr in ( + dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + + dev_capacity_calculation_module.INTENSITY_CODES + ): logger.info("======== Testing BASIS attribute {}".format(test_attr)) # start with all PBA40 but use BASIS just for this test_hybrid_juris_idx = pba40_juris_idx.copy() if test_attr in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: - test_hybrid_juris_idx["{}_idx".format(test_attr) ] = dev_capacity_calculation_module.USE_BASIS + test_hybrid_juris_idx[ + "{}_idx".format(test_attr) + ] = dev_capacity_calculation_module.USE_BASIS else: - test_hybrid_juris_idx["max_{}_idx".format(test_attr)] = dev_capacity_calculation_module.USE_BASIS - + test_hybrid_juris_idx[ + "max_{}_idx".format(test_attr) + ] = dev_capacity_calculation_module.USE_BASIS # apply the hybrid jurisdiction index to the parcel data - test_hybrid_parcel_idx = dev_capacity_calculation_module.create_hybrid_parcel_data_from_juris_idx(logger, plu_boc, test_hybrid_juris_idx) + test_hybrid_parcel_idx = ( + dev_capacity_calculation_module.create_hybrid_parcel_data_from_juris_idx( + logger, plu_boc, test_hybrid_juris_idx + ) + ) # compute allowed development type - residential vs non-residential for each parcel - test_hybrid_allow_dev_type = dev_capacity_calculation_module.set_allow_dev_type(test_hybrid_parcel_idx, boc_source="urbansim") + test_hybrid_allow_dev_type = dev_capacity_calculation_module.set_allow_dev_type( + test_hybrid_parcel_idx, boc_source="urbansim" + ) # put them together - test_hybrid_parcel_idx = pd.merge(left=test_hybrid_parcel_idx, right=test_hybrid_allow_dev_type, how="left", on="PARCEL_ID") + test_hybrid_parcel_idx = pd.merge( + left=test_hybrid_parcel_idx, + right=test_hybrid_allow_dev_type, + how="left", + on="PARCEL_ID", + ) # and join alongside the parcel data - test_hybrid_parcel_idx = pd.merge(left=plu_boc, right=test_hybrid_parcel_idx, how="left", on=["PARCEL_ID", "juris_zmod"]) - - logger.debug("test_hybrid_parcel_idx head(30):\n{}".format(test_hybrid_parcel_idx.head(30))) + test_hybrid_parcel_idx = pd.merge( + left=plu_boc, + right=test_hybrid_parcel_idx, + how="left", + on=["PARCEL_ID", "juris_zmod"], + ) + + logger.debug( + "test_hybrid_parcel_idx head(30):\n{}".format( + test_hybrid_parcel_idx.head(30) + ) + ) # calculate capacity for PBA40 and BASIS test, where the BASIS test uses the "urbansim" index, # which is really a test of BASIS for this attribute only - capacity_pba40 = dev_capacity_calculation_module.calculate_capacity(test_hybrid_parcel_idx,"pba40","zmod", - pass_thru_cols=["juris_zmod"]) - capacity_basisTest = dev_capacity_calculation_module.calculate_capacity(test_hybrid_parcel_idx,"basis","zmod", - pass_thru_cols=["juris_zmod"]) + capacity_pba40 = dev_capacity_calculation_module.calculate_capacity( + test_hybrid_parcel_idx, "pba40", "zmod", pass_thru_cols=["juris_zmod"] + ) + capacity_basisTest = dev_capacity_calculation_module.calculate_capacity( + test_hybrid_parcel_idx, "basis", "zmod", pass_thru_cols=["juris_zmod"] + ) logger.debug("capacity_pba40.head():\n{}".format(capacity_pba40.head())) logger.debug("capacity_basisTest.head():\n{}".format(capacity_basisTest.head())) # should we keep capacity cols based on test_attr? - capacity_juris_pba40 = capacity_pba40.groupby(["juris_zmod"])[["zoned_du_pba40", - "zoned_Ksqft_pba40", - "job_spaces_pba40"]].sum().reset_index() - capacity_juris_basisTest = capacity_basisTest.groupby(["juris_zmod"])[["zoned_du_basis", - "zoned_Ksqft_basis", - "job_spaces_basis"]].sum().reset_index() - - logger.debug("capacity_juris_pba40.head():\n{}".format(capacity_juris_pba40.head())) - logger.debug("capacity_juris_basisTest.head():\n{}".format(capacity_juris_basisTest.head())) + capacity_juris_pba40 = ( + capacity_pba40.groupby(["juris_zmod"])[ + ["zoned_du_pba40", "zoned_Ksqft_pba40", "job_spaces_pba40"] + ] + .sum() + .reset_index() + ) + capacity_juris_basisTest = ( + capacity_basisTest.groupby(["juris_zmod"])[ + ["zoned_du_basis", "zoned_Ksqft_basis", "job_spaces_basis"] + ] + .sum() + .reset_index() + ) + + logger.debug( + "capacity_juris_pba40.head():\n{}".format(capacity_juris_pba40.head()) + ) + logger.debug( + "capacity_juris_basisTest.head():\n{}".format( + capacity_juris_basisTest.head() + ) + ) # calculate net capacity - net_capacity_pba40 = dev_capacity_calculation_module.calculate_net_capacity(logger, test_hybrid_parcel_idx,"pba40","zmod", - building_parcel, net_pass_thru_cols=["juris_zmod"]) - net_capacity_basisTest = dev_capacity_calculation_module.calculate_net_capacity(logger, test_hybrid_parcel_idx,"basis","zmod", - building_parcel, net_pass_thru_cols=["juris_zmod"]) + net_capacity_pba40 = dev_capacity_calculation_module.calculate_net_capacity( + logger, + test_hybrid_parcel_idx, + "pba40", + "zmod", + building_parcel, + net_pass_thru_cols=["juris_zmod"], + ) + net_capacity_basisTest = dev_capacity_calculation_module.calculate_net_capacity( + logger, + test_hybrid_parcel_idx, + "basis", + "zmod", + building_parcel, + net_pass_thru_cols=["juris_zmod"], + ) logger.debug("net_capacity_pba40.head():\n{}".format(net_capacity_pba40.head())) - logger.debug("net_capacity_basisTest.head():\n{}".format(net_capacity_basisTest.head())) - - net_capacity_juris_pba40 = net_capacity_pba40.groupby(["juris_zmod"])[["zoned_du_vacant_pba40", - "zoned_Ksqft_vacant_pba40", - "job_spaces_vacant_pba40", - "zoned_du_underbuild_pba40", - "zoned_Ksqft_underbuild_pba40", - "job_spaces_underbuild_pba40", - "zoned_du_underbuild_noProt_pba40", - "zoned_Ksqft_underbuild_noProt_pba40", - "job_spaces_underbuild_noProt_pba40"]].sum().reset_index() - net_capacity_juris_basisTest = net_capacity_basisTest.groupby(["juris_zmod"])[["zoned_du_vacant_basis", - "zoned_Ksqft_vacant_basis", - "job_spaces_vacant_basis", - "zoned_du_underbuild_basis", - "zoned_Ksqft_underbuild_basis", - "job_spaces_underbuild_basis", - "zoned_du_underbuild_noProt_basis", - "zoned_Ksqft_underbuild_noProt_basis", - "job_spaces_underbuild_noProt_basis"]].sum().reset_index() - - logger.debug("net_capacity_juris_pba40.head():\n{}".format(net_capacity_juris_pba40.head())) - logger.debug("net_capacity_juris_basisTest.head():\n{}".format(net_capacity_juris_basisTest.head())) + logger.debug( + "net_capacity_basisTest.head():\n{}".format(net_capacity_basisTest.head()) + ) + + net_capacity_juris_pba40 = ( + net_capacity_pba40.groupby(["juris_zmod"])[ + [ + "zoned_du_vacant_pba40", + "zoned_Ksqft_vacant_pba40", + "job_spaces_vacant_pba40", + "zoned_du_underbuild_pba40", + "zoned_Ksqft_underbuild_pba40", + "job_spaces_underbuild_pba40", + "zoned_du_underbuild_noProt_pba40", + "zoned_Ksqft_underbuild_noProt_pba40", + "job_spaces_underbuild_noProt_pba40", + ] + ] + .sum() + .reset_index() + ) + net_capacity_juris_basisTest = ( + net_capacity_basisTest.groupby(["juris_zmod"])[ + [ + "zoned_du_vacant_basis", + "zoned_Ksqft_vacant_basis", + "job_spaces_vacant_basis", + "zoned_du_underbuild_basis", + "zoned_Ksqft_underbuild_basis", + "job_spaces_underbuild_basis", + "zoned_du_underbuild_noProt_basis", + "zoned_Ksqft_underbuild_noProt_basis", + "job_spaces_underbuild_noProt_basis", + ] + ] + .sum() + .reset_index() + ) + + logger.debug( + "net_capacity_juris_pba40.head():\n{}".format( + net_capacity_juris_pba40.head() + ) + ) + logger.debug( + "net_capacity_juris_basisTest.head():\n{}".format( + net_capacity_juris_basisTest.head() + ) + ) # put them together and add variable name - capacity_juris_pba40_basisTest = pd.merge(left=capacity_juris_pba40, - right=capacity_juris_basisTest, - on="juris_zmod") - net_capacity_juris_pba40_basisTest = pd.merge(left=net_capacity_juris_pba40, - right=net_capacity_juris_basisTest, - on="juris_zmod") - capacities_juris_pba40_basisTest = pd.merge(left=capacity_juris_pba40_basisTest, - right=net_capacity_juris_pba40_basisTest, - on="juris_zmod") + capacity_juris_pba40_basisTest = pd.merge( + left=capacity_juris_pba40, right=capacity_juris_basisTest, on="juris_zmod" + ) + net_capacity_juris_pba40_basisTest = pd.merge( + left=net_capacity_juris_pba40, + right=net_capacity_juris_basisTest, + on="juris_zmod", + ) + capacities_juris_pba40_basisTest = pd.merge( + left=capacity_juris_pba40_basisTest, + right=net_capacity_juris_pba40_basisTest, + on="juris_zmod", + ) capacities_juris_pba40_basisTest["variable"] = test_attr - logger.debug("capacities_juris_pba40_basisTest.head():\n{}".format(capacities_juris_pba40_basisTest.head())) + logger.debug( + "capacities_juris_pba40_basisTest.head():\n{}".format( + capacities_juris_pba40_basisTest.head() + ) + ) # special metrics for allowed building development type: # count where attribute changes and allow_res/allow_nonres changes, in terms of parcels and acreage if test_attr in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: - dev_type_metric = test_hybrid_parcel_idx[["PARCEL_ID", "juris_zmod", "ACRES", - test_attr+"_pba40", test_attr+"_basis", - "allow_res_pba40", "allow_res_urbansim", - "allow_nonres_pba40", "allow_nonres_urbansim"]].copy() + dev_type_metric = test_hybrid_parcel_idx[ + [ + "PARCEL_ID", + "juris_zmod", + "ACRES", + test_attr + "_pba40", + test_attr + "_basis", + "allow_res_pba40", + "allow_res_urbansim", + "allow_nonres_pba40", + "allow_nonres_urbansim", + ] + ].copy() dev_type_metric["num_parcels"] = 1 # convert to simple 1 character codes: 0, 1 or M for missing # TODO: 11 shouldn't be a value but apparently OF_basis=11 for two parcels?! - dev_type_metric.replace(to_replace={test_attr+"_pba40":{0:"0", 1:"1", np.nan:"M"}, - test_attr+"_basis":{0:"0", 1:"1", np.nan:"M", 11:"1"}}, - inplace=True) - logger.debug("dev_type_metric[{}_pba40].value_counts():\n{}".format(test_attr, dev_type_metric[test_attr+"_pba40"].value_counts())) - logger.debug("dev_type_metric[{}_basis].value_counts():\n{}".format(test_attr, dev_type_metric[test_attr+"_basis"].value_counts())) - - dev_type_metric["pba40_basis"] = dev_type_metric[test_attr+"_pba40"] + " " + dev_type_metric[test_attr+"_basis"] + dev_type_metric.replace( + to_replace={ + test_attr + "_pba40": {0: "0", 1: "1", np.nan: "M"}, + test_attr + "_basis": {0: "0", 1: "1", np.nan: "M", 11: "1"}, + }, + inplace=True, + ) + logger.debug( + "dev_type_metric[{}_pba40].value_counts():\n{}".format( + test_attr, dev_type_metric[test_attr + "_pba40"].value_counts() + ) + ) + logger.debug( + "dev_type_metric[{}_basis].value_counts():\n{}".format( + test_attr, dev_type_metric[test_attr + "_basis"].value_counts() + ) + ) + + dev_type_metric["pba40_basis"] = ( + dev_type_metric[test_attr + "_pba40"] + + " " + + dev_type_metric[test_attr + "_basis"] + ) # aggregate to jurisdiction, attribute (pba40 and basis) - dev_type_metric_juris = dev_type_metric.groupby(["juris_zmod", "pba40_basis"]).agg({"num_parcels":"sum", "ACRES":"sum"}).reset_index() + dev_type_metric_juris = ( + dev_type_metric.groupby(["juris_zmod", "pba40_basis"]) + .agg({"num_parcels": "sum", "ACRES": "sum"}) + .reset_index() + ) # pivot so one row per jurisdiction - dev_type_metric_juris = dev_type_metric_juris.pivot(index="juris_zmod", columns="pba40_basis", values=["num_parcels", "ACRES"]) + dev_type_metric_juris = dev_type_metric_juris.pivot( + index="juris_zmod", + columns="pba40_basis", + values=["num_parcels", "ACRES"], + ) # rename columns so they're not tuples; they'll look like "num_parcels 1_0" # NOTE: for 1_0 etc, the convention is pba40_basis - dev_type_metric_juris.columns = [col[0] + " " + col[1] for col in dev_type_metric_juris.columns.values] + dev_type_metric_juris.columns = [ + col[0] + " " + col[1] for col in dev_type_metric_juris.columns.values + ] dev_type_metric_juris.reset_index(inplace=True) dev_type_metric_juris.fillna(value=0, inplace=True) dev_type_metric_juris["variable"] = test_attr - logger.debug("dev_type_metric_juris.head(20):\n{}".format(dev_type_metric_juris.head(20))) + logger.debug( + "dev_type_metric_juris.head(20):\n{}".format( + dev_type_metric_juris.head(20) + ) + ) # add to capacities_juris_pba40_basisTest - capacities_juris_pba40_basisTest = pd.merge(left=capacities_juris_pba40_basisTest, - right=dev_type_metric_juris, - how="left", - on=["juris_zmod","variable"]) + capacities_juris_pba40_basisTest = pd.merge( + left=capacities_juris_pba40_basisTest, + right=dev_type_metric_juris, + how="left", + on=["juris_zmod", "variable"], + ) # for development building type codes, also look at how this affects allowed res or nonres - for devtype in ["res","nonres"]: - if devtype == "res" and test_attr not in dev_capacity_calculation_module.RES_BUILDING_TYPE_CODES: - continue # not relevant - - if devtype == "nonres" and test_attr not in dev_capacity_calculation_module.NONRES_BUILDING_TYPE_CODES: - continue # not relevant + for devtype in ["res", "nonres"]: + if ( + devtype == "res" + and test_attr + not in dev_capacity_calculation_module.RES_BUILDING_TYPE_CODES + ): + continue # not relevant + + if ( + devtype == "nonres" + and test_attr + not in dev_capacity_calculation_module.NONRES_BUILDING_TYPE_CODES + ): + continue # not relevant # convert to "T"/"F" - dev_type_metric["allow_pba40" ] = dev_type_metric["allow_"+devtype+"_pba40" ] > 0 - dev_type_metric["allow_urbansim"] = dev_type_metric["allow_"+devtype+"_urbansim"] > 0 - dev_type_metric.replace(to_replace={"allow_pba40" :{True:"T", False:"F"}, - "allow_urbansim":{True:"T", False:"F"}}, inplace=True) + dev_type_metric["allow_pba40"] = ( + dev_type_metric["allow_" + devtype + "_pba40"] > 0 + ) + dev_type_metric["allow_urbansim"] = ( + dev_type_metric["allow_" + devtype + "_urbansim"] > 0 + ) + dev_type_metric.replace( + to_replace={ + "allow_pba40": {True: "T", False: "F"}, + "allow_urbansim": {True: "T", False: "F"}, + }, + inplace=True, + ) # combine into one column - dev_type_metric["allow_pba40_basis"] = \ - dev_type_metric["allow_pba40" ].astype(str) + "_" + \ - dev_type_metric["allow_urbansim"].astype(str) + dev_type_metric["allow_pba40_basis"] = ( + dev_type_metric["allow_pba40"].astype(str) + + "_" + + dev_type_metric["allow_urbansim"].astype(str) + ) # aggregate to jurisdiction - allow_juris = dev_type_metric.groupby(['juris_zmod',"allow_pba40_basis"]).agg({'num_parcels':'sum', 'ACRES':'sum'}).reset_index() + allow_juris = ( + dev_type_metric.groupby(["juris_zmod", "allow_pba40_basis"]) + .agg({"num_parcels": "sum", "ACRES": "sum"}) + .reset_index() + ) logger.debug("allow_juris.head(20):\n{}".format(allow_juris.head(20))) # pivot so one row per jurisdiction - allow_juris = allow_juris.pivot(index="juris_zmod", columns="allow_pba40_basis", values=["num_parcels", "ACRES"]) + allow_juris = allow_juris.pivot( + index="juris_zmod", + columns="allow_pba40_basis", + values=["num_parcels", "ACRES"], + ) # rename columns so they're not tuples; they'll look like "allow_res num_parcels T_F" # NOTE: for T_F etc, the convention is pba40_basis - allow_juris.columns = ["allow_"+devtype+" " + col[0] + " " + col[1] for col in allow_juris.columns.values] + allow_juris.columns = [ + "allow_" + devtype + " " + col[0] + " " + col[1] + for col in allow_juris.columns.values + ] allow_juris.reset_index(inplace=True) allow_juris.fillna(value=0, inplace=True) allow_juris["variable"] = test_attr logger.debug("allow_juris.head():\n{}".format(allow_juris.head())) # add to capacities_juris_pba40_basisTest - capacities_juris_pba40_basisTest = pd.merge(left=capacities_juris_pba40_basisTest, - right=allow_juris, - how="left", - on=["juris_zmod","variable"]) - - logger.debug("capacities_juris_pba40_basisTest.head():\n{}".format(capacities_juris_pba40_basisTest.head())) + capacities_juris_pba40_basisTest = pd.merge( + left=capacities_juris_pba40_basisTest, + right=allow_juris, + how="left", + on=["juris_zmod", "variable"], + ) + + logger.debug( + "capacities_juris_pba40_basisTest.head():\n{}".format( + capacities_juris_pba40_basisTest.head() + ) + ) # add to the full set - capacity_metrics = pd.concat([capacity_metrics, capacities_juris_pba40_basisTest], axis="index", sort=True) - + capacity_metrics = pd.concat( + [capacity_metrics, capacities_juris_pba40_basisTest], + axis="index", + sort=True, + ) # bring juris_zmod and variable to the left to be more intuitive reorder_cols = list(capacity_metrics.columns.values) reorder_cols.remove("juris_zmod") reorder_cols.remove("variable") - reorder_cols = ["juris_zmod","variable"] + reorder_cols + reorder_cols = ["juris_zmod", "variable"] + reorder_cols capacity_metrics = capacity_metrics[reorder_cols] # write those capacity metrics out - capacity_metrics.to_csv(JURIS_CAPACITY_FILE,index = False) + capacity_metrics.to_csv(JURIS_CAPACITY_FILE, index=False) logger.info("Wrote {}".format(JURIS_CAPACITY_FILE)) diff --git a/policies/plu/3_create_heuristic_hybrid_index.py b/policies/plu/3_create_heuristic_hybrid_index.py index dcef984..e10069f 100644 --- a/policies/plu/3_create_heuristic_hybrid_index.py +++ b/policies/plu/3_create_heuristic_hybrid_index.py @@ -1,4 +1,4 @@ -USAGE=""" +USAGE = """ Creates the heuristic hybrid index given a threshold argument. @@ -11,26 +11,45 @@ import dev_capacity_calculation_module -if os.getenv('USERNAME') =='ywang': - M_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50\\Draft_Blueprint\\Base zoning' - GITHUB_PETRALE_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\petrale\\'.format(os.getenv('USERNAME')) -elif os.getenv('USERNAME') =='lzorn': - M_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50\\Draft_Blueprint\\Base zoning' - GITHUB_PETRALE_DIR = 'X:\\petrale\\'.format(os.getenv('USERNAME')) +if os.getenv("USERNAME") == "ywang": + M_DIR = "M:\\Data\\Urban\\BAUS\\PBA50\\Draft_Blueprint\\Base zoning" + GITHUB_PETRALE_DIR = "C:\\Users\\{}\\Documents\\GitHub\\petrale\\".format( + os.getenv("USERNAME") + ) +elif os.getenv("USERNAME") == "lzorn": + M_DIR = "M:\\Data\\Urban\\BAUS\\PBA50\\Draft_Blueprint\\Base zoning" + GITHUB_PETRALE_DIR = "X:\\petrale\\".format(os.getenv("USERNAME")) # input file locations -PLU_BOC_M_DIR = os.path.join(M_DIR, 'outputs') -JURIS_CAPACITY_FILE = os.path.join(PLU_BOC_M_DIR, '2020_06_03_juris_basis_pba40_capacity_metrics.csv') +PLU_BOC_M_DIR = os.path.join(M_DIR, "outputs") +JURIS_CAPACITY_FILE = os.path.join( + PLU_BOC_M_DIR, "2020_06_03_juris_basis_pba40_capacity_metrics.csv" +) # output file -OUTPUT_FILE = os.path.join(GITHUB_PETRALE_DIR, 'policies\\plu\\base_zoning\\hybrid_index', 'idx_urbansim_heuristic.csv') -LOG_FILE = os.path.join(GITHUB_PETRALE_DIR, 'policies\\plu\\base_zoning\\hybrid_index', 'idx_urbansim_heuristic.log') - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) - parser.add_argument("threshold", type=float, help="Threshold for capacity metric percentage change used to accept BASIS for a jurisdiction; should be between 0.0 and 1.0") +OUTPUT_FILE = os.path.join( + GITHUB_PETRALE_DIR, + "policies\\plu\\base_zoning\\hybrid_index", + "idx_urbansim_heuristic.csv", +) +LOG_FILE = os.path.join( + GITHUB_PETRALE_DIR, + "policies\\plu\\base_zoning\\hybrid_index", + "idx_urbansim_heuristic.log", +) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "threshold", + type=float, + help="Threshold for capacity metric percentage change used to accept BASIS for a jurisdiction; should be between 0.0 and 1.0", + ) args = parser.parse_args() if args.threshold <= 0 or args.threshold >= 1.0: @@ -39,17 +58,25 @@ # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) logger.info("JURIS_CAPACITY_FILE = {}".format(JURIS_CAPACITY_FILE)) @@ -57,31 +84,51 @@ # Read jurisdiction capacity metrics capacity_juris_pba40_basis = pd.read_csv(JURIS_CAPACITY_FILE) - logger.info("Read {} lines from {}; head:\n{}".format(len(capacity_juris_pba40_basis), JURIS_CAPACITY_FILE, capacity_juris_pba40_basis.head())) + logger.info( + "Read {} lines from {}; head:\n{}".format( + len(capacity_juris_pba40_basis), + JURIS_CAPACITY_FILE, + capacity_juris_pba40_basis.head(), + ) + ) logger.debug("dtypes:\n{}".format(capacity_juris_pba40_basis.dtypes)) # pull jurisdictions to start the index dataframe we're building index_df = capacity_juris_pba40_basis[["juris_zmod"]].drop_duplicates() logger.debug("Have {} unique jurisdictions".format(len(index_df))) - # intensity variables first - for variable in dev_capacity_calculation_module.INTENSITY_CODES + dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: + for variable in ( + dev_capacity_calculation_module.INTENSITY_CODES + + dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + ): # does it affect residential? is_res = False - if variable in ["dua","height"]+dev_capacity_calculation_module.RES_BUILDING_TYPE_CODES: + if ( + variable + in ["dua", "height"] + + dev_capacity_calculation_module.RES_BUILDING_TYPE_CODES + ): is_res = True # does it affect non-residential? - # Note: it can be both res and non-res. + # Note: it can be both res and non-res. # Also, strictly speaking, height doesn't really affect either since it affects # the imputation of dua and far, so this will effectively turn on BASIS for height is_nonres = False - if variable in ["far","height"]+dev_capacity_calculation_module.NONRES_BUILDING_TYPE_CODES: + if ( + variable + in ["far", "height"] + + dev_capacity_calculation_module.NONRES_BUILDING_TYPE_CODES + ): is_nonres = True - logger.info("Setting hybrid index for variable {:10} res? {:5} nonres? {:5}".format(variable, is_res, is_nonres)) + logger.info( + "Setting hybrid index for variable {:10} res? {:5} nonres? {:5}".format( + variable, is_res, is_nonres + ) + ) # variable index name - for allowed development types, it just has a suffix "_idx" variable_idx = "{}_idx".format(variable) @@ -93,37 +140,75 @@ index_df["proportion_adj_{}".format(variable)] = 1.0 # pull the select rows from capacity_juris_pba40_basis relevant for this variable - capacity_juris_var = capacity_juris_pba40_basis.loc[ capacity_juris_pba40_basis['variable'] == variable, ].copy() + capacity_juris_var = capacity_juris_pba40_basis.loc[ + capacity_juris_pba40_basis["variable"] == variable, + ].copy() # default to PBA40 capacity_juris_var[variable_idx] = dev_capacity_calculation_module.USE_PBA40 # for variables that are res and nonres, require units AND sqft to be within threshold if is_res and is_nonres: - capacity_juris_var.loc[ ((abs(capacity_juris_var.units_basis - capacity_juris_var.units_pba40) / capacity_juris_var.units_pba40) <= args.threshold) & - ((abs(capacity_juris_var.Ksqft_basis - capacity_juris_var.Ksqft_pba40) / capacity_juris_var.Ksqft_pba40) <= args.threshold), - variable_idx ] = dev_capacity_calculation_module.USE_BASIS + capacity_juris_var.loc[ + ( + ( + abs( + capacity_juris_var.units_basis + - capacity_juris_var.units_pba40 + ) + / capacity_juris_var.units_pba40 + ) + <= args.threshold + ) + & ( + ( + abs( + capacity_juris_var.Ksqft_basis + - capacity_juris_var.Ksqft_pba40 + ) + / capacity_juris_var.Ksqft_pba40 + ) + <= args.threshold + ), + variable_idx, + ] = dev_capacity_calculation_module.USE_BASIS # for res variables, require units to be within threshold elif is_res: - capacity_juris_var.loc[ (abs(capacity_juris_var.units_basis - capacity_juris_var.units_pba40) / capacity_juris_var.units_pba40) <= args.threshold, - variable_idx ] = dev_capacity_calculation_module.USE_BASIS + capacity_juris_var.loc[ + ( + abs(capacity_juris_var.units_basis - capacity_juris_var.units_pba40) + / capacity_juris_var.units_pba40 + ) + <= args.threshold, + variable_idx, + ] = dev_capacity_calculation_module.USE_BASIS # for nonres variables, require sqft to be within threshold elif is_nonres: - capacity_juris_var.loc[ (abs(capacity_juris_var.Ksqft_basis - capacity_juris_var.Ksqft_pba40) / capacity_juris_var.Ksqft_pba40) <= args.threshold, - variable_idx ] = dev_capacity_calculation_module.USE_BASIS + capacity_juris_var.loc[ + ( + abs(capacity_juris_var.Ksqft_basis - capacity_juris_var.Ksqft_pba40) + / capacity_juris_var.Ksqft_pba40 + ) + <= args.threshold, + variable_idx, + ] = dev_capacity_calculation_module.USE_BASIS # bring into index_df - index_df = pd.merge(left=index_df, right=capacity_juris_var[["juris_zmod",variable_idx]]) + index_df = pd.merge( + left=index_df, right=capacity_juris_var[["juris_zmod", variable_idx]] + ) # report out number of BASIS jurisdictions for each variable # these should match the tableau - logger.info("Number of jurisdictions using BASIS variable:\n{}".format(index_df.sum())) + logger.info( + "Number of jurisdictions using BASIS variable:\n{}".format(index_df.sum()) + ) # rename jurisdiction - index_df.rename(columns = {'juris_zmod': 'juris_name'}, inplace = True) + index_df.rename(columns={"juris_zmod": "juris_name"}, inplace=True) # save it - index_df.to_csv(OUTPUT_FILE, index = False) - logger.info('Wrote {}'.format(OUTPUT_FILE)) + index_df.to_csv(OUTPUT_FILE, index=False) + logger.info("Wrote {}".format(OUTPUT_FILE)) diff --git a/policies/plu/4_create_hybrid_urbansim_input.py b/policies/plu/4_create_hybrid_urbansim_input.py index 2ec6f54..64de499 100644 --- a/policies/plu/4_create_hybrid_urbansim_input.py +++ b/policies/plu/4_create_hybrid_urbansim_input.py @@ -1,4 +1,4 @@ -USAGE=""" +USAGE = """ Given a hybrid configuration index, generates UrbanSim inpput files. @@ -14,65 +14,99 @@ import argparse, os, logging, sys, time import dev_capacity_calculation_module -today = time.strftime('%Y_%m_%d') +today = time.strftime("%Y_%m_%d") -if os.getenv('USERNAME') =='ywang': - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) - M_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50' - GITHUB_PETRALE_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\petrale\\'.format(os.getenv('USERNAME')) -elif os.getenv('USERNAME') =='lzorn': - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) - M_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50' - GITHUB_PETRALE_DIR = 'X:\\petrale' +if os.getenv("USERNAME") == "ywang": + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) + M_DIR = "M:\\Data\\Urban\\BAUS\\PBA50" + GITHUB_PETRALE_DIR = "C:\\Users\\{}\\Documents\\GitHub\\petrale\\".format( + os.getenv("USERNAME") + ) +elif os.getenv("USERNAME") == "lzorn": + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) + M_DIR = "M:\\Data\\Urban\\BAUS\\PBA50" + GITHUB_PETRALE_DIR = "X:\\petrale" # input file locations -PREV_ZONING_PARCELS_FILE = os.path.join(M_DIR, 'Horizon', 'Large General Input Data', '2015_12_21_zoning_parcels.csv') -PLU_BOC_M_DIR = os.path.join(M_DIR, 'Final_Blueprint', 'Base zoning', 'output') -PLU_BOC_FILE = os.path.join(PLU_BOC_M_DIR, '2020_10_20_p10_plu_boc_allAttrs.csv') -HYBRID_INDEX_DIR = os.path.join(GITHUB_PETRALE_DIR, 'policies\\plu\\hybrid_index') -PARCEL_UGB_TYPE_FILE = os.path.join(M_DIR, 'Final_Blueprint', 'Large General Input Data', '2020_11_10_parcels_geography.csv') +PREV_ZONING_PARCELS_FILE = os.path.join( + M_DIR, "Horizon", "Large General Input Data", "2015_12_21_zoning_parcels.csv" +) +PLU_BOC_M_DIR = os.path.join(M_DIR, "Final_Blueprint", "Base zoning", "output") +PLU_BOC_FILE = os.path.join(PLU_BOC_M_DIR, "2020_10_20_p10_plu_boc_allAttrs.csv") +HYBRID_INDEX_DIR = os.path.join(GITHUB_PETRALE_DIR, "policies\\plu\\hybrid_index") +PARCEL_UGB_TYPE_FILE = os.path.join( + M_DIR, + "Final_Blueprint", + "Large General Input Data", + "2020_11_10_parcels_geography.csv", +) # TODO: change to idx_urbansim.csv when we have one -HYBRID_INDEX_FILE = os.path.join(HYBRID_INDEX_DIR, "idx_urbansim_fb_11052020.csv") +HYBRID_INDEX_FILE = os.path.join(HYBRID_INDEX_DIR, "idx_urbansim_fb_11052020.csv") # output file locations -HYBRID_ZONING_OUTPUT_DIR = os.path.join(BOX_DIR, 'Policies\\Base zoning\\outputs\\hybrid_base_zoning') +HYBRID_ZONING_OUTPUT_DIR = os.path.join( + BOX_DIR, "Policies\\Base zoning\\outputs\\hybrid_base_zoning" +) # In test mode (specified by --test), outputs to cwd and without date prefix; otherwise, outputs to HYBRID_ZONING_OUTPUT_DIR with date prefix -HYBRID_PARCELS_FILE = 'p10_plu_boc_hybrid.csv' -LOG_FILE = "create_hybrid_urbansim_input.log" +HYBRID_PARCELS_FILE = "p10_plu_boc_hybrid.csv" +LOG_FILE = "create_hybrid_urbansim_input.log" # UrbanSim Inputs -ZONING_PARCELS_FILE = 'zoning_parcels_hybrid_pba50.csv' -ZONING_LOOKUP_FILE = 'zoning_lookup_hybrid_pba50.csv' +ZONING_PARCELS_FILE = "zoning_parcels_hybrid_pba50.csv" +ZONING_LOOKUP_FILE = "zoning_lookup_hybrid_pba50.csv" -if __name__ == '__main__': +if __name__ == "__main__": - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) parser.add_argument("--test", action="store_true", help="Test mode") args = parser.parse_args() if args.test == False: - LOG_FILE = os.path.join(HYBRID_ZONING_OUTPUT_DIR, "{}_{}".format(today, LOG_FILE)) - HYBRID_PARCELS_FILE = os.path.join(HYBRID_ZONING_OUTPUT_DIR, "{}_{}".format(today, HYBRID_PARCELS_FILE)) - ZONING_PARCELS_FILE = os.path.join(HYBRID_ZONING_OUTPUT_DIR, "{}_{}".format(today, ZONING_PARCELS_FILE)) - ZONING_LOOKUP_FILE = os.path.join(HYBRID_ZONING_OUTPUT_DIR, "{}_{}".format(today, ZONING_LOOKUP_FILE)) - - pd.set_option('max_columns', 200) - pd.set_option('display.width', 200) - - # create logger + LOG_FILE = os.path.join( + HYBRID_ZONING_OUTPUT_DIR, "{}_{}".format(today, LOG_FILE) + ) + HYBRID_PARCELS_FILE = os.path.join( + HYBRID_ZONING_OUTPUT_DIR, "{}_{}".format(today, HYBRID_PARCELS_FILE) + ) + ZONING_PARCELS_FILE = os.path.join( + HYBRID_ZONING_OUTPUT_DIR, "{}_{}".format(today, ZONING_PARCELS_FILE) + ) + ZONING_LOOKUP_FILE = os.path.join( + HYBRID_ZONING_OUTPUT_DIR, "{}_{}".format(today, ZONING_LOOKUP_FILE) + ) + + pd.set_option("max_columns", 200) + pd.set_option("display.width", 200) + + # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) plu_boc = pd.read_csv(PLU_BOC_FILE) @@ -84,146 +118,253 @@ logger.debug("head:\n{}".format(hybrid_idx.head())) # create the parcel data set with hybrid index attached - plu_boc_hybrid = dev_capacity_calculation_module.create_hybrid_parcel_data_from_juris_idx(logger, plu_boc, hybrid_idx) - plu_boc_hybrid = pd.merge(left=plu_boc, right=plu_boc_hybrid, how="left", on=['PARCEL_ID', 'juris_zmod']) - + plu_boc_hybrid = ( + dev_capacity_calculation_module.create_hybrid_parcel_data_from_juris_idx( + logger, plu_boc, hybrid_idx + ) + ) + plu_boc_hybrid = pd.merge( + left=plu_boc, right=plu_boc_hybrid, how="left", on=["PARCEL_ID", "juris_zmod"] + ) + # calculate 'allow_res' and 'allow_nonres' based on the allowable development type - allowed_urbansim = dev_capacity_calculation_module.set_allow_dev_type(plu_boc_hybrid,'urbansim') - - plu_boc_hybrid = pd.merge(left = plu_boc_hybrid, - right= allowed_urbansim, - on = 'PARCEL_ID', - how = 'left') + allowed_urbansim = dev_capacity_calculation_module.set_allow_dev_type( + plu_boc_hybrid, "urbansim" + ) + + plu_boc_hybrid = pd.merge( + left=plu_boc_hybrid, right=allowed_urbansim, on="PARCEL_ID", how="left" + ) # force max_dua_urbansim to 0 to avoid over-estimating max_dua during imputation - # two options: + # two options: # option 1: for parcels outside of UGB and don't allow residential development, force max_dua_urbansim to 0 # option 2: for parcels that don't allow residential development, force max_dua_urbansim to 0 - parcel_UBG_type = pd.read_csv(PARCEL_UGB_TYPE_FILE, - usecols = ['PARCEL_ID', 'fbp_exp2020_id']) - logger.info('Read {} lines from {}'.format(len(parcel_UBG_type), PARCEL_UGB_TYPE_FILE)) - - parcel_UBG_type['UGB_type'] = 'outside UGB' - parcel_UBG_type.loc[(parcel_UBG_type.fbp_exp2020_id == 'in') | (parcel_UBG_type.fbp_exp2020_id == 'inun'), - 'UGB_type'] = 'inside UGB' + parcel_UBG_type = pd.read_csv( + PARCEL_UGB_TYPE_FILE, usecols=["PARCEL_ID", "fbp_exp2020_id"] + ) + logger.info( + "Read {} lines from {}".format(len(parcel_UBG_type), PARCEL_UGB_TYPE_FILE) + ) + + parcel_UBG_type["UGB_type"] = "outside UGB" + parcel_UBG_type.loc[ + (parcel_UBG_type.fbp_exp2020_id == "in") + | (parcel_UBG_type.fbp_exp2020_id == "inun"), + "UGB_type", + ] = "inside UGB" logger.info(parcel_UBG_type.UGB_type.value_counts()) - plu_boc_hybrid = pd.merge(left = plu_boc_hybrid, - right= parcel_UBG_type, - on = 'PARCEL_ID', - how = 'left') + plu_boc_hybrid = pd.merge( + left=plu_boc_hybrid, right=parcel_UBG_type, on="PARCEL_ID", how="left" + ) # option 1 # plu_boc_hybrid.loc[(plu_boc_hybrid.UGB_type == 'outside UGB') & (plu_boc_hybrid.allow_res_urbansim == 0), # 'max_dua_urbansim'] = 0 # option 2 - plu_boc_hybrid.loc[plu_boc_hybrid.allow_res_urbansim == 0, - 'max_dua_urbansim'] = 0 + plu_boc_hybrid.loc[plu_boc_hybrid.allow_res_urbansim == 0, "max_dua_urbansim"] = 0 - logger.info('Saving hybrid zoning for {} parcels to {}'.format(len(plu_boc_hybrid),HYBRID_PARCELS_FILE)) - logger.debug('plu_boc_hybrid dtypes:\n{}'.format(plu_boc_hybrid.dtypes)) - plu_boc_hybrid.to_csv(HYBRID_PARCELS_FILE,index = False) + logger.info( + "Saving hybrid zoning for {} parcels to {}".format( + len(plu_boc_hybrid), HYBRID_PARCELS_FILE + ) + ) + logger.debug("plu_boc_hybrid dtypes:\n{}".format(plu_boc_hybrid.dtypes)) + plu_boc_hybrid.to_csv(HYBRID_PARCELS_FILE, index=False) - logger.info('Create BAUS base zoning input files:') + logger.info("Create BAUS base zoning input files:") # select hybrid fields - plu_boc_urbansim_cols = ['PARCEL_ID','geom_id','county_id','county_name', 'juris_zmod', 'jurisdiction_id', 'ACRES', - 'fbpzoningm_zmod','nodev_zmod','name_pba40','plu_code_basis'] + \ - ['{}_urbansim'.format(devType) for devType in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES] + \ - ['max_{}_urbansim'.format(intensity) for intensity in dev_capacity_calculation_module.INTENSITY_CODES] + plu_boc_urbansim_cols = ( + [ + "PARCEL_ID", + "geom_id", + "county_id", + "county_name", + "juris_zmod", + "jurisdiction_id", + "ACRES", + "fbpzoningm_zmod", + "nodev_zmod", + "name_pba40", + "plu_code_basis", + ] + + [ + "{}_urbansim".format(devType) + for devType in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + ] + + [ + "max_{}_urbansim".format(intensity) + for intensity in dev_capacity_calculation_module.INTENSITY_CODES + ] + ) plu_boc_urbansim = plu_boc_hybrid[plu_boc_urbansim_cols].copy() # rename the fields to remove '_urbansim' rename_cols = {} for col in plu_boc_urbansim_cols: - if col.endswith('_urbansim'): + if col.endswith("_urbansim"): rename_cols[col] = col[:-9] plu_boc_urbansim.rename(columns=rename_cols, inplace=True) # convert allowed types to integer for attr in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: plu_boc_urbansim[attr] = plu_boc_urbansim[attr].fillna(-1).astype(int) - plu_boc_urbansim.replace({-1: None}, inplace = True) + plu_boc_urbansim.replace({-1: None}, inplace=True) # create zoning_lookup table with unique jurisdiction and zoning attributes - zoning_lookup_pba50 = plu_boc_urbansim[['county_name','juris_zmod'] + \ - dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + \ - ['max_dua','max_far','max_height']].drop_duplicates() + zoning_lookup_pba50 = plu_boc_urbansim[ + ["county_name", "juris_zmod"] + + dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + + ["max_dua", "max_far", "max_height"] + ].drop_duplicates() # sort zoning type by county and jurisdiction and assign zoning_id - zoning_lookup_pba50.sort_values(by=['county_name', 'juris_zmod'], inplace = True) - zoning_lookup_pba50['zoning_id_pba50'] = range(1,len(zoning_lookup_pba50) + 1) - logger.info('Zoning lookup table has {} unique types (juris + zoning attributes), header:'.format(len(zoning_lookup_pba50))) + zoning_lookup_pba50.sort_values(by=["county_name", "juris_zmod"], inplace=True) + zoning_lookup_pba50["zoning_id_pba50"] = range(1, len(zoning_lookup_pba50) + 1) + logger.info( + "Zoning lookup table has {} unique types (juris + zoning attributes), header:".format( + len(zoning_lookup_pba50) + ) + ) logger.info(zoning_lookup_pba50.head()) - - # create zoning_parcels file and attach zoning_id - plu_boc_urbansim_ID = plu_boc_urbansim.merge(zoning_lookup_pba50, - on = list(zoning_lookup_pba50)[:-1], - how = 'left') - zoning_parcels_pba50 = plu_boc_urbansim_ID[['PARCEL_ID','geom_id','juris_zmod','jurisdiction_id','zoning_id_pba50','nodev_zmod']] - # bring into other attributes from previous parcel zoning file - zoning_parcels_prev = pd.read_csv(PREV_ZONING_PARCELS_FILE, usecols = ['geom_id','prop']) - zoning_parcels_pba50 = zoning_parcels_pba50.merge(zoning_parcels_prev, on = 'geom_id', how = 'left') + # create zoning_parcels file and attach zoning_id + plu_boc_urbansim_ID = plu_boc_urbansim.merge( + zoning_lookup_pba50, on=list(zoning_lookup_pba50)[:-1], how="left" + ) + zoning_parcels_pba50 = plu_boc_urbansim_ID[ + [ + "PARCEL_ID", + "geom_id", + "juris_zmod", + "jurisdiction_id", + "zoning_id_pba50", + "nodev_zmod", + ] + ] + # bring into other attributes from previous parcel zoning file + zoning_parcels_prev = pd.read_csv( + PREV_ZONING_PARCELS_FILE, usecols=["geom_id", "prop"] + ) + zoning_parcels_pba50 = zoning_parcels_pba50.merge( + zoning_parcels_prev, on="geom_id", how="left" + ) ## assign zoning name to each zoning_id based on the most frequent occurance of zoning name among all the parcels with the same zoning_id - zoning_names = plu_boc_urbansim[['PARCEL_ID','name_pba40','plu_code_basis']] + zoning_names = plu_boc_urbansim[["PARCEL_ID", "name_pba40", "plu_code_basis"]] # merge zoning names of pba40 and BASIS into zoning_parcels - zoning_names['name_pba40'] = zoning_names['name_pba40'].apply(lambda x: str(x)+'_pba40') - zoning_names['plu_code_basis'] = zoning_names['plu_code_basis'].apply(lambda x: str(x)+'_basis') - parcel_zoning_names = zoning_parcels_pba50[['PARCEL_ID','zoning_id_pba50']].merge(zoning_names, - on = 'PARCEL_ID', - how = 'left') + zoning_names["name_pba40"] = zoning_names["name_pba40"].apply( + lambda x: str(x) + "_pba40" + ) + zoning_names["plu_code_basis"] = zoning_names["plu_code_basis"].apply( + lambda x: str(x) + "_basis" + ) + parcel_zoning_names = zoning_parcels_pba50[["PARCEL_ID", "zoning_id_pba50"]].merge( + zoning_names, on="PARCEL_ID", how="left" + ) # use name_pba40 as the default for pab50 zoning name, unless it is null, then use basis zoning name - parcel_zoning_names['zoning_name_pba50'] = zoning_names['name_pba40'] + parcel_zoning_names["zoning_name_pba50"] = zoning_names["name_pba40"] name_null_index = parcel_zoning_names.name_pba40.isnull() - parcel_zoning_names.loc[name_null_index,'zoning_name_pba50'] = parcel_zoning_names.loc[name_null_index,'plu_code_basis'] + parcel_zoning_names.loc[ + name_null_index, "zoning_name_pba50" + ] = parcel_zoning_names.loc[name_null_index, "plu_code_basis"] # find the most frenquent zoning name of each zoning_id - name_by_zone = parcel_zoning_names[['zoning_id_pba50','zoning_name_pba50']].groupby(['zoning_id_pba50']).agg(lambda x:x.value_counts().index[0]).reset_index() - zoning_lookup_pba50 = zoning_lookup_pba50.merge(name_by_zone, - on = 'zoning_id_pba50', - how = 'left') + name_by_zone = ( + parcel_zoning_names[["zoning_id_pba50", "zoning_name_pba50"]] + .groupby(["zoning_id_pba50"]) + .agg(lambda x: x.value_counts().index[0]) + .reset_index() + ) + zoning_lookup_pba50 = zoning_lookup_pba50.merge( + name_by_zone, on="zoning_id_pba50", how="left" + ) # attach zoning name to the zoning lookup table - zoning_lookup_pba50 = zoning_lookup_pba50[['zoning_id_pba50','juris_zmod','zoning_name_pba50','max_dua','max_far','max_height'] + \ - dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES] + zoning_lookup_pba50 = zoning_lookup_pba50[ + [ + "zoning_id_pba50", + "juris_zmod", + "zoning_name_pba50", + "max_dua", + "max_far", + "max_height", + ] + + dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + ] # change field names to be consistent with the previous version - zoning_lookup_pba50.rename(columns = {'zoning_id_pba50' :'id', - 'juris_zmod' :'juris', - 'zoning_name_pba50':'name'}, inplace = True) - logger.info('zoning_lookup has {} unique zoning_ids; zoning_lookup table header:'.format(len(zoning_lookup_pba50))) - logger.info(zoning_lookup_pba50.head()) + zoning_lookup_pba50.rename( + columns={ + "zoning_id_pba50": "id", + "juris_zmod": "juris", + "zoning_name_pba50": "name", + }, + inplace=True, + ) + logger.info( + "zoning_lookup has {} unique zoning_ids; zoning_lookup table header:".format( + len(zoning_lookup_pba50) + ) + ) + logger.info(zoning_lookup_pba50.head()) # lastly, append zone name to zoning_parcel - zoning_parcels_pba50 = zoning_parcels_pba50.merge(zoning_lookup_pba50[['id','name']], - left_on = 'zoning_id_pba50', - right_on = 'id', - how = 'left') + zoning_parcels_pba50 = zoning_parcels_pba50.merge( + zoning_lookup_pba50[["id", "name"]], + left_on="zoning_id_pba50", + right_on="id", + how="left", + ) # rename fields to be consistent with the model - zoning_parcels_pba50.rename(columns = {'juris_zmod' : 'juris_id', - 'zoning_id_pba50': 'zoning_id', - 'jurisdiction_id': 'juris', - 'nodev_zmod' : 'nodev', - 'name' : 'zoning'}, inplace = True) + zoning_parcels_pba50.rename( + columns={ + "juris_zmod": "juris_id", + "zoning_id_pba50": "zoning_id", + "jurisdiction_id": "juris", + "nodev_zmod": "nodev", + "name": "zoning", + }, + inplace=True, + ) # remove lines with null geom_id - logger.info('zoning_parcels_pba50 is length {} but has {} rows with null geom_id; removing'.format( - len(zoning_parcels_pba50), - len(zoning_parcels_pba50.loc[pd.isnull(zoning_parcels_pba50['geom_id'])]))) - zoning_parcels_pba50 = zoning_parcels_pba50.loc[pd.notnull(zoning_parcels_pba50['geom_id']),] - logger.info('zoning_parcels_pba50 is length {}'.format(len(zoning_parcels_pba50))) - + logger.info( + "zoning_parcels_pba50 is length {} but has {} rows with null geom_id; removing".format( + len(zoning_parcels_pba50), + len(zoning_parcels_pba50.loc[pd.isnull(zoning_parcels_pba50["geom_id"])]), + ) + ) + zoning_parcels_pba50 = zoning_parcels_pba50.loc[ + pd.notnull(zoning_parcels_pba50["geom_id"]), + ] + logger.info("zoning_parcels_pba50 is length {}".format(len(zoning_parcels_pba50))) + # so we can convert geom_id to int - zoning_parcels_pba50['geom_id'] = zoning_parcels_pba50['geom_id'].round(0).astype(np.int64) + zoning_parcels_pba50["geom_id"] = ( + zoning_parcels_pba50["geom_id"].round(0).astype(np.int64) + ) - logger.info('zoning_parcels_pba50 has {} records; table head:\n{}'.format(len(zoning_parcels_pba50), zoning_parcels_pba50.head())) + logger.info( + "zoning_parcels_pba50 has {} records; table head:\n{}".format( + len(zoning_parcels_pba50), zoning_parcels_pba50.head() + ) + ) # export - logger.info('Export zoning_lookup table with the following attributes: {}'.format(zoning_lookup_pba50.dtypes)) - zoning_parcels_pba50.to_csv(ZONING_PARCELS_FILE,index = False) - - logger.info('Export zoning_parcels table with the following attributes: {}'.format(zoning_parcels_pba50.dtypes)) - zoning_lookup_pba50.to_csv(ZONING_LOOKUP_FILE,index = False) + logger.info( + "Export zoning_lookup table with the following attributes: {}".format( + zoning_lookup_pba50.dtypes + ) + ) + zoning_parcels_pba50.to_csv(ZONING_PARCELS_FILE, index=False) + + logger.info( + "Export zoning_parcels table with the following attributes: {}".format( + zoning_parcels_pba50.dtypes + ) + ) + zoning_lookup_pba50.to_csv(ZONING_LOOKUP_FILE, index=False) diff --git a/policies/plu/calculate_upzoning_capacity.py b/policies/plu/calculate_upzoning_capacity.py index 7f0ec47..b7611a1 100644 --- a/policies/plu/calculate_upzoning_capacity.py +++ b/policies/plu/calculate_upzoning_capacity.py @@ -1,4 +1,4 @@ -USAGE=""" +USAGE = """ Given a set of base zoning (parcel-level) and zoning_mods (representing one upzoning scheme) data, calculate the raw and net development capacity under the upzoning scheme. @@ -25,420 +25,638 @@ import dev_capacity_calculation_module NOW = time.strftime("%Y_%m%d_%H%M") -today = time.strftime('%Y_%m_%d') +today = time.strftime("%Y_%m_%d") -if os.getenv('USERNAME') =='ywang': - M_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50\\Final_Blueprint' - M_SMELT_DIR = 'M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12' - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) +if os.getenv("USERNAME") == "ywang": + M_DIR = "M:\\Data\\Urban\\BAUS\\PBA50\\Final_Blueprint" + M_SMELT_DIR = "M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12" + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) # GITHUB_URBANSIM_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\bayarea_urbansim\\data'.format(os.getenv('USERNAME')) -elif os.getenv('USERNAME') =='lzorn': - M_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50\\Final_Blueprint' - M_SMELT_DIR = 'M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12' - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) +elif os.getenv("USERNAME") == "lzorn": + M_DIR = "M:\\Data\\Urban\\BAUS\\PBA50\\Final_Blueprint" + M_SMELT_DIR = "M:\\Data\\GIS layers\\UrbanSim smelt\\2020 03 12" + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) # GITHUB_URBANSIM_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\bayarea_urbansim\\data'.format(os.getenv('USERNAME')) # raw and net development capacity metrics -RAW_CAPACITY_CODES = ['zoned_du', 'zoned_Ksqft', 'job_spaces'] -NET_CAPACITY_CODES = ['zoned_du_vacant', 'zoned_Ksqft_vacant', 'job_spaces_vacant', - 'zoned_du_underbuild', 'zoned_Ksqft_underbuild', 'job_spaces_underbuild', - 'zoned_du_underbuild_noProt', 'zoned_Ksqft_underbuild_noProt', 'job_spaces_underbuild_noProt'] -BAUS_CAPACITY_CODES = ['residential_units', - 'job_spaces', - 'non_residential_sqft', - 'zoned_du_underbuild', - 'zoned_du', - 'zoned_du_underbuild_nodev', - 'totemp'] - - -def apply_upzoning_to_parcel_data(logger, parcel_basezoning_original, - upzoning_scenario, upzoning_version): +RAW_CAPACITY_CODES = ["zoned_du", "zoned_Ksqft", "job_spaces"] +NET_CAPACITY_CODES = [ + "zoned_du_vacant", + "zoned_Ksqft_vacant", + "job_spaces_vacant", + "zoned_du_underbuild", + "zoned_Ksqft_underbuild", + "job_spaces_underbuild", + "zoned_du_underbuild_noProt", + "zoned_Ksqft_underbuild_noProt", + "job_spaces_underbuild_noProt", +] +BAUS_CAPACITY_CODES = [ + "residential_units", + "job_spaces", + "non_residential_sqft", + "zoned_du_underbuild", + "zoned_du", + "zoned_du_underbuild_nodev", + "totemp", +] + + +def apply_upzoning_to_parcel_data( + logger, parcel_basezoning_original, upzoning_scenario, upzoning_version +): """ Apply upzoning to parcels by adjusting the allowable development types and intensities. - * upzoning_scenario: version of zoning_mods for upzoning, e.g. 's20', 's21', 's22', + * upzoning_scenario: version of zoning_mods for upzoning, e.g. 's20', 's21', 's22', 's23' for Draft/Final Blueprint - Returns a dataframe with columns PARCEL_ID, juris_zmod, plus XX_upzoning for each allowed + Returns a dataframe with columns PARCEL_ID, juris_zmod, plus XX_upzoning for each allowed development type or intensity attribute. """ - - # Make a copy and add '_basezoning' to basezoning attributes parcel_basezoning = parcel_basezoning_original.copy() - # Read zoningmods lookup data and merge with parcels - zmods_upzoning_file = os.path.join(PBA50_ZONINGMODS_DIR, - 'zoning_mods_{}_{}.csv'.format(upzoning_scenario, upzoning_version)) + zmods_upzoning_file = os.path.join( + PBA50_ZONINGMODS_DIR, + "zoning_mods_{}_{}.csv".format(upzoning_scenario, upzoning_version), + ) if not os.path.exists(zmods_upzoning_file): - print('Error: file {} not found'.format(zmods_upzoning_file)) - raise - - use_cols = ['fbpzoningm','add_bldg', 'drop_bldg', - 'dua_up', 'far_up', 'dua_down', 'far_down', 'res_rent_cat'] - zmods_upzoning_df = pd.read_csv(zmods_upzoning_file, usecols = use_cols) + print("Error: file {} not found".format(zmods_upzoning_file)) + raise + + use_cols = [ + "fbpzoningm", + "add_bldg", + "drop_bldg", + "dua_up", + "far_up", + "dua_down", + "far_down", + "res_rent_cat", + ] + zmods_upzoning_df = pd.read_csv(zmods_upzoning_file, usecols=use_cols) # Merge upzoning with basezoning - parcel_basezoning_zoningmods = parcel_basezoning.merge(zmods_upzoning_df, - on = 'fbpzoningm', - how = 'left') + parcel_basezoning_zoningmods = parcel_basezoning.merge( + zmods_upzoning_df, on="fbpzoningm", how="left" + ) keep_cols = list(parcel_basezoning) # create allowed development type and intensity columns for upzoning # and default to base zoning for dev_type in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: - parcel_basezoning_zoningmods["{}_{}".format(dev_type, upzoning_scenario)] = \ - parcel_basezoning_zoningmods["{}_basezoning".format(dev_type)] + parcel_basezoning_zoningmods[ + "{}_{}".format(dev_type, upzoning_scenario) + ] = parcel_basezoning_zoningmods["{}_basezoning".format(dev_type)] # keep the new column keep_cols.append("{}_{}".format(dev_type, upzoning_scenario)) for intensity in dev_capacity_calculation_module.INTENSITY_CODES: - parcel_basezoning_zoningmods["max_{}_{}".format(intensity, upzoning_scenario)] = \ - parcel_basezoning_zoningmods["max_{}_basezoning".format(intensity)] + parcel_basezoning_zoningmods[ + "max_{}_{}".format(intensity, upzoning_scenario) + ] = parcel_basezoning_zoningmods["max_{}_basezoning".format(intensity)] # keep the new column keep_cols.append("max_{}_{}".format(intensity, upzoning_scenario)) - # Get a list of development types that have modifications in pba50zoningmod add_bldg_types = list(zmods_upzoning_df.add_bldg.dropna().unique()) - logger.info('Development types enabled by upzoning:\n{}'.format(add_bldg_types)) + logger.info("Development types enabled by upzoning:\n{}".format(add_bldg_types)) drop_bldg_types = list(zmods_upzoning_df.drop_bldg.dropna().unique()) - logger.info('Development types disallowed by upzoning:\n{}'.format(drop_bldg_types)) - + logger.info("Development types disallowed by upzoning:\n{}".format(drop_bldg_types)) # Make a copy and then modify the alowed dev types - #zoning_modify_type = zoning_base_pba50.copy() + # zoning_modify_type = zoning_base_pba50.copy() if len(add_bldg_types) > 0: for devType in add_bldg_types: add_bldg_parcels = parcel_basezoning_zoningmods.add_bldg == devType - parcel_basezoning_zoningmods.loc[add_bldg_parcels, devType+'_'+upzoning_scenario] = 1 + parcel_basezoning_zoningmods.loc[ + add_bldg_parcels, devType + "_" + upzoning_scenario + ] = 1 if len(drop_bldg_types) > 0: for devType in drop_bldg_types: drop_bldg_parcels = parcel_basezoning_zoningmods.drop_bldg == devType - parcel_basezoning_zoningmods.loc[drop_bldg_parcels,devType+'_'+upzoning_scenario] = 0 + parcel_basezoning_zoningmods.loc[ + drop_bldg_parcels, devType + "_" + upzoning_scenario + ] = 0 # Compare allowed dev types before and after applying pba50zoningmod for devType in add_bldg_types + drop_bldg_types: - logger.info('Out of {:,} parcels: \n {:,} parcels allow {} before applying fbpzoningm dev type adjustment;\ - \n {:,} parcels allow {} after applying fbpzoningm dev type adjustment.'.format(len(parcel_basezoning_zoningmods), - len(parcel_basezoning_zoningmods.loc[parcel_basezoning_zoningmods[devType+'_basezoning'] == 1]), devType, - len(parcel_basezoning_zoningmods.loc[parcel_basezoning_zoningmods[devType+'_'+upzoning_scenario] == 1]), devType)) - + logger.info( + "Out of {:,} parcels: \n {:,} parcels allow {} before applying fbpzoningm dev type adjustment;\ + \n {:,} parcels allow {} after applying fbpzoningm dev type adjustment.".format( + len(parcel_basezoning_zoningmods), + len( + parcel_basezoning_zoningmods.loc[ + parcel_basezoning_zoningmods[devType + "_basezoning"] == 1 + ] + ), + devType, + len( + parcel_basezoning_zoningmods.loc[ + parcel_basezoning_zoningmods[devType + "_" + upzoning_scenario] + == 1 + ] + ), + devType, + ) + ) # Make a copy and then modify the intensities - #zoning_modify_intensity = zoning_modify_type.copy() + # zoning_modify_intensity = zoning_modify_type.copy() - for intensity in ['dua','far']: + for intensity in ["dua", "far"]: # modify intensity when 'intensity_up' is not null - up_parcels = parcel_basezoning_zoningmods['{}_up'.format(intensity)].notnull() + up_parcels = parcel_basezoning_zoningmods["{}_up".format(intensity)].notnull() # the effective max_dua is the larger of base zoning max_dua and the pba50 max_dua - parcel_basezoning_zoningmods.loc[up_parcels, 'max_{}_{}'.format(intensity, upzoning_scenario)] = \ - parcel_basezoning_zoningmods[['max_{}_{}'.format(intensity, upzoning_scenario),'{}_up'.format(intensity)]].max(axis = 1) + parcel_basezoning_zoningmods.loc[ + up_parcels, "max_{}_{}".format(intensity, upzoning_scenario) + ] = parcel_basezoning_zoningmods[ + [ + "max_{}_{}".format(intensity, upzoning_scenario), + "{}_up".format(intensity), + ] + ].max( + axis=1 + ) # modify intensity when 'intensity_up' is not null - down_parcels = parcel_basezoning_zoningmods['{}_down'.format(intensity)].notnull() + down_parcels = parcel_basezoning_zoningmods[ + "{}_down".format(intensity) + ].notnull() # the effective max_dua is the larger of base zoning max_dua and the pba50 max_dua - parcel_basezoning_zoningmods.loc[down_parcels, 'max_{}_{}'.format(intensity, upzoning_scenario)] = \ - parcel_basezoning_zoningmods[['max_{}_{}'.format(intensity, upzoning_scenario),'{}_down'.format(intensity)]].min(axis = 1) + parcel_basezoning_zoningmods.loc[ + down_parcels, "max_{}_{}".format(intensity, upzoning_scenario) + ] = parcel_basezoning_zoningmods[ + [ + "max_{}_{}".format(intensity, upzoning_scenario), + "{}_down".format(intensity), + ] + ].min( + axis=1 + ) # Compare max_dua and max_far before and after applying pba50zoningmod - logger.info('Before applying fbpzoningm intensity adjustment: \n', - parcel_basezoning_zoningmods[['max_'+intensity+'_basezoning']].describe()) - logger.info('After applying fbpzoningm intensity adjustment: \n', - parcel_basezoning_zoningmods[['max_'+intensity+'_'+upzoning_scenario]].describe()) + logger.info( + "Before applying fbpzoningm intensity adjustment: \n", + parcel_basezoning_zoningmods[ + ["max_" + intensity + "_basezoning"] + ].describe(), + ) + logger.info( + "After applying fbpzoningm intensity adjustment: \n", + parcel_basezoning_zoningmods[ + ["max_" + intensity + "_" + upzoning_scenario] + ].describe(), + ) parcel_upzoning = parcel_basezoning_zoningmods[keep_cols] - logger.info('Generate parcel-level upzoning table of {:,} records: \n {}'.format(len(parcel_upzoning), parcel_upzoning.head())) + logger.info( + "Generate parcel-level upzoning table of {:,} records: \n {}".format( + len(parcel_upzoning), parcel_upzoning.head() + ) + ) return parcel_upzoning -def summary_capacity(parcel_capacity, groupby_field, capacity_metrics): - return parcel_capacity.groupby([groupby_field])[capacity_metrics].sum().reset_index() +def summary_capacity(parcel_capacity, groupby_field, capacity_metrics): + return ( + parcel_capacity.groupby([groupby_field])[capacity_metrics].sum().reset_index() + ) -if __name__ == '__main__': +if __name__ == "__main__": - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) parser.add_argument("-zoningmods_scenario", help="zoningmods scenario") - parser.add_argument("-zoningmods_version", help="version of zoningmods, represented by the date") - parser.add_argument("-attr_version", help="version of p10_pba50_attr, represented by the date") + parser.add_argument( + "-zoningmods_version", help="version of zoningmods, represented by the date" + ) + parser.add_argument( + "-attr_version", help="version of p10_pba50_attr, represented by the date" + ) parser.add_argument("-test", action="store_true", help="Test mode") args = parser.parse_args() ## inputs - URBANSIM_BASEZONING_DIR = os.path.join(M_DIR, 'Large General Input Data') - PARCEL_ZONING_ID_FILE = os.path.join(URBANSIM_BASEZONING_DIR, '2020_06_22_zoning_parcels_hybrid_pba50.csv') - BASEZONING_LOOKUP_FILE = os.path.join(URBANSIM_BASEZONING_DIR, '2020_06_22_zoning_lookup_hybrid_pba50.csv') + URBANSIM_BASEZONING_DIR = os.path.join(M_DIR, "Large General Input Data") + PARCEL_ZONING_ID_FILE = os.path.join( + URBANSIM_BASEZONING_DIR, "2020_06_22_zoning_parcels_hybrid_pba50.csv" + ) + BASEZONING_LOOKUP_FILE = os.path.join( + URBANSIM_BASEZONING_DIR, "2020_06_22_zoning_lookup_hybrid_pba50.csv" + ) # URBANSIM_PARCEL_FILE = os.path.join(URBANSIM_BASEZONING_DIR, '2020_04_17_parcels_geography.csv') - URBANSIM_PARCEL_TAZ_FILE = os.path.join(URBANSIM_BASEZONING_DIR, '2020_08_17_parcel_to_taz1454sub.csv') + URBANSIM_PARCEL_TAZ_FILE = os.path.join( + URBANSIM_BASEZONING_DIR, "2020_08_17_parcel_to_taz1454sub.csv" + ) - PBA50_ZONINGMODS_DIR = os.path.join(M_DIR, 'Zoning Modifications') - PARCEL_ZONINGMODS_PBA50_FILE = os.path.join(PBA50_ZONINGMODS_DIR, 'p10_pba50_attr_{}.csv'.format(str(args.attr_version))) + PBA50_ZONINGMODS_DIR = os.path.join(M_DIR, "Zoning Modifications") + PARCEL_ZONINGMODS_PBA50_FILE = os.path.join( + PBA50_ZONINGMODS_DIR, "p10_pba50_attr_{}.csv".format(str(args.attr_version)) + ) # output # In test mode (specified by --test), outputs to cwd and without date prefix; otherwise, outputs to URBANSIM_UPZONING_DIR with date prefix - BOX_UPZONING_DIR = os.path.join(BOX_DIR, 'Policies', 'Zoning Modifications', 'capacity') - - COMPARE_JURIS_CAPACITY_FILE = "compare_juris_capacity_{}.csv".format(args.zoningmods_scenario) - COMPARE_TAZ_CAPACITY_FILE = 'compare_taz_capacity_{}.csv'.format(args.zoningmods_scenario) - LOG_FILE = "compare_juris_capacity_{}.log".format(args.zoningmods_scenario) + BOX_UPZONING_DIR = os.path.join( + BOX_DIR, "Policies", "Zoning Modifications", "capacity" + ) + + COMPARE_JURIS_CAPACITY_FILE = "compare_juris_capacity_{}.csv".format( + args.zoningmods_scenario + ) + COMPARE_TAZ_CAPACITY_FILE = "compare_taz_capacity_{}.csv".format( + args.zoningmods_scenario + ) + LOG_FILE = "compare_juris_capacity_{}.log".format(args.zoningmods_scenario) # QA/QC files exported in test mode - P10_BASEZONING_FILE = 'p10_basezoning.csv' - P10_UPZONING_PBA50_FILE = 'p10_upzoning_pba50_{}.csv'.format(args.zoningmods_scenario) - PARCEL_CAPACITY_BASEZONING_FILE = 'parcel_capacity_basezoning.csv' - PARCEL_CAPACITY_UPZONING_FILE = 'parcel_capacity_upzoning_{}.csv'.format(args.zoningmods_scenario) - PARCEL_CAPACITY_BAUS_FILE = 'parcel_capacity_baus_{}.csv'.format(args.zoningmods_scenario) - + P10_BASEZONING_FILE = "p10_basezoning.csv" + P10_UPZONING_PBA50_FILE = "p10_upzoning_pba50_{}.csv".format( + args.zoningmods_scenario + ) + PARCEL_CAPACITY_BASEZONING_FILE = "parcel_capacity_basezoning.csv" + PARCEL_CAPACITY_UPZONING_FILE = "parcel_capacity_upzoning_{}.csv".format( + args.zoningmods_scenario + ) + PARCEL_CAPACITY_BAUS_FILE = "parcel_capacity_baus_{}.csv".format( + args.zoningmods_scenario + ) if args.test == False: - LOG_FILE = os.path.join(BOX_UPZONING_DIR, "{}_{}".format(today, LOG_FILE)) - COMPARE_JURIS_CAPACITY_FILE = os.path.join(BOX_UPZONING_DIR, "{}_{}".format(today, COMPARE_JURIS_CAPACITY_FILE)) - COMPARE_TAZ_CAPACITY_FILE = os.path.join(BOX_UPZONING_DIR, "{}_{}".format(today, COMPARE_TAZ_CAPACITY_FILE)) + LOG_FILE = os.path.join(BOX_UPZONING_DIR, "{}_{}".format(today, LOG_FILE)) + COMPARE_JURIS_CAPACITY_FILE = os.path.join( + BOX_UPZONING_DIR, "{}_{}".format(today, COMPARE_JURIS_CAPACITY_FILE) + ) + COMPARE_TAZ_CAPACITY_FILE = os.path.join( + BOX_UPZONING_DIR, "{}_{}".format(today, COMPARE_TAZ_CAPACITY_FILE) + ) - pd.set_option('max_columns', 200) - pd.set_option('display.width', 200) + pd.set_option("max_columns", 200) + pd.set_option("display.width", 200) # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) logger.info("BOX_UPZONING_DIR = {}".format(BOX_UPZONING_DIR)) - logger.info("COMPARE_JURIS_CAPACITY_FILE = {}".format(COMPARE_JURIS_CAPACITY_FILE)) - logger.info("COMPARE_TAZ_CAPACITY_FILE = {}".format(COMPARE_TAZ_CAPACITY_FILE)) + logger.info( + "COMPARE_JURIS_CAPACITY_FILE = {}".format(COMPARE_JURIS_CAPACITY_FILE) + ) + logger.info( + "COMPARE_TAZ_CAPACITY_FILE = {}".format(COMPARE_TAZ_CAPACITY_FILE) + ) ## Read p10 parcels data - basemap_p10_file = os.path.join(M_SMELT_DIR, 'p10.csv') - basemap_p10 = pd.read_csv(basemap_p10_file, - usecols =['PARCEL_ID', 'ACRES', 'LAND_VALUE']) + basemap_p10_file = os.path.join(M_SMELT_DIR, "p10.csv") + basemap_p10 = pd.read_csv( + basemap_p10_file, usecols=["PARCEL_ID", "ACRES", "LAND_VALUE"] + ) # conver PARCEL_ID to integer: - basemap_p10['PARCEL_ID'] = basemap_p10['PARCEL_ID'].apply(lambda x: int(round(x))) + basemap_p10["PARCEL_ID"] = basemap_p10["PARCEL_ID"].apply(lambda x: int(round(x))) logger.info("Read {:,} rows from {}".format(len(basemap_p10), basemap_p10_file)) logger.info("\n{}".format(basemap_p10.head())) - logger.info('Number of unique PARCEL_ID: {}'.format(len(basemap_p10.PARCEL_ID.unique()))) - + logger.info( + "Number of unique PARCEL_ID: {}".format(len(basemap_p10.PARCEL_ID.unique())) + ) ## Read parcel with base zoning data - parcel_use_cols = ['PARCEL_ID', 'zoning_id','nodev'] - parcel_zoning_id = pd.read_csv(PARCEL_ZONING_ID_FILE, - usecols = parcel_use_cols) - parcel_zoning_id.rename(columns = {'PARCEL_ID': 'PARCEL_ID_pz'}, inplace=True) + parcel_use_cols = ["PARCEL_ID", "zoning_id", "nodev"] + parcel_zoning_id = pd.read_csv(PARCEL_ZONING_ID_FILE, usecols=parcel_use_cols) + parcel_zoning_id.rename(columns={"PARCEL_ID": "PARCEL_ID_pz"}, inplace=True) basezoning_lookup = pd.read_csv(BASEZONING_LOOKUP_FILE) - basezoning_lookup.columns = [col+'_basezoning' for col in basezoning_lookup.columns.values] - - parcel_basezoning = parcel_zoning_id.merge(basezoning_lookup, - left_on = 'zoning_id', - right_on = 'id_basezoning', - how = 'left') - parcel_basezoning.rename(columns = {'nodev': 'nodev_basezoning'}, inplace=True) - logger.info("Parcels with base zoning has {} records, with columns:\n{}".format(len(parcel_basezoning), parcel_basezoning.dtypes)) - + basezoning_lookup.columns = [ + col + "_basezoning" for col in basezoning_lookup.columns.values + ] + + parcel_basezoning = parcel_zoning_id.merge( + basezoning_lookup, left_on="zoning_id", right_on="id_basezoning", how="left" + ) + parcel_basezoning.rename(columns={"nodev": "nodev_basezoning"}, inplace=True) + logger.info( + "Parcels with base zoning has {} records, with columns:\n{}".format( + len(parcel_basezoning), parcel_basezoning.dtypes + ) + ) # Read PBA50 zoningmods - parcel_zoningmods = pd.read_csv(PARCEL_ZONINGMODS_PBA50_FILE, - usecols = ['PARCEL_ID', 'fbpzoningm', 'juris']) - parcel_zoningmods.PARCEL_ID = parcel_zoningmods.PARCEL_ID.apply(lambda x: int(round(x))) - parcel_zoningmods.rename(columns = {'PARCEL_ID': 'PARCEL_ID_attr'}, inplace=True) + parcel_zoningmods = pd.read_csv( + PARCEL_ZONINGMODS_PBA50_FILE, usecols=["PARCEL_ID", "fbpzoningm", "juris"] + ) + parcel_zoningmods.PARCEL_ID = parcel_zoningmods.PARCEL_ID.apply( + lambda x: int(round(x)) + ) + parcel_zoningmods.rename(columns={"PARCEL_ID": "PARCEL_ID_attr"}, inplace=True) # Add base zoning and PBA50 zoningmods to p10 parcels - p10_basezoning = basemap_p10.merge(parcel_basezoning, - left_on = 'PARCEL_ID', - right_on = 'PARCEL_ID_pz', - how = 'left').merge(parcel_zoningmods, - left_on = 'PARCEL_ID', - right_on = 'PARCEL_ID_attr', - how = 'left') + p10_basezoning = basemap_p10.merge( + parcel_basezoning, left_on="PARCEL_ID", right_on="PARCEL_ID_pz", how="left" + ).merge( + parcel_zoningmods, left_on="PARCEL_ID", right_on="PARCEL_ID_attr", how="left" + ) # in test mode, export the data for QA/QC if args.test == True: logger.info("Export p10_basezoning") p10_basezoning.to_csv(P10_BASEZONING_FILE, index=False) + logger.info( + "Running step ------ Applying upzoning {}".format( + "zoning_mods_{}_{}.csv".format( + args.zoningmods_scenario, args.zoningmods_version + ) + ) + ) - logger.info("Running step ------ Applying upzoning {}".format('zoning_mods_{}_{}.csv'.format(args.zoningmods_scenario, - args.zoningmods_version))) - - p10_upzoning_pba50 = apply_upzoning_to_parcel_data(logger, p10_basezoning, args.zoningmods_scenario, args.zoningmods_version) - - logger.info("Generating p10_upzoning_pba50 with {} records;\n Headers:\n{}".format(len(p10_upzoning_pba50), - p10_upzoning_pba50.head())) + p10_upzoning_pba50 = apply_upzoning_to_parcel_data( + logger, p10_basezoning, args.zoningmods_scenario, args.zoningmods_version + ) + logger.info( + "Generating p10_upzoning_pba50 with {} records;\n Headers:\n{}".format( + len(p10_upzoning_pba50), p10_upzoning_pba50.head() + ) + ) ## B10 buildings with p10 parcels data - basemap_b10_file = os.path.join(M_SMELT_DIR, 'b10.csv') + basemap_b10_file = os.path.join(M_SMELT_DIR, "b10.csv") basemap_b10 = pd.read_csv(basemap_b10_file) # conver PARCEL_ID to integer: - basemap_b10['parcel_id'] = basemap_b10['parcel_id'].apply(lambda x: int(round(x))) + basemap_b10["parcel_id"] = basemap_b10["parcel_id"].apply(lambda x: int(round(x))) logger.info("Read {:,} rows from {}".format(len(basemap_b10), basemap_b10_file)) logger.info("\n{}".format(basemap_b10.head())) - logger.info('b10 building data has {:,} unique PARCEL_ID:'.format(len(basemap_b10.parcel_id.unique()))) + logger.info( + "b10 building data has {:,} unique PARCEL_ID:".format( + len(basemap_b10.parcel_id.unique()) + ) + ) # join parcels to buildings which is used to determine current built-out condition when calculating net capacity - building_parcel = pd.merge(left=basemap_b10, - right=basemap_p10[['PARCEL_ID','LAND_VALUE','ACRES']], - left_on='parcel_id', - right_on='PARCEL_ID', - how='outer') + building_parcel = pd.merge( + left=basemap_b10, + right=basemap_p10[["PARCEL_ID", "LAND_VALUE", "ACRES"]], + left_on="parcel_id", + right_on="PARCEL_ID", + how="outer", + ) # compute allowed development type - residential vs non-residential for each parcel - basezoning_allow_dev_type = \ - dev_capacity_calculation_module.set_allow_dev_type(p10_upzoning_pba50, boc_source="basezoning") - upzoning_allow_dev_type = \ - dev_capacity_calculation_module.set_allow_dev_type(p10_upzoning_pba50, boc_source=args.zoningmods_scenario) + basezoning_allow_dev_type = dev_capacity_calculation_module.set_allow_dev_type( + p10_upzoning_pba50, boc_source="basezoning" + ) + upzoning_allow_dev_type = dev_capacity_calculation_module.set_allow_dev_type( + p10_upzoning_pba50, boc_source=args.zoningmods_scenario + ) # put them together - p10_upzoning_pba50 = p10_upzoning_pba50.merge(basezoning_allow_dev_type, - on = 'PARCEL_ID', - how = 'left').merge(upzoning_allow_dev_type, - on = 'PARCEL_ID', - how = 'left') - + p10_upzoning_pba50 = p10_upzoning_pba50.merge( + basezoning_allow_dev_type, on="PARCEL_ID", how="left" + ).merge(upzoning_allow_dev_type, on="PARCEL_ID", how="left") + # Add TAZ id to parcel data - parcel_taz = pd.read_csv(URBANSIM_PARCEL_TAZ_FILE, - usecols = ['PARCEL_ID', 'ZONE_ID']) + parcel_taz = pd.read_csv(URBANSIM_PARCEL_TAZ_FILE, usecols=["PARCEL_ID", "ZONE_ID"]) parcel_taz.PARCEL_ID = parcel_taz.PARCEL_ID.apply(lambda x: int(round(x))) - parcel_taz.ZONE_ID = parcel_taz.ZONE_ID.apply(lambda x: int(round(x))) - parcel_taz.rename(columns ={'PARCEL_ID': 'PARCEL_ID_taz'}, inplace=True) + parcel_taz.ZONE_ID = parcel_taz.ZONE_ID.apply(lambda x: int(round(x))) + parcel_taz.rename(columns={"PARCEL_ID": "PARCEL_ID_taz"}, inplace=True) - p10_upzoning_pba50 = p10_upzoning_pba50.merge(parcel_taz, - left_on = 'PARCEL_ID', - right_on = 'PARCEL_ID_taz', - how = 'left') + p10_upzoning_pba50 = p10_upzoning_pba50.merge( + parcel_taz, left_on="PARCEL_ID", right_on="PARCEL_ID_taz", how="left" + ) - logger.debug("p10_upzoning_pba50 with columns:\n{}".format(p10_upzoning_pba50.dtypes)) + logger.debug( + "p10_upzoning_pba50 with columns:\n{}".format(p10_upzoning_pba50.dtypes) + ) # in test mode, export the data for QA/QC if args.test == True: logger.info("Export p10_upzoning_pba50") - p10_upzoning_pba50.to_csv(P10_UPZONING_PBA50_FILE, index = False) - + p10_upzoning_pba50.to_csv(P10_UPZONING_PBA50_FILE, index=False) ## calculate raw and net capacity for basezoning and upzoning - logger.info("Running step ------ Calculating raw development capacity under basezoning") - - raw_parcel_capacity_basezoning = dev_capacity_calculation_module.calculate_capacity(p10_upzoning_pba50, - "basezoning", - "basezoning", - pass_thru_cols=["juris", 'ZONE_ID']) - logger.debug("raw_parcel_capacity_basezoning.head():\n{}".format(raw_parcel_capacity_basezoning.head())) - - logger.info("Running step ------ Calculating raw development capacity under {}".format('zoning_mods_'+args.zoningmods_scenario)) - raw_parcel_capacity_upzoning = dev_capacity_calculation_module.calculate_capacity(p10_upzoning_pba50, - args.zoningmods_scenario, - "basezoning", - pass_thru_cols=["juris", 'ZONE_ID']) - logger.debug("raw_parcel_capacity_upzoning.head():\n{}".format(raw_parcel_capacity_upzoning.head())) - - logger.info("Running step ------ Calculating net development capacity under basezoning") - - net_parcel_capacity_basezoning = dev_capacity_calculation_module.calculate_net_capacity(logger, - p10_upzoning_pba50, - "basezoning", - "basezoning", - building_parcel, - net_pass_thru_cols=["juris", 'ZONE_ID']) - logger.debug("net_parcel_capacity_basezoning.head():\n{}".format(net_parcel_capacity_basezoning.head())) - - logger.info("Running step ------ Calculating net development capacity under {}".format('zoning_mods_'+args.zoningmods_scenario)) - net_parcel_capacity_upzoning = dev_capacity_calculation_module.calculate_net_capacity(logger, - p10_upzoning_pba50, - args.zoningmods_scenario, - "basezoning", - building_parcel, - net_pass_thru_cols=["juris", 'ZONE_ID']) - logger.debug("net_parcel_capacity_upzoning.head():\n{}".format(net_parcel_capacity_upzoning.head())) + logger.info( + "Running step ------ Calculating raw development capacity under basezoning" + ) + + raw_parcel_capacity_basezoning = dev_capacity_calculation_module.calculate_capacity( + p10_upzoning_pba50, + "basezoning", + "basezoning", + pass_thru_cols=["juris", "ZONE_ID"], + ) + logger.debug( + "raw_parcel_capacity_basezoning.head():\n{}".format( + raw_parcel_capacity_basezoning.head() + ) + ) + + logger.info( + "Running step ------ Calculating raw development capacity under {}".format( + "zoning_mods_" + args.zoningmods_scenario + ) + ) + raw_parcel_capacity_upzoning = dev_capacity_calculation_module.calculate_capacity( + p10_upzoning_pba50, + args.zoningmods_scenario, + "basezoning", + pass_thru_cols=["juris", "ZONE_ID"], + ) + logger.debug( + "raw_parcel_capacity_upzoning.head():\n{}".format( + raw_parcel_capacity_upzoning.head() + ) + ) + + logger.info( + "Running step ------ Calculating net development capacity under basezoning" + ) + + net_parcel_capacity_basezoning = ( + dev_capacity_calculation_module.calculate_net_capacity( + logger, + p10_upzoning_pba50, + "basezoning", + "basezoning", + building_parcel, + net_pass_thru_cols=["juris", "ZONE_ID"], + ) + ) + logger.debug( + "net_parcel_capacity_basezoning.head():\n{}".format( + net_parcel_capacity_basezoning.head() + ) + ) + + logger.info( + "Running step ------ Calculating net development capacity under {}".format( + "zoning_mods_" + args.zoningmods_scenario + ) + ) + net_parcel_capacity_upzoning = ( + dev_capacity_calculation_module.calculate_net_capacity( + logger, + p10_upzoning_pba50, + args.zoningmods_scenario, + "basezoning", + building_parcel, + net_pass_thru_cols=["juris", "ZONE_ID"], + ) + ) + logger.debug( + "net_parcel_capacity_upzoning.head():\n{}".format( + net_parcel_capacity_upzoning.head() + ) + ) # in test mode, export the data for QA/QC if args.test == True: - raw_parcel_capacity_basezoning.to_csv('raw_parcel_capacity_basezoning.csv', index = False) - net_parcel_capacity_basezoning.to_csv('net_parcel_capacity_basezoning.csv', index = False) - raw_parcel_capacity_upzoning.to_csv('raw_parcel_capacity_upzoning.csv', index = False) - net_parcel_capacity_upzoning.to_csv('net_parcel_capacity_upzoning.csv', index = False) - + raw_parcel_capacity_basezoning.to_csv( + "raw_parcel_capacity_basezoning.csv", index=False + ) + net_parcel_capacity_basezoning.to_csv( + "net_parcel_capacity_basezoning.csv", index=False + ) + raw_parcel_capacity_upzoning.to_csv( + "raw_parcel_capacity_upzoning.csv", index=False + ) + net_parcel_capacity_upzoning.to_csv( + "net_parcel_capacity_upzoning.csv", index=False + ) ## calculate jurisdiction-level capacity - juris_raw_capacity_basezoning = summary_capacity(raw_parcel_capacity_basezoning, - 'juris', - [raw_metrics + '_basezoning' for raw_metrics in RAW_CAPACITY_CODES]) - - juris_net_capacity_basezoning = summary_capacity(net_parcel_capacity_basezoning, - 'juris', - [net_metrics + '_basezoning' for net_metrics in NET_CAPACITY_CODES]) - - juris_raw_capacity_upzoning = summary_capacity(raw_parcel_capacity_upzoning, - 'juris', - [raw_metrics + '_' + args.zoningmods_scenario for raw_metrics in RAW_CAPACITY_CODES]) - - juris_net_capacity_upzoning = summary_capacity(net_parcel_capacity_upzoning, - 'juris', - [net_metrics + '_' + args.zoningmods_scenario for net_metrics in NET_CAPACITY_CODES]) - + juris_raw_capacity_basezoning = summary_capacity( + raw_parcel_capacity_basezoning, + "juris", + [raw_metrics + "_basezoning" for raw_metrics in RAW_CAPACITY_CODES], + ) + + juris_net_capacity_basezoning = summary_capacity( + net_parcel_capacity_basezoning, + "juris", + [net_metrics + "_basezoning" for net_metrics in NET_CAPACITY_CODES], + ) + + juris_raw_capacity_upzoning = summary_capacity( + raw_parcel_capacity_upzoning, + "juris", + [ + raw_metrics + "_" + args.zoningmods_scenario + for raw_metrics in RAW_CAPACITY_CODES + ], + ) + + juris_net_capacity_upzoning = summary_capacity( + net_parcel_capacity_upzoning, + "juris", + [ + net_metrics + "_" + args.zoningmods_scenario + for net_metrics in NET_CAPACITY_CODES + ], + ) ## calculate taz-level capacity - taz_raw_capacity_basezoning = summary_capacity(raw_parcel_capacity_basezoning, - 'ZONE_ID', - [raw_metrics + '_basezoning' for raw_metrics in RAW_CAPACITY_CODES]) - - taz_net_capacity_basezoning = summary_capacity(net_parcel_capacity_basezoning, - 'ZONE_ID', - [net_metrics + '_basezoning' for net_metrics in NET_CAPACITY_CODES]) - - taz_raw_capacity_upzoning = summary_capacity(raw_parcel_capacity_upzoning, - 'ZONE_ID', - [raw_metrics + '_' + args.zoningmods_scenario for raw_metrics in RAW_CAPACITY_CODES]) - - taz_net_capacity_upzoning = summary_capacity(net_parcel_capacity_upzoning, - 'ZONE_ID', - [net_metrics + '_' + args.zoningmods_scenario for net_metrics in NET_CAPACITY_CODES]) - + taz_raw_capacity_basezoning = summary_capacity( + raw_parcel_capacity_basezoning, + "ZONE_ID", + [raw_metrics + "_basezoning" for raw_metrics in RAW_CAPACITY_CODES], + ) + + taz_net_capacity_basezoning = summary_capacity( + net_parcel_capacity_basezoning, + "ZONE_ID", + [net_metrics + "_basezoning" for net_metrics in NET_CAPACITY_CODES], + ) + + taz_raw_capacity_upzoning = summary_capacity( + raw_parcel_capacity_upzoning, + "ZONE_ID", + [ + raw_metrics + "_" + args.zoningmods_scenario + for raw_metrics in RAW_CAPACITY_CODES + ], + ) + + taz_net_capacity_upzoning = summary_capacity( + net_parcel_capacity_upzoning, + "ZONE_ID", + [ + net_metrics + "_" + args.zoningmods_scenario + for net_metrics in NET_CAPACITY_CODES + ], + ) # merge to generate juris-level summary - juris_capacity_compare = juris_raw_capacity_basezoning.merge(juris_net_capacity_basezoning, - on = 'juris').merge(juris_raw_capacity_upzoning, - on = 'juris').merge(juris_net_capacity_upzoning, - on = 'juris') - juris_capacity_compare.rename(columns = {'juris': 'jurisdiciton'}, inplace = True) + juris_capacity_compare = ( + juris_raw_capacity_basezoning.merge(juris_net_capacity_basezoning, on="juris") + .merge(juris_raw_capacity_upzoning, on="juris") + .merge(juris_net_capacity_upzoning, on="juris") + ) + juris_capacity_compare.rename(columns={"juris": "jurisdiciton"}, inplace=True) - logger.debug("juris_capacity_compare.head():\n{}".format(juris_capacity_compare.head())) + logger.debug( + "juris_capacity_compare.head():\n{}".format(juris_capacity_compare.head()) + ) # merge to generate taz-level summary - taz_capacity_compare = taz_raw_capacity_basezoning.merge(taz_net_capacity_basezoning, - on = 'ZONE_ID').merge(taz_raw_capacity_upzoning, - on = 'ZONE_ID').merge(taz_net_capacity_upzoning, - on = 'ZONE_ID') + taz_capacity_compare = ( + taz_raw_capacity_basezoning.merge(taz_net_capacity_basezoning, on="ZONE_ID") + .merge(taz_raw_capacity_upzoning, on="ZONE_ID") + .merge(taz_net_capacity_upzoning, on="ZONE_ID") + ) logger.debug("taz_capacity_compare.head():\n{}".format(taz_capacity_compare.head())) ## Export data # export jurisdiction-level capacity comparison - logger.info("Export development capacity comparison by jurisdiciton: \n{}".format(juris_capacity_compare.dtypes)) - juris_capacity_compare.to_csv(COMPARE_JURIS_CAPACITY_FILE, index = False) + logger.info( + "Export development capacity comparison by jurisdiciton: \n{}".format( + juris_capacity_compare.dtypes + ) + ) + juris_capacity_compare.to_csv(COMPARE_JURIS_CAPACITY_FILE, index=False) # export taz-level capacity comparison - logger.info("Export development capacity comparison by TAZ: \n{}".format(taz_capacity_compare.dtypes)) - taz_capacity_compare.to_csv(COMPARE_TAZ_CAPACITY_FILE, index = False) \ No newline at end of file + logger.info( + "Export development capacity comparison by TAZ: \n{}".format( + taz_capacity_compare.dtypes + ) + ) + taz_capacity_compare.to_csv(COMPARE_TAZ_CAPACITY_FILE, index=False) diff --git a/policies/plu/create_jurisdiction_map.py b/policies/plu/create_jurisdiction_map.py index fc85af6..e759e14 100644 --- a/policies/plu/create_jurisdiction_map.py +++ b/policies/plu/create_jurisdiction_map.py @@ -13,83 +13,135 @@ import dev_capacity_calculation_module -COUNTY_JURISDICTIONS_CSV = "M:\\Data\\GIS layers\\Jurisdictions\\county_jurisdictions.csv" +COUNTY_JURISDICTIONS_CSV = ( + "M:\\Data\\GIS layers\\Jurisdictions\\county_jurisdictions.csv" +) -if os.getenv("USERNAME")=="lzorn": +if os.getenv("USERNAME") == "lzorn": # This was created by joining output of 1_PLU_BOC_data_combine.ipynb with p10 # # e.g. using the command # - # python import_filegdb_layers.py "M:\Data\GIS layers\UrbanSim smelt\2020 03 12\smelt.gdb" p10 PARCEL_ID + # python import_filegdb_layers.py "M:\Data\GIS layers\UrbanSim smelt\2020 03 12\smelt.gdb" p10 PARCEL_ID # "C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\Policies\Base zoning\outputs\2020_10_20_p10_plu_boc_allAttrs.csv" # PARCEL_ID "KEEP_ALL" "M:\Data\GIS layers\UrbanSim_BASIS_zoning\UrbanSim_BASIS_zoning_fb.gdb" # - WORKSPACE_DIR = "M:\\Data\\GIS layers\\UrbanSim_BASIS_zoning" - WORKSPACE_GDB = os.path.join(WORKSPACE_DIR,"UrbanSim_BASIS_zoning_fb.gdb") - ARCGIS_PROJECTS = [os.path.join(WORKSPACE_DIR,"UrbanSim_BASIS_zoning_intensity_fb.aprx"), - os.path.join(WORKSPACE_DIR,"UrbanSim_BASIS_zoning_devType_fb.aprx")] + WORKSPACE_DIR = "M:\\Data\\GIS layers\\UrbanSim_BASIS_zoning" + WORKSPACE_GDB = os.path.join(WORKSPACE_DIR, "UrbanSim_BASIS_zoning_fb.gdb") + ARCGIS_PROJECTS = [ + os.path.join(WORKSPACE_DIR, "UrbanSim_BASIS_zoning_intensity_fb.aprx"), + os.path.join(WORKSPACE_DIR, "UrbanSim_BASIS_zoning_devType_fb.aprx"), + ] # location of BASIS_Local_Jurisdiction_Review_Summary.xlsx (https://mtcdrive.box.com/s/s2w68pnboa3gzq5z228mqbxtdehgdcxd) - JURIS_REVIEW = "C:\\Users\\lzorn\\Box\\BASIS Land Use Data Store\\Jurisdiction Review\\BASIS_Local_Jurisdiction_Review_Summary.xlsx" + JURIS_REVIEW = "C:\\Users\\lzorn\\Box\\BASIS Land Use Data Store\\Jurisdiction Review\\BASIS_Local_Jurisdiction_Review_Summary.xlsx" PETRALE_GITHUB_DIR = "X:\\petrale" # location of current hybrid configuration - HYBRID_CONFIG_DIR = os.path.join(PETRALE_GITHUB_DIR, "policies", "plu", "base_zoning", "hybrid_index") + HYBRID_CONFIG_DIR = os.path.join( + PETRALE_GITHUB_DIR, "policies", "plu", "base_zoning", "hybrid_index" + ) -elif os.getenv("USERNAME")=="ywang": +elif os.getenv("USERNAME") == "ywang": # This was created by joining output of 1_PLU_BOC_data_combine.ipynb with p10 # # e.g. using the command # - # python import_filegdb_layers.py "M:\Data\GIS layers\UrbanSim smelt\2020 03 12\smelt.gdb" p10 PARCEL_ID + # python import_filegdb_layers.py "M:\Data\GIS layers\UrbanSim smelt\2020 03 12\smelt.gdb" p10 PARCEL_ID # "C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\Policies\Base zoning\outputs\2020_10_20_p10_plu_boc_allAttrs.csv" # PARCEL_ID "KEEP_ALL" "M:\Data\GIS layers\UrbanSim_BASIS_zoning\UrbanSim_BASIS_zoning_fb.gdb" - WORKSPACE_DIR = "M:\\Data\\GIS layers\\UrbanSim_BASIS_zoning" - WORKSPACE_GDB = os.path.join(WORKSPACE_DIR,"UrbanSim_BASIS_zoning_fb.gdb") - ARCGIS_PROJECTS = [os.path.join(WORKSPACE_DIR,"UrbanSim_BASIS_zoning_intensity_fb.aprx"), - os.path.join(WORKSPACE_DIR,"UrbanSim_BASIS_zoning_devType_fb.aprx")] + WORKSPACE_DIR = "M:\\Data\\GIS layers\\UrbanSim_BASIS_zoning" + WORKSPACE_GDB = os.path.join(WORKSPACE_DIR, "UrbanSim_BASIS_zoning_fb.gdb") + ARCGIS_PROJECTS = [ + os.path.join(WORKSPACE_DIR, "UrbanSim_BASIS_zoning_intensity_fb.aprx"), + os.path.join(WORKSPACE_DIR, "UrbanSim_BASIS_zoning_devType_fb.aprx"), + ] # location of BASIS_Local_Jurisdiction_Review_Summary.xlsx (https://mtcdrive.box.com/s/s2w68pnboa3gzq5z228mqbxtdehgdcxd) - JURIS_REVIEW = "C:\\Users\\ywang\\Documents\\Python Scripts\\UrbanSim_BASIS_zoning\\BASIS_Local_Jurisdiction_Review_Summary.xlsx" + JURIS_REVIEW = "C:\\Users\\ywang\\Documents\\Python Scripts\\UrbanSim_BASIS_zoning\\BASIS_Local_Jurisdiction_Review_Summary.xlsx" PETRALE_GITHUB_DIR = "C:\\Users\\ywang\\Documents\\GitHub\\petrale" # location of current hybrid configuration - HYBRID_CONFIG_DIR = os.path.join(PETRALE_GITHUB_DIR, "policies", "plu", "base_zoning", "hybrid_index") + HYBRID_CONFIG_DIR = os.path.join( + PETRALE_GITHUB_DIR, "policies", "plu", "base_zoning", "hybrid_index" + ) -if __name__ == '__main__': +if __name__ == "__main__": pandas.options.display.max_rows = 999 - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) - parser.add_argument("--debug", help="If on, saves a copy of the arcgis project with mods.", action='store_true') + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--debug", + help="If on, saves a copy of the arcgis project with mods.", + action="store_true", + ) parser.add_argument("--restart_juris", help="Jurisdiction to restart from") - parser.add_argument("--jurisdiction", help="Jurisdiction. If none passed, will process all", nargs='+', ) - parser.add_argument("--metric", help="Metrics type(s). If none passed, will process all", nargs='+', - choices=["DUA","FAR","height","HS","HT","HM","OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"]) - parser.add_argument("--hybrid_config", help="Required arg. Hybrid config file in {}".format(HYBRID_CONFIG_DIR), required=True) - parser.add_argument("--output_type", help="Type of map to export", choices=["pdf","png"], default="pdf") + parser.add_argument( + "--jurisdiction", + help="Jurisdiction. If none passed, will process all", + nargs="+", + ) + parser.add_argument( + "--metric", + help="Metrics type(s). If none passed, will process all", + nargs="+", + choices=[ + "DUA", + "FAR", + "height", + "HS", + "HT", + "HM", + "OF", + "HO", + "SC", + "IL", + "IW", + "IH", + "RS", + "RB", + "MR", + "MT", + "ME", + ], + ) + parser.add_argument( + "--hybrid_config", + help="Required arg. Hybrid config file in {}".format(HYBRID_CONFIG_DIR), + required=True, + ) + parser.add_argument( + "--output_type", + help="Type of map to export", + choices=["pdf", "png"], + default="pdf", + ) args = parser.parse_args() # read list of jurisdictions JURISDICTION_TO_COUNTY = collections.OrderedDict() - with open(COUNTY_JURISDICTIONS_CSV, mode='r') as infile: + with open(COUNTY_JURISDICTIONS_CSV, mode="r") as infile: reader = csv.DictReader(infile) for row in reader: - JURISDICTION_TO_COUNTY[row['Jurisdiction']] = row['County'] - + JURISDICTION_TO_COUNTY[row["Jurisdiction"]] = row["County"] + # read jurisdiction review status for BASIS juris_review_df = pandas.read_excel(JURIS_REVIEW, sheet_name="Sheet 1", header=1) - juris_review_df = juris_review_df.loc[ pandas.notnull(juris_review_df.Jurisdiction) ] + juris_review_df = juris_review_df.loc[pandas.notnull(juris_review_df.Jurisdiction)] juris_review_df.set_index("Jurisdiction", inplace=True) # print(juris_review_df) juris_review_dict = juris_review_df.to_dict(orient="index") # print(juris_review_dict["Berkeley"]) # e.g. { - # 'County': 'Alameda', - # 'Check Allowable Building Heights': True, + # 'County': 'Alameda', + # 'Check Allowable Building Heights': True, # 'Check Development Pipeline': True, # 'Check Floor Area Ratio': True, # 'Check Residential Densities': True, @@ -104,8 +156,10 @@ # 'Percent Complete': 1 # } - # read hybrid configuration - hybrid_config_df = pandas.read_csv(os.path.join(HYBRID_CONFIG_DIR, args.hybrid_config)) + # read hybrid configuration + hybrid_config_df = pandas.read_csv( + os.path.join(HYBRID_CONFIG_DIR, args.hybrid_config) + ) # print(hybrid_config_df.head()) hybrid_config_df.set_index("juris_name", inplace=True) hybrid_config_dict = hybrid_config_df.to_dict(orient="index") @@ -117,7 +171,7 @@ # 'HO_idx': 0, # 'SC_idx': 0, # 'IL_idx': 0, - # 'IW_idx': 0, + # 'IW_idx': 0, # 'IH_idx': 0, # 'RS_idx': 0, # 'RB_idx': 0, @@ -131,7 +185,7 @@ # 'max_far_idx': 0, # 'max_height_idx': 0, # 'proportion_adj_dua': 1, - # 'proportion_adj_far': 1, + # 'proportion_adj_far': 1, # 'proportion_adj_height': 1 # } @@ -140,7 +194,11 @@ JURISDICTION_TO_COUNTY_arg = {} for juris in args.jurisdiction: if juris not in JURISDICTION_TO_COUNTY: - print("Jurisdiction [{}] not found in {}".format(juris, COUNTY_JURISDICTIONS_CSV)) + print( + "Jurisdiction [{}] not found in {}".format( + juris, COUNTY_JURISDICTIONS_CSV + ) + ) else: JURISDICTION_TO_COUNTY_arg[juris] = JURISDICTION_TO_COUNTY[juris] @@ -150,7 +208,8 @@ print("Restarting at jurisdiction {}".format(args.restart_juris)) juris_list = list(JURISDICTION_TO_COUNTY.keys()) for juris in juris_list: - if juris == args.restart_juris: break + if juris == args.restart_juris: + break del JURISDICTION_TO_COUNTY[juris] @@ -160,34 +219,174 @@ arcpy.env.workspace = WORKSPACE_GDB now_str = datetime.datetime.now().strftime("%Y/%m/%d, %H:%M") - source_str = \ - "Created by " \ - "https://github.com/BayAreaMetro/petrale/blob/master/policies/plu/base_zoning/create_jurisdiction_map.py on {}. " \ + source_str = ( + 'Created by ' + "https://github.com/BayAreaMetro/petrale/blob/master/policies/plu/base_zoning/create_jurisdiction_map.py on {}. " "Hybrid config: https://github.com/BayAreaMetro/petrale/blob/master/policies/plu/base_zoning/hybrid_index/{}".format( - now_str, args.hybrid_config) - - METRICS_DEF = collections.OrderedDict([ - # ArcGIS project, detail name, BASIS jurisdiction col, hybrid config col - ('DUA' ,["UrbanSim_BASIS_zoning_intensity.aprx", 'DUA', 'Check Residential Densities', 'max_dua_idx' ]), - ('FAR' ,["UrbanSim_BASIS_zoning_intensity.aprx", 'FAR', 'Check Floor Area Ratio', 'max_far_idx' ]), - ('height' ,["UrbanSim_BASIS_zoning_intensity.aprx", 'height', 'Check Allowable Building Heights','max_height_idx']), - # residential - ('HS' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow HS (Single-family Housing)', None, 'HS_idx' ]), - ('HT' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow HT (Row-House Dwelling)', None, 'HT_idx' ]), - ('HM' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow HM (Multi-family Housing)', None, 'HM_idx' ]), - ('MR' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow MR (Mixed-use Residential)', None, 'MR_idx' ]), - # non residential - ('OF' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow OF (Office)', None, 'OF_idx' ]), - ('HO' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow HO (Hotel)', None, 'HO_idx' ]), - ('SC' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow SC (School)', None, 'SC_idx' ]), - ('IL' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow IL (Light Industrial)', None, 'IL_idx' ]), - ('IW' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow IW (Warehouse+Logistics)', None, 'IW_idx' ]), - ('IH' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow IH (Heavy Industrial)', None, 'IH_idx' ]), - ('RS' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow RS (Retail)', None, 'RS_idx' ]), - ('RB' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow RB (Big Box Retail)', None, 'RB_idx' ]), - ('MT' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow MT (Mixed-use Retail)', None, 'MT_idx' ]), - ('ME' ,["UrbanSim_BASIS_zoning_devType.aprx", 'Allow ME (Mixed-use Office)', None, 'ME_idx' ]), - ]) + now_str, args.hybrid_config + ) + ) + + METRICS_DEF = collections.OrderedDict( + [ + # ArcGIS project, detail name, BASIS jurisdiction col, hybrid config col + ( + "DUA", + [ + "UrbanSim_BASIS_zoning_intensity.aprx", + "DUA", + "Check Residential Densities", + "max_dua_idx", + ], + ), + ( + "FAR", + [ + "UrbanSim_BASIS_zoning_intensity.aprx", + "FAR", + "Check Floor Area Ratio", + "max_far_idx", + ], + ), + ( + "height", + [ + "UrbanSim_BASIS_zoning_intensity.aprx", + "height", + "Check Allowable Building Heights", + "max_height_idx", + ], + ), + # residential + ( + "HS", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow HS (Single-family Housing)", + None, + "HS_idx", + ], + ), + ( + "HT", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow HT (Row-House Dwelling)", + None, + "HT_idx", + ], + ), + ( + "HM", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow HM (Multi-family Housing)", + None, + "HM_idx", + ], + ), + ( + "MR", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow MR (Mixed-use Residential)", + None, + "MR_idx", + ], + ), + # non residential + ( + "OF", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow OF (Office)", + None, + "OF_idx", + ], + ), + ( + "HO", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow HO (Hotel)", + None, + "HO_idx", + ], + ), + ( + "SC", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow SC (School)", + None, + "SC_idx", + ], + ), + ( + "IL", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow IL (Light Industrial)", + None, + "IL_idx", + ], + ), + ( + "IW", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow IW (Warehouse+Logistics)", + None, + "IW_idx", + ], + ), + ( + "IH", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow IH (Heavy Industrial)", + None, + "IH_idx", + ], + ), + ( + "RS", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow RS (Retail)", + None, + "RS_idx", + ], + ), + ( + "RB", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow RB (Big Box Retail)", + None, + "RB_idx", + ], + ), + ( + "MT", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow MT (Mixed-use Retail)", + None, + "MT_idx", + ], + ), + ( + "ME", + [ + "UrbanSim_BASIS_zoning_devType.aprx", + "Allow ME (Mixed-use Office)", + None, + "ME_idx", + ], + ), + ] + ) # these are the metrics we'll process if args.metric: @@ -197,13 +396,13 @@ print("Will process metrics: {}".format(metric_list)) prev_jurisdiction = "Palo Alto" - prev_juris_code = "palo_alto" + prev_juris_code = "palo_alto" prev_allowdevtype_metric = "HM" for jurisdiction in JURISDICTION_TO_COUNTY.keys(): - juris_code = jurisdiction.lower().replace(" ","_").replace(".","") + juris_code = jurisdiction.lower().replace(" ", "_").replace(".", "") print("Creating map for {} ({})".format(jurisdiction, juris_code)) metric_idx = 0 @@ -213,155 +412,231 @@ try: print(" Creating map for metric {}".format(metric)) - arc_project = METRICS_DEF[metric][0] - metric_name = METRICS_DEF[metric][1] - basis_check_col = METRICS_DEF[metric][2] + arc_project = METRICS_DEF[metric][0] + metric_name = METRICS_DEF[metric][1] + basis_check_col = METRICS_DEF[metric][2] basis_hybrid_col = METRICS_DEF[metric][3] - - basis_check_val = False + + basis_check_val = False if basis_check_col: if jurisdiction not in juris_review_dict: - print("Couldn't find jurisdiction {} in BASIS jurisdiction review {}".format(jurisdiction, JURIS_REVIEW)) + print( + "Couldn't find jurisdiction {} in BASIS jurisdiction review {}".format( + jurisdiction, JURIS_REVIEW + ) + ) else: - basis_check_val = juris_review_dict[jurisdiction][basis_check_col] - print(" BASIS check val for {}: {}".format(basis_check_col, basis_check_val)) + basis_check_val = juris_review_dict[jurisdiction][ + basis_check_col + ] + print( + " BASIS check val for {}: {}".format( + basis_check_col, basis_check_val + ) + ) basis_hybrid_val = hybrid_config_dict[juris_code][basis_hybrid_col] - print(" BASIS hybrid config val for {}: {}".format(basis_hybrid_col, basis_hybrid_val)) - + print( + " BASIS hybrid config val for {}: {}".format( + basis_hybrid_col, basis_hybrid_val + ) + ) + # allowed dev type has a generic map so needs subsitution for that as well - is_devtype = False - map_metric = metric + is_devtype = False + map_metric = metric map_metric_name = metric_name - if metric in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES: - is_devtype = True - map_metric = prev_allowdevtype_metric + if ( + metric + in dev_capacity_calculation_module.ALLOWED_BUILDING_TYPE_CODES + ): + is_devtype = True + map_metric = prev_allowdevtype_metric map_metric_name = METRICS_DEF[prev_allowdevtype_metric][1] - print(" map_metric:[{}] map_metric_name:[{}]".format(map_metric, map_metric_name)) - + print( + " map_metric:[{}] map_metric_name:[{}]".format( + map_metric, map_metric_name + ) + ) + # start fresh - aprx = arcpy.mp.ArcGISProject(arc_project) - layouts = aprx.listLayouts("Layout_{}".format(map_metric)) - maps = aprx.listMaps() - juris_lyr = {} # key: "BASIS" or "PBA40" - - assert(len(layouts)==1) - + aprx = arcpy.mp.ArcGISProject(arc_project) + layouts = aprx.listLayouts("Layout_{}".format(map_metric)) + maps = aprx.listMaps() + juris_lyr = {} # key: "BASIS" or "PBA40" + + assert len(layouts) == 1 + for my_map in maps: - if my_map.name.endswith(map_metric) or my_map.name.endswith(map_metric_name): + if my_map.name.endswith(map_metric) or my_map.name.endswith( + map_metric_name + ): # process this one print(" Processing map {}".format(my_map.name)) else: print(" Skipping map {}".format(my_map.name)) continue - + for layer in my_map.listLayers(): - if not layer.isFeatureLayer: continue + if not layer.isFeatureLayer: + continue print(" Processing layer {}".format(layer.name)) - print(" Definition query: {}".format(layer.definitionQuery)) + print( + " Definition query: {}".format(layer.definitionQuery) + ) # modify to current jurisdiction - layer.definitionQuery = layer.definitionQuery.replace(prev_jurisdiction, jurisdiction) - layer.definitionQuery = layer.definitionQuery.replace(prev_juris_code, juris_code) + layer.definitionQuery = layer.definitionQuery.replace( + prev_jurisdiction, jurisdiction + ) + layer.definitionQuery = layer.definitionQuery.replace( + prev_juris_code, juris_code + ) # modify to current devtype if is_devtype: - layer.definitionQuery = layer.definitionQuery.replace(prev_allowdevtype_metric, metric) - layer.name = layer.name.replace(prev_allowdevtype_metric, metric) - - print(" => Definition query: {}".format(layer.definitionQuery)) - + layer.definitionQuery = layer.definitionQuery.replace( + prev_allowdevtype_metric, metric + ) + layer.name = layer.name.replace( + prev_allowdevtype_metric, metric + ) + + print( + " => Definition query: {}".format( + layer.definitionQuery + ) + ) + # for devtype, may need to change variable used which means updating symbology if is_devtype: print(" Symbology: {}".format(layer.symbology)) - if hasattr(layer.symbology, 'renderer') and layer.symbology.renderer.type=='UniqueValueRenderer': - - fields = layer.symbology.renderer.fields - new_fields = [field.replace(prev_allowdevtype_metric, metric) for field in fields] - + if ( + hasattr(layer.symbology, "renderer") + and layer.symbology.renderer.type + == "UniqueValueRenderer" + ): + + fields = layer.symbology.renderer.fields + new_fields = [ + field.replace(prev_allowdevtype_metric, metric) + for field in fields + ] + # following example here: https://pro.arcgis.com/en/pro-app/arcpy/mapping/uniquevaluerenderer-class.htm sym = layer.symbology - sym.updateRenderer('UniqueValueRenderer') + sym.updateRenderer("UniqueValueRenderer") sym.renderer.fields = new_fields - print(" Symbology.renderer.fields: {} => {}".format(fields, new_fields)) + print( + " Symbology.renderer.fields: {} => {}".format( + fields, new_fields + ) + ) for grp in sym.renderer.groups: for itm in grp.items: - if itm.values == [['0']]: - itm.label = 'Not Allowed' - itm.symbol.color = {'RGB': [199, 215, 158, 100]} # light green - itm.symbol.size = 0.0 # outline width => no outline - elif itm.values == [['1']]: - itm.label = 'Allowed' - itm.symbol.color = {'RGB': [230, 152, 0, 100]} # orange - itm.symbol.size = 0.0 # outline width => no outline - elif itm.values == [['']]: - itm.label = 'Missing' - itm.symbol.color = {'RGB': [0, 77, 168, 100]} # blue - itm.symbol.size = 0.0 # outline width => no outline + if itm.values == [["0"]]: + itm.label = "Not Allowed" + itm.symbol.color = { + "RGB": [199, 215, 158, 100] + } # light green + itm.symbol.size = ( + 0.0 # outline width => no outline + ) + elif itm.values == [["1"]]: + itm.label = "Allowed" + itm.symbol.color = { + "RGB": [230, 152, 0, 100] + } # orange + itm.symbol.size = ( + 0.0 # outline width => no outline + ) + elif itm.values == [[""]]: + itm.label = "Missing" + itm.symbol.color = { + "RGB": [0, 77, 168, 100] + } # blue + itm.symbol.size = ( + 0.0 # outline width => no outline + ) else: - print(" Don't recognize itm.values: {}".format(itm.values)) + print( + " Don't recognize itm.values: {}".format( + itm.values + ) + ) layer.symbology = sym - - + # save this for extent if layer.name == "Jurisdictions - primary": juris_lyr[my_map.name] = layer print(" saving juris_lyr[{}]".format(my_map.name)) - + layout = layouts[0] - - + print(" Processing layout {}".format(layout.name)) for element in layout.listElements(): print(" Processing element {}: {}".format(element.name, element)) - + if element.name == "Source": element.text = source_str if element.name == "Jurisdiction": element.text = jurisdiction - + if element.name == "juris_review_false": - element.visible = not basis_check_val # visible if basis_check_val==False + element.visible = ( + not basis_check_val + ) # visible if basis_check_val==False if element.name == "juris_review_true": - element.visible = basis_check_val # visible if basis_check_val==True - + element.visible = ( + basis_check_val # visible if basis_check_val==True + ) + if element.name == "arrow_basis": - element.visible = basis_hybrid_val # visible if basis_hybrid_val==True + element.visible = ( + basis_hybrid_val # visible if basis_hybrid_val==True + ) if element.name == "input_basis": - element.visible = basis_hybrid_val # visible if basis_hybrid_val==True - + element.visible = ( + basis_hybrid_val # visible if basis_hybrid_val==True + ) + if element.name == "arrow_pba40": - element.visible = not basis_hybrid_val # visible if basis_hybrid_val==False + element.visible = ( + not basis_hybrid_val + ) # visible if basis_hybrid_val==False if element.name == "input_pba40": - element.visible = not basis_hybrid_val # visible if basis_hybrid_val==False - + element.visible = ( + not basis_hybrid_val + ) # visible if basis_hybrid_val==False + if is_devtype and element.name == "BASIS Label": element.text = "BASIS {}".format(metric_name) if is_devtype and element.name == "PBA40 Label": element.text = "PBA40 {}".format(metric_name) - + # zoom to the jurisdiction if element.name.find("Map Frame") >= 0: if element.name.endswith("BASIS"): - map_type = "BASIS_"+map_metric + map_type = "BASIS_" + map_metric else: - map_type = "PBA40_"+map_metric - + map_type = "PBA40_" + map_metric + # get the jurisdiction layer extent layer_extent = element.getLayerExtent(juris_lyr[map_type]) # apply extent to mapframe camera element.camera.setExtent(layer_extent) - + if args.output_type == "pdf": juris_pdf = "{}_{}.pdf".format(juris_code, metric_name) layout.exportToPDF(juris_pdf) print(" Wrote {}".format(juris_pdf)) elif args.output_type == "png": juris_png = "{}_{}.png".format(juris_code, metric_name) - layout.exportToPNG(juris_png, resolution=300) + layout.exportToPNG(juris_png, resolution=300) print(" Wrote {}".format(juris_png)) - - + # if instructed, save a copy of the arcgis project if args.debug: - copy_filename = arc_project.replace(".aprx","_{}_{}.aprx".format(juris_code,metric)) + copy_filename = arc_project.replace( + ".aprx", "_{}_{}.aprx".format(juris_code, metric) + ) aprx.saveACopy(copy_filename) print("DEBUG: saved a copy of project to {}".format(copy_filename)) @@ -384,12 +659,10 @@ # File "C:\Program Files\ArcGIS\Pro\Resources\ArcPy\arcpy\arcobjects\_base.py", line 109, in _set # return setattr(self._arc_object, attr_name, cval(val)) # RuntimeError: Invalid set of Fileds : ['p10_plu_boc_allAttrs_IW_basis'] - + # note: I tried to retry using the continue line above, but in practice it ended up looping and being # unable to resolve the issue. So just quit sys.exit(2) # done with jurisdiction print("") - - diff --git a/policies/plu/dev_capacity_calculation_module.py b/policies/plu/dev_capacity_calculation_module.py index 0a380a9..38a2788 100644 --- a/policies/plu/dev_capacity_calculation_module.py +++ b/policies/plu/dev_capacity_calculation_module.py @@ -4,12 +4,12 @@ """ # Run the script by passing along the following inputs: - # 1. folder dir of the hybrid base zoning, eg. "C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\ - # PBA50\Policies\Base zoning\outputs\hybrid_base_zoning\2020_05_22_p10_plu_boc_urbansim_heuristic_10.csv" - # 2. hybrid verion, eg. "idx_urbansim_heuristic.csv" - # 3. folder dir for the ouput parcel-level development capacity, eg. "C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\ - # Bay Area UrbanSim 1.5\PBA50\Policies\Base zoning\outputs\capacity" - # 4. upzoning files, eg. 'zoning_mods_21', 'zoning_mods_22', 'zoning_mods_23' for Draft/Final Blueprint +# 1. folder dir of the hybrid base zoning, eg. "C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim 1.5\ +# PBA50\Policies\Base zoning\outputs\hybrid_base_zoning\2020_05_22_p10_plu_boc_urbansim_heuristic_10.csv" +# 2. hybrid verion, eg. "idx_urbansim_heuristic.csv" +# 3. folder dir for the ouput parcel-level development capacity, eg. "C:\Users\ywang\Box\Modeling and Surveys\Urban Modeling\ +# Bay Area UrbanSim 1.5\PBA50\Policies\Base zoning\outputs\capacity" +# 4. upzoning files, eg. 'zoning_mods_21', 'zoning_mods_22', 'zoning_mods_23' for Draft/Final Blueprint import pandas as pd @@ -17,42 +17,76 @@ import os, argparse, time, logging -if os.getenv('USERNAME')=='ywang': - GITHUB_PETRALE_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\petrale\\'.format(os.getenv('USERNAME')) -elif os.getenv('USERNAME')=='lzorn': - GITHUB_PETRALE_DIR = 'X:\\petrale' +if os.getenv("USERNAME") == "ywang": + GITHUB_PETRALE_DIR = "C:\\Users\\{}\\Documents\\GitHub\\petrale\\".format( + os.getenv("USERNAME") + ) +elif os.getenv("USERNAME") == "lzorn": + GITHUB_PETRALE_DIR = "X:\\petrale" -JURIS_COUNTY_FILE = os.path.join(GITHUB_PETRALE_DIR, 'zones', 'jurisdictions', 'juris_county_id.csv') +JURIS_COUNTY_FILE = os.path.join( + GITHUB_PETRALE_DIR, "zones", "jurisdictions", "juris_county_id.csv" +) # See Dataset_Field_Definitions_Phase1.xlsx, Build Out Capacity worksheet # https://mtcdrive.box.com/s/efbpxbz8553e90eljvlnnq20465whyiv -ALLOWED_BUILDING_TYPE_CODES = ["HS","HT","HM","OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"] -RES_BUILDING_TYPE_CODES = ["HS","HT","HM", "MR" ] -NONRES_BUILDING_TYPE_CODES = [ "OF","HO","SC","IL","IW","IH","RS","RB","MR","MT","ME"] - -INTENSITY_CODES = ["far", "dua", "height"] +ALLOWED_BUILDING_TYPE_CODES = [ + "HS", + "HT", + "HM", + "OF", + "HO", + "SC", + "IL", + "IW", + "IH", + "RS", + "RB", + "MR", + "MT", + "ME", +] +RES_BUILDING_TYPE_CODES = ["HS", "HT", "HM", "MR"] +NONRES_BUILDING_TYPE_CODES = [ + "OF", + "HO", + "SC", + "IL", + "IW", + "IH", + "RS", + "RB", + "MR", + "MT", + "ME", +] + +INTENSITY_CODES = ["far", "dua", "height"] # human-readable idx values for hybrid indexing USE_PBA40 = 0 USE_BASIS = 1 # used in calculate_capacity() -SQUARE_FEET_PER_ACRE = 43560.0 -SQUARE_FEET_PER_DU = 1000.0 -FEET_PER_STORY = 12.0 -PARCEL_USE_EFFICIENCY = 0.8 - -SQUARE_FEET_PER_EMPLOYEE = {'OF': 355.0, - 'HO': 1161.0, - 'SC': 470.0, - 'IL': 661.0, - 'IW': 960.0, - 'IH': 825.0, - 'RS': 445.0, - 'RB': 445.0, - 'MR': 383.0, - 'MT': 383.0, - 'ME': 383.0} +SQUARE_FEET_PER_ACRE = 43560.0 +SQUARE_FEET_PER_DU = 1000.0 +FEET_PER_STORY = 12.0 +PARCEL_USE_EFFICIENCY = 0.8 + +SQUARE_FEET_PER_EMPLOYEE = { + "OF": 355.0, + "HO": 1161.0, + "SC": 470.0, + "IL": 661.0, + "IW": 960.0, + "IH": 825.0, + "RS": 445.0, + "RB": 445.0, + "MR": 383.0, + "MT": 383.0, + "ME": 383.0, +} + def get_jurisdiction_county_df(): """ @@ -62,11 +96,14 @@ def get_jurisdiction_county_df(): """ # obtain jurisdiction list - juris_df = pd.read_csv(JURIS_COUNTY_FILE, usecols = ['juris_name_full', 'county_name']) - juris_df.rename(columns = {'juris_name_full': 'juris_name'}, inplace = True) + juris_df = pd.read_csv( + JURIS_COUNTY_FILE, usecols=["juris_name_full", "county_name"] + ) + juris_df.rename(columns={"juris_name_full": "juris_name"}, inplace=True) return juris_df -def set_allow_dev_type(df_original,boc_source): + +def set_allow_dev_type(df_original, boc_source): """ Assign allow residential and/or non-residential by summing the columns for the residential/nonresidential allowed building type codes @@ -78,21 +115,26 @@ def set_allow_dev_type(df_original,boc_source): # note that they can't be null because then they won't sum -- so make a copy and fillna with 0 for dev_type in ALLOWED_BUILDING_TYPE_CODES: - df[dev_type+"_"+boc_source] = df[dev_type+"_"+boc_source].fillna(value=0.0) - + df[dev_type + "_" + boc_source] = df[dev_type + "_" + boc_source].fillna( + value=0.0 + ) + # allow_res is sum of allowed building types that are residential - res_allowed_columns = [btype+'_'+boc_source for btype in RES_BUILDING_TYPE_CODES] - df['allow_res_' +boc_source] = df[res_allowed_columns].sum(axis=1) - + res_allowed_columns = [ + btype + "_" + boc_source for btype in RES_BUILDING_TYPE_CODES + ] + df["allow_res_" + boc_source] = df[res_allowed_columns].sum(axis=1) + # allow_nonres is the sum of allowed building types that are non-residential - nonres_allowed_columns = [btype+'_'+boc_source for btype in NONRES_BUILDING_TYPE_CODES] - df['allow_nonres_'+boc_source] = df[nonres_allowed_columns].sum(axis=1) - - return df[['PARCEL_ID', - "allow_res_" +boc_source, - "allow_nonres_" +boc_source]] - -def create_hybrid_parcel_data_from_juris_idx(logger, df_original,hybrid_idx): + nonres_allowed_columns = [ + btype + "_" + boc_source for btype in NONRES_BUILDING_TYPE_CODES + ] + df["allow_nonres_" + boc_source] = df[nonres_allowed_columns].sum(axis=1) + + return df[["PARCEL_ID", "allow_res_" + boc_source, "allow_nonres_" + boc_source]] + + +def create_hybrid_parcel_data_from_juris_idx(logger, df_original, hybrid_idx): """ Apply hybrid jurisdiction index to plu_boc parcel data * df_original is a parcel dataframe with pba40 and basis attributes @@ -110,23 +152,29 @@ def create_hybrid_parcel_data_from_juris_idx(logger, df_original,hybrid_idx): # don't modify passed df df = df_original.copy() - keep_cols = ['PARCEL_ID', 'juris_zmod'] + keep_cols = ["PARCEL_ID", "juris_zmod"] # join parcel dataframe with jurisdiction hybrid_idx on juris_zmod == juris_name # this brings in XX_idx - urbansim_df = pd.merge(left =df_original.copy(), - right =hybrid_idx, - left_on ='juris_zmod', - right_on='juris_name', - how = 'left') + urbansim_df = pd.merge( + left=df_original.copy(), + right=hybrid_idx, + left_on="juris_zmod", + right_on="juris_name", + how="left", + ) # bring in the allowed development type values for dev_type in ALLOWED_BUILDING_TYPE_CODES: # default to BASIS - urbansim_df["{}_urbansim".format(dev_type)] = urbansim_df["{}_basis".format(dev_type)] + urbansim_df["{}_urbansim".format(dev_type)] = urbansim_df[ + "{}_basis".format(dev_type) + ] # but set to PBA40 if the idx says to use PBA40 - urbansim_df.loc[ urbansim_df["{}_idx".format(dev_type)]==USE_PBA40, - "{}_urbansim".format(dev_type) ] = urbansim_df["{}_pba40".format(dev_type)] + urbansim_df.loc[ + urbansim_df["{}_idx".format(dev_type)] == USE_PBA40, + "{}_urbansim".format(dev_type), + ] = urbansim_df["{}_pba40".format(dev_type)] # keep the idx and the new column keep_cols.append("{}_idx".format(dev_type)) keep_cols.append("{}_urbansim".format(dev_type)) @@ -134,10 +182,14 @@ def create_hybrid_parcel_data_from_juris_idx(logger, df_original,hybrid_idx): # bring in the intensity type values for intensity in INTENSITY_CODES: # default to BASIS - urbansim_df["max_{}_urbansim".format(intensity)] = urbansim_df["max_{}_basis".format(intensity)] + urbansim_df["max_{}_urbansim".format(intensity)] = urbansim_df[ + "max_{}_basis".format(intensity) + ] # but set to PBA40 if the idx says to use PBA40 - urbansim_df.loc[ urbansim_df["max_{}_idx".format(intensity)]==USE_PBA40, - "max_{}_urbansim".format(intensity) ] = urbansim_df["max_{}_pba40".format(intensity)] + urbansim_df.loc[ + urbansim_df["max_{}_idx".format(intensity)] == USE_PBA40, + "max_{}_urbansim".format(intensity), + ] = urbansim_df["max_{}_pba40".format(intensity)] # keep the idx and the new column keep_cols.append("max_{}_idx".format(intensity)) @@ -146,10 +198,10 @@ def create_hybrid_parcel_data_from_juris_idx(logger, df_original,hybrid_idx): return urbansim_df[keep_cols] -def calculate_capacity(df_original,boc_source,nodev_source,pass_thru_cols=[]): +def calculate_capacity(df_original, boc_source, nodev_source, pass_thru_cols=[]): """ Calculate the development capacity in res units, non-res sqft, and employee estimates - + Inputs: * df_original: parcel-zoning dataframe, mapping parcel_id to zoning attributes including allowable development types and development intensities @@ -158,7 +210,7 @@ def calculate_capacity(df_original,boc_source,nodev_source,pass_thru_cols=[]): * building_parcel_df_original: parcel data joined to building data, which is used to determine parcel existing characteristics - these characteristics determine if a parcel's net capacity is zero or equals the raw capacity - * calculate_net: if calculate_net = True, calculate both raw and net capacity; if + * calculate_net: if calculate_net = True, calculate both raw and net capacity; if calculate_net = False, only calculate raw capacity. Returns dataframe with columns: @@ -177,49 +229,65 @@ def calculate_capacity(df_original,boc_source,nodev_source,pass_thru_cols=[]): * is_under_built_[boc_source] * res_zoned_existing_ratio_[boc_source] * nonres_zoned_existing_ratio_[boc_source] - + """ - + df = df_original.copy() - + # DUA calculations apply to parcels 'allowRes' - df['zoned_du_'+boc_source] = df['ACRES'] * df['max_dua_'+boc_source] - + df["zoned_du_" + boc_source] = df["ACRES"] * df["max_dua_" + boc_source] + # zero out units for 'nodev' parcels or parcels that don't allow residential - zero_unit_idx = (df['allow_res_'+boc_source] == 0) | (df['nodev_'+nodev_source] == 1) - df.loc[zero_unit_idx,'zoned_du_' +boc_source] = 0 - + zero_unit_idx = (df["allow_res_" + boc_source] == 0) | ( + df["nodev_" + nodev_source] == 1 + ) + df.loc[zero_unit_idx, "zoned_du_" + boc_source] = 0 + # FAR calculations apply to parcels 'allowNonRes' - df['zoned_sqft_' +boc_source] = df['ACRES'] * df['max_far_'+boc_source] * SQUARE_FEET_PER_ACRE - + df["zoned_sqft_" + boc_source] = ( + df["ACRES"] * df["max_far_" + boc_source] * SQUARE_FEET_PER_ACRE + ) + # zero out sqft for 'nodev' parcels or parcels that don't allow non-residential - zero_sqft_idx = (df['allow_nonres_'+boc_source] == 0) | (df['nodev_'+nodev_source] == 1) - df.loc[zero_sqft_idx,'zoned_sqft_' +boc_source] = 0 - - df['zoned_Ksqft_'+boc_source] = df['zoned_sqft_'+boc_source]*0.001 + zero_sqft_idx = (df["allow_nonres_" + boc_source] == 0) | ( + df["nodev_" + nodev_source] == 1 + ) + df.loc[zero_sqft_idx, "zoned_sqft_" + boc_source] = 0 + + df["zoned_Ksqft_" + boc_source] = df["zoned_sqft_" + boc_source] * 0.001 # calculate job_spaces - df['job_spaces_'+boc_source] = 0 + df["job_spaces_" + boc_source] = 0 for dev_type in NONRES_BUILDING_TYPE_CODES: - df.loc[df[dev_type+'_'+boc_source] == 1, 'job_spaces_'+boc_source] = df['zoned_sqft_'+boc_source] / SQUARE_FEET_PER_EMPLOYEE[dev_type] - - keep_cols = ['PARCEL_ID'] + pass_thru_cols + \ - [ - "zoned_du_" +boc_source, - "zoned_sqft_" +boc_source, - "zoned_Ksqft_" +boc_source, - "job_spaces_" +boc_source - ] + df.loc[df[dev_type + "_" + boc_source] == 1, "job_spaces_" + boc_source] = ( + df["zoned_sqft_" + boc_source] / SQUARE_FEET_PER_EMPLOYEE[dev_type] + ) + + keep_cols = ( + ["PARCEL_ID"] + + pass_thru_cols + + [ + "zoned_du_" + boc_source, + "zoned_sqft_" + boc_source, + "zoned_Ksqft_" + boc_source, + "job_spaces_" + boc_source, + ] + ) return df[keep_cols] -def calculate_net_capacity(logger, df_original,boc_source,nodev_source, - building_parcel_df_original, - net_pass_thru_cols=[]): +def calculate_net_capacity( + logger, + df_original, + boc_source, + nodev_source, + building_parcel_df_original, + net_pass_thru_cols=[], +): """ Calculate the net development capacity in res units, non-res sqft, and employee estimates - + Inputs: * df_original: parcel-zoning dataframe, mapping parcel_id to zoning attributes including allowable development types and development intensities @@ -253,153 +321,226 @@ def calculate_net_capacity(logger, df_original,boc_source,nodev_source, * zoned_sqft_underbuild_noProt_[boc_source] * zoned_Ksqft_underbuild_noProt_[boc_source] * job_spaces_underbuild_noProt_[boc_source] - + """ - capacity_raw = calculate_capacity(df_original,boc_source,nodev_source,pass_thru_cols=net_pass_thru_cols) + capacity_raw = calculate_capacity( + df_original, boc_source, nodev_source, pass_thru_cols=net_pass_thru_cols + ) building_parcel_df = building_parcel_df_original.copy() # label vacant building based on building's development_type_id # https://github.com/BayAreaMetro/petrale/blob/master/incoming/dv_buildings_det_type_lu.csv building_parcel_df["building_vacant"] = 0.0 - building_parcel_df.loc[building_parcel_df.development_type_id== 0, "building_vacant"] = 1.0 - building_parcel_df.loc[building_parcel_df.development_type_id== 15, "building_vacant"] = 1.0 - - # parcel_building_df_original is building-level data, therefore need to + building_parcel_df.loc[ + building_parcel_df.development_type_id == 0, "building_vacant" + ] = 1.0 + building_parcel_df.loc[ + building_parcel_df.development_type_id == 15, "building_vacant" + ] = 1.0 + + # parcel_building_df_original is building-level data, therefore need to # aggregate buildings at the parcel level - building_groupby_parcel = building_parcel_df.groupby(['PARCEL_ID']).agg({ - 'ACRES' :'max', - 'LAND_VALUE' :'max', - 'improvement_value' :'sum', - 'residential_units' :'sum', - 'residential_sqft' :'sum', - 'non_residential_sqft':'sum', - 'building_sqft' :'sum', - 'year_built' :'min', - 'building_id' :'min', - 'building_vacant' :'prod'}) # all buildings must be vacant to call this building_vacant + building_groupby_parcel = building_parcel_df.groupby(["PARCEL_ID"]).agg( + { + "ACRES": "max", + "LAND_VALUE": "max", + "improvement_value": "sum", + "residential_units": "sum", + "residential_sqft": "sum", + "non_residential_sqft": "sum", + "building_sqft": "sum", + "year_built": "min", + "building_id": "min", + "building_vacant": "prod", + } + ) # all buildings must be vacant to call this building_vacant # Identify vacant parcels building_groupby_parcel["parcel_vacant"] = False - building_groupby_parcel.loc[ building_groupby_parcel['building_id'].isnull(), "parcel_vacant" ] = True - building_groupby_parcel.loc[ building_groupby_parcel['building_vacant'] == 1.0, "parcel_vacant" ] = True - building_groupby_parcel.loc[(building_groupby_parcel['improvement_value' ] == 0) & - (building_groupby_parcel['residential_units' ] == 0) & - (building_groupby_parcel['residential_sqft' ] == 0) & - (building_groupby_parcel['non_residential_sqft'] == 0) & - (building_groupby_parcel['building_sqft' ] == 0), "parcel_vacant"] = True - logger.info("Vacant parcel statistics: \n {}".format(building_groupby_parcel.parcel_vacant.value_counts())) + building_groupby_parcel.loc[ + building_groupby_parcel["building_id"].isnull(), "parcel_vacant" + ] = True + building_groupby_parcel.loc[ + building_groupby_parcel["building_vacant"] == 1.0, "parcel_vacant" + ] = True + building_groupby_parcel.loc[ + (building_groupby_parcel["improvement_value"] == 0) + & (building_groupby_parcel["residential_units"] == 0) + & (building_groupby_parcel["residential_sqft"] == 0) + & (building_groupby_parcel["non_residential_sqft"] == 0) + & (building_groupby_parcel["building_sqft"] == 0), + "parcel_vacant", + ] = True + logger.info( + "Vacant parcel statistics: \n {}".format( + building_groupby_parcel.parcel_vacant.value_counts() + ) + ) # Identify parcels with old buildings which are protected (if multiple buildings on one parcel, take the oldest) # and not build on before-1940 parcels - building_groupby_parcel['building_age'] = 'missing' - building_groupby_parcel.loc[building_groupby_parcel.year_built >= 2000, 'building_age' ] = 'after 2000' - building_groupby_parcel.loc[building_groupby_parcel.year_built < 2000, 'building_age' ] = '1980-2000' - building_groupby_parcel.loc[building_groupby_parcel.year_built < 1980, 'building_age' ] = '1940-1980' - building_groupby_parcel.loc[building_groupby_parcel.year_built < 1940, 'building_age' ] = 'before 1940' - - building_groupby_parcel['has_old_building'] = False - building_groupby_parcel.loc[building_groupby_parcel.building_age == 'before 1940','has_old_building'] = True - logger.info('Parcel statistics by the age of the oldest building: \n {}'.format(building_groupby_parcel.building_age.value_counts())) - + building_groupby_parcel["building_age"] = "missing" + building_groupby_parcel.loc[ + building_groupby_parcel.year_built >= 2000, "building_age" + ] = "after 2000" + building_groupby_parcel.loc[ + building_groupby_parcel.year_built < 2000, "building_age" + ] = "1980-2000" + building_groupby_parcel.loc[ + building_groupby_parcel.year_built < 1980, "building_age" + ] = "1940-1980" + building_groupby_parcel.loc[ + building_groupby_parcel.year_built < 1940, "building_age" + ] = "before 1940" + + building_groupby_parcel["has_old_building"] = False + building_groupby_parcel.loc[ + building_groupby_parcel.building_age == "before 1940", "has_old_building" + ] = True + logger.info( + "Parcel statistics by the age of the oldest building: \n {}".format( + building_groupby_parcel.building_age.value_counts() + ) + ) # Identify single-family parcels smaller than 0.5 acre - building_groupby_parcel['small_HS_parcel'] = False - small_HS_idx = (building_groupby_parcel.residential_units == 1.0) & (building_groupby_parcel.ACRES < 0.5) - building_groupby_parcel.loc[small_HS_idx, 'small_HS_parcel'] = True - logger.info("Small single-family parcel statistics: \n {}".format(building_groupby_parcel.small_HS_parcel.value_counts())) + building_groupby_parcel["small_HS_parcel"] = False + small_HS_idx = (building_groupby_parcel.residential_units == 1.0) & ( + building_groupby_parcel.ACRES < 0.5 + ) + building_groupby_parcel.loc[small_HS_idx, "small_HS_parcel"] = True + logger.info( + "Small single-family parcel statistics: \n {}".format( + building_groupby_parcel.small_HS_parcel.value_counts() + ) + ) # Identify parcels smaller than 2000 sqft - building_groupby_parcel['small_parcel'] = False + building_groupby_parcel["small_parcel"] = False small_parcel_idx = (building_groupby_parcel.ACRES * SQUARE_FEET_PER_ACRE) < 2000 - building_groupby_parcel.loc[small_parcel_idx, 'small_parcel'] = True - logger.info("Small parcel (<2000 sqft) statistics: \n {}".format(building_groupby_parcel.small_parcel.value_counts())) + building_groupby_parcel.loc[small_parcel_idx, "small_parcel"] = True + logger.info( + "Small parcel (<2000 sqft) statistics: \n {}".format( + building_groupby_parcel.small_parcel.value_counts() + ) + ) # Calculate parcel's investment-land ratio - building_groupby_parcel['ILR'] = building_groupby_parcel['improvement_value'] / building_groupby_parcel['LAND_VALUE'] - building_groupby_parcel.loc[building_groupby_parcel['LAND_VALUE'] == 0, 'ILR'] = 'n/a' - + building_groupby_parcel["ILR"] = ( + building_groupby_parcel["improvement_value"] + / building_groupby_parcel["LAND_VALUE"] + ) + building_groupby_parcel.loc[ + building_groupby_parcel["LAND_VALUE"] == 0, "ILR" + ] = "n/a" # join to raw capacity dataframe - capacity_with_building = pd.merge(left=capacity_raw, - right=building_groupby_parcel, - how="left", - on="PARCEL_ID") + capacity_with_building = pd.merge( + left=capacity_raw, right=building_groupby_parcel, how="left", on="PARCEL_ID" + ) # Identify under-built parcels and calculate the net units capacity for under-built parcels - new_units = (capacity_with_building['zoned_du_' + boc_source] - - capacity_with_building['residential_units'] - - capacity_with_building['non_residential_sqft'] / SQUARE_FEET_PER_DU).clip(lower=0) - ratio = (new_units / capacity_with_building['residential_units']).replace(np.inf, 1) - capacity_with_building['is_under_built_' + boc_source] = ratio > 0.5 - logger.info('Under_built parcel statistics ({}): \n {}'.format(boc_source, - (capacity_with_building['is_under_built_' + boc_source].value_counts()))) - + new_units = ( + capacity_with_building["zoned_du_" + boc_source] + - capacity_with_building["residential_units"] + - capacity_with_building["non_residential_sqft"] / SQUARE_FEET_PER_DU + ).clip(lower=0) + ratio = (new_units / capacity_with_building["residential_units"]).replace(np.inf, 1) + capacity_with_building["is_under_built_" + boc_source] = ratio > 0.5 + logger.info( + "Under_built parcel statistics ({}): \n {}".format( + boc_source, + (capacity_with_building["is_under_built_" + boc_source].value_counts()), + ) + ) # Calculate existing capactiy to zoned capacity ratio # ratio of existing res units to zoned res units - capacity_with_building['res_zoned_existing_ratio_' + boc_source] = \ - (capacity_with_building['residential_units'] / capacity_with_building['zoned_du_' + boc_source]).replace(np.inf, 1).clip(lower=0) + capacity_with_building["res_zoned_existing_ratio_" + boc_source] = ( + ( + capacity_with_building["residential_units"] + / capacity_with_building["zoned_du_" + boc_source] + ) + .replace(np.inf, 1) + .clip(lower=0) + ) # ratio of existing non-res sqft to zoned non-res sqft - capacity_with_building['nonres_zoned_existing_ratio_' + boc_source] = \ - (capacity_with_building['non_residential_sqft'] / capacity_with_building['zoned_sqft_' + boc_source]).replace(np.inf, 1).clip(lower=0) - + capacity_with_building["nonres_zoned_existing_ratio_" + boc_source] = ( + ( + capacity_with_building["non_residential_sqft"] + / capacity_with_building["zoned_sqft_" + boc_source] + ) + .replace(np.inf, 1) + .clip(lower=0) + ) # calculate net capacity by different criteria # 1. only of vacant parcels for capacity_type in ["zoned_du", "zoned_sqft", "zoned_Ksqft", "job_spaces"]: - capacity_with_building[capacity_type+'_vacant_'+boc_source] = capacity_with_building[capacity_type+'_'+boc_source] - capacity_with_building.loc[capacity_with_building.parcel_vacant == False, - capacity_type+'_vacant_'+boc_source] = 0 + capacity_with_building[ + capacity_type + "_vacant_" + boc_source + ] = capacity_with_building[capacity_type + "_" + boc_source] + capacity_with_building.loc[ + capacity_with_building.parcel_vacant == False, + capacity_type + "_vacant_" + boc_source, + ] = 0 # 2. only of under-built parcels for capacity_type in ["zoned_du", "zoned_sqft", "zoned_Ksqft", "job_spaces"]: - capacity_with_building[capacity_type+'_underbuild_'+boc_source] = capacity_with_building[capacity_type+'_'+boc_source] - capacity_with_building.loc[capacity_with_building['is_under_built_' + boc_source] == False, - capacity_type+'_underbuild_'+boc_source] = 0 + capacity_with_building[ + capacity_type + "_underbuild_" + boc_source + ] = capacity_with_building[capacity_type + "_" + boc_source] + capacity_with_building.loc[ + capacity_with_building["is_under_built_" + boc_source] == False, + capacity_type + "_underbuild_" + boc_source, + ] = 0 # 3. of under-built but no protected parcels (with old building or single-family parcel < 0.5 acre) for capacity_type in ["zoned_du", "zoned_sqft", "zoned_Ksqft", "job_spaces"]: - capacity_with_building[capacity_type+'_underbuild_noProt_'+boc_source] = capacity_with_building[capacity_type+'_'+boc_source] - capacity_with_building.loc[(capacity_with_building['is_under_built_' + boc_source] == False) | - (capacity_with_building.has_old_building == True) | - (capacity_with_building.small_HS_parcel == True) | - (capacity_with_building.small_parcel == True), - capacity_type+'_underbuild_noProt_'+boc_source] = 0 - - keep_cols = ['PARCEL_ID'] + net_pass_thru_cols + \ - [ - "zoned_du_vacant_" + boc_source, - "zoned_sqft_vacant_" + boc_source, - "zoned_Ksqft_vacant_" + boc_source, - "job_spaces_vacant_" + boc_source, - - "zoned_du_underbuild_" + boc_source, - "zoned_sqft_underbuild_" + boc_source, - "zoned_Ksqft_underbuild_" + boc_source, - "job_spaces_underbuild_" + boc_source, - - "zoned_du_underbuild_noProt_" + boc_source, - "zoned_sqft_underbuild_noProt_" + boc_source, - "zoned_Ksqft_underbuild_noProt_" + boc_source, - "job_spaces_underbuild_noProt_" + boc_source, - - "parcel_vacant", - "has_old_building", - 'small_HS_parcel', - "ILR", - - 'is_under_built_' + boc_source, - 'res_zoned_existing_ratio_' + boc_source, - 'nonres_zoned_existing_ratio_' + boc_source - ] + capacity_with_building[ + capacity_type + "_underbuild_noProt_" + boc_source + ] = capacity_with_building[capacity_type + "_" + boc_source] + capacity_with_building.loc[ + (capacity_with_building["is_under_built_" + boc_source] == False) + | (capacity_with_building.has_old_building == True) + | (capacity_with_building.small_HS_parcel == True) + | (capacity_with_building.small_parcel == True), + capacity_type + "_underbuild_noProt_" + boc_source, + ] = 0 + + keep_cols = ( + ["PARCEL_ID"] + + net_pass_thru_cols + + [ + "zoned_du_vacant_" + boc_source, + "zoned_sqft_vacant_" + boc_source, + "zoned_Ksqft_vacant_" + boc_source, + "job_spaces_vacant_" + boc_source, + "zoned_du_underbuild_" + boc_source, + "zoned_sqft_underbuild_" + boc_source, + "zoned_Ksqft_underbuild_" + boc_source, + "job_spaces_underbuild_" + boc_source, + "zoned_du_underbuild_noProt_" + boc_source, + "zoned_sqft_underbuild_noProt_" + boc_source, + "zoned_Ksqft_underbuild_noProt_" + boc_source, + "job_spaces_underbuild_noProt_" + boc_source, + "parcel_vacant", + "has_old_building", + "small_HS_parcel", + "ILR", + "is_under_built_" + boc_source, + "res_zoned_existing_ratio_" + boc_source, + "nonres_zoned_existing_ratio_" + boc_source, + ] + ) return capacity_with_building[keep_cols] -if __name__ == '__main__': +if __name__ == "__main__": - """ + """ # create logger logger = logging.getLogger(__name__) logger.setLevel('DEBUG') @@ -419,20 +560,33 @@ def calculate_net_capacity(logger, df_original,boc_source,nodev_source, logger.info("data_output_dir = {}".format(data_output_dir)) """ - - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) - parser.add_argument('hybrid_zoning', metavar="hybrid_zoning.csv", help="Input hybrid zoning") - parser.add_argument('hybrid_version', metavar="hybrid_version", help="Version of input hybrid zoning") - parser.add_argument('capacity_output', metavar="capacity_output", help="Capacity output folder") - #parser.add_argument('upzoning_zmods', metavar='zoningmods.csv', help='Zoningmods for upzoning') - parser.add_argument('upzoning_scenario',metavar='upzoning_scenario', help="Scenario of input upzoning zoning") + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "hybrid_zoning", metavar="hybrid_zoning.csv", help="Input hybrid zoning" + ) + parser.add_argument( + "hybrid_version", + metavar="hybrid_version", + help="Version of input hybrid zoning", + ) + parser.add_argument( + "capacity_output", metavar="capacity_output", help="Capacity output folder" + ) + # parser.add_argument('upzoning_zmods', metavar='zoningmods.csv', help='Zoningmods for upzoning') + parser.add_argument( + "upzoning_scenario", + metavar="upzoning_scenario", + help="Scenario of input upzoning zoning", + ) args = parser.parse_args() - print(" {:15}: {}".format('hybrid_zoning', args.hybrid_zoning)) - print(" {:15}: {}".format('hybrid_version', args.hybrid_version)) - print(" {:15}: {}".format('capacity_output', args.capacity_output)) - #print(" {:15}: {}".format('upzoning', args.upzoning_zmods)) - print(" {:15}: {}".format('upzoning_scenario',args.upzoning_scenario)) + print(" {:15}: {}".format("hybrid_zoning", args.hybrid_zoning)) + print(" {:15}: {}".format("hybrid_version", args.hybrid_version)) + print(" {:15}: {}".format("capacity_output", args.capacity_output)) + # print(" {:15}: {}".format('upzoning', args.upzoning_zmods)) + print(" {:15}: {}".format("upzoning_scenario", args.upzoning_scenario)) zoning_to_capacity(args.hybrid_zoning, args.hybrid_version, args.capacity_output) - diff --git a/policies/plu/extract_area_basezoning.py b/policies/plu/extract_area_basezoning.py index ea83945..4e880c8 100644 --- a/policies/plu/extract_area_basezoning.py +++ b/policies/plu/extract_area_basezoning.py @@ -1,4 +1,4 @@ -USAGE=""" +USAGE = """ Extract base zoning data for a given jurisdiction or county. @@ -14,73 +14,181 @@ import numpy as np import argparse, os, glob, logging, sys, time -today = time.strftime('%Y_%m_%d') +today = time.strftime("%Y_%m_%d") -if os.getenv('USERNAME') =='ywang': - M_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50\\Draft_Blueprint' - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) - GITHUB_URBANSIM_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\bayarea_urbansim\\data'.format(os.getenv('USERNAME')) +if os.getenv("USERNAME") == "ywang": + M_DIR = "M:\\Data\\Urban\\BAUS\\PBA50\\Draft_Blueprint" + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) + GITHUB_URBANSIM_DIR = ( + "C:\\Users\\{}\\Documents\\GitHub\\bayarea_urbansim\\data".format( + os.getenv("USERNAME") + ) + ) # input -M_BASEZONING_DIR = os.path.join(M_DIR, 'Base zoning', 'output') -BASEZONING_FILE = os.path.join(M_BASEZONING_DIR, '2020_06_03_p10_plu_boc_allAttrs.csv') +M_BASEZONING_DIR = os.path.join(M_DIR, "Base zoning", "output") +BASEZONING_FILE = os.path.join(M_BASEZONING_DIR, "2020_06_03_p10_plu_boc_allAttrs.csv") # output -BOX_BASEZONING_DIR = os.path.join(BOX_DIR, 'Policies', 'Base zoning', 'outputs') -AREA_BASEZONING_FILE = os.path.join(BOX_BASEZONING_DIR, '{}_basezoning'.format(today)) - -juris_list = ['livermore', 'hayward', 'unincorporated_sonoma', 'fremont', - 'pleasanton', 'dublin', 'unincorporated_contra_costa', 'brentwood', - 'san_ramon', 'oakley', 'antioch', 'unincorporated_napa', - 'san_francisco', 'unincorporated_san_mateo', 'petaluma', - 'santa_rosa', 'rohnert_park', 'unincorporated_marin', 'richmond', - 'pittsburg', 'orinda', 'alameda', 'napa', 'hercules', 'newark', - 'unincorporated_alameda', 'martinez', 'danville', 'healdsburg', - 'concord', 'sunnyvale', 'clayton', 'daly_city', 'rio_vista', - 'oakland', 'lafayette', 'san_pablo', 'walnut_creek', - 'pleasant_hill', 'union_city', 'brisbane', 'cloverdale', - 'san_leandro', 'pinole', 'fairfield', 'san_jose', - 'south_san_francisco', 'palo_alto', 'novato', 'hillsborough', - 'half_moon_bay', 'berkeley', 'unincorporated_solano', 'milpitas', - 'american_canyon', 'redwood_city', 'mountain_view', 'sonoma', - 'fairfax', 'santa_clara', 'vallejo', 'woodside', - 'unincorporated_santa_clara', 'windsor', 'moraga', 'dixon', - 'vacaville', 'gilroy', 'morgan_hill', 'cupertino', 'benicia', - 'larkspur', 'piedmont', 'san_mateo', 'san_rafael', 'san_bruno', - 'calistoga', 'cotati', 'mill_valley', 'san_anselmo', 'los_altos', - 'el_cerrito', 'saratoga', 'suisun_city', 'sebastopol', 'campbell', - 'st_helena', 'albany', 'los_gatos', 'menlo_park', 'san_carlos', - 'los_altos_hills', 'sausalito', 'pacifica', 'belmont', 'tiburon', - 'east_palo_alto', 'emeryville', 'corte_madera', 'foster_city', - 'millbrae', 'burlingame', 'atherton', 'portola_valley', - 'monte_sereno', 'ross', 'yountville', 'colma', 'belvedere'] -county_list = ['Alameda', 'Sonoma', 'Contra Costa', 'Napa', 'San Francisco', - 'San Mateo', 'Marin', 'Santa Clara', 'Solano'] - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) - parser.add_argument('--area_list', nargs='+', help='list of jurisdiction or county name') +BOX_BASEZONING_DIR = os.path.join(BOX_DIR, "Policies", "Base zoning", "outputs") +AREA_BASEZONING_FILE = os.path.join(BOX_BASEZONING_DIR, "{}_basezoning".format(today)) + +juris_list = [ + "livermore", + "hayward", + "unincorporated_sonoma", + "fremont", + "pleasanton", + "dublin", + "unincorporated_contra_costa", + "brentwood", + "san_ramon", + "oakley", + "antioch", + "unincorporated_napa", + "san_francisco", + "unincorporated_san_mateo", + "petaluma", + "santa_rosa", + "rohnert_park", + "unincorporated_marin", + "richmond", + "pittsburg", + "orinda", + "alameda", + "napa", + "hercules", + "newark", + "unincorporated_alameda", + "martinez", + "danville", + "healdsburg", + "concord", + "sunnyvale", + "clayton", + "daly_city", + "rio_vista", + "oakland", + "lafayette", + "san_pablo", + "walnut_creek", + "pleasant_hill", + "union_city", + "brisbane", + "cloverdale", + "san_leandro", + "pinole", + "fairfield", + "san_jose", + "south_san_francisco", + "palo_alto", + "novato", + "hillsborough", + "half_moon_bay", + "berkeley", + "unincorporated_solano", + "milpitas", + "american_canyon", + "redwood_city", + "mountain_view", + "sonoma", + "fairfax", + "santa_clara", + "vallejo", + "woodside", + "unincorporated_santa_clara", + "windsor", + "moraga", + "dixon", + "vacaville", + "gilroy", + "morgan_hill", + "cupertino", + "benicia", + "larkspur", + "piedmont", + "san_mateo", + "san_rafael", + "san_bruno", + "calistoga", + "cotati", + "mill_valley", + "san_anselmo", + "los_altos", + "el_cerrito", + "saratoga", + "suisun_city", + "sebastopol", + "campbell", + "st_helena", + "albany", + "los_gatos", + "menlo_park", + "san_carlos", + "los_altos_hills", + "sausalito", + "pacifica", + "belmont", + "tiburon", + "east_palo_alto", + "emeryville", + "corte_madera", + "foster_city", + "millbrae", + "burlingame", + "atherton", + "portola_valley", + "monte_sereno", + "ross", + "yountville", + "colma", + "belvedere", +] +county_list = [ + "Alameda", + "Sonoma", + "Contra Costa", + "Napa", + "San Francisco", + "San Mateo", + "Marin", + "Santa Clara", + "Solano", +] + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--area_list", nargs="+", help="list of jurisdiction or county name" + ) args = parser.parse_args() # read basezoning data basezoning_all = pd.read_csv(BASEZONING_FILE) - print('Read {} records from {}'.format(len(BASEZONING_FILE), BASEZONING_FILE)) + print("Read {} records from {}".format(len(BASEZONING_FILE), BASEZONING_FILE)) # extract data for areas in the list for area in args.area_list: - print('Select records for {}'.format(area)) + print("Select records for {}".format(area)) if area in juris_list: area_basezoning = basezoning_all.loc[basezoning_all.juris_zmod == area] if area in county_list: area_basezoning = basezoning_all.loc[basezoning_all.county_name == area] if (area not in juris_list) & (area not in county_list): - print('Wrong jurisdiction or county name: {}'.format(area)) + print("Wrong jurisdiction or county name: {}".format(area)) continue - export_file_name = '{}_{}.csv'.format(AREA_BASEZONING_FILE, area) - print('Export {} records of basezoning for {} to {}'.format(len(area_basezoning), - area, - export_file_name)) - area_basezoning.to_csv(export_file_name, index = False) \ No newline at end of file + export_file_name = "{}_{}.csv".format(AREA_BASEZONING_FILE, area) + print( + "Export {} records of basezoning for {} to {}".format( + len(area_basezoning), area, export_file_name + ) + ) + area_basezoning.to_csv(export_file_name, index=False) diff --git a/policies/plu/parcel_BlueprintGeos_index.py b/policies/plu/parcel_BlueprintGeos_index.py index 1bd0734..c757dfc 100644 --- a/policies/plu/parcel_BlueprintGeos_index.py +++ b/policies/plu/parcel_BlueprintGeos_index.py @@ -1,169 +1,206 @@ - import pandas as pd import numpy as np import os, glob, logging, sys, time -#p10_PDA.csv is created in ArcGIS through spatial join of p10 polygons and Draft Blueprint growth geography polygons. +# p10_PDA.csv is created in ArcGIS through spatial join of p10 polygons and Draft Blueprint growth geography polygons. -#p10 : M:\Data\GIS layers\UrbanSim smelt\2020 03 12\smelt.gdb -#PDA : http://opendata.mtc.ca.gov/datasets/priority-development-areas-current?geometry=-129.633%2C36.372%2C-114.945%2C39.406 -#Spatial join rule : Centers of p10 polygons fall into PDA polygons -#Spatial join output: M:\Data\GIS layers\Blueprint Land Use Strategies\Blueprint Land Use Strategies.gdb\p10_PDA +# p10 : M:\Data\GIS layers\UrbanSim smelt\2020 03 12\smelt.gdb +# PDA : http://opendata.mtc.ca.gov/datasets/priority-development-areas-current?geometry=-129.633%2C36.372%2C-114.945%2C39.406 +# Spatial join rule : Centers of p10 polygons fall into PDA polygons +# Spatial join output: M:\Data\GIS layers\Blueprint Land Use Strategies\Blueprint Land Use Strategies.gdb\p10_PDA -#Then run script 'https://github.com/BayAreaMetro/petrale/blob/master/basemap/export_filegdb_layers.py' -#to export the 'p10_PDA' layer to .csv format: p10_PDA_09172020.csv +# Then run script 'https://github.com/BayAreaMetro/petrale/blob/master/basemap/export_filegdb_layers.py' +# to export the 'p10_PDA' layer to .csv format: p10_PDA_09172020.csv NOW = time.strftime("%Y_%m%d_%H%M") -today = time.strftime('%Y_%m_%d') - +today = time.strftime("%Y_%m_%d") + + +if os.getenv("USERNAME") == "ywang": + WORK_DIR = ( + "M:\\Data\\GIS layers\\Blueprint Land Use Strategies\\ID_idx\\Final Blueprint" + ) + GROWTH_GEOGRAPHY_DIR = ( + "M:\\Data\\Urban\\BAUS\\PBA50\\Final_Blueprint\\Zoning Modifications" + ) + URBANSIM_DIR = "C:\\Users\\{}\\Documents\\GitHub\\bayarea_urbansim".format( + os.getenv("USERNAME") + ) + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim".format( + os.getenv("USERNAME") + ) -if os.getenv('USERNAME')=='ywang': - WORK_DIR = 'M:\\Data\\GIS layers\\Blueprint Land Use Strategies\\ID_idx\\Final Blueprint' - GROWTH_GEOGRAPHY_DIR= 'M:\\Data\\Urban\\BAUS\\PBA50\\Final_Blueprint\\Zoning Modifications' - URBANSIM_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\bayarea_urbansim'.format(os.getenv('USERNAME')) - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim'.format(os.getenv('USERNAME')) - # output folders # PBA50_PARCEL_GEO_DIR = os.path.join(WORK_DIR, 'Final Blueprint') # URBANSIM_INPUT_DIR = os.path.join(URBANSIM_DIR, 'data') -LOG_FILE = os.path.join(WORK_DIR,'{}_parcel_BlueprintGeos_idx.log'.format(today)) - +LOG_FILE = os.path.join(WORK_DIR, "{}_parcel_BlueprintGeos_idx.log".format(today)) -if __name__ == '__main__': +if __name__ == "__main__": # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) logger.info("WORK_DIR = {}".format(WORK_DIR)) - # Read input - p10_pda_file = os.path.join(WORK_DIR, 'p10_pda_09172020.csv') + p10_pda_file = os.path.join(WORK_DIR, "p10_pda_09172020.csv") p10_pda_raw = pd.read_csv(p10_pda_file) p10_pda_raw.PARCEL_ID = p10_pda_raw.PARCEL_ID.apply(lambda x: int(round(x))) - logger.info('Read {:,} rows from p10_PDA_file'.format(len(p10_pda_raw))) - logger.info('{:,} unique PARCEL_IDs'.format(len(p10_pda_raw.PARCEL_ID.unique()))) - logger.info('Columns: {}'.format(list(p10_pda_raw))) + logger.info("Read {:,} rows from p10_PDA_file".format(len(p10_pda_raw))) + logger.info("{:,} unique PARCEL_IDs".format(len(p10_pda_raw.PARCEL_ID.unique()))) + logger.info("Columns: {}".format(list(p10_pda_raw))) # Keep needed fields and name them as needed - p10_pda = p10_pda_raw[['PARCEL_ID','geom_id_s', 'ACRES', 'pda_name']] + p10_pda = p10_pda_raw[["PARCEL_ID", "geom_id_s", "ACRES", "pda_name"]] # Get new jurisdiction, tra_id, hra_id - pba50_attr_file = os.path.join(GROWTH_GEOGRAPHY_DIR, 'p10_pba50_attr_20200915.csv') - pba50_attr = pd.read_csv(pba50_attr_file, - usecols = ['PARCEL_ID', 'juris', 'fbp_tra_id', 'fbp_sesit_']) - pba50_attr.rename(columns = {'fbp_tra_id': 'tra_id', - 'fbp_sesit_': 'sesit_id'}, inplace=True) + pba50_attr_file = os.path.join(GROWTH_GEOGRAPHY_DIR, "p10_pba50_attr_20200915.csv") + pba50_attr = pd.read_csv( + pba50_attr_file, usecols=["PARCEL_ID", "juris", "fbp_tra_id", "fbp_sesit_"] + ) + pba50_attr.rename( + columns={"fbp_tra_id": "tra_id", "fbp_sesit_": "sesit_id"}, inplace=True + ) pba50_attr.PARCEL_ID = pba50_attr.PARCEL_ID.apply(lambda x: int(round(x))) - p10_pda = p10_pda.merge(pba50_attr[['PARCEL_ID', 'juris']], on = 'PARCEL_ID', how = 'outer') + p10_pda = p10_pda.merge( + pba50_attr[["PARCEL_ID", "juris"]], on="PARCEL_ID", how="outer" + ) # Get unique PDAs - pdas = p10_pda[['juris','pda_name']].drop_duplicates() - pdas = pdas.loc[(pdas.pda_name.notnull()) & (pdas.pda_name != ' ')] - pdas.sort_values(by = ['juris','pda_name'], inplace = True) - logger.info('There are {:,} unique PDAs in {} jurisdictions'.format(len(pdas), \ - len(pdas.juris.unique()))) + pdas = p10_pda[["juris", "pda_name"]].drop_duplicates() + pdas = pdas.loc[(pdas.pda_name.notnull()) & (pdas.pda_name != " ")] + pdas.sort_values(by=["juris", "pda_name"], inplace=True) + logger.info( + "There are {:,} unique PDAs in {} jurisdictions".format( + len(pdas), len(pdas.juris.unique()) + ) + ) logger.info(pdas.head()) # Assign index to PDAs by jurisdiction - pdas['idx'] = pdas.groupby(['juris']).cumcount()+1 + pdas["idx"] = pdas.groupby(["juris"]).cumcount() + 1 logger.info(pdas.head()) # Join the index back to p10 parcels - p10_pda_idx = p10_pda.merge(pdas, - on = ['pda_name','juris'], - how = 'left') + p10_pda_idx = p10_pda.merge(pdas, on=["pda_name", "juris"], how="left") - p10_pda_idx['PARCEL_ID'] = p10_pda_idx['PARCEL_ID'].apply(lambda x: int(round(x))) + p10_pda_idx["PARCEL_ID"] = p10_pda_idx["PARCEL_ID"].apply(lambda x: int(round(x))) # create pda_id by concatenating jurisdiction name and pda idx in_pda_idx = p10_pda_idx.pda_name.notnull() - p10_pda_idx.loc[in_pda_idx,'idx'] = p10_pda_idx.loc[in_pda_idx,'idx'].apply(lambda x: str(int(x))) - p10_pda_idx.loc[in_pda_idx,'pda_id_pba50'] = \ - p10_pda_idx.loc[in_pda_idx,'juris'] + p10_pda_idx.loc[in_pda_idx,'idx'] + p10_pda_idx.loc[in_pda_idx, "idx"] = p10_pda_idx.loc[in_pda_idx, "idx"].apply( + lambda x: str(int(x)) + ) + p10_pda_idx.loc[in_pda_idx, "pda_id_pba50"] = ( + p10_pda_idx.loc[in_pda_idx, "juris"] + p10_pda_idx.loc[in_pda_idx, "idx"] + ) logger.info(p10_pda_idx.head()) - logger.info('Export {:,} rows to p10_pda_idx'.format(len(p10_pda_idx))) - logger.info('{:,} unique PARCEL_IDs'.format(len(p10_pda_idx.PARCEL_ID.unique()))) - p10_pda_idx.to_csv(os.path.join(WORK_DIR, today+'_p10_pda_idx.csv'),index = False) + logger.info("Export {:,} rows to p10_pda_idx".format(len(p10_pda_idx))) + logger.info("{:,} unique PARCEL_IDs".format(len(p10_pda_idx.PARCEL_ID.unique()))) + p10_pda_idx.to_csv(os.path.join(WORK_DIR, today + "_p10_pda_idx.csv"), index=False) # Simple stats - logger.info('Number of PDAs by Jurisdiction:') - pda_count_juris = pdas.groupby(['juris'])['pda_name'].count().reset_index() - pda_count_juris.rename(columns = {'pda_name':'pda_count'}, inplace = True) + logger.info("Number of PDAs by Jurisdiction:") + pda_count_juris = pdas.groupby(["juris"])["pda_name"].count().reset_index() + pda_count_juris.rename(columns={"pda_name": "pda_count"}, inplace=True) logger.info(pda_count_juris) - pda_count_juris.to_csv(os.path.join(WORK_DIR, today+'_pda_count_juris.csv'),index = False) - - logger.info('Parcels Acreage by PDAs:') - p10_acr_pda = p10_pda_idx.groupby(['juris','pda_name'])['ACRES'].sum().reset_index() + pda_count_juris.to_csv( + os.path.join(WORK_DIR, today + "_pda_count_juris.csv"), index=False + ) + + logger.info("Parcels Acreage by PDAs:") + p10_acr_pda = ( + p10_pda_idx.groupby(["juris", "pda_name"])["ACRES"].sum().reset_index() + ) logger.info(p10_acr_pda) - p10_acr_pda.to_csv(os.path.join(WORK_DIR, today+'_p10_acr_pda.csv'),index = False) + p10_acr_pda.to_csv(os.path.join(WORK_DIR, today + "_p10_acr_pda.csv"), index=False) # double check data quality before export - logger.info('Double check total number of PDAs:') - logger.info(pda_count_juris['pda_count'].sum()) + logger.info("Double check total number of PDAs:") + logger.info(pda_count_juris["pda_count"].sum()) # export to Urbansim input - pda_id_2020 = p10_pda_idx[['PARCEL_ID','pda_id_pba50']] - pda_id_2020.rename(columns = {'pda_id_pba50':'pda_id', - 'PARCEL_ID': 'parcel_id'}, inplace = True) + pda_id_2020 = p10_pda_idx[["PARCEL_ID", "pda_id_pba50"]] + pda_id_2020.rename( + columns={"pda_id_pba50": "pda_id", "PARCEL_ID": "parcel_id"}, inplace=True + ) logger.info(pda_id_2020.head()) - pda_id_2020.to_csv(os.path.join(WORK_DIR, 'pda_id_2020.csv'), index = False) - - + pda_id_2020.to_csv(os.path.join(WORK_DIR, "pda_id_2020.csv"), index=False) ## p10 PARCEL_ID - TRA_ID index - pba50_tra = pba50_attr[['PARCEL_ID','tra_id','juris']] - pba50_tra.rename(columns = {'PARCEL_ID': 'parcel_id'}, inplace = True) + pba50_tra = pba50_attr[["PARCEL_ID", "tra_id", "juris"]] + pba50_tra.rename(columns={"PARCEL_ID": "parcel_id"}, inplace=True) - tra_id = pd.DataFrame({'tra_id':['tra3', 'tra2', 'tra1', 'tra3c2', 'tra2c1', 'tra3c1'], - '20_22' :['tra3', 'tra2', 'tra1', 'tra3', 'tra2', 'tra3' ], - '23' :['tra3', 'tra2', 'tra1', 'tra2', 'tra1', 'tra1' ]}) + tra_id = pd.DataFrame( + { + "tra_id": ["tra3", "tra2", "tra1", "tra3c2", "tra2c1", "tra3c1"], + "20_22": ["tra3", "tra2", "tra1", "tra3", "tra2", "tra3"], + "23": ["tra3", "tra2", "tra1", "tra2", "tra1", "tra1"], + } + ) - pba50_tra = pba50_tra.merge(tra_id, on = 'tra_id', how = 'left') + pba50_tra = pba50_tra.merge(tra_id, on="tra_id", how="left") in_tra_idx = pba50_tra.tra_id.notnull() - pba50_tra_noCrossing = pba50_tra[['parcel_id','juris','20_22']] - pba50_tra_noCrossing.rename(columns = {'20_22': 'tra_id'}, inplace = True) - pba50_tra_noCrossing.loc[in_tra_idx,'juris_tra'] = \ - pba50_tra_noCrossing.loc[in_tra_idx,'juris'] + '_' + pba50_tra_noCrossing.loc[in_tra_idx,'tra_id'] - pba50_tra_noCrossing.drop(columns = ['juris'], inplace = True) + pba50_tra_noCrossing = pba50_tra[["parcel_id", "juris", "20_22"]] + pba50_tra_noCrossing.rename(columns={"20_22": "tra_id"}, inplace=True) + pba50_tra_noCrossing.loc[in_tra_idx, "juris_tra"] = ( + pba50_tra_noCrossing.loc[in_tra_idx, "juris"] + + "_" + + pba50_tra_noCrossing.loc[in_tra_idx, "tra_id"] + ) + pba50_tra_noCrossing.drop(columns=["juris"], inplace=True) logger.info(pba50_tra_noCrossing.head()) - pba50_tra_crossing = pba50_tra[['parcel_id','juris','23']] - pba50_tra_crossing.rename(columns = {'23': 'tra_id'}, inplace = True) - pba50_tra_crossing.loc[in_tra_idx,'juris_tra'] = \ - pba50_tra_crossing.loc[in_tra_idx,'juris'] + '_' + pba50_tra_crossing.loc[in_tra_idx,'tra_id'] - pba50_tra_crossing.drop(columns = ['juris'], inplace = True) + pba50_tra_crossing = pba50_tra[["parcel_id", "juris", "23"]] + pba50_tra_crossing.rename(columns={"23": "tra_id"}, inplace=True) + pba50_tra_crossing.loc[in_tra_idx, "juris_tra"] = ( + pba50_tra_crossing.loc[in_tra_idx, "juris"] + + "_" + + pba50_tra_crossing.loc[in_tra_idx, "tra_id"] + ) + pba50_tra_crossing.drop(columns=["juris"], inplace=True) logger.info(pba50_tra_crossing.head()) - pba50_tra_noCrossing.to_csv(os.path.join(WORK_DIR, 'tra_id_2020_s202122.csv'), index = False) - pba50_tra_crossing.to_csv(os.path.join(WORK_DIR, 'tra_id_2020_s23.csv'), index = False) - - + pba50_tra_noCrossing.to_csv( + os.path.join(WORK_DIR, "tra_id_2020_s202122.csv"), index=False + ) + pba50_tra_crossing.to_csv( + os.path.join(WORK_DIR, "tra_id_2020_s23.csv"), index=False + ) ## p10 PARCEL_ID - HRA_ID index - pba50_hra = pba50_attr[['PARCEL_ID','sesit_id','juris']] - pba50_hra.rename(columns = {'PARCEL_ID': 'parcel_id'}, inplace = True) + pba50_hra = pba50_attr[["PARCEL_ID", "sesit_id", "juris"]] + pba50_hra.rename(columns={"PARCEL_ID": "parcel_id"}, inplace=True) in_hra_idx = pba50_hra.sesit_id.notnull() - pba50_hra.loc[in_hra_idx, 'juris_sesit'] = \ - pba50_hra.loc[in_hra_idx, 'juris'] + '_' + pba50_hra.loc[in_hra_idx, 'sesit_id'] - pba50_hra.drop(columns = ['juris'], inplace = True) + pba50_hra.loc[in_hra_idx, "juris_sesit"] = ( + pba50_hra.loc[in_hra_idx, "juris"] + "_" + pba50_hra.loc[in_hra_idx, "sesit_id"] + ) + pba50_hra.drop(columns=["juris"], inplace=True) logger.info(pba50_hra.head()) - pba50_hra.to_csv(os.path.join(WORK_DIR, 'hra_id_2020.csv'), index = False) \ No newline at end of file + pba50_hra.to_csv(os.path.join(WORK_DIR, "hra_id_2020.csv"), index=False) diff --git a/policies/plu/update_parcels_geography.py b/policies/plu/update_parcels_geography.py index 414e856..8d9781b 100644 --- a/policies/plu/update_parcels_geography.py +++ b/policies/plu/update_parcels_geography.py @@ -5,228 +5,378 @@ import os, glob, logging, sys, time NOW = time.strftime("%Y_%m%d_%H%M") -today = time.strftime('%Y_%m_%d') - - -if os.getenv('USERNAME')=='ywang': - M_WORKING_DIR = 'M:\\Data\\Urban\\BAUS\\PBA50' - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim'.format(os.getenv('USERNAME')) - BOX_SMELT_DIR = 'C:\\Users\\{}\\Box\\baydata\\smelt\\2020 03 12'.format(os.getenv('USERNAME')) - GITHUB_PETRALE_DIR = 'C:\\Users\\{}\\Documents\\GitHub\\petrale'.format(os.getenv('USERNAME')) - M_ID_DIR = 'M:\\Data\\GIS layers\\Blueprint Land Use Strategies\\ID_idx' +today = time.strftime("%Y_%m_%d") + + +if os.getenv("USERNAME") == "ywang": + M_WORKING_DIR = "M:\\Data\\Urban\\BAUS\\PBA50" + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim".format( + os.getenv("USERNAME") + ) + BOX_SMELT_DIR = "C:\\Users\\{}\\Box\\baydata\\smelt\\2020 03 12".format( + os.getenv("USERNAME") + ) + GITHUB_PETRALE_DIR = "C:\\Users\\{}\\Documents\\GitHub\\petrale".format( + os.getenv("USERNAME") + ) + M_ID_DIR = "M:\\Data\\GIS layers\\Blueprint Land Use Strategies\\ID_idx" # input file locations -HORIZON_ZONING_BOX_DIR = os.path.join(M_WORKING_DIR, 'Horizon', 'Large General Input Data') -PBA50_ZONINGMOD_DIR = os.path.join(M_WORKING_DIR, 'Final_Blueprint', 'Zoning Modifications') -EIR_ZONINGMOD_DIR = os.path.join(M_WORKING_DIR, 'EIR', 'Zoning Modifications') -JURIS_CODE_DIR = os.path.join(GITHUB_PETRALE_DIR, 'zones', 'jurisdictions') -M_ID_DB_DIR = os.path.join(M_ID_DIR, 'Draft Blueprint') -M_ID_FB_DIR = os.path.join(M_ID_DIR, 'Final Blueprint') -M_ID_EIR_DIR = os.path.join(M_ID_DIR, 'EIR') +HORIZON_ZONING_BOX_DIR = os.path.join( + M_WORKING_DIR, "Horizon", "Large General Input Data" +) +PBA50_ZONINGMOD_DIR = os.path.join( + M_WORKING_DIR, "Final_Blueprint", "Zoning Modifications" +) +EIR_ZONINGMOD_DIR = os.path.join(M_WORKING_DIR, "EIR", "Zoning Modifications") +JURIS_CODE_DIR = os.path.join(GITHUB_PETRALE_DIR, "zones", "jurisdictions") +M_ID_DB_DIR = os.path.join(M_ID_DIR, "Draft Blueprint") +M_ID_FB_DIR = os.path.join(M_ID_DIR, "Final Blueprint") +M_ID_EIR_DIR = os.path.join(M_ID_DIR, "EIR") # outputs locations -PBA50_LARGE_INPUT_DIR = os.path.join(BOX_DIR, 'PBA50', 'Current PBA50 Large General Input Data') -M_LARGE_INPUT_FB_DIR = os.path.join(M_WORKING_DIR, 'Final_Blueprint', 'Large General Input Data') -M_LARGE_INPUT_EIR_DIR = os.path.join(M_WORKING_DIR, 'EIR', 'Large General Input Data') -LOG_FILE = os.path.join(M_LARGE_INPUT_EIR_DIR,'{}_update_parcels_geography.log'.format(today)) +PBA50_LARGE_INPUT_DIR = os.path.join( + BOX_DIR, "PBA50", "Current PBA50 Large General Input Data" +) +M_LARGE_INPUT_FB_DIR = os.path.join( + M_WORKING_DIR, "Final_Blueprint", "Large General Input Data" +) +M_LARGE_INPUT_EIR_DIR = os.path.join(M_WORKING_DIR, "EIR", "Large General Input Data") +LOG_FILE = os.path.join( + M_LARGE_INPUT_EIR_DIR, "{}_update_parcels_geography.log".format(today) +) -if __name__ == '__main__': +if __name__ == "__main__": # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) logger.info("PBA50_LARGE_INPUT_DIR = {}".format(PBA50_LARGE_INPUT_DIR)) - ## Basemap parcels - basemap_p10_file = os.path.join(BOX_SMELT_DIR, 'p10.csv') - basemap_p10 = pd.read_csv(basemap_p10_file, - usecols =['PARCEL_ID','geom_id_s','ACRES','LAND_VALUE']) + basemap_p10_file = os.path.join(BOX_SMELT_DIR, "p10.csv") + basemap_p10 = pd.read_csv( + basemap_p10_file, usecols=["PARCEL_ID", "geom_id_s", "ACRES", "LAND_VALUE"] + ) # Make sure PARCEL_ID and geom_id_s are integer: - basemap_p10['PARCEL_ID'] = basemap_p10['PARCEL_ID'].apply(lambda x: int(round(x))) - basemap_p10['geom_id_s'] = basemap_p10['geom_id_s'].apply(lambda x: int(round(x))) + basemap_p10["PARCEL_ID"] = basemap_p10["PARCEL_ID"].apply(lambda x: int(round(x))) + basemap_p10["geom_id_s"] = basemap_p10["geom_id_s"].apply(lambda x: int(round(x))) - logger.info('Read {} records from {}, with {} unique Parcel IDs, and header: \n {}'.format( - len(basemap_p10), + logger.info( + "Read {} records from {}, with {} unique Parcel IDs, and header: \n {}".format( + len(basemap_p10), basemap_p10_file, len(basemap_p10.PARCEL_ID.unique()), - basemap_p10.head())) + basemap_p10.head(), + ) + ) logger.info(basemap_p10.dtypes) ## Read PBA40 parcels_geography file - pg_pba40_file = os.path.join(HORIZON_ZONING_BOX_DIR, '07_11_2019_parcels_geography.csv') - pg_pba40_cols = ['geom_id', 'pda_id', 'tpp_id', 'exp_id', 'opp_id', 'zoningmodcat', 'perffoot', 'perfarea', 'urbanized', - 'hra_id', 'trich_id', 'cat_id', 'zoninghzcat'] - pg_pba40 = pd.read_csv(pg_pba40_file, - usecols = pg_pba40_cols) - pg_pba40.rename(columns = {'pda_id': 'pda_id_pba40'}, inplace = True) - - logger.info('Read {} records from {}, with header: \n {}'.format( - len(pg_pba40), - pg_pba40_file, - pg_pba40.head())) + pg_pba40_file = os.path.join( + HORIZON_ZONING_BOX_DIR, "07_11_2019_parcels_geography.csv" + ) + pg_pba40_cols = [ + "geom_id", + "pda_id", + "tpp_id", + "exp_id", + "opp_id", + "zoningmodcat", + "perffoot", + "perfarea", + "urbanized", + "hra_id", + "trich_id", + "cat_id", + "zoninghzcat", + ] + pg_pba40 = pd.read_csv(pg_pba40_file, usecols=pg_pba40_cols) + pg_pba40.rename(columns={"pda_id": "pda_id_pba40"}, inplace=True) + + logger.info( + "Read {} records from {}, with header: \n {}".format( + len(pg_pba40), pg_pba40_file, pg_pba40.head() + ) + ) logger.info(pg_pba40.dtypes) ## Read PBA50 attributes - #pba50_attrs_file = os.path.join(PBA50_ZONINGMOD_DIR, 'p10_pba50_FBP_attr_20201110.csv') - pba50_attrs_file = os.path.join(EIR_ZONINGMOD_DIR, 'p10_pba50_EIR_attr_20210224.csv') - pba50_attrs_cols = ['geom_id_s', 'juris_id', 'juris', 'gg_id', 'tra_id', 'sesit_id', 'ppa_id', - 'exp2020_id', 'pba50chcat', 'exsfd_id', 'chcatwsfd', 'pba50zonin', 'nodev', - 'fbp_gg_id', 'fbp_tra_id', 'fbp_sesit_', 'fbp_ppa_id', 'fbp_exp202', - 'fbpchcat', 'fbp_exsfd_', 'fbpchcatws', 'fbpzoningm', - 'eir_gg_id', 'eir_tra_id', 'eir_sesit_', 'eir_ppa_id', - 'eir_exp202', 'eir_coc_id', 'ex_res_bldg', 'eirzoningm'] - pba50_attrs = pd.read_csv(pba50_attrs_file, - usecols = pba50_attrs_cols) + # pba50_attrs_file = os.path.join(PBA50_ZONINGMOD_DIR, 'p10_pba50_FBP_attr_20201110.csv') + pba50_attrs_file = os.path.join( + EIR_ZONINGMOD_DIR, "p10_pba50_EIR_attr_20210224.csv" + ) + pba50_attrs_cols = [ + "geom_id_s", + "juris_id", + "juris", + "gg_id", + "tra_id", + "sesit_id", + "ppa_id", + "exp2020_id", + "pba50chcat", + "exsfd_id", + "chcatwsfd", + "pba50zonin", + "nodev", + "fbp_gg_id", + "fbp_tra_id", + "fbp_sesit_", + "fbp_ppa_id", + "fbp_exp202", + "fbpchcat", + "fbp_exsfd_", + "fbpchcatws", + "fbpzoningm", + "eir_gg_id", + "eir_tra_id", + "eir_sesit_", + "eir_ppa_id", + "eir_exp202", + "eir_coc_id", + "ex_res_bldg", + "eirzoningm", + ] + pba50_attrs = pd.read_csv(pba50_attrs_file, usecols=pba50_attrs_cols) pba50_attrs.geom_id_s = pba50_attrs.geom_id_s.apply(lambda x: int(round(x))) - pba50_attrs.rename(columns = {'pba50zonin': 'pba50zoningmodcat', - - 'fbpzoningm': 'fbpzoningmodcat', - 'fbp_sesit_': 'fbp_sesit_id', - 'fbp_exp202': 'fbp_exp2020_id', - 'fbp_exsfd_': 'fbp_exsfd_id', - 'fbpchcatws': 'fbpchcatwsfd', - - 'eir_sesit_': 'eir_sesit_id', - 'eir_exp202': 'eir_exp2020_id', - 'eirzoningm': 'eirzoningmodcat'}, inplace=True) - - logger.info('Read {} records from {}, with header: \n {}'.format( - len(pba50_attrs), - pba50_attrs_file, - pba50_attrs.head())) + pba50_attrs.rename( + columns={ + "pba50zonin": "pba50zoningmodcat", + "fbpzoningm": "fbpzoningmodcat", + "fbp_sesit_": "fbp_sesit_id", + "fbp_exp202": "fbp_exp2020_id", + "fbp_exsfd_": "fbp_exsfd_id", + "fbpchcatws": "fbpchcatwsfd", + "eir_sesit_": "eir_sesit_id", + "eir_exp202": "eir_exp2020_id", + "eirzoningm": "eirzoningmodcat", + }, + inplace=True, + ) + + logger.info( + "Read {} records from {}, with header: \n {}".format( + len(pba50_attrs), pba50_attrs_file, pba50_attrs.head() + ) + ) logger.info(pba50_attrs.dtypes) ## Read new PBA50 PDA IDs - pda_pba50_db_file = os.path.join(M_ID_DB_DIR, 'pda_id_2020.csv') + pda_pba50_db_file = os.path.join(M_ID_DB_DIR, "pda_id_2020.csv") pda_pba50_db = pd.read_csv(pda_pba50_db_file) - pda_pba50_db.rename(columns = {'pda_id': 'pda_id_pba50_db'}, inplace = True) + pda_pba50_db.rename(columns={"pda_id": "pda_id_pba50_db"}, inplace=True) - logger.info('Read {} records from {}, with {} unique Parcel IDs, header: \n {}'.format( + logger.info( + "Read {} records from {}, with {} unique Parcel IDs, header: \n {}".format( len(pda_pba50_db), pda_pba50_db_file, len(pda_pba50_db.parcel_id.unique()), - pda_pba50_db.head())) + pda_pba50_db.head(), + ) + ) logger.info(pda_pba50_db.dtypes) - pda_pba50_fb_file = os.path.join(M_ID_FB_DIR, 'pda_id_2020.csv') + pda_pba50_fb_file = os.path.join(M_ID_FB_DIR, "pda_id_2020.csv") pda_pba50_fb = pd.read_csv(pda_pba50_fb_file) - pda_pba50_fb.rename(columns = {'pda_id': 'pda_id_pba50_fb'}, inplace = True) + pda_pba50_fb.rename(columns={"pda_id": "pda_id_pba50_fb"}, inplace=True) - logger.info('Read {} records from {}, with {} unique Parcel IDs, header: \n {}'.format( + logger.info( + "Read {} records from {}, with {} unique Parcel IDs, header: \n {}".format( len(pda_pba50_fb), pda_pba50_fb_file, len(pda_pba50_fb.parcel_id.unique()), - pda_pba50_fb.head())) + pda_pba50_fb.head(), + ) + ) logger.info(pda_pba50_fb.dtypes) ## Read jurisdiction code file - juris_code_file = os.path.join(JURIS_CODE_DIR, 'juris_county_id.csv') - juris_code = pd.read_csv(juris_code_file, usecols = ['jurisdiction_id','juris_id','juris_name_full']) - + juris_code_file = os.path.join(JURIS_CODE_DIR, "juris_county_id.csv") + juris_code = pd.read_csv( + juris_code_file, usecols=["jurisdiction_id", "juris_id", "juris_name_full"] + ) ## Join pab50 attributes to pba40 parcel set - pg_pba50_merge = pg_pba40.merge(pba50_attrs, - left_on = 'geom_id', - right_on = 'geom_id_s', - how = 'left').merge(juris_code, - on = 'juris_id', - how = 'left').merge(basemap_p10, - on = 'geom_id_s', - how = 'left').merge(pda_pba50_db, - left_on = 'PARCEL_ID', - right_on = 'parcel_id', - how = 'left').merge(pda_pba50_fb, - left_on = 'PARCEL_ID', - right_on = 'parcel_id', - how = 'left') + pg_pba50_merge = ( + pg_pba40.merge(pba50_attrs, left_on="geom_id", right_on="geom_id_s", how="left") + .merge(juris_code, on="juris_id", how="left") + .merge(basemap_p10, on="geom_id_s", how="left") + .merge(pda_pba50_db, left_on="PARCEL_ID", right_on="parcel_id", how="left") + .merge(pda_pba50_fb, left_on="PARCEL_ID", right_on="parcel_id", how="left") + ) ## additional updates on 'nodev' - nodev_folder = os.path.join(PBA50_ZONINGMOD_DIR,'*.csv') + nodev_folder = os.path.join(PBA50_ZONINGMOD_DIR, "*.csv") for filename in list(glob.glob(nodev_folder)): - if 'noDev_parcels_' in filename: - logger.info('Update nodev based on {}'.format(filename)) + if "noDev_parcels_" in filename: + logger.info("Update nodev based on {}".format(filename)) nodev_parcel = pd.read_csv(filename) nodev_parcel.drop_duplicates(inplace=True) - nodev_parcel.rename(columns = {'nodev': 'nodev_update'}, inplace=True) - - if 'PARCEL_ID' in nodev_parcel: - logger.info('update {} parcels using PARCEL_ID'.format(len(nodev_parcel))) - nodev_parcel.PARCEL_ID = nodev_parcel.PARCEL_ID.apply(lambda x: int(round(x))) - pg_pba50_merge = pg_pba50_merge.merge(nodev_parcel[['PARCEL_ID','nodev_update']], - on='PARCEL_ID', - how='left') - logger.info(pg_pba50_merge.loc[pg_pba50_merge.nodev_update.notnull()][['PARCEL_ID','nodev','nodev_update']]) - pg_pba50_merge.loc[pg_pba50_merge.nodev_update.notnull(), 'nodev'] = \ - pg_pba50_merge.loc[pg_pba50_merge.nodev_update.notnull(), 'nodev_update'] - pg_pba50_merge.drop(columns=['nodev_update'], inplace=True) - - elif 'geom_id' in nodev_parcel: - logger.info('update {} parcels using geom_id'.format(len(nodev_parcel))) - nodev_parcel.geom_id = nodev_parcel.geom_id.apply(lambda x: int(round(x))) - pg_pba50_merge = pg_pba50_merge.merge(nodev_parcel[['geom_id','nodev_update']], - on='geom_id', - how='left') - logger.info(pg_pba50_merge.loc[pg_pba50_merge.nodev_update.notnull()][['geom_id','nodev','nodev_update']]) - pg_pba50_merge.loc[pg_pba50_merge.nodev_update.notnull(), 'nodev'] = \ - pg_pba50_merge.loc[pg_pba50_merge.nodev_update.notnull(), 'nodev_update'] - pg_pba50_merge.drop(columns=['nodev_update'], inplace=True) + nodev_parcel.rename(columns={"nodev": "nodev_update"}, inplace=True) + + if "PARCEL_ID" in nodev_parcel: + logger.info( + "update {} parcels using PARCEL_ID".format(len(nodev_parcel)) + ) + nodev_parcel.PARCEL_ID = nodev_parcel.PARCEL_ID.apply( + lambda x: int(round(x)) + ) + pg_pba50_merge = pg_pba50_merge.merge( + nodev_parcel[["PARCEL_ID", "nodev_update"]], + on="PARCEL_ID", + how="left", + ) + logger.info( + pg_pba50_merge.loc[pg_pba50_merge.nodev_update.notnull()][ + ["PARCEL_ID", "nodev", "nodev_update"] + ] + ) + pg_pba50_merge.loc[ + pg_pba50_merge.nodev_update.notnull(), "nodev" + ] = pg_pba50_merge.loc[ + pg_pba50_merge.nodev_update.notnull(), "nodev_update" + ] + pg_pba50_merge.drop(columns=["nodev_update"], inplace=True) + + elif "geom_id" in nodev_parcel: + logger.info("update {} parcels using geom_id".format(len(nodev_parcel))) + nodev_parcel.geom_id = nodev_parcel.geom_id.apply( + lambda x: int(round(x)) + ) + pg_pba50_merge = pg_pba50_merge.merge( + nodev_parcel[["geom_id", "nodev_update"]], on="geom_id", how="left" + ) + logger.info( + pg_pba50_merge.loc[pg_pba50_merge.nodev_update.notnull()][ + ["geom_id", "nodev", "nodev_update"] + ] + ) + pg_pba50_merge.loc[ + pg_pba50_merge.nodev_update.notnull(), "nodev" + ] = pg_pba50_merge.loc[ + pg_pba50_merge.nodev_update.notnull(), "nodev_update" + ] + pg_pba50_merge.drop(columns=["nodev_update"], inplace=True) else: - logger.info('Missing PARCEL_ID and geom_id, cannot update.') - + logger.info("Missing PARCEL_ID and geom_id, cannot update.") ## export needed fields # Parcel attribute: - p_att = ['PARCEL_ID', 'geom_id', 'jurisdiction_id', 'juris_name_full', 'juris_id', 'juris','ACRES'] + p_att = [ + "PARCEL_ID", + "geom_id", + "jurisdiction_id", + "juris_name_full", + "juris_id", + "juris", + "ACRES", + ] # PBA40 fields: - pba40_att = ['pda_id_pba40', 'tpp_id', 'exp_id', 'opp_id', - 'zoningmodcat', 'perffoot', 'perfarea', 'urbanized'] + pba40_att = [ + "pda_id_pba40", + "tpp_id", + "exp_id", + "opp_id", + "zoningmodcat", + "perffoot", + "perfarea", + "urbanized", + ] # Horizon fields: - hor_att = ['hra_id', 'trich_id', 'cat_id', 'zoninghzcat'] + hor_att = ["hra_id", "trich_id", "cat_id", "zoninghzcat"] # PBA50 Draft Blueprint fields: - pba50_db_att = ['gg_id', 'pda_id_pba50_db', 'tra_id', 'sesit_id', 'ppa_id', - 'exp2020_id', 'exsfd_id', 'pba50zoningmodcat', 'pba50chcat'] + pba50_db_att = [ + "gg_id", + "pda_id_pba50_db", + "tra_id", + "sesit_id", + "ppa_id", + "exp2020_id", + "exsfd_id", + "pba50zoningmodcat", + "pba50chcat", + ] # PBA50 Final Blueprint fields: - pba50_fb_att = ['fbp_gg_id', 'pda_id_pba50_fb', 'fbp_tra_id', 'fbp_sesit_id', 'fbp_ppa_id', - 'fbp_exp2020_id', 'fbp_exsfd_id', 'fbpzoningmodcat', 'fbpchcat'] + pba50_fb_att = [ + "fbp_gg_id", + "pda_id_pba50_fb", + "fbp_tra_id", + "fbp_sesit_id", + "fbp_ppa_id", + "fbp_exp2020_id", + "fbp_exsfd_id", + "fbpzoningmodcat", + "fbpchcat", + ] # PBA50 EIR fields: - pba50_eir_att = ['eir_gg_id', 'pda_id_pba50_fb', 'eir_tra_id', 'eir_sesit_id', 'eir_coc_id', - 'eir_ppa_id', 'eir_exp2020_id', 'ex_res_bldg', 'eirzoningmodcat'] - + pba50_eir_att = [ + "eir_gg_id", + "pda_id_pba50_fb", + "eir_tra_id", + "eir_sesit_id", + "eir_coc_id", + "eir_ppa_id", + "eir_exp2020_id", + "ex_res_bldg", + "eirzoningmodcat", + ] # PBA50 shared fields: - pba50_att_both = ['nodev'] + pba50_att_both = ["nodev"] # export: - pg_all = pg_pba50_merge[p_att + pba40_att + hor_att + pba50_db_att + pba50_fb_att + pba50_eir_att + pba50_att_both] + pg_all = pg_pba50_merge[ + p_att + + pba40_att + + hor_att + + pba50_db_att + + pba50_fb_att + + pba50_eir_att + + pba50_att_both + ] pg_pba50_fb_only = pg_pba50_merge[p_att + pba50_fb_att + pba50_att_both] - logger.info('Export {} records with {} unique PARCEL IDs to {} with the following fields: \n {}'.format(len(pg_all), - len(pg_all.PARCEL_ID.unique()), - PBA50_LARGE_INPUT_DIR, - pg_all.dtypes)) - pg_all.to_csv(os.path.join(PBA50_LARGE_INPUT_DIR, today+'_parcels_geography.csv'), index = False) - #pg_pba50_fb_only.to_csv(os.path.join(M_LARGE_INPUT_DIR, today+'_parcels_geography_fb_only.csv'), index = False) - \ No newline at end of file + logger.info( + "Export {} records with {} unique PARCEL IDs to {} with the following fields: \n {}".format( + len(pg_all), + len(pg_all.PARCEL_ID.unique()), + PBA50_LARGE_INPUT_DIR, + pg_all.dtypes, + ) + ) + pg_all.to_csv( + os.path.join(PBA50_LARGE_INPUT_DIR, today + "_parcels_geography.csv"), + index=False, + ) + # pg_pba50_fb_only.to_csv(os.path.join(M_LARGE_INPUT_DIR, today+'_parcels_geography_fb_only.csv'), index = False) diff --git a/policies/plu/update_zoning_parcels.py b/policies/plu/update_zoning_parcels.py index b0d5be3..f782ca8 100644 --- a/policies/plu/update_zoning_parcels.py +++ b/policies/plu/update_zoning_parcels.py @@ -12,68 +12,75 @@ import numpy as np -if os.getenv('USERNAME')=='ywang': - BOX_DIR = 'C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50'.format(os.getenv('USERNAME')) - HORIZON_DIR = os.path.join(BOX_DIR, 'OLD Horizon Large General Input Data') - DRAFTBLUEPRINT_DIR = os.path.join(BOX_DIR, 'Current PBA50 Large General Input Data') - PBA50_ZONING_DIR = os.path.join(BOX_DIR, 'Policies', 'Zoning Modifications') +if os.getenv("USERNAME") == "ywang": + BOX_DIR = "C:\\Users\\{}\\Box\\Modeling and Surveys\\Urban Modeling\\Bay Area UrbanSim\\PBA50".format( + os.getenv("USERNAME") + ) + HORIZON_DIR = os.path.join(BOX_DIR, "OLD Horizon Large General Input Data") + DRAFTBLUEPRINT_DIR = os.path.join(BOX_DIR, "Current PBA50 Large General Input Data") + PBA50_ZONING_DIR = os.path.join(BOX_DIR, "Policies", "Zoning Modifications") -if __name__ == '__main__': +if __name__ == "__main__": # Horizon zoning_parcels.csv - pz_horizon_file = os.path.join(HORIZON_DIR, '2015_12_21_zoning_parcels.csv') - pz_horizon_cols = ['geom_id','zoning_id','zoning','juris', - 'prop','tablename','nodev_pba40'] - pz_horizon = pd.read_csv(pz_horizon_file, - usecols = pz_horizon_cols) - - print('Read {} rows from {}'.format(len(pz_horizon), pz_horizon_file)) - print('Header: {}'.format(pz_horizon.head())) - print('Number of unique zoning_id is {}'.format(len(pz_horizon.zoning_id.unique()))) - - pz_horizon.rename(columns={'nodev':'nodev_pba40'},inplace = True) - + pz_horizon_file = os.path.join(HORIZON_DIR, "2015_12_21_zoning_parcels.csv") + pz_horizon_cols = [ + "geom_id", + "zoning_id", + "zoning", + "juris", + "prop", + "tablename", + "nodev_pba40", + ] + pz_horizon = pd.read_csv(pz_horizon_file, usecols=pz_horizon_cols) + + print("Read {} rows from {}".format(len(pz_horizon), pz_horizon_file)) + print("Header: {}".format(pz_horizon.head())) + print("Number of unique zoning_id is {}".format(len(pz_horizon.zoning_id.unique()))) + + pz_horizon.rename(columns={"nodev": "nodev_pba40"}, inplace=True) # PBA50 zoning input - pz_pba50_file = os.path.join(PBA50_ZONING_DIR, 'p10_pba50_attr_20200416.csv') - pz_pba50_cols = ['PARCEL_ID','geom_id_s','nodev','juris_id'] - pz_pba50 = pd.read_csv(pz_pba50_file, - usecols=pz_pba50_cols) + pz_pba50_file = os.path.join(PBA50_ZONING_DIR, "p10_pba50_attr_20200416.csv") + pz_pba50_cols = ["PARCEL_ID", "geom_id_s", "nodev", "juris_id"] + pz_pba50 = pd.read_csv(pz_pba50_file, usecols=pz_pba50_cols) - print('Read {} rows from {}'.format(len(pz_pba50), pz_pba50_file)) - print('Header: {}'.format(pz_pba50.head())) + print("Read {} rows from {}".format(len(pz_pba50), pz_pba50_file)) + print("Header: {}".format(pz_pba50.head())) - pz_pba50['geom_id_s'] = pd.to_numeric(pz_pba50['geom_id_s']) - pz_pba50.rename(columns = {'geom_id_s':'geom_id'},inplace = True) + pz_pba50["geom_id_s"] = pd.to_numeric(pz_pba50["geom_id_s"]) + pz_pba50.rename(columns={"geom_id_s": "geom_id"}, inplace=True) - # Merge - merg = pz_horizon.merge(pz_pba50, - on = 'geom_id', - how = 'left') + # Merge + merg = pz_horizon.merge(pz_pba50, on="geom_id", how="left") - for i in ['zoning_id','PARCEL_ID','nodev']: + for i in ["zoning_id", "PARCEL_ID", "nodev"]: try: merg[i] = merg[i].fillna(-1).astype(np.int64) - merg[i].replace(-1, None, inplace = True) + merg[i].replace(-1, None, inplace=True) except: print(i) pass - print('Merge pz_pba50 value with pz_horizon zoning_parcels') + print("Merge pz_pba50 value with pz_horizon zoning_parcels") display(merg.dtypes) - merg.to_csv(os.path.join(DRAFTBLUEPRINT_DIR, 'zoning_parcels.csv')) - + merg.to_csv(os.path.join(DRAFTBLUEPRINT_DIR, "zoning_parcels.csv")) # For QA/QC of the nodev field # Parcels whose 'nodev' value is 1 in Horizon but 0 in Draft Blueprint - nodev_pba40_1_pba50_0 = merg.loc[(merg['nodev_pba40'] == 1) & (merg['nodev'] == 0)] - nodev_pba40_1_pba50_0.drop(columns = 'geom_id',inplace = True) - nodev_pba40_1_pba50_0.to_csv(os.path.join(PBA50_ZONING_DIR, 'nodev_pba40_1_pba50_0.csv')) + nodev_pba40_1_pba50_0 = merg.loc[(merg["nodev_pba40"] == 1) & (merg["nodev"] == 0)] + nodev_pba40_1_pba50_0.drop(columns="geom_id", inplace=True) + nodev_pba40_1_pba50_0.to_csv( + os.path.join(PBA50_ZONING_DIR, "nodev_pba40_1_pba50_0.csv") + ) # Parcels whose 'nodev' value is 0 in Horizon but 1 in Draft Blueprint - nodev_pba40_0_pba50_1 = merg.loc[(merg['nodev_pba40'] == 0) & (merg['nodev'] == 1)] - #nodev_pba40_0_pba50_1 - nodev_pba40_0_pba50_1.drop(columns = 'geom_id',inplace = True) - nodev_pba40_0_pba50_1.to_csv(os.path.join(PBA50_ZONING_DIR, 'nodev_pba40_0_pba50_1.csv')) \ No newline at end of file + nodev_pba40_0_pba50_1 = merg.loc[(merg["nodev_pba40"] == 0) & (merg["nodev"] == 1)] + # nodev_pba40_0_pba50_1 + nodev_pba40_0_pba50_1.drop(columns="geom_id", inplace=True) + nodev_pba40_0_pba50_1.to_csv( + os.path.join(PBA50_ZONING_DIR, "nodev_pba40_0_pba50_1.csv") + ) diff --git a/policies/plu/zoningmods_map.py b/policies/plu/zoningmods_map.py index ae91b96..ba139db 100644 --- a/policies/plu/zoningmods_map.py +++ b/policies/plu/zoningmods_map.py @@ -19,21 +19,21 @@ # python zoningmods_map.py -folder . # -input_gdb "M:\Data\GIS layers\UrbanSim smelt\2020 03 12\smelt.gdb" # -p10_layer p10 -# -parcels_geography "%FBP_DIR%\2020_09_21_parcels_geography.csv" +# -parcels_geography "%FBP_DIR%\2020_09_21_parcels_geography.csv" # -zmods_csv "%BAUS_DIR%\data\zoning_mods_24.csv" # -zmodcat_col fbpzoningmodcat -# -join_field PARCEL_ID +# -join_field PARCEL_ID # -join_type KEEP_ALL -# -output_gdb "FinalBlueprint_ZoningMods_20201002.gdb" +# -output_gdb "FinalBlueprint_ZoningMods_20201002.gdb" # # Draft Blueprint release: https://github.com/BayAreaMetro/bayarea_urbansim/releases/tag/v1.9 (July 31, 2020) # commit: 7183846409013a6175e613f11f032513e7dbe51d # -# Note: though the v1.9 datasources.py (https://github.com/BayAreaMetro/bayarea_urbansim/blob/7183846409013a6175e613f11f032513e7dbe51d/baus/datasources.py#L492) +# Note: though the v1.9 datasources.py (https://github.com/BayAreaMetro/bayarea_urbansim/blob/7183846409013a6175e613f11f032513e7dbe51d/baus/datasources.py#L492) # says parcels_geography is 2020_07_10_parcels_geography.csv # But the "v1.7.1- FINAL DRAFT BLUEPRINT" run98 was on June 22, 2020 # So assuming the parcels_geography used was 2020_04_17_parcels_geography.csv -# +# # set BAUS_DIR=%USERPROFILE%\Documents\bayarea_urbansim # set DBP_DIR=%USERPROFILE%\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\Draft Blueprint Large Input Data # python zoningmods_map.py -folder . @@ -50,59 +50,88 @@ import arcpy, pandas -if __name__ == '__main__': +if __name__ == "__main__": start = time.time() - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) - parser.add_argument("-folder", metavar="folder", help="Working folder") - parser.add_argument("-input_gdb", metavar="input.gdb", help="Input geodatabase") - parser.add_argument("-output_gdb", metavar="output.gdb", help="Output geodatabase") - parser.add_argument("-p10_layer", metavar="p10_layer", help="p10 parcel layer") - parser.add_argument("-parcels_geography", help="Parcels geography layer (maps parcels to zoning mod category)") - parser.add_argument("-zmods_csv", metavar="zmods.csv", help="Zoning mods definition for zoning mod categories") - parser.add_argument("-zmodcat_col", help="Zoning mod category column. e.g. pba50zoningmodcat or fbpzoningmodcat") - parser.add_argument("-join_field", metavar="join_field", help="Join field for parcel-zmods join") - parser.add_argument("-join_type", choices=["KEEP_ALL","KEEP_COMMON"], default="KEEP_ALL", - help="Outer join vs inner join. Default is KEEP_ALL, or outer") + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("-folder", metavar="folder", help="Working folder") + parser.add_argument("-input_gdb", metavar="input.gdb", help="Input geodatabase") + parser.add_argument("-output_gdb", metavar="output.gdb", help="Output geodatabase") + parser.add_argument("-p10_layer", metavar="p10_layer", help="p10 parcel layer") + parser.add_argument( + "-parcels_geography", + help="Parcels geography layer (maps parcels to zoning mod category)", + ) + parser.add_argument( + "-zmods_csv", + metavar="zmods.csv", + help="Zoning mods definition for zoning mod categories", + ) + parser.add_argument( + "-zmodcat_col", + help="Zoning mod category column. e.g. pba50zoningmodcat or fbpzoningmodcat", + ) + parser.add_argument( + "-join_field", metavar="join_field", help="Join field for parcel-zmods join" + ) + parser.add_argument( + "-join_type", + choices=["KEEP_ALL", "KEEP_COMMON"], + default="KEEP_ALL", + help="Outer join vs inner join. Default is KEEP_ALL, or outer", + ) args = parser.parse_args() args.folder = os.path.abspath(args.folder) - print(" {:18}: {}".format("folder", args.folder)) - print(" {:18}: {}".format("input_gdb", args.input_gdb)) - print(" {:18}: {}".format("output_gdb", args.output_gdb)) - print(" {:18}: {}".format("p10_layer", args.p10_layer)) + print(" {:18}: {}".format("folder", args.folder)) + print(" {:18}: {}".format("input_gdb", args.input_gdb)) + print(" {:18}: {}".format("output_gdb", args.output_gdb)) + print(" {:18}: {}".format("p10_layer", args.p10_layer)) print(" {:18}: {}".format("parcels_geography", args.parcels_geography)) - print(" {:18}: {}".format("zmods_csv", args.zmods_csv)) - print(" {:18}: {}".format("zmodcat_col", args.zmodcat_col)) - print(" {:18}: {}".format("join_field", args.join_field)) - print(" {:18}: {}".format("join_type", args.join_type)) - + print(" {:18}: {}".format("zmods_csv", args.zmods_csv)) + print(" {:18}: {}".format("zmodcat_col", args.zmodcat_col)) + print(" {:18}: {}".format("join_field", args.join_field)) + print(" {:18}: {}".format("join_type", args.join_type)) # create output_gdb if not exists already - if not os.path.exists(os.path.join(args.folder,args.output_gdb)): - (head,tail) = os.path.split(os.path.join(args.folder,args.output_gdb)) + if not os.path.exists(os.path.join(args.folder, args.output_gdb)): + (head, tail) = os.path.split(os.path.join(args.folder, args.output_gdb)) print("head: {} tail: {}".format(head, tail)) - if head=="": head="." + if head == "": + head = "." arcpy.CreateFileGDB_management(head, tail) - print("Created {}".format(os.path.join(args.folder,args.output_gdb))) + print("Created {}".format(os.path.join(args.folder, args.output_gdb))) - arcpy.env.workspace = os.path.join(args.folder,args.output_gdb) + arcpy.env.workspace = os.path.join(args.folder, args.output_gdb) ########## Join zmods_attr layer to p10 parcel layer ########## # read zmods_attr file zmod_cols = [args.join_field, args.zmodcat_col] - zmod_attr = pandas.read_csv(os.path.join(args.folder, args.parcels_geography), - usecols = zmod_cols) - print("Read {} records from {}\nwith {} unique {} and {} unique {}".format( - len(zmod_attr), os.path.join(args.folder, args.parcels_geography), - len(zmod_attr[args.join_field].unique()), args.join_field, - len(zmod_attr[args.zmodcat_col].unique()), args.zmodcat_col)) + zmod_attr = pandas.read_csv( + os.path.join(args.folder, args.parcels_geography), usecols=zmod_cols + ) + print( + "Read {} records from {}\nwith {} unique {} and {} unique {}".format( + len(zmod_attr), + os.path.join(args.folder, args.parcels_geography), + len(zmod_attr[args.join_field].unique()), + args.join_field, + len(zmod_attr[args.zmodcat_col].unique()), + args.zmodcat_col, + ) + ) # copy the table to output_gdb - print("Copy {} to {}".format(args.parcels_geography, - os.path.join(args.folder, args.output_gdb))) - + print( + "Copy {} to {}".format( + args.parcels_geography, os.path.join(args.folder, args.output_gdb) + ) + ) + # Note: can't rename after args.parcels_geography because ArcGIS errors for some table names (e.g. those starting with a number) zmod_attr_table = "parcel_geography" print("zmod_attr_table={}".format(zmod_attr_table)) @@ -116,16 +145,18 @@ zmod_attr_values.dtype.names = tuple(zmod_attr.dtypes.index.tolist()) zmod_attr_table_path = os.path.join(args.folder, args.output_gdb, zmod_attr_table) arcpy.da.NumPyArrayToTable(zmod_attr_values, zmod_attr_table_path) - print("Created {} with {} records".format(zmod_attr_table_path, - arcpy.GetCount_management(zmod_attr_table_path))) + print( + "Created {} with {} records".format( + zmod_attr_table_path, arcpy.GetCount_management(zmod_attr_table_path) + ) + ) # target layer p10 = os.path.join(args.folder, args.input_gdb, args.p10_layer) print("Target layer: {}".format(p10)) # copy the layer to output_gdb - print("Copy {} to {}".format(p10, - os.path.join(args.folder, args.output_gdb))) + print("Copy {} to {}".format(p10, os.path.join(args.folder, args.output_gdb))) # delete the layer if it already exists in the output gdb if arcpy.Exists(args.p10_layer): @@ -133,16 +164,26 @@ print("Found {} -- deleting".format(args.p10_layer)) # copy the input to output_gdb with the same name - arcpy.CopyFeatures_management(os.path.join(args.folder, args.input_gdb, args.p10_layer), - os.path.join(args.folder, args.output_gdb, args.p10_layer)) + arcpy.CopyFeatures_management( + os.path.join(args.folder, args.input_gdb, args.p10_layer), + os.path.join(args.folder, args.output_gdb, args.p10_layer), + ) # join table to the target layer - print("Joining {} with {}".format(os.path.join(args.folder, args.output_gdb, args.p10_layer), - os.path.join(args.folder, args.output_gdb, zmod_attr_table))) - - p_zmod_attr_join = arcpy.AddJoin_management(args.p10_layer, args.join_field, - zmod_attr_table, args.join_field, - join_type=args.join_type) + print( + "Joining {} with {}".format( + os.path.join(args.folder, args.output_gdb, args.p10_layer), + os.path.join(args.folder, args.output_gdb, zmod_attr_table), + ) + ) + + p_zmod_attr_join = arcpy.AddJoin_management( + args.p10_layer, + args.join_field, + zmod_attr_table, + args.join_field, + join_type=args.join_type, + ) p_zmod_attr_joined = "p10_zmod_attr_joined" @@ -152,47 +193,67 @@ print("Found {} -- deleting".format(p_zmod_attr_joined)) # save it - arcpy.CopyFeatures_management(p_zmod_attr_join, os.path.join(args.folder, args.output_gdb, p_zmod_attr_joined)) - print("Completed creation of {}".format(os.path.join(args.folder, args.output_gdb, p_zmod_attr_joined))) + arcpy.CopyFeatures_management( + p_zmod_attr_join, os.path.join(args.folder, args.output_gdb, p_zmod_attr_joined) + ) + print( + "Completed creation of {}".format( + os.path.join(args.folder, args.output_gdb, p_zmod_attr_joined) + ) + ) field_names = [f.name for f in arcpy.ListFields(p_zmod_attr_joined)] - print("{} has the following fields: {}".format(p_zmod_attr_joined, - field_names)) + print("{} has the following fields: {}".format(p_zmod_attr_joined, field_names)) ########## Dissolve the joint parcel-zmods layer by zoningmod category ########## - print("Dissolve {} on field: {}".format(p_zmod_attr_joined, - [zmod_attr_table+'_'+args.zmodcat_col])) - #p_zmod_dissolved = 'p10_zmods_dissolved_{}'.format(zmod_attr_version) - p_zmod_dissolved = 'p10_zmods_dissolved' + print( + "Dissolve {} on field: {}".format( + p_zmod_attr_joined, [zmod_attr_table + "_" + args.zmodcat_col] + ) + ) + # p_zmod_dissolved = 'p10_zmods_dissolved_{}'.format(zmod_attr_version) + p_zmod_dissolved = "p10_zmods_dissolved" # delete the layer if it already exists in the output gdb if arcpy.Exists(p_zmod_dissolved): arcpy.Delete_management(p_zmod_dissolved) print("Found {} -- deleting".format(p_zmod_dissolved)) - arcpy.Dissolve_management(p_zmod_attr_joined, - os.path.join(args.folder, args.output_gdb, p_zmod_dissolved), - [zmod_attr_table+'_'+args.zmodcat_col], "") + arcpy.Dissolve_management( + p_zmod_attr_joined, + os.path.join(args.folder, args.output_gdb, p_zmod_dissolved), + [zmod_attr_table + "_" + args.zmodcat_col], + "", + ) field_names = [f.name for f in arcpy.ListFields(p_zmod_dissolved)] - print("Dissolve completed; {} has {} records and the following fields \n{}".format( - p_zmod_dissolved, - arcpy.GetCount_management(p_zmod_dissolved), - field_names)) + print( + "Dissolve completed; {} has {} records and the following fields \n{}".format( + p_zmod_dissolved, arcpy.GetCount_management(p_zmod_dissolved), field_names + ) + ) ########## Join the dissolved parcels to zoning_mods ########## - + # read zoning_mods file zmods = pandas.read_csv(args.zmods_csv) - print("Read {} records from {}, with {} unique {} and the following fields: \n{}".format( - len(zmods), args.zmods_csv, - len(zmods[args.zmodcat_col].unique()), args.zmodcat_col, - list(zmods))) + print( + "Read {} records from {}, with {} unique {} and the following fields: \n{}".format( + len(zmods), + args.zmods_csv, + len(zmods[args.zmodcat_col].unique()), + args.zmodcat_col, + list(zmods), + ) + ) # copy the table to output_gdb - print("Copy {} to {}".format(args.zmods_csv, - os.path.join(args.folder, args.output_gdb))) + print( + "Copy {} to {}".format( + args.zmods_csv, os.path.join(args.folder, args.output_gdb) + ) + ) zmods_table = os.path.split(args.zmods_csv)[1] # remove directory if full path zmods_table = os.path.splitext(zmods_table)[0] # remove file extension @@ -203,18 +264,28 @@ zmods_values = numpy.array(numpy.rec.fromrecords(zmods.values)) zmods_values.dtype.names = tuple(zmods.dtypes.index.tolist()) - arcpy.da.NumPyArrayToTable(zmods_values, os.path.join(args.folder, args.output_gdb, zmods_table)) + arcpy.da.NumPyArrayToTable( + zmods_values, os.path.join(args.folder, args.output_gdb, zmods_table) + ) print("Created {}".format(os.path.join(args.folder, args.output_gdb, zmods_table))) # join table to the dissolved layer - print("Joining {} with {}".format(os.path.join(args.folder, args.output_gdb, p_zmod_dissolved), - os.path.join(args.folder, args.output_gdb, zmods_table))) - - p_zmods_join = arcpy.AddJoin_management(p_zmod_dissolved, zmod_attr_table+'_'+args.zmodcat_col, - zmods_table, args.zmodcat_col, - join_type=args.join_type) - - zmods_version = args.zmods_csv.split('.')[0].split('_')[-1] + print( + "Joining {} with {}".format( + os.path.join(args.folder, args.output_gdb, p_zmod_dissolved), + os.path.join(args.folder, args.output_gdb, zmods_table), + ) + ) + + p_zmods_join = arcpy.AddJoin_management( + p_zmod_dissolved, + zmod_attr_table + "_" + args.zmodcat_col, + zmods_table, + args.zmodcat_col, + join_type=args.join_type, + ) + + zmods_version = args.zmods_csv.split(".")[0].split("_")[-1] p_zmods_joined = "p10_zoningmods_{}".format(zmods_version) # delete the layer if it already exists in the output gdb @@ -223,10 +294,19 @@ print("Found {} -- deleting".format(p_zmods_joined)) # save it - arcpy.CopyFeatures_management(p_zmods_join, os.path.join(args.folder, args.output_gdb, p_zmods_joined)) - print("Completed creation of {}".format(os.path.join(args.folder, args.output_gdb, p_zmods_joined))) - - print("{} has {} records".format(p_zmods_joined, - arcpy.GetCount_management(p_zmods_joined)[0])) - - print("Script took {0:0.1f} minutes".format((time.time()-start)/60.0)) \ No newline at end of file + arcpy.CopyFeatures_management( + p_zmods_join, os.path.join(args.folder, args.output_gdb, p_zmods_joined) + ) + print( + "Completed creation of {}".format( + os.path.join(args.folder, args.output_gdb, p_zmods_joined) + ) + ) + + print( + "{} has {} records".format( + p_zmods_joined, arcpy.GetCount_management(p_zmods_joined)[0] + ) + ) + + print("Script took {0:0.1f} minutes".format((time.time() - start) / 60.0)) diff --git a/scripts/h8_santa_clara_analysis_part2.py b/scripts/h8_santa_clara_analysis_part2.py index c31b3cb..08faa7f 100644 --- a/scripts/h8_santa_clara_analysis_part2.py +++ b/scripts/h8_santa_clara_analysis_part2.py @@ -1,47 +1,57 @@ import glob, os, sys, time import arcpy, pandas, numpy -if __name__ == '__main__': +if __name__ == "__main__": - if os.getenv('USERNAME')=='ywang': - file_dir = 'C:\\Users\\{}\\Documents\\ArcGIS\\Projects\\SantaClara_subsidized_blgs'.format(os.getenv('USERNAME')) - gdb_dir = 'C:\\Users\\{}\\Documents\\ArcGIS\\Projects\\SantaClara_subsidized_blgs\\SantaClara_subsidized_blgs.gdb'.format(os.getenv('USERNAME')) + if os.getenv("USERNAME") == "ywang": + file_dir = "C:\\Users\\{}\\Documents\\ArcGIS\\Projects\\SantaClara_subsidized_blgs".format( + os.getenv("USERNAME") + ) + gdb_dir = "C:\\Users\\{}\\Documents\\ArcGIS\\Projects\\SantaClara_subsidized_blgs\\SantaClara_subsidized_blgs.gdb".format( + os.getenv("USERNAME") + ) arcpy.env.workspace = gdb_dir - files = {'v2.1.csv': 'v2_1', - 'v2.2.1.csv': 'v2_2_1', - 'v2.3.csv': 'v2_3', - 'v2.3.1.csv': 'v2_3_1', - 'v2.4.csv': 'v2_4', - 'v2.5.csv': 'v2_5', - 'v2.6.csv': 'v2_6', - 'v2.7.csv': 'v2_7', - 'v2.8.csv': 'v2_8', - 'v2.9.csv': 'v2_9'} + files = { + "v2.1.csv": "v2_1", + "v2.2.1.csv": "v2_2_1", + "v2.3.csv": "v2_3", + "v2.3.1.csv": "v2_3_1", + "v2.4.csv": "v2_4", + "v2.5.csv": "v2_5", + "v2.6.csv": "v2_6", + "v2.7.csv": "v2_7", + "v2.8.csv": "v2_8", + "v2.9.csv": "v2_9", + } - for filename in list(glob.glob(file_dir+'\\*.csv')): + for filename in list(glob.glob(file_dir + "\\*.csv")): -# start = time.time() + # start = time.time() - if 'parcel_summary' in filename: - print('Process {}'.format(filename)) + if "parcel_summary" in filename: + print("Process {}".format(filename)) base_name = os.path.basename(filename) print(base_name) parcel_summary = pandas.read_csv(filename) # copy the table to gdb_dir - print('Copy {} to {}'.format(filename, gdb_dir)) + print("Copy {} to {}".format(filename, gdb_dir)) # delete table if there's already one there by that name - parcel_summary_cp = 'parcel_summary_cp_'+files[base_name.split('_')[2]] + parcel_summary_cp = "parcel_summary_cp_" + files[base_name.split("_")[2]] if arcpy.Exists(parcel_summary_cp): arcpy.Delete_management(parcel_summary_cp) print("Found {} -- deleting".format(parcel_summary_cp)) - - parcel_summary_values = numpy.array(numpy.rec.fromrecords(parcel_summary.values)) - parcel_summary_values.dtype.names = tuple(parcel_summary.dtypes.index.tolist()) + + parcel_summary_values = numpy.array( + numpy.rec.fromrecords(parcel_summary.values) + ) + parcel_summary_values.dtype.names = tuple( + parcel_summary.dtypes.index.tolist() + ) parcel_summary_table_path = os.path.join(gdb_dir, parcel_summary_cp) arcpy.da.NumPyArrayToTable(parcel_summary_values, parcel_summary_table_path) print("Created {}".format(parcel_summary_table_path)) @@ -49,24 +59,26 @@ # make a copy of the p10_scl layer as the join target layer # delete the layer if it already exists - if arcpy.Exists('p10_scl_cp'): - arcpy.Delete_management('p10_scl_cp') - print("Found {} -- deleting".format('p10_scl_cp')) - - arcpy.CopyFeatures_management('p10_scl', - 'p10_scl_cp') + if arcpy.Exists("p10_scl_cp"): + arcpy.Delete_management("p10_scl_cp") + print("Found {} -- deleting".format("p10_scl_cp")) + + arcpy.CopyFeatures_management("p10_scl", "p10_scl_cp") # join table to p10_scl layer - print('Joining {} with {}'.format('p10_scl_cp', - parcel_summary_table_path)) + print("Joining {} with {}".format("p10_scl_cp", parcel_summary_table_path)) - parcel_summary_join = arcpy.AddJoin_management('p10_scl_cp', 'PARCEL_ID', - parcel_summary_cp, 'parcel_id', - join_type="KEEP_ALL") + parcel_summary_join = arcpy.AddJoin_management( + "p10_scl_cp", + "PARCEL_ID", + parcel_summary_cp, + "parcel_id", + join_type="KEEP_ALL", + ) - parcel_summary_version = files[base_name.split('_')[2]] - parcel_summary_rename = 'p10_scl_{}'.format(parcel_summary_version) - print('Save {}'.format(parcel_summary_rename)) + parcel_summary_version = files[base_name.split("_")[2]] + parcel_summary_rename = "p10_scl_{}".format(parcel_summary_version) + print("Save {}".format(parcel_summary_rename)) # delete the layer if it already exists in the output gdb if arcpy.Exists(parcel_summary_rename): @@ -74,11 +86,20 @@ print("Found {} -- deleting".format(parcel_summary_rename)) # save it - arcpy.CopyFeatures_management(parcel_summary_join, os.path.join(gdb_dir, parcel_summary_rename)) - print("Completed creation of {}".format(os.path.join(gdb_dir, parcel_summary_rename))) + arcpy.CopyFeatures_management( + parcel_summary_join, os.path.join(gdb_dir, parcel_summary_rename) + ) + print( + "Completed creation of {}".format( + os.path.join(gdb_dir, parcel_summary_rename) + ) + ) field_names = [f.name for f in arcpy.ListFields(parcel_summary_rename)] - print("{} has the following fields: {}".format(parcel_summary_rename, - field_names)) + print( + "{} has the following fields: {}".format( + parcel_summary_rename, field_names + ) + ) # print('{} took {0:0.1f} minutes'.format(base_name, # (time.time()-start)/60.0)) diff --git a/scripts/pba50_metrics.py b/scripts/pba50_metrics.py index b555e5d..6b2e13e 100644 --- a/scripts/pba50_metrics.py +++ b/scripts/pba50_metrics.py @@ -24,650 +24,1691 @@ from collections import OrderedDict, defaultdict -def calculate_urbansim_highlevelmetrics(runid, dbp, parcel_sum_df, county_sum_df, metrics_dict): +def calculate_urbansim_highlevelmetrics( + runid, dbp, parcel_sum_df, county_sum_df, metrics_dict +): metric_id = "Overall" #################### Housing # all households - metrics_dict[runid,metric_id,'TotHH_region',y2,dbp] = parcel_sum_df['tothh_2050'].sum() - metrics_dict[runid,metric_id,'TotHH_region',y1,dbp] = parcel_sum_df['tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_growth_region',y_diff,dbp] = metrics_dict[runid,metric_id,'TotHH_region',y2,dbp] / metrics_dict[runid,metric_id,'TotHH_region',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotHH_growth_region_number',y_diff,dbp] = parcel_sum_df['tothh_2050'].sum() - parcel_sum_df['tothh_2015'].sum() + metrics_dict[runid, metric_id, "TotHH_region", y2, dbp] = parcel_sum_df[ + "tothh_2050" + ].sum() + metrics_dict[runid, metric_id, "TotHH_region", y1, dbp] = parcel_sum_df[ + "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_growth_region", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_region", y2, dbp] + / metrics_dict[runid, metric_id, "TotHH_region", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotHH_growth_region_number", y_diff, dbp] = ( + parcel_sum_df["tothh_2050"].sum() - parcel_sum_df["tothh_2015"].sum() + ) # HH growth by county - for index,row in county_sum_df.iterrows(): - metrics_dict[runid,metric_id,'TotHH_county_growth_%s' % row['county'],y_diff,dbp] = row['tothh_growth'] - metrics_dict[runid,metric_id,'TotHH_county_shareofgrowth_%s' % row['county'],y_diff,dbp] = row['tothh_growth'] / metrics_dict[runid,metric_id,'TotHH_growth_region_number',y_diff,dbp] + for index, row in county_sum_df.iterrows(): + metrics_dict[ + runid, metric_id, "TotHH_county_growth_%s" % row["county"], y_diff, dbp + ] = row["tothh_growth"] + metrics_dict[ + runid, + metric_id, + "TotHH_county_shareofgrowth_%s" % row["county"], + y_diff, + dbp, + ] = ( + row["tothh_growth"] + / metrics_dict[runid, metric_id, "TotHH_growth_region_number", y_diff, dbp] + ) # HH Growth in all GGs - metrics_dict[runid,metric_id,'TotHH_GG',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('GG', na=False), 'tothh_2050'].sum() - metrics_dict[runid,metric_id,'TotHH_GG',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('GG', na=False), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_GG_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotHH_GG',y2,dbp] / metrics_dict[runid,metric_id,'TotHH_GG',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotHH_GG_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotHH_GG',y2,dbp] - metrics_dict[runid,metric_id,'TotHH_GG',y1,dbp]) / metrics_dict[runid,metric_id,'TotHH_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotHH_GG", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("GG", na=False), "tothh_2050" + ].sum() + metrics_dict[runid, metric_id, "TotHH_GG", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("GG", na=False), "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_GG_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_GG", y2, dbp] + / metrics_dict[runid, metric_id, "TotHH_GG", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotHH_GG_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_GG", y2, dbp] + - metrics_dict[runid, metric_id, "TotHH_GG", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotHH_growth_region_number", y_diff, dbp] # HH Growth in PDAs - metrics_dict[runid,metric_id,'TotHH_PDA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pda_id'].str.contains('', na=False), 'tothh_2050'].sum() - metrics_dict[runid,metric_id,'TotHH_PDA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pda_id'].str.contains('', na=False), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_PDA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotHH_PDA',y2,dbp] / metrics_dict[runid,metric_id,'TotHH_PDA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotHH_PDA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotHH_PDA',y2,dbp] - metrics_dict[runid,metric_id,'TotHH_PDA',y1,dbp]) / metrics_dict[runid,metric_id,'TotHH_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotHH_PDA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pda_id"].str.contains("", na=False), "tothh_2050" + ].sum() + metrics_dict[runid, metric_id, "TotHH_PDA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pda_id"].str.contains("", na=False), "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_PDA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_PDA", y2, dbp] + / metrics_dict[runid, metric_id, "TotHH_PDA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotHH_PDA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_PDA", y2, dbp] + - metrics_dict[runid, metric_id, "TotHH_PDA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotHH_growth_region_number", y_diff, dbp] # HH Growth in GGs that are not PDAs - metrics_dict[runid,metric_id,'TotHH_GG_notPDA',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('GG', na=False)) & \ - (parcel_sum_df['pda_id'].str.contains('', na=False)==0), 'tothh_2050'].sum() - metrics_dict[runid,metric_id,'TotHH_GG_notPDA',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('GG', na=False)) & \ - (parcel_sum_df['pda_id'].str.contains('', na=False)==0), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_GG_notPDA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotHH_GG_notPDA',y2,dbp] / metrics_dict[runid,metric_id,'TotHH_GG_notPDA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotHH_GG_notPDA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotHH_GG_notPDA',y2,dbp] - metrics_dict[runid,metric_id,'TotHH_GG_notPDA',y1,dbp]) / metrics_dict[runid,metric_id,'TotHH_growth_region_number',y_diff,dbp] - + metrics_dict[runid, metric_id, "TotHH_GG_notPDA", y2, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("GG", na=False)) + & (parcel_sum_df["pda_id"].str.contains("", na=False) == 0), + "tothh_2050", + ].sum() + metrics_dict[runid, metric_id, "TotHH_GG_notPDA", y1, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("GG", na=False)) + & (parcel_sum_df["pda_id"].str.contains("", na=False) == 0), + "tothh_2015", + ].sum() + metrics_dict[runid, metric_id, "TotHH_GG_notPDA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_GG_notPDA", y2, dbp] + / metrics_dict[runid, metric_id, "TotHH_GG_notPDA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotHH_GG_notPDA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_GG_notPDA", y2, dbp] + - metrics_dict[runid, metric_id, "TotHH_GG_notPDA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotHH_growth_region_number", y_diff, dbp] # HH Growth in HRAs - metrics_dict[runid,metric_id,'TotHH_HRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'tothh_2050'].sum() - metrics_dict[runid,metric_id,'TotHH_HRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_HRA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotHH_HRA',y2,dbp] / metrics_dict[runid,metric_id,'TotHH_HRA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotHH_HRA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotHH_HRA',y2,dbp] - metrics_dict[runid,metric_id,'TotHH_HRA',y1,dbp]) / metrics_dict[runid,metric_id,'TotHH_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotHH_HRA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "tothh_2050" + ].sum() + metrics_dict[runid, metric_id, "TotHH_HRA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_HRA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_HRA", y2, dbp] + / metrics_dict[runid, metric_id, "TotHH_HRA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotHH_HRA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_HRA", y2, dbp] + - metrics_dict[runid, metric_id, "TotHH_HRA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotHH_growth_region_number", y_diff, dbp] # HH Growth in TRAs - metrics_dict[runid,metric_id,'TotHH_TRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'tothh_2050'].sum() - metrics_dict[runid,metric_id,'TotHH_TRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_TRA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotHH_TRA',y2,dbp] / metrics_dict[runid,metric_id,'TotHH_TRA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotHH_TRA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotHH_TRA',y2,dbp] - metrics_dict[runid,metric_id,'TotHH_TRA',y1,dbp]) / metrics_dict[runid,metric_id,'TotHH_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotHH_TRA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), "tothh_2050" + ].sum() + metrics_dict[runid, metric_id, "TotHH_TRA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_TRA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_TRA", y2, dbp] + / metrics_dict[runid, metric_id, "TotHH_TRA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotHH_TRA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_TRA", y2, dbp] + - metrics_dict[runid, metric_id, "TotHH_TRA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotHH_growth_region_number", y_diff, dbp] # HH Growth in areas that are both HRAs and TRAs - metrics_dict[runid,metric_id,'TotHH_HRAandTRA',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) &\ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) , 'tothh_2050'].sum() - metrics_dict[runid,metric_id,'TotHH_HRAandTRA',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) &\ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) , 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_HRAandTRA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotHH_HRAandTRA',y2,dbp] / metrics_dict[runid,metric_id,'TotHH_HRAandTRA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotHH_HRAandTRA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotHH_HRAandTRA',y2,dbp] - metrics_dict[runid,metric_id,'TotHH_HRAandTRA',y1,dbp]) / metrics_dict[runid,metric_id,'TotHH_growth_region_number',y_diff,dbp] - - + metrics_dict[runid, metric_id, "TotHH_HRAandTRA", y2, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)), + "tothh_2050", + ].sum() + metrics_dict[runid, metric_id, "TotHH_HRAandTRA", y1, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)), + "tothh_2015", + ].sum() + metrics_dict[runid, metric_id, "TotHH_HRAandTRA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_HRAandTRA", y2, dbp] + / metrics_dict[runid, metric_id, "TotHH_HRAandTRA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotHH_HRAandTRA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotHH_HRAandTRA", y2, dbp] + - metrics_dict[runid, metric_id, "TotHH_HRAandTRA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotHH_growth_region_number", y_diff, dbp] #################### Jobs - # all jobs - metrics_dict[runid,metric_id,'TotJobs_region',y2,dbp] = parcel_sum_df['totemp_2050'].sum() - metrics_dict[runid,metric_id,'TotJobs_region',y1,dbp] = parcel_sum_df['totemp_2015'].sum() - metrics_dict[runid,metric_id,'TotJobs_growth_region',y_diff,dbp] = metrics_dict[runid,metric_id,'TotJobs_region',y2,dbp] / metrics_dict[runid,metric_id,'TotJobs_region',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotJobs_growth_region_number',y_diff,dbp] = parcel_sum_df['totemp_2050'].sum() - parcel_sum_df['totemp_2015'].sum() - #Job growth by county - for index,row in county_sum_df.iterrows(): - metrics_dict[runid,metric_id,'TotJobs_growth_%s' % row['county'],y_diff,dbp] = row['totemp_growth'] - metrics_dict[runid,metric_id,'TotJobs_county_shareofgrowth_%s' % row['county'],y_diff,dbp] = row['totemp_growth'] / metrics_dict[runid,metric_id,'TotJobs_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotJobs_region", y2, dbp] = parcel_sum_df[ + "totemp_2050" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_region", y1, dbp] = parcel_sum_df[ + "totemp_2015" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_growth_region", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_region", y2, dbp] + / metrics_dict[runid, metric_id, "TotJobs_region", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotJobs_growth_region_number", y_diff, dbp] = ( + parcel_sum_df["totemp_2050"].sum() - parcel_sum_df["totemp_2015"].sum() + ) + # Job growth by county + for index, row in county_sum_df.iterrows(): + metrics_dict[ + runid, metric_id, "TotJobs_growth_%s" % row["county"], y_diff, dbp + ] = row["totemp_growth"] + metrics_dict[ + runid, + metric_id, + "TotJobs_county_shareofgrowth_%s" % row["county"], + y_diff, + dbp, + ] = ( + row["totemp_growth"] + / metrics_dict[ + runid, metric_id, "TotJobs_growth_region_number", y_diff, dbp + ] + ) # Job Growth in all GGs - metrics_dict[runid,metric_id,'TotJobs_GG',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('GG', na=False), 'totemp_2050'].sum() - metrics_dict[runid,metric_id,'TotJobs_GG',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('GG', na=False), 'totemp_2015'].sum() - metrics_dict[runid,metric_id,'TotJobs_GG_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotJobs_GG',y2,dbp] / metrics_dict[runid,metric_id,'TotJobs_GG',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotJobs_GG_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotJobs_GG',y2,dbp] - metrics_dict[runid,metric_id,'TotJobs_GG',y1,dbp]) / metrics_dict[runid,metric_id,'TotJobs_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotJobs_GG", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("GG", na=False), "totemp_2050" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_GG", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("GG", na=False), "totemp_2015" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_GG_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_GG", y2, dbp] + / metrics_dict[runid, metric_id, "TotJobs_GG", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotJobs_GG_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_GG", y2, dbp] + - metrics_dict[runid, metric_id, "TotJobs_GG", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotJobs_growth_region_number", y_diff, dbp] # Job Growth in PDAs - metrics_dict[runid,metric_id,'TotJobs_PDA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pda_id'].str.contains('', na=False), 'totemp_2050'].sum() - metrics_dict[runid,metric_id,'TotJobs_PDA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pda_id'].str.contains('', na=False), 'totemp_2015'].sum() - metrics_dict[runid,metric_id,'TotJobs_PDA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotJobs_PDA',y2,dbp] / metrics_dict[runid,metric_id,'TotJobs_PDA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotJobs_PDA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotJobs_PDA',y2,dbp] - metrics_dict[runid,metric_id,'TotJobs_PDA',y1,dbp]) / metrics_dict[runid,metric_id,'TotJobs_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotJobs_PDA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pda_id"].str.contains("", na=False), "totemp_2050" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_PDA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pda_id"].str.contains("", na=False), "totemp_2015" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_PDA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_PDA", y2, dbp] + / metrics_dict[runid, metric_id, "TotJobs_PDA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotJobs_PDA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_PDA", y2, dbp] + - metrics_dict[runid, metric_id, "TotJobs_PDA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotJobs_growth_region_number", y_diff, dbp] # Job Growth in GGs that are not PDAs - metrics_dict[runid,metric_id,'TotJobs_GG_notPDA',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('GG', na=False)) & \ - (parcel_sum_df['pda_id'].str.contains('', na=False)==0), 'totemp_2050'].sum() - metrics_dict[runid,metric_id,'TotJobs_GG_notPDA',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('GG', na=False)) & \ - (parcel_sum_df['pda_id'].str.contains('', na=False)==0), 'totemp_2015'].sum() - metrics_dict[runid,metric_id,'TotJobs_GG_notPDA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotJobs_GG_notPDA',y2,dbp] / metrics_dict[runid,metric_id,'TotJobs_GG_notPDA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotJobs_GG_notPDA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotJobs_GG_notPDA',y2,dbp] - metrics_dict[runid,metric_id,'TotJobs_GG_notPDA',y1,dbp]) / metrics_dict[runid,metric_id,'TotJobs_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotJobs_GG_notPDA", y2, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("GG", na=False)) + & (parcel_sum_df["pda_id"].str.contains("", na=False) == 0), + "totemp_2050", + ].sum() + metrics_dict[runid, metric_id, "TotJobs_GG_notPDA", y1, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("GG", na=False)) + & (parcel_sum_df["pda_id"].str.contains("", na=False) == 0), + "totemp_2015", + ].sum() + metrics_dict[runid, metric_id, "TotJobs_GG_notPDA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_GG_notPDA", y2, dbp] + / metrics_dict[runid, metric_id, "TotJobs_GG_notPDA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotJobs_GG_notPDA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_GG_notPDA", y2, dbp] + - metrics_dict[runid, metric_id, "TotJobs_GG_notPDA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotJobs_growth_region_number", y_diff, dbp] # Job Growth in HRAs - metrics_dict[runid,metric_id,'TotJobs_HRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'totemp_2050'].sum() - metrics_dict[runid,metric_id,'TotJobs_HRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'totemp_2015'].sum() - metrics_dict[runid,metric_id,'TotJobs_HRA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotJobs_HRA',y2,dbp] / metrics_dict[runid,metric_id,'TotJobs_HRA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotJobs_HRA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotJobs_HRA',y2,dbp] - metrics_dict[runid,metric_id,'TotJobs_HRA',y1,dbp]) / metrics_dict[runid,metric_id,'TotJobs_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotJobs_HRA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "totemp_2050" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_HRA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "totemp_2015" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_HRA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_HRA", y2, dbp] + / metrics_dict[runid, metric_id, "TotJobs_HRA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotJobs_HRA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_HRA", y2, dbp] + - metrics_dict[runid, metric_id, "TotJobs_HRA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotJobs_growth_region_number", y_diff, dbp] # Job Growth in TRAs - metrics_dict[runid,metric_id,'TotJobs_TRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'totemp_2050'].sum() - metrics_dict[runid,metric_id,'TotJobs_TRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'totemp_2015'].sum() - metrics_dict[runid,metric_id,'TotJobs_TRA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotJobs_TRA',y2,dbp] / metrics_dict[runid,metric_id,'TotJobs_TRA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotJobs_TRA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotJobs_TRA',y2,dbp] - metrics_dict[runid,metric_id,'TotJobs_TRA',y1,dbp]) / metrics_dict[runid,metric_id,'TotJobs_growth_region_number',y_diff,dbp] + metrics_dict[runid, metric_id, "TotJobs_TRA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), "totemp_2050" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_TRA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), "totemp_2015" + ].sum() + metrics_dict[runid, metric_id, "TotJobs_TRA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_TRA", y2, dbp] + / metrics_dict[runid, metric_id, "TotJobs_TRA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotJobs_TRA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_TRA", y2, dbp] + - metrics_dict[runid, metric_id, "TotJobs_TRA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotJobs_growth_region_number", y_diff, dbp] # Job Growth in areas that are both HRAs and TRAs - metrics_dict[runid,metric_id,'TotJobs_HRAandTRA',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) &\ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) , 'totemp_2050'].sum() - metrics_dict[runid,metric_id,'TotJobs_HRAandTRA',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) &\ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) , 'totemp_2015'].sum() - metrics_dict[runid,metric_id,'TotJobs_HRAandTRA_growth',y_diff,dbp] = metrics_dict[runid,metric_id,'TotJobs_HRAandTRA',y2,dbp] / metrics_dict[runid,metric_id,'TotJobs_HRAandTRA',y1,dbp] - 1 - metrics_dict[runid,metric_id,'TotJobs_HRAandTRA_shareofgrowth',y_diff,dbp] = (metrics_dict[runid,metric_id,'TotJobs_HRAandTRA',y2,dbp] - metrics_dict[runid,metric_id,'TotJobs_HRAandTRA',y1,dbp]) / metrics_dict[runid,metric_id,'TotJobs_growth_region_number',y_diff,dbp] - + metrics_dict[runid, metric_id, "TotJobs_HRAandTRA", y2, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)), + "totemp_2050", + ].sum() + metrics_dict[runid, metric_id, "TotJobs_HRAandTRA", y1, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)), + "totemp_2015", + ].sum() + metrics_dict[runid, metric_id, "TotJobs_HRAandTRA_growth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_HRAandTRA", y2, dbp] + / metrics_dict[runid, metric_id, "TotJobs_HRAandTRA", y1, dbp] + - 1 + ) + metrics_dict[runid, metric_id, "TotJobs_HRAandTRA_shareofgrowth", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "TotJobs_HRAandTRA", y2, dbp] + - metrics_dict[runid, metric_id, "TotJobs_HRAandTRA", y1, dbp] + ) / metrics_dict[runid, metric_id, "TotJobs_growth_region_number", y_diff, dbp] ############################ # LIHH - metrics_dict[runid,metric_id,'LIHH_share_2050',y2,dbp] = (parcel_sum_df['hhq1_2050'].sum() + parcel_sum_df['hhq2_2050'].sum()) / parcel_sum_df['totemp_2050'].sum() - metrics_dict[runid,metric_id,'LIHH_share_2015',y1,dbp] = (parcel_sum_df['hhq1_2015'].sum() + parcel_sum_df['hhq2_2050'].sum()) / parcel_sum_df['totemp_2015'].sum() - metrics_dict[runid,metric_id,'LIHH_growth_region',y_diff,dbp] = (parcel_sum_df['hhq1_2050'].sum() + parcel_sum_df['hhq2_2050'].sum()) / (parcel_sum_df['hhq1_2015'].sum() + parcel_sum_df['hhq2_2050'].sum()) - for index,row in county_sum_df.iterrows(): - metrics_dict[runid,metric_id,'LIHH_growth_%s' % row["county"],y_diff,dbp] = row['LIHH_growth'] - - # all jobs - metrics_dict[runid,metric_id,'tot_jobs_2050',y2,dbp] = parcel_sum_df['totemp_2050'].sum() - metrics_dict[runid,metric_id,'tot_jobs_2015',y1,dbp] = parcel_sum_df['totemp_2015'].sum() - metrics_dict[runid,metric_id,'jobs_growth_region',y_diff,dbp] = (parcel_sum_df['totemp_2050'].sum() / parcel_sum_df['totemp_2015'].sum()) - for index,row in county_sum_df.iterrows(): - metrics_dict[runid,metric_id,'jobs_growth_%s' % row["county"],y_diff,dbp] = row['totemp_growth'] + metrics_dict[runid, metric_id, "LIHH_share_2050", y2, dbp] = ( + parcel_sum_df["hhq1_2050"].sum() + parcel_sum_df["hhq2_2050"].sum() + ) / parcel_sum_df["totemp_2050"].sum() + metrics_dict[runid, metric_id, "LIHH_share_2015", y1, dbp] = ( + parcel_sum_df["hhq1_2015"].sum() + parcel_sum_df["hhq2_2050"].sum() + ) / parcel_sum_df["totemp_2015"].sum() + metrics_dict[runid, metric_id, "LIHH_growth_region", y_diff, dbp] = ( + parcel_sum_df["hhq1_2050"].sum() + parcel_sum_df["hhq2_2050"].sum() + ) / (parcel_sum_df["hhq1_2015"].sum() + parcel_sum_df["hhq2_2050"].sum()) + for index, row in county_sum_df.iterrows(): + metrics_dict[ + runid, metric_id, "LIHH_growth_%s" % row["county"], y_diff, dbp + ] = row["LIHH_growth"] -def calculate_tm_highlevelmetrics(runid, dbp, parcel_sum_df, county_sum_df, metrics_dict): + # all jobs + metrics_dict[runid, metric_id, "tot_jobs_2050", y2, dbp] = parcel_sum_df[ + "totemp_2050" + ].sum() + metrics_dict[runid, metric_id, "tot_jobs_2015", y1, dbp] = parcel_sum_df[ + "totemp_2015" + ].sum() + metrics_dict[runid, metric_id, "jobs_growth_region", y_diff, dbp] = ( + parcel_sum_df["totemp_2050"].sum() / parcel_sum_df["totemp_2015"].sum() + ) + for index, row in county_sum_df.iterrows(): + metrics_dict[ + runid, metric_id, "jobs_growth_%s" % row["county"], y_diff, dbp + ] = row["totemp_growth"] + + +def calculate_tm_highlevelmetrics( + runid, dbp, parcel_sum_df, county_sum_df, metrics_dict +): metric_id = "Overall_TM" # TBD -def calculate_normalize_factor_Q1Q2(parcel_sum_df): - return ((parcel_sum_df['hhq1_2050'].sum() + parcel_sum_df['hhq2_2050'].sum()) / parcel_sum_df['tothh_2050'].sum()) \ - / ((parcel_sum_df['hhq1_2015'].sum() + parcel_sum_df['hhq2_2015'].sum()) / parcel_sum_df['tothh_2015'].sum()) -def calculate_normalize_factor_Q1(parcel_sum_df): - return (parcel_sum_df['hhq1_2050'].sum() / parcel_sum_df['tothh_2050'].sum()) \ - / (parcel_sum_df['hhq1_2015'].sum() / parcel_sum_df['tothh_2015'].sum()) +def calculate_normalize_factor_Q1Q2(parcel_sum_df): + return ( + (parcel_sum_df["hhq1_2050"].sum() + parcel_sum_df["hhq2_2050"].sum()) + / parcel_sum_df["tothh_2050"].sum() + ) / ( + (parcel_sum_df["hhq1_2015"].sum() + parcel_sum_df["hhq2_2015"].sum()) + / parcel_sum_df["tothh_2015"].sum() + ) -def calculate_Affordable1_transportation_costs(runid, year, dbp, tm_scen_metrics_df, tm_auto_owned_df, tm_auto_times_df, tm_travel_cost_df, metrics_dict): +def calculate_normalize_factor_Q1(parcel_sum_df): + return (parcel_sum_df["hhq1_2050"].sum() / parcel_sum_df["tothh_2050"].sum()) / ( + parcel_sum_df["hhq1_2015"].sum() / parcel_sum_df["tothh_2015"].sum() + ) + + +def calculate_Affordable1_transportation_costs( + runid, + year, + dbp, + tm_scen_metrics_df, + tm_auto_owned_df, + tm_auto_times_df, + tm_travel_cost_df, + metrics_dict, +): metric_id = "A1" days_per_year = 300 # Total number of households - tm_tot_hh = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'].str.contains("total_households_inc") == True), 'value'].sum() - tm_tot_hh_inc1 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_households_inc1"),'value'].item() - tm_tot_hh_inc2 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_households_inc2"),'value'].item() + tm_tot_hh = tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"].str.contains("total_households_inc") + == True + ), + "value", + ].sum() + tm_tot_hh_inc1 = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_households_inc1"), "value" + ].item() + tm_tot_hh_inc2 = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_households_inc2"), "value" + ].item() # Total household income (model outputs are in 2000$, annual) - tm_total_hh_inc = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'].str.contains("total_hh_inc") == True), 'value'].sum() - tm_total_hh_inc_inc1 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_hh_inc_inc1"),'value'].item() - tm_total_hh_inc_inc2 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_hh_inc_inc2"),'value'].item() + tm_total_hh_inc = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"].str.contains("total_hh_inc") == True), + "value", + ].sum() + tm_total_hh_inc_inc1 = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_hh_inc_inc1"), "value" + ].item() + tm_total_hh_inc_inc2 = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_hh_inc_inc2"), "value" + ].item() # Total transit fares (model outputs are in 2000$, per day) - tm_tot_transit_fares = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'].str.contains("total_transit_fares") == True), 'value'].sum() * days_per_year - tm_tot_transit_fares_inc1 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_transit_fares_inc1"),'value'].item() * days_per_year - tm_tot_transit_fares_inc2 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_transit_fares_inc2"),'value'].item() * days_per_year + tm_tot_transit_fares = ( + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"].str.contains("total_transit_fares") + == True + ), + "value", + ].sum() + * days_per_year + ) + tm_tot_transit_fares_inc1 = ( + tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_transit_fares_inc1"), "value" + ].item() + * days_per_year + ) + tm_tot_transit_fares_inc2 = ( + tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_transit_fares_inc2"), "value" + ].item() + * days_per_year + ) # Total auto op cost (model outputs are in 2000$, per day) - tm_tot_auto_op_cost = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'].str.contains("total_auto_cost_inc") == True), 'value'].sum() * days_per_year - tm_tot_auto_op_cost_inc1 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_auto_cost_inc1"),'value'].item() * days_per_year - tm_tot_auto_op_cost_inc2 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_auto_cost_inc2"),'value'].item() * days_per_year + tm_tot_auto_op_cost = ( + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"].str.contains("total_auto_cost_inc") + == True + ), + "value", + ].sum() + * days_per_year + ) + tm_tot_auto_op_cost_inc1 = ( + tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_auto_cost_inc1"), "value" + ].item() + * days_per_year + ) + tm_tot_auto_op_cost_inc2 = ( + tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_auto_cost_inc2"), "value" + ].item() + * days_per_year + ) # Total auto parking cost (model outputs are in 2000$, per day, in cents) - #tm_travel_cost_df['park_cost'] = (tm_travel_cost_df['pcost_indiv']+tm_travel_cost_df['pcost_joint']) * tm_travel_cost_df['freq'] - tm_tot_auto_park_cost = (tm_travel_cost_df.pcost_indiv.sum() + tm_travel_cost_df.pcost_joint.sum()) * days_per_year / 100 - tm_tot_auto_park_cost_inc1 = (tm_travel_cost_df.loc[(tm_travel_cost_df['incQ'] == 1),'pcost_indiv'].sum() + tm_travel_cost_df.loc[(tm_travel_cost_df['incQ'] == 1),'pcost_joint'].sum()) * days_per_year / 100 - tm_tot_auto_park_cost_inc2 = (tm_travel_cost_df.loc[(tm_travel_cost_df['incQ'] == 2),'pcost_indiv'].sum() + tm_travel_cost_df.loc[(tm_travel_cost_df['incQ'] == 2),'pcost_joint'].sum()) * days_per_year / 100 + # tm_travel_cost_df['park_cost'] = (tm_travel_cost_df['pcost_indiv']+tm_travel_cost_df['pcost_joint']) * tm_travel_cost_df['freq'] + tm_tot_auto_park_cost = ( + (tm_travel_cost_df.pcost_indiv.sum() + tm_travel_cost_df.pcost_joint.sum()) + * days_per_year + / 100 + ) + tm_tot_auto_park_cost_inc1 = ( + ( + tm_travel_cost_df.loc[(tm_travel_cost_df["incQ"] == 1), "pcost_indiv"].sum() + + tm_travel_cost_df.loc[ + (tm_travel_cost_df["incQ"] == 1), "pcost_joint" + ].sum() + ) + * days_per_year + / 100 + ) + tm_tot_auto_park_cost_inc2 = ( + ( + tm_travel_cost_df.loc[(tm_travel_cost_df["incQ"] == 2), "pcost_indiv"].sum() + + tm_travel_cost_df.loc[ + (tm_travel_cost_df["incQ"] == 2), "pcost_joint" + ].sum() + ) + * days_per_year + / 100 + ) # Calculating number of autos owned from autos_owned.csv - tm_auto_owned_df['tot_autos'] = tm_auto_owned_df['autos'] * tm_auto_owned_df['households'] - tm_tot_autos_owned = tm_auto_owned_df['tot_autos'].sum() - tm_tot_autos_owned_inc1 = tm_auto_owned_df.loc[(tm_auto_owned_df['incQ'] == 1), 'tot_autos'].sum() - tm_tot_autos_owned_inc2 = tm_auto_owned_df.loc[(tm_auto_owned_df['incQ'] == 2), 'tot_autos'].sum() + tm_auto_owned_df["tot_autos"] = ( + tm_auto_owned_df["autos"] * tm_auto_owned_df["households"] + ) + tm_tot_autos_owned = tm_auto_owned_df["tot_autos"].sum() + tm_tot_autos_owned_inc1 = tm_auto_owned_df.loc[ + (tm_auto_owned_df["incQ"] == 1), "tot_autos" + ].sum() + tm_tot_autos_owned_inc2 = tm_auto_owned_df.loc[ + (tm_auto_owned_df["incQ"] == 2), "tot_autos" + ].sum() # Total auto ownership cost in 2000$ - tm_tot_auto_owner_cost = tm_tot_autos_owned * auto_ownership_cost * inflation_18_20 / inflation_00_20 - tm_tot_auto_owner_cost_inc1 = tm_tot_autos_owned_inc1 * auto_ownership_cost_inc1 * inflation_18_20 / inflation_00_20 - tm_tot_auto_owner_cost_inc2 = tm_tot_autos_owned_inc2 * auto_ownership_cost_inc2 * inflation_18_20 / inflation_00_20 + tm_tot_auto_owner_cost = ( + tm_tot_autos_owned * auto_ownership_cost * inflation_18_20 / inflation_00_20 + ) + tm_tot_auto_owner_cost_inc1 = ( + tm_tot_autos_owned_inc1 + * auto_ownership_cost_inc1 + * inflation_18_20 + / inflation_00_20 + ) + tm_tot_auto_owner_cost_inc2 = ( + tm_tot_autos_owned_inc2 + * auto_ownership_cost_inc2 + * inflation_18_20 + / inflation_00_20 + ) # Total Transportation Cost (in 2000$) - tp_cost = tm_tot_auto_op_cost + tm_tot_transit_fares + tm_tot_auto_owner_cost + tm_tot_auto_park_cost - tp_cost_inc1 = tm_tot_auto_op_cost_inc1 + tm_tot_transit_fares_inc1 + tm_tot_auto_owner_cost_inc1 + tm_tot_auto_park_cost_inc1 - tp_cost_inc2 = tm_tot_auto_op_cost_inc2 + tm_tot_transit_fares_inc2 + tm_tot_auto_owner_cost_inc2 + tm_tot_auto_park_cost_inc2 + tp_cost = ( + tm_tot_auto_op_cost + + tm_tot_transit_fares + + tm_tot_auto_owner_cost + + tm_tot_auto_park_cost + ) + tp_cost_inc1 = ( + tm_tot_auto_op_cost_inc1 + + tm_tot_transit_fares_inc1 + + tm_tot_auto_owner_cost_inc1 + + tm_tot_auto_park_cost_inc1 + ) + tp_cost_inc2 = ( + tm_tot_auto_op_cost_inc2 + + tm_tot_transit_fares_inc2 + + tm_tot_auto_owner_cost_inc2 + + tm_tot_auto_park_cost_inc2 + ) # Mean transportation cost per household in 2020$ - tp_cost_mean = tp_cost / tm_tot_hh * inflation_00_20 + tp_cost_mean = tp_cost / tm_tot_hh * inflation_00_20 tp_cost_mean_inc1 = tp_cost_inc1 / tm_tot_hh_inc1 * inflation_00_20 tp_cost_mean_inc2 = tp_cost_inc2 / tm_tot_hh_inc2 * inflation_00_20 - metrics_dict[runid,metric_id,'mean_transportation_cost_2020$',year,dbp] = tp_cost_mean - metrics_dict[runid,metric_id,'mean_transportation_cost_2020$_inc1',year,dbp] = tp_cost_mean_inc1 - metrics_dict[runid,metric_id,'mean_transportation_cost_2020$_inc2',year,dbp] = tp_cost_mean_inc2 - - # Transportation cost % of income - tp_cost_pct_inc = tp_cost / tm_total_hh_inc - tp_cost_pct_inc_inc1 = tp_cost_inc1 / tm_total_hh_inc_inc1 - tp_cost_pct_inc_inc2 = tp_cost_inc2 / tm_total_hh_inc_inc2 - tp_cost_pct_inc_inc1and2 = (tp_cost_inc1+tp_cost_inc2) / (tm_total_hh_inc_inc1+tm_total_hh_inc_inc2) + metrics_dict[ + runid, metric_id, "mean_transportation_cost_2020$", year, dbp + ] = tp_cost_mean + metrics_dict[ + runid, metric_id, "mean_transportation_cost_2020$_inc1", year, dbp + ] = tp_cost_mean_inc1 + metrics_dict[ + runid, metric_id, "mean_transportation_cost_2020$_inc2", year, dbp + ] = tp_cost_mean_inc2 - - # Transportation cost % of income metrics - metrics_dict[runid,metric_id,'transportation_cost_pct_income',year,dbp] = tp_cost_pct_inc - metrics_dict[runid,metric_id,'transportation_cost_pct_income_inc1',year,dbp] = tp_cost_pct_inc_inc1 - metrics_dict[runid,metric_id,'transportation_cost_pct_income_inc2',year,dbp] = tp_cost_pct_inc_inc2 - metrics_dict[runid,metric_id,'transportation_cost_pct_income_inc1and2',year,dbp] = tp_cost_pct_inc_inc1and2 + # Transportation cost % of income + tp_cost_pct_inc = tp_cost / tm_total_hh_inc + tp_cost_pct_inc_inc1 = tp_cost_inc1 / tm_total_hh_inc_inc1 + tp_cost_pct_inc_inc2 = tp_cost_inc2 / tm_total_hh_inc_inc2 + tp_cost_pct_inc_inc1and2 = (tp_cost_inc1 + tp_cost_inc2) / ( + tm_total_hh_inc_inc1 + tm_total_hh_inc_inc2 + ) + + # Transportation cost % of income metrics + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income", year, dbp + ] = tp_cost_pct_inc + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income_inc1", year, dbp + ] = tp_cost_pct_inc_inc1 + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income_inc2", year, dbp + ] = tp_cost_pct_inc_inc2 + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income_inc1and2", year, dbp + ] = tp_cost_pct_inc_inc1and2 # Transportation cost % of income metrics; split by cost bucket - metrics_dict[runid,metric_id,'transportation_cost_pct_income_autoop',year,dbp] = tm_tot_auto_op_cost / tm_total_hh_inc - metrics_dict[runid,metric_id,'transportation_cost_pct_income_autopark',year,dbp] = tm_tot_auto_park_cost / tm_total_hh_inc - metrics_dict[runid,metric_id,'transportation_cost_pct_income_transitfare',year,dbp] = tm_tot_transit_fares / tm_total_hh_inc - metrics_dict[runid,metric_id,'transportation_cost_pct_income_autoown',year,dbp] = tm_tot_auto_owner_cost / tm_total_hh_inc - + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income_autoop", year, dbp + ] = (tm_tot_auto_op_cost / tm_total_hh_inc) + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income_autopark", year, dbp + ] = (tm_tot_auto_park_cost / tm_total_hh_inc) + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income_transitfare", year, dbp + ] = (tm_tot_transit_fares / tm_total_hh_inc) + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income_autoown", year, dbp + ] = (tm_tot_auto_owner_cost / tm_total_hh_inc) + # Add housing costs from Shimon's outputs - housing_costs_2050_df = pd.read_csv('C:/Users/ATapase/Box/Horizon and Plan Bay Area 2050/Equity and Performance/7_Analysis/Metrics/metrics_files/2050 Share of Income Spent on Housing.csv') - housing_costs_2015_df = pd.read_csv('C:/Users/ATapase/Box/Horizon and Plan Bay Area 2050/Equity and Performance/7_Analysis/Metrics/metrics_files/2015 Share of Income Spent on Housing.csv') - housing_costs_2015_df['totcosts'] = housing_costs_2015_df['share_income'] * housing_costs_2015_df['households'] + housing_costs_2050_df = pd.read_csv( + "C:/Users/ATapase/Box/Horizon and Plan Bay Area 2050/Equity and Performance/7_Analysis/Metrics/metrics_files/2050 Share of Income Spent on Housing.csv" + ) + housing_costs_2015_df = pd.read_csv( + "C:/Users/ATapase/Box/Horizon and Plan Bay Area 2050/Equity and Performance/7_Analysis/Metrics/metrics_files/2015 Share of Income Spent on Housing.csv" + ) + housing_costs_2015_df["totcosts"] = ( + housing_costs_2015_df["share_income"] * housing_costs_2015_df["households"] + ) if year == "2050": - metrics_dict[runid,metric_id,'housing_cost_pct_income',year,dbp] = housing_costs_2050_df['w_all'].sum() - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc1',year,dbp] = housing_costs_2050_df['w_q1'].sum() - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc2',year,dbp] = housing_costs_2050_df['w_q2'].sum() - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc1and2',year,dbp] = housing_costs_2050_df['w_q1_q2'].sum() + metrics_dict[ + runid, metric_id, "housing_cost_pct_income", year, dbp + ] = housing_costs_2050_df["w_all"].sum() + metrics_dict[ + runid, metric_id, "housing_cost_pct_income_inc1", year, dbp + ] = housing_costs_2050_df["w_q1"].sum() + metrics_dict[ + runid, metric_id, "housing_cost_pct_income_inc2", year, dbp + ] = housing_costs_2050_df["w_q2"].sum() + metrics_dict[ + runid, metric_id, "housing_cost_pct_income_inc1and2", year, dbp + ] = housing_costs_2050_df["w_q1_q2"].sum() elif year == "2015": - metrics_dict[runid,metric_id,'housing_cost_pct_income',year,dbp] = housing_costs_2015_df.loc[(housing_costs_2015_df['tenure'].str.contains("Total")), 'totcosts'].sum() / \ - housing_costs_2015_df.loc[(housing_costs_2015_df['tenure'].str.contains("Total")), 'households'].sum() - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc1',year,dbp] = housing_costs_2015_df.loc[(housing_costs_2015_df['short_name'].str.contains("q1t")), 'share_income'].sum() - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc2',year,dbp] = housing_costs_2015_df.loc[(housing_costs_2015_df['short_name'].str.contains("q2t")), 'share_income'].sum() - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc1and2',year,dbp] = (housing_costs_2015_df.loc[(housing_costs_2015_df['short_name'].str.contains("q1t")), 'totcosts'].sum() + housing_costs_2015_df.loc[(housing_costs_2015_df['short_name'].str.contains("q2t")), 'totcosts'].sum()) / \ - (housing_costs_2015_df.loc[(housing_costs_2015_df['short_name'].str.contains("q1t")), 'households'].sum() + housing_costs_2015_df.loc[(housing_costs_2015_df['short_name'].str.contains("q2t")), 'households'].sum()) + metrics_dict[runid, metric_id, "housing_cost_pct_income", year, dbp] = ( + housing_costs_2015_df.loc[ + (housing_costs_2015_df["tenure"].str.contains("Total")), "totcosts" + ].sum() + / housing_costs_2015_df.loc[ + (housing_costs_2015_df["tenure"].str.contains("Total")), "households" + ].sum() + ) + metrics_dict[ + runid, metric_id, "housing_cost_pct_income_inc1", year, dbp + ] = housing_costs_2015_df.loc[ + (housing_costs_2015_df["short_name"].str.contains("q1t")), "share_income" + ].sum() + metrics_dict[ + runid, metric_id, "housing_cost_pct_income_inc2", year, dbp + ] = housing_costs_2015_df.loc[ + (housing_costs_2015_df["short_name"].str.contains("q2t")), "share_income" + ].sum() + metrics_dict[ + runid, metric_id, "housing_cost_pct_income_inc1and2", year, dbp + ] = ( + housing_costs_2015_df.loc[ + (housing_costs_2015_df["short_name"].str.contains("q1t")), "totcosts" + ].sum() + + housing_costs_2015_df.loc[ + (housing_costs_2015_df["short_name"].str.contains("q2t")), "totcosts" + ].sum() + ) / ( + housing_costs_2015_df.loc[ + (housing_costs_2015_df["short_name"].str.contains("q1t")), "households" + ].sum() + + housing_costs_2015_df.loc[ + (housing_costs_2015_df["short_name"].str.contains("q2t")), "households" + ].sum() + ) # Total H+T Costs pct of income - metrics_dict[runid,metric_id,'HplusT_cost_pct_income',year,dbp] = metrics_dict[runid,metric_id,'transportation_cost_pct_income',year,dbp] + \ - metrics_dict[runid,metric_id,'housing_cost_pct_income',year,dbp] - metrics_dict[runid,metric_id,'HplusT_cost_pct_income_inc1',year,dbp] = metrics_dict[runid,metric_id,'transportation_cost_pct_income_inc1',year,dbp] + \ - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc1',year,dbp] - metrics_dict[runid,metric_id,'HplusT_cost_pct_income_inc2',year,dbp] = metrics_dict[runid,metric_id,'transportation_cost_pct_income_inc2',year,dbp] + \ - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc2',year,dbp] - metrics_dict[runid,metric_id,'HplusT_cost_pct_income_inc1and2',year,dbp] = metrics_dict[runid,metric_id,'transportation_cost_pct_income_inc1and2',year,dbp] + \ - metrics_dict[runid,metric_id,'housing_cost_pct_income_inc1and2',year,dbp] - - - + metrics_dict[runid, metric_id, "HplusT_cost_pct_income", year, dbp] = ( + metrics_dict[runid, metric_id, "transportation_cost_pct_income", year, dbp] + + metrics_dict[runid, metric_id, "housing_cost_pct_income", year, dbp] + ) + metrics_dict[runid, metric_id, "HplusT_cost_pct_income_inc1", year, dbp] = ( + metrics_dict[runid, metric_id, "transportation_cost_pct_income_inc1", year, dbp] + + metrics_dict[runid, metric_id, "housing_cost_pct_income_inc1", year, dbp] + ) + metrics_dict[runid, metric_id, "HplusT_cost_pct_income_inc2", year, dbp] = ( + metrics_dict[runid, metric_id, "transportation_cost_pct_income_inc2", year, dbp] + + metrics_dict[runid, metric_id, "housing_cost_pct_income_inc2", year, dbp] + ) + metrics_dict[runid, metric_id, "HplusT_cost_pct_income_inc1and2", year, dbp] = ( + metrics_dict[ + runid, metric_id, "transportation_cost_pct_income_inc1and2", year, dbp + ] + + metrics_dict[runid, metric_id, "housing_cost_pct_income_inc1and2", year, dbp] + ) # Tolls & Fares - + # Reading auto times file - tm_auto_times_df = tm_auto_times_df.sum(level='Income') + tm_auto_times_df = tm_auto_times_df.sum(level="Income") # Calculating Total Tolls per day = bridge tolls + value tolls (2000$) total_tolls = OrderedDict() - for inc_level in range(1,5): - total_tolls['inc%d' % inc_level] = tm_auto_times_df.loc['inc%d' % inc_level, ['Bridge Tolls', 'Value Tolls']].sum()/100 # cents -> dollars + for inc_level in range(1, 5): + total_tolls["inc%d" % inc_level] = ( + tm_auto_times_df.loc[ + "inc%d" % inc_level, ["Bridge Tolls", "Value Tolls"] + ].sum() + / 100 + ) # cents -> dollars total_tolls_allHH = sum(total_tolls.values()) - total_tolls_LIHH = total_tolls['inc1'] + total_tolls['inc2'] - + total_tolls_LIHH = total_tolls["inc1"] + total_tolls["inc2"] + # Average Daily Tolls per household - metrics_dict[runid,metric_id,'tolls_per_HH',year,dbp] = total_tolls_allHH / tm_tot_hh * inflation_00_20 - metrics_dict[runid,metric_id,'tolls_per_LIHH',year,dbp] = total_tolls_LIHH / (tm_tot_hh_inc1+tm_tot_hh_inc2) * inflation_00_20 - metrics_dict[runid,metric_id,'tolls_per_inc1HH',year,dbp] = total_tolls['inc1'] / tm_tot_hh_inc1 * inflation_00_20 + metrics_dict[runid, metric_id, "tolls_per_HH", year, dbp] = ( + total_tolls_allHH / tm_tot_hh * inflation_00_20 + ) + metrics_dict[runid, metric_id, "tolls_per_LIHH", year, dbp] = ( + total_tolls_LIHH / (tm_tot_hh_inc1 + tm_tot_hh_inc2) * inflation_00_20 + ) + metrics_dict[runid, metric_id, "tolls_per_inc1HH", year, dbp] = ( + total_tolls["inc1"] / tm_tot_hh_inc1 * inflation_00_20 + ) # Average Daily Fares per Household (note: transit fares totals calculated above are annual and need to be divided by days_per_year) - metrics_dict[runid,metric_id,'fares_per_HH',year,dbp] = tm_tot_transit_fares / tm_tot_hh * inflation_00_20 / days_per_year - metrics_dict[runid,metric_id,'fares_per_LIHH',year,dbp] = (tm_tot_transit_fares_inc1 + tm_tot_transit_fares_inc2) / (tm_tot_hh_inc1+tm_tot_hh_inc2) * inflation_00_20 / days_per_year - metrics_dict[runid,metric_id,'fares_per_inc1HH',year,dbp] = tm_tot_transit_fares_inc1 / tm_tot_hh_inc1 * inflation_00_20 / days_per_year + metrics_dict[runid, metric_id, "fares_per_HH", year, dbp] = ( + tm_tot_transit_fares / tm_tot_hh * inflation_00_20 / days_per_year + ) + metrics_dict[runid, metric_id, "fares_per_LIHH", year, dbp] = ( + (tm_tot_transit_fares_inc1 + tm_tot_transit_fares_inc2) + / (tm_tot_hh_inc1 + tm_tot_hh_inc2) + * inflation_00_20 + / days_per_year + ) + metrics_dict[runid, metric_id, "fares_per_inc1HH", year, dbp] = ( + tm_tot_transit_fares_inc1 / tm_tot_hh_inc1 * inflation_00_20 / days_per_year + ) - # per trip # Total auto trips per day (model outputs are in trips, per day) - tm_tot_auto_trips = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'].str.contains("total_auto_trips") == True), 'value'].sum() - tm_tot_auto_trips_inc1 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_auto_trips_inc1"),'value'].item() - tm_tot_auto_trips_inc2 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_auto_trips_inc2"),'value'].item() + tm_tot_auto_trips = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"].str.contains("total_auto_trips") == True), + "value", + ].sum() + tm_tot_auto_trips_inc1 = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_auto_trips_inc1"), "value" + ].item() + tm_tot_auto_trips_inc2 = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_auto_trips_inc2"), "value" + ].item() # Total transit trips per day (model outputs are in trips, per day) - tm_tot_transit_trips = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'].str.contains("total_transit_trips") == True), 'value'].sum() - tm_tot_transit_trips_inc1 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_transit_trips_inc1"),'value'].item() - tm_tot_transit_trips_inc2 = tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "total_transit_trips_inc2"),'value'].item() + tm_tot_transit_trips = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"].str.contains("total_transit_trips") == True), + "value", + ].sum() + tm_tot_transit_trips_inc1 = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_transit_trips_inc1"), "value" + ].item() + tm_tot_transit_trips_inc2 = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "total_transit_trips_inc2"), "value" + ].item() # Average Tolls per trip (total_tolls_xx is calculated above as per day tolls in 2000 dollars) - metrics_dict[runid,metric_id,'tolls_per_trip',year,dbp] = total_tolls_allHH / tm_tot_auto_trips * inflation_00_20 - metrics_dict[runid,metric_id,'tolls_per_trip_inc1and2',year,dbp] = total_tolls_LIHH / (tm_tot_auto_trips_inc1+tm_tot_auto_trips_inc2) * inflation_00_20 - metrics_dict[runid,metric_id,'tolls_per_trip_inc1',year,dbp] = total_tolls['inc1'] / tm_tot_auto_trips_inc1 * inflation_00_20 + metrics_dict[runid, metric_id, "tolls_per_trip", year, dbp] = ( + total_tolls_allHH / tm_tot_auto_trips * inflation_00_20 + ) + metrics_dict[runid, metric_id, "tolls_per_trip_inc1and2", year, dbp] = ( + total_tolls_LIHH + / (tm_tot_auto_trips_inc1 + tm_tot_auto_trips_inc2) + * inflation_00_20 + ) + metrics_dict[runid, metric_id, "tolls_per_trip_inc1", year, dbp] = ( + total_tolls["inc1"] / tm_tot_auto_trips_inc1 * inflation_00_20 + ) # Total auto operating cost per trip (tm_tot_auto_op_cost and tm_tot_auto_park_cost are calculated above as annual costs in 2000 dollars) - metrics_dict[runid,metric_id,'autocost_per_trip',year,dbp] = (tm_tot_auto_op_cost + tm_tot_auto_park_cost) / tm_tot_auto_trips * inflation_00_20 / days_per_year - metrics_dict[runid,metric_id,'autocost_per_trip_inc1and2',year,dbp] = (tm_tot_auto_op_cost_inc1 + tm_tot_auto_op_cost_inc2 + tm_tot_auto_park_cost_inc1 + tm_tot_auto_park_cost_inc2) / (tm_tot_auto_trips_inc1+tm_tot_auto_trips_inc2) * inflation_00_20 / days_per_year - metrics_dict[runid,metric_id,'autocost_per_trip_inc1',year,dbp] = (tm_tot_auto_op_cost_inc1 + tm_tot_auto_park_cost_inc1) / tm_tot_auto_trips_inc1 * inflation_00_20 / days_per_year + metrics_dict[runid, metric_id, "autocost_per_trip", year, dbp] = ( + (tm_tot_auto_op_cost + tm_tot_auto_park_cost) + / tm_tot_auto_trips + * inflation_00_20 + / days_per_year + ) + metrics_dict[runid, metric_id, "autocost_per_trip_inc1and2", year, dbp] = ( + ( + tm_tot_auto_op_cost_inc1 + + tm_tot_auto_op_cost_inc2 + + tm_tot_auto_park_cost_inc1 + + tm_tot_auto_park_cost_inc2 + ) + / (tm_tot_auto_trips_inc1 + tm_tot_auto_trips_inc2) + * inflation_00_20 + / days_per_year + ) + metrics_dict[runid, metric_id, "autocost_per_trip_inc1", year, dbp] = ( + (tm_tot_auto_op_cost_inc1 + tm_tot_auto_park_cost_inc1) + / tm_tot_auto_trips_inc1 + * inflation_00_20 + / days_per_year + ) # Average Fares per trip (note: transit fares totals calculated above are annual and need to be divided by days_per_year) - metrics_dict[runid,metric_id,'fares_per_trip',year,dbp] = tm_tot_transit_fares / tm_tot_transit_trips * inflation_00_20 / days_per_year - metrics_dict[runid,metric_id,'fares_per_trip_inc1and2',year,dbp] = (tm_tot_transit_fares_inc1 + tm_tot_transit_fares_inc2) / (tm_tot_transit_trips_inc1+tm_tot_transit_trips_inc2) * inflation_00_20 / days_per_year - metrics_dict[runid,metric_id,'fares_per_trip_inc1',year,dbp] = tm_tot_transit_fares_inc1 / tm_tot_transit_trips_inc1 * inflation_00_20 / days_per_year - - -def calculate_Affordable2_deed_restricted_housing(runid, dbp, parcel_sum_df, metrics_dict): + metrics_dict[runid, metric_id, "fares_per_trip", year, dbp] = ( + tm_tot_transit_fares / tm_tot_transit_trips * inflation_00_20 / days_per_year + ) + metrics_dict[runid, metric_id, "fares_per_trip_inc1and2", year, dbp] = ( + (tm_tot_transit_fares_inc1 + tm_tot_transit_fares_inc2) + / (tm_tot_transit_trips_inc1 + tm_tot_transit_trips_inc2) + * inflation_00_20 + / days_per_year + ) + metrics_dict[runid, metric_id, "fares_per_trip_inc1", year, dbp] = ( + tm_tot_transit_fares_inc1 + / tm_tot_transit_trips_inc1 + * inflation_00_20 + / days_per_year + ) + + +def calculate_Affordable2_deed_restricted_housing( + runid, dbp, parcel_sum_df, metrics_dict +): metric_id = "A2" # totals for 2050 and 2015 - metrics_dict[runid,metric_id,'deed_restricted_total',y2,dbp] = parcel_sum_df['deed_restricted_units_2050'].sum() - metrics_dict[runid,metric_id,'deed_restricted_total',y1,dbp] = parcel_sum_df['deed_restricted_units_2015'].sum() - metrics_dict[runid,metric_id,'residential_units_total',y2,dbp] = parcel_sum_df['residential_units_2050'].sum() - metrics_dict[runid,metric_id,'residential_units_total',y1,dbp] = parcel_sum_df['residential_units_2015'].sum() - metrics_dict[runid,metric_id,'deed_restricted_HRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'deed_restricted_units_2050'].sum() - metrics_dict[runid,metric_id,'deed_restricted_HRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'deed_restricted_units_2015'].sum() - metrics_dict[runid,metric_id,'residential_units_HRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'residential_units_2050'].sum() - metrics_dict[runid,metric_id,'residential_units_HRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'residential_units_2015'].sum() - metrics_dict[runid,metric_id,'deed_restricted_TRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'deed_restricted_units_2050'].sum() - metrics_dict[runid,metric_id,'deed_restricted_TRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'deed_restricted_units_2015'].sum() - metrics_dict[runid,metric_id,'residential_units_TRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'residential_units_2050'].sum() - metrics_dict[runid,metric_id,'residential_units_TRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'residential_units_2015'].sum() - metrics_dict[runid,metric_id,'deed_restricted_CoC',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['coc_flag_pba2050']==1, 'deed_restricted_units_2050'].sum() - metrics_dict[runid,metric_id,'deed_restricted_CoC',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['coc_flag_pba2050']==1, 'deed_restricted_units_2015'].sum() - metrics_dict[runid,metric_id,'residential_units_CoC',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['coc_flag_pba2050']==1, 'residential_units_2050'].sum() - metrics_dict[runid,metric_id,'residential_units_CoC',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['coc_flag_pba2050']==1, 'residential_units_2015'].sum() + metrics_dict[runid, metric_id, "deed_restricted_total", y2, dbp] = parcel_sum_df[ + "deed_restricted_units_2050" + ].sum() + metrics_dict[runid, metric_id, "deed_restricted_total", y1, dbp] = parcel_sum_df[ + "deed_restricted_units_2015" + ].sum() + metrics_dict[runid, metric_id, "residential_units_total", y2, dbp] = parcel_sum_df[ + "residential_units_2050" + ].sum() + metrics_dict[runid, metric_id, "residential_units_total", y1, dbp] = parcel_sum_df[ + "residential_units_2015" + ].sum() + metrics_dict[runid, metric_id, "deed_restricted_HRA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), + "deed_restricted_units_2050", + ].sum() + metrics_dict[runid, metric_id, "deed_restricted_HRA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), + "deed_restricted_units_2015", + ].sum() + metrics_dict[ + runid, metric_id, "residential_units_HRA", y2, dbp + ] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), + "residential_units_2050", + ].sum() + metrics_dict[ + runid, metric_id, "residential_units_HRA", y1, dbp + ] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), + "residential_units_2015", + ].sum() + metrics_dict[runid, metric_id, "deed_restricted_TRA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), + "deed_restricted_units_2050", + ].sum() + metrics_dict[runid, metric_id, "deed_restricted_TRA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), + "deed_restricted_units_2015", + ].sum() + metrics_dict[ + runid, metric_id, "residential_units_TRA", y2, dbp + ] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), + "residential_units_2050", + ].sum() + metrics_dict[ + runid, metric_id, "residential_units_TRA", y1, dbp + ] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), + "residential_units_2015", + ].sum() + metrics_dict[runid, metric_id, "deed_restricted_CoC", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["coc_flag_pba2050"] == 1, "deed_restricted_units_2050" + ].sum() + metrics_dict[runid, metric_id, "deed_restricted_CoC", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["coc_flag_pba2050"] == 1, "deed_restricted_units_2015" + ].sum() + metrics_dict[ + runid, metric_id, "residential_units_CoC", y2, dbp + ] = parcel_sum_df.loc[ + parcel_sum_df["coc_flag_pba2050"] == 1, "residential_units_2050" + ].sum() + metrics_dict[ + runid, metric_id, "residential_units_CoC", y1, dbp + ] = parcel_sum_df.loc[ + parcel_sum_df["coc_flag_pba2050"] == 1, "residential_units_2015" + ].sum() # diff between 2050 and 2015 - metrics_dict[runid,metric_id,'deed_restricted_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_total',y2,dbp] - metrics_dict[runid,metric_id,'deed_restricted_total',y1,dbp] - metrics_dict[runid,metric_id,'residential_units_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'residential_units_total',y2,dbp] - metrics_dict[runid,metric_id,'residential_units_total',y1,dbp] - metrics_dict[runid,metric_id,'deed_restricted_HRA_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_HRA',y2,dbp] - metrics_dict[runid,metric_id,'deed_restricted_HRA',y1,dbp] - metrics_dict[runid,metric_id,'residential_units_HRA_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'residential_units_HRA',y2,dbp] - metrics_dict[runid,metric_id,'residential_units_HRA',y1,dbp] - metrics_dict[runid,metric_id,'deed_restricted_TRA_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_TRA',y2,dbp] - metrics_dict[runid,metric_id,'deed_restricted_TRA',y1,dbp] - metrics_dict[runid,metric_id,'residential_units_TRA_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'residential_units_TRA',y2,dbp] - metrics_dict[runid,metric_id,'residential_units_TRA',y1,dbp] - metrics_dict[runid,metric_id,'deed_restricted_nonHRA_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_diff',y_diff,dbp] - metrics_dict[runid,metric_id,'deed_restricted_HRA_diff',y_diff,dbp] - metrics_dict[runid,metric_id,'residential_units_nonHRA_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'residential_units_diff',y_diff,dbp] - metrics_dict[runid,metric_id,'residential_units_HRA_diff',y_diff,dbp] - metrics_dict[runid,metric_id,'deed_restricted_CoC_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_CoC',y2,dbp] - metrics_dict[runid,metric_id,'deed_restricted_CoC',y1,dbp] - metrics_dict[runid,metric_id,'residential_units_CoC_diff',y_diff,dbp] = metrics_dict[runid,metric_id,'residential_units_CoC',y2,dbp] - metrics_dict[runid,metric_id,'residential_units_CoC',y1,dbp] + metrics_dict[runid, metric_id, "deed_restricted_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_total", y2, dbp] + - metrics_dict[runid, metric_id, "deed_restricted_total", y1, dbp] + ) + metrics_dict[runid, metric_id, "residential_units_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "residential_units_total", y2, dbp] + - metrics_dict[runid, metric_id, "residential_units_total", y1, dbp] + ) + metrics_dict[runid, metric_id, "deed_restricted_HRA_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_HRA", y2, dbp] + - metrics_dict[runid, metric_id, "deed_restricted_HRA", y1, dbp] + ) + metrics_dict[runid, metric_id, "residential_units_HRA_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "residential_units_HRA", y2, dbp] + - metrics_dict[runid, metric_id, "residential_units_HRA", y1, dbp] + ) + metrics_dict[runid, metric_id, "deed_restricted_TRA_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_TRA", y2, dbp] + - metrics_dict[runid, metric_id, "deed_restricted_TRA", y1, dbp] + ) + metrics_dict[runid, metric_id, "residential_units_TRA_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "residential_units_TRA", y2, dbp] + - metrics_dict[runid, metric_id, "residential_units_TRA", y1, dbp] + ) + metrics_dict[runid, metric_id, "deed_restricted_nonHRA_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_diff", y_diff, dbp] + - metrics_dict[runid, metric_id, "deed_restricted_HRA_diff", y_diff, dbp] + ) + metrics_dict[runid, metric_id, "residential_units_nonHRA_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "residential_units_diff", y_diff, dbp] + - metrics_dict[runid, metric_id, "residential_units_HRA_diff", y_diff, dbp] + ) + metrics_dict[runid, metric_id, "deed_restricted_CoC_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_CoC", y2, dbp] + - metrics_dict[runid, metric_id, "deed_restricted_CoC", y1, dbp] + ) + metrics_dict[runid, metric_id, "residential_units_CoC_diff", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "residential_units_CoC", y2, dbp] + - metrics_dict[runid, metric_id, "residential_units_CoC", y1, dbp] + ) # metric: deed restricted % of total units: overall, HRA and non-HRA - metrics_dict[runid,metric_id,'deed_restricted_pct_new_units',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_diff',y_diff,dbp] / metrics_dict[runid,metric_id,'residential_units_diff',y_diff,dbp] - metrics_dict[runid,metric_id,'deed_restricted_pct_new_units_HRA',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_HRA_diff',y_diff,dbp]/metrics_dict[runid,metric_id,'residential_units_HRA_diff',y_diff,dbp] - metrics_dict[runid,metric_id,'deed_restricted_pct_new_units_TRA',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_TRA_diff',y_diff,dbp]/metrics_dict[runid,metric_id,'residential_units_TRA_diff',y_diff,dbp] - metrics_dict[runid,metric_id,'deed_restricted_pct_new_units_nonHRA',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_nonHRA_diff',y_diff,dbp]/metrics_dict[runid,metric_id,'residential_units_nonHRA_diff',y_diff,dbp] - metrics_dict[runid,metric_id,'deed_restricted_pct_new_units_CoC',y_diff,dbp] = metrics_dict[runid,metric_id,'deed_restricted_CoC_diff',y_diff,dbp]/metrics_dict[runid,metric_id,'residential_units_CoC_diff',y_diff,dbp] - - print('********************A2 Affordable********************') - print('DR pct of new units %s' % dbp,metrics_dict[runid,metric_id,'deed_restricted_pct_new_units',y_diff,dbp] ) - print('DR pct of new units in HRAs %s' % dbp,metrics_dict[runid,metric_id,'deed_restricted_pct_new_units_HRA',y_diff,dbp] ) - print('DR pct of new units in TRAs %s' % dbp,metrics_dict[runid,metric_id,'deed_restricted_pct_new_units_TRA',y_diff,dbp] ) - print('DR pct of new units outside of HRAs %s' % dbp,metrics_dict[runid,metric_id,'deed_restricted_pct_new_units_nonHRA',y_diff,dbp]) - + metrics_dict[runid, metric_id, "deed_restricted_pct_new_units", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_diff", y_diff, dbp] + / metrics_dict[runid, metric_id, "residential_units_diff", y_diff, dbp] + ) + metrics_dict[runid, metric_id, "deed_restricted_pct_new_units_HRA", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_HRA_diff", y_diff, dbp] + / metrics_dict[runid, metric_id, "residential_units_HRA_diff", y_diff, dbp] + ) + metrics_dict[runid, metric_id, "deed_restricted_pct_new_units_TRA", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_TRA_diff", y_diff, dbp] + / metrics_dict[runid, metric_id, "residential_units_TRA_diff", y_diff, dbp] + ) + metrics_dict[ + runid, metric_id, "deed_restricted_pct_new_units_nonHRA", y_diff, dbp + ] = ( + metrics_dict[runid, metric_id, "deed_restricted_nonHRA_diff", y_diff, dbp] + / metrics_dict[runid, metric_id, "residential_units_nonHRA_diff", y_diff, dbp] + ) + metrics_dict[runid, metric_id, "deed_restricted_pct_new_units_CoC", y_diff, dbp] = ( + metrics_dict[runid, metric_id, "deed_restricted_CoC_diff", y_diff, dbp] + / metrics_dict[runid, metric_id, "residential_units_CoC_diff", y_diff, dbp] + ) + + print("********************A2 Affordable********************") + print( + "DR pct of new units %s" % dbp, + metrics_dict[runid, metric_id, "deed_restricted_pct_new_units", y_diff, dbp], + ) + print( + "DR pct of new units in HRAs %s" % dbp, + metrics_dict[ + runid, metric_id, "deed_restricted_pct_new_units_HRA", y_diff, dbp + ], + ) + print( + "DR pct of new units in TRAs %s" % dbp, + metrics_dict[ + runid, metric_id, "deed_restricted_pct_new_units_TRA", y_diff, dbp + ], + ) + print( + "DR pct of new units outside of HRAs %s" % dbp, + metrics_dict[ + runid, metric_id, "deed_restricted_pct_new_units_nonHRA", y_diff, dbp + ], + ) # Forcing preservation metrics - metrics_dict[runid,metric_id,'preservation_affordable_housing',y_diff,dbp] = 1 + metrics_dict[runid, metric_id, "preservation_affordable_housing", y_diff, dbp] = 1 -def calculate_Connected1_accessibility(runid, year, dbp, tm_scen_metrics_df, metrics_dict): - +def calculate_Connected1_accessibility( + runid, year, dbp, tm_scen_metrics_df, metrics_dict +): + metric_id = "C1" # % of Jobs accessible by 30 min car OR 45 min transit - metrics_dict[runid,metric_id,'pct_jobs_acc_by_allmodes',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_accessible_job_share"), 'value'].item() - metrics_dict[runid,metric_id,'pct_jobs_acc_by_allmodes_coc',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_accessible_job_share_coc"), 'value'].item() - metrics_dict[runid,metric_id,'pct_jobs_acc_by_allmodes_noncoc',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_accessible_job_share_noncoc"), 'value'].item() - + metrics_dict[ + runid, metric_id, "pct_jobs_acc_by_allmodes", year, dbp + ] = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "jobacc_accessible_job_share"), "value" + ].item() + metrics_dict[ + runid, metric_id, "pct_jobs_acc_by_allmodes_coc", year, dbp + ] = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "jobacc_accessible_job_share_coc"), + "value", + ].item() + metrics_dict[ + runid, metric_id, "pct_jobs_acc_by_allmodes_noncoc", year, dbp + ] = tm_scen_metrics_df.loc[ + (tm_scen_metrics_df["metric_name"] == "jobacc_accessible_job_share_noncoc"), + "value", + ].item() + # % of Jobs accessible by 30 min car only - metrics_dict[runid,metric_id,'pct_jobs_acc_by_drv_only',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_drv_only_acc_accessible_job_share"), 'value'].item() \ - + tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_drv_acc_accessible_job_share"), 'value'].item() - - metrics_dict[runid,metric_id,'pct_jobs_acc_by_drv_only_coc',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_drv_only_acc_accessible_job_share_coc"), 'value'].item() \ - + tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_drv_acc_accessible_job_share_coc"), 'value'].item() - metrics_dict[runid,metric_id,'pct_jobs_acc_by_drv_only_noncoc',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_drv_only_acc_accessible_job_share_noncoc"), 'value'].item() \ - + tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_drv_acc_accessible_job_share_noncoc"), 'value'].item() - - # % of Jobs accessible by 45 min transit only - metrics_dict[runid,metric_id,'pct_jobs_acc_by_trn_only',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_only_acc_accessible_job_share"), 'value'].item() \ - + tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_drv_acc_accessible_job_share"), 'value'].item() - - metrics_dict[runid,metric_id,'pct_jobs_acc_by_trn_only_coc',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_only_acc_accessible_job_share_coc"), 'value'].item() \ - + tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_drv_acc_accessible_job_share_coc"), 'value'].item() - - metrics_dict[runid,metric_id,'pct_jobs_acc_by_trn_only_noncoc',year,dbp] = \ - tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_only_acc_accessible_job_share_noncoc"), 'value'].item() \ - + tm_scen_metrics_df.loc[(tm_scen_metrics_df['metric_name'] == "jobacc_trn_drv_acc_accessible_job_share_noncoc"), 'value'].item() + metrics_dict[runid, metric_id, "pct_jobs_acc_by_drv_only", year, dbp] = ( + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_drv_only_acc_accessible_job_share" + ), + "value", + ].item() + + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_drv_acc_accessible_job_share" + ), + "value", + ].item() + ) + + metrics_dict[runid, metric_id, "pct_jobs_acc_by_drv_only_coc", year, dbp] = ( + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_drv_only_acc_accessible_job_share_coc" + ), + "value", + ].item() + + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_drv_acc_accessible_job_share_coc" + ), + "value", + ].item() + ) + metrics_dict[runid, metric_id, "pct_jobs_acc_by_drv_only_noncoc", year, dbp] = ( + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_drv_only_acc_accessible_job_share_noncoc" + ), + "value", + ].item() + + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_drv_acc_accessible_job_share_noncoc" + ), + "value", + ].item() + ) + + # % of Jobs accessible by 45 min transit only + metrics_dict[runid, metric_id, "pct_jobs_acc_by_trn_only", year, dbp] = ( + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_only_acc_accessible_job_share" + ), + "value", + ].item() + + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_drv_acc_accessible_job_share" + ), + "value", + ].item() + ) + + metrics_dict[runid, metric_id, "pct_jobs_acc_by_trn_only_coc", year, dbp] = ( + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_only_acc_accessible_job_share_coc" + ), + "value", + ].item() + + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_drv_acc_accessible_job_share_coc" + ), + "value", + ].item() + ) + + metrics_dict[runid, metric_id, "pct_jobs_acc_by_trn_only_noncoc", year, dbp] = ( + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_only_acc_accessible_job_share_noncoc" + ), + "value", + ].item() + + tm_scen_metrics_df.loc[ + ( + tm_scen_metrics_df["metric_name"] + == "jobacc_trn_drv_acc_accessible_job_share_noncoc" + ), + "value", + ].item() + ) def calculate_Connected1_proximity(runid, year, dbp, tm_scen_metrics_df, metrics_dict): - - metric_id = "C1" - - + metric_id = "C1" def calculate_Connected2_crowding(runid, year, dbp, transit_operator_df, metrics_dict): - - metric_id = "C2" - - if "2015" in runid: tm_run_location = tm_run_location_ipa - else: tm_run_location = tm_run_location_bp - tm_crowding_df = pd.read_csv(tm_run_location+runid+'/OUTPUT/metrics/transit_crowding_complete.csv') - - tm_crowding_df = tm_crowding_df[['TIME','SYSTEM','ABNAMESEQ','period','load_standcap','AB_VOL']] - tm_crowding_df = tm_crowding_df.loc[tm_crowding_df['period'] == "AM"] - tm_crowding_df['time_overcapacity'] = tm_crowding_df.apply (lambda row: row['TIME'] if (row['load_standcap']>1) else 0, axis=1) - tm_crowding_df['time_crowded'] = tm_crowding_df.apply (lambda row: row['TIME'] if (row['load_standcap']>0.85) else 0, axis=1) - tm_crowding_df['person_hrs_total'] = tm_crowding_df['TIME'] * tm_crowding_df['AB_VOL'] - tm_crowding_df['person_hrs_overcap'] = tm_crowding_df['time_overcapacity'] * tm_crowding_df['AB_VOL'] - tm_crowding_df['person_hrs_crowded'] = tm_crowding_df['time_crowded'] * tm_crowding_df['AB_VOL'] - - - tm_crowding_df = pd.merge(left=tm_crowding_df, right=transit_operator_df, left_on="SYSTEM", right_on="SYSTEM", how="left") - - system_crowding_df = tm_crowding_df[['person_hrs_total','person_hrs_overcap','person_hrs_crowded']].groupby(tm_crowding_df['operator']).sum().reset_index() - system_crowding_df['pct_overcapacity'] = system_crowding_df['person_hrs_overcap'] / system_crowding_df['person_hrs_total'] - system_crowding_df['pct_crowded'] = system_crowding_df['person_hrs_crowded'] / system_crowding_df['person_hrs_total'] - - for index,row in system_crowding_df.iterrows(): - if row['operator'] in ['AC Transit Local','AC Transit Transbay','SFMTA LRT','SFMTA Bus','VTA Bus Local','VTA LRT','BART','Caltrain','SamTrans Local','GGT Express','WETA']: - metrics_dict[runid,metric_id,'crowded_pct_personhrs_AM_%s' % row['operator'],year,dbp] = row['pct_crowded'] + metric_id = "C2" -def calculate_Connected2_hwy_traveltimes(runid, year, dbp, hwy_corridor_links_df, metrics_dict): + if "2015" in runid: + tm_run_location = tm_run_location_ipa + else: + tm_run_location = tm_run_location_bp + tm_crowding_df = pd.read_csv( + tm_run_location + runid + "/OUTPUT/metrics/transit_crowding_complete.csv" + ) + + tm_crowding_df = tm_crowding_df[ + ["TIME", "SYSTEM", "ABNAMESEQ", "period", "load_standcap", "AB_VOL"] + ] + tm_crowding_df = tm_crowding_df.loc[tm_crowding_df["period"] == "AM"] + tm_crowding_df["time_overcapacity"] = tm_crowding_df.apply( + lambda row: row["TIME"] if (row["load_standcap"] > 1) else 0, axis=1 + ) + tm_crowding_df["time_crowded"] = tm_crowding_df.apply( + lambda row: row["TIME"] if (row["load_standcap"] > 0.85) else 0, axis=1 + ) + tm_crowding_df["person_hrs_total"] = ( + tm_crowding_df["TIME"] * tm_crowding_df["AB_VOL"] + ) + tm_crowding_df["person_hrs_overcap"] = ( + tm_crowding_df["time_overcapacity"] * tm_crowding_df["AB_VOL"] + ) + tm_crowding_df["person_hrs_crowded"] = ( + tm_crowding_df["time_crowded"] * tm_crowding_df["AB_VOL"] + ) + + tm_crowding_df = pd.merge( + left=tm_crowding_df, + right=transit_operator_df, + left_on="SYSTEM", + right_on="SYSTEM", + how="left", + ) + + system_crowding_df = ( + tm_crowding_df[["person_hrs_total", "person_hrs_overcap", "person_hrs_crowded"]] + .groupby(tm_crowding_df["operator"]) + .sum() + .reset_index() + ) + system_crowding_df["pct_overcapacity"] = ( + system_crowding_df["person_hrs_overcap"] + / system_crowding_df["person_hrs_total"] + ) + system_crowding_df["pct_crowded"] = ( + system_crowding_df["person_hrs_crowded"] + / system_crowding_df["person_hrs_total"] + ) + + for index, row in system_crowding_df.iterrows(): + if row["operator"] in [ + "AC Transit Local", + "AC Transit Transbay", + "SFMTA LRT", + "SFMTA Bus", + "VTA Bus Local", + "VTA LRT", + "BART", + "Caltrain", + "SamTrans Local", + "GGT Express", + "WETA", + ]: + metrics_dict[ + runid, + metric_id, + "crowded_pct_personhrs_AM_%s" % row["operator"], + year, + dbp, + ] = row["pct_crowded"] + + +def calculate_Connected2_hwy_traveltimes( + runid, year, dbp, hwy_corridor_links_df, metrics_dict +): metric_id = "C2" - if "2015" in runid: tm_run_location = tm_run_location_ipa - else: tm_run_location = tm_run_location_bp - tm_loaded_network_df = pd.read_csv(tm_run_location+runid+'/OUTPUT/avgload5period.csv') + if "2015" in runid: + tm_run_location = tm_run_location_ipa + else: + tm_run_location = tm_run_location_bp + tm_loaded_network_df = pd.read_csv( + tm_run_location + runid + "/OUTPUT/avgload5period.csv" + ) # Keeping essential columns of loaded highway network: node A and B, distance, free flow time, congested time tm_loaded_network_df = tm_loaded_network_df.rename(columns=lambda x: x.strip()) - tm_loaded_network_df = tm_loaded_network_df[['a','b','distance','fft','ctimAM']] - tm_loaded_network_df['link'] = tm_loaded_network_df['a'].astype(str) + "_" + tm_loaded_network_df['b'].astype(str) + tm_loaded_network_df = tm_loaded_network_df[["a", "b", "distance", "fft", "ctimAM"]] + tm_loaded_network_df["link"] = ( + tm_loaded_network_df["a"].astype(str) + + "_" + + tm_loaded_network_df["b"].astype(str) + ) # merging df that has the list of all - hwy_corridor_links_df = pd.merge(left=hwy_corridor_links_df, right=tm_loaded_network_df, left_on="link", right_on="link", how="left") - corridor_travel_times_df = hwy_corridor_links_df[['distance','fft','ctimAM']].groupby(hwy_corridor_links_df['route']).sum().reset_index() - - for index,row in corridor_travel_times_df.iterrows(): - metrics_dict[runid,metric_id,'travel_time_AM_%s' % row['route'],year,dbp] = row['ctimAM'] - -def calculate_Connected2_trn_traveltimes(runid, year, dbp, transit_operator_df, metrics_dict): + hwy_corridor_links_df = pd.merge( + left=hwy_corridor_links_df, + right=tm_loaded_network_df, + left_on="link", + right_on="link", + how="left", + ) + corridor_travel_times_df = ( + hwy_corridor_links_df[["distance", "fft", "ctimAM"]] + .groupby(hwy_corridor_links_df["route"]) + .sum() + .reset_index() + ) + + for index, row in corridor_travel_times_df.iterrows(): + metrics_dict[ + runid, metric_id, "travel_time_AM_%s" % row["route"], year, dbp + ] = row["ctimAM"] + + +def calculate_Connected2_trn_traveltimes( + runid, year, dbp, transit_operator_df, metrics_dict +): metric_id = "C2" - if "2015" in runid: tm_run_location = tm_run_location_ipa - else: tm_run_location = tm_run_location_bp - tm_trn_line_df = pd.read_csv(tm_run_location+runid+'/OUTPUT/trn/trnline.csv') + if "2015" in runid: + tm_run_location = tm_run_location_ipa + else: + tm_run_location = tm_run_location_bp + tm_trn_line_df = pd.read_csv(tm_run_location + runid + "/OUTPUT/trn/trnline.csv") # It doesn't really matter which path ID we pick, as long as it is AM - tm_trn_line_df = tm_trn_line_df.loc[tm_trn_line_df['path id'] == "am_wlk_loc_wlk"] - tm_trn_line_df = pd.merge(left=tm_trn_line_df, right=transit_operator_df, left_on="mode", right_on="mode", how="left") + tm_trn_line_df = tm_trn_line_df.loc[tm_trn_line_df["path id"] == "am_wlk_loc_wlk"] + tm_trn_line_df = pd.merge( + left=tm_trn_line_df, + right=transit_operator_df, + left_on="mode", + right_on="mode", + how="left", + ) # grouping by transit operator, and summing all line times and distances, to get metric of "time per unit distance", in minutes/mile - trn_operator_travel_times_df = tm_trn_line_df[['line time','line dist']].groupby(tm_trn_line_df['operator']).sum().reset_index() - trn_operator_travel_times_df['time_per_dist_AM'] = trn_operator_travel_times_df['line time'] / trn_operator_travel_times_df['line dist'] + trn_operator_travel_times_df = ( + tm_trn_line_df[["line time", "line dist"]] + .groupby(tm_trn_line_df["operator"]) + .sum() + .reset_index() + ) + trn_operator_travel_times_df["time_per_dist_AM"] = ( + trn_operator_travel_times_df["line time"] + / trn_operator_travel_times_df["line dist"] + ) # grouping by mode, and summing all line times and distances, to get metric of "time per unit distance", in minutes/mile - trn_mode_travel_times_df = tm_trn_line_df[['line time','line dist']].groupby(tm_trn_line_df['mode_name']).sum().reset_index() - trn_mode_travel_times_df['time_per_dist_AM'] = trn_mode_travel_times_df['line time'] / trn_mode_travel_times_df['line dist'] - - for index,row in trn_operator_travel_times_df.iterrows(): - if row['operator'] in ['AC Transit Local','AC Transit Transbay','SFMTA LRT','SFMTA Bus','VTA Bus Local','VTA LRT','BART','Caltrain','SamTrans Local']: - metrics_dict[runid,metric_id,'time_per_dist_AM_%s' % row['operator'],year,dbp] = row['time_per_dist_AM'] - - for index,row in trn_mode_travel_times_df.iterrows(): - metrics_dict[runid,metric_id,'time_per_dist_AM_%s' % row['mode_name'],year,dbp] = row['time_per_dist_AM'] - - - -def calculate_Diverse1_LIHHinHRAs(runid, dbp, parcel_sum_df, tract_sum_df, GG_sum_df, normalize_factor_Q1Q2, normalize_factor_Q1, metrics_dict): + trn_mode_travel_times_df = ( + tm_trn_line_df[["line time", "line dist"]] + .groupby(tm_trn_line_df["mode_name"]) + .sum() + .reset_index() + ) + trn_mode_travel_times_df["time_per_dist_AM"] = ( + trn_mode_travel_times_df["line time"] / trn_mode_travel_times_df["line dist"] + ) + + for index, row in trn_operator_travel_times_df.iterrows(): + if row["operator"] in [ + "AC Transit Local", + "AC Transit Transbay", + "SFMTA LRT", + "SFMTA Bus", + "VTA Bus Local", + "VTA LRT", + "BART", + "Caltrain", + "SamTrans Local", + ]: + metrics_dict[ + runid, metric_id, "time_per_dist_AM_%s" % row["operator"], year, dbp + ] = row["time_per_dist_AM"] + + for index, row in trn_mode_travel_times_df.iterrows(): + metrics_dict[ + runid, metric_id, "time_per_dist_AM_%s" % row["mode_name"], year, dbp + ] = row["time_per_dist_AM"] + + +def calculate_Diverse1_LIHHinHRAs( + runid, + dbp, + parcel_sum_df, + tract_sum_df, + GG_sum_df, + normalize_factor_Q1Q2, + normalize_factor_Q1, + metrics_dict, +): metric_id = "D1" # Share of region's LIHH households that are in HRAs - metrics_dict[runid,metric_id,'LIHH_total',y2,dbp] = parcel_sum_df['hhq1_2050'].sum() + parcel_sum_df['hhq2_2050'].sum() - metrics_dict[runid,metric_id,'LIHH_total',y1,dbp] = parcel_sum_df['hhq1_2015'].sum() + parcel_sum_df['hhq2_2015'].sum() - metrics_dict[runid,metric_id,'LIHH_inHRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'hhq1_2050'].sum() + parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'hhq2_2050'].sum() - metrics_dict[runid,metric_id,'LIHH_inHRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'hhq1_2015'].sum() + parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'hhq2_2015'].sum() - metrics_dict[runid,metric_id,'LIHH_shareinHRA',y2,dbp] = metrics_dict[runid,metric_id,'LIHH_inHRA',y2,dbp] / metrics_dict[runid,metric_id,'LIHH_total',y2,dbp] - metrics_dict[runid,metric_id,'LIHH_shareinHRA',y1,dbp] = metrics_dict[runid,metric_id,'LIHH_inHRA',y1,dbp] / metrics_dict[runid,metric_id,'LIHH_total',y1,dbp] + metrics_dict[runid, metric_id, "LIHH_total", y2, dbp] = ( + parcel_sum_df["hhq1_2050"].sum() + parcel_sum_df["hhq2_2050"].sum() + ) + metrics_dict[runid, metric_id, "LIHH_total", y1, dbp] = ( + parcel_sum_df["hhq1_2015"].sum() + parcel_sum_df["hhq2_2015"].sum() + ) + metrics_dict[runid, metric_id, "LIHH_inHRA", y2, dbp] = ( + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "hhq1_2050" + ].sum() + + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "hhq2_2050" + ].sum() + ) + metrics_dict[runid, metric_id, "LIHH_inHRA", y1, dbp] = ( + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "hhq1_2015" + ].sum() + + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "hhq2_2015" + ].sum() + ) + metrics_dict[runid, metric_id, "LIHH_shareinHRA", y2, dbp] = ( + metrics_dict[runid, metric_id, "LIHH_inHRA", y2, dbp] + / metrics_dict[runid, metric_id, "LIHH_total", y2, dbp] + ) + metrics_dict[runid, metric_id, "LIHH_shareinHRA", y1, dbp] = ( + metrics_dict[runid, metric_id, "LIHH_inHRA", y1, dbp] + / metrics_dict[runid, metric_id, "LIHH_total", y1, dbp] + ) # normalizing for overall growth in LIHH - metrics_dict[runid,metric_id,'LIHH_shareinHRA_normalized',y1,dbp] = metrics_dict[runid,metric_id,'LIHH_shareinHRA',y1,dbp] * normalize_factor_Q1Q2 + metrics_dict[runid, metric_id, "LIHH_shareinHRA_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "LIHH_shareinHRA", y1, dbp] + * normalize_factor_Q1Q2 + ) # Total number of Households # Total HHs in HRAs, in 2015 and 2050 - metrics_dict[runid,metric_id,'TotHH_inHRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inHRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'tothh_2050'].sum() + metrics_dict[runid, metric_id, "TotHH_inHRA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_inHRA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "tothh_2050" + ].sum() # Total HHs in TRAs, in 2015 and 2050 - metrics_dict[runid,metric_id,'TotHH_inTRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inTRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'tothh_2050'].sum() + metrics_dict[runid, metric_id, "TotHH_inTRA", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_inTRA", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), "tothh_2050" + ].sum() # Total HHs in HRAs only, in 2015 and 2050 - metrics_dict[runid,metric_id,'TotHH_inHRAonly',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) & \ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False) == False), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inHRAonly',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) & \ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False) == False), 'tothh_2050'].sum() + metrics_dict[runid, metric_id, "TotHH_inHRAonly", y1, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False) == False), + "tothh_2015", + ].sum() + metrics_dict[runid, metric_id, "TotHH_inHRAonly", y2, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False) == False), + "tothh_2050", + ].sum() # Total HHs in TRAs only, in 2015 and 2050 - metrics_dict[runid,metric_id,'TotHH_inTRAonly',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) & \ - (parcel_sum_df['pba50chcat'].str.contains('HRA', na=False) == False), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inTRAonly',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) & \ - (parcel_sum_df['pba50chcat'].str.contains('HRA', na=False) == False), 'tothh_2050'].sum() + metrics_dict[runid, metric_id, "TotHH_inTRAonly", y1, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False) == False), + "tothh_2015", + ].sum() + metrics_dict[runid, metric_id, "TotHH_inTRAonly", y2, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False) == False), + "tothh_2050", + ].sum() # Total HHs in HRA/TRAs, in 2015 and 2050 - metrics_dict[runid,metric_id,'TotHH_inHRATRA',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) & \ - (parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inHRATRA',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) & \ - (parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)), 'tothh_2050'].sum() - # Total HHs in DR Tracts, in 2015 and 2050 - metrics_dict[runid,metric_id,'TotHH_inDRTracts',y1,dbp] = tract_sum_df.loc[(tract_sum_df['DispRisk'] == 1), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inDRTracts',y2,dbp] = tract_sum_df.loc[(tract_sum_df['DispRisk'] == 1), 'tothh_2050'].sum() + metrics_dict[runid, metric_id, "TotHH_inHRATRA", y1, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)), + "tothh_2015", + ].sum() + metrics_dict[runid, metric_id, "TotHH_inHRATRA", y2, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)), + "tothh_2050", + ].sum() + # Total HHs in DR Tracts, in 2015 and 2050 + metrics_dict[runid, metric_id, "TotHH_inDRTracts", y1, dbp] = tract_sum_df.loc[ + (tract_sum_df["DispRisk"] == 1), "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_inDRTracts", y2, dbp] = tract_sum_df.loc[ + (tract_sum_df["DispRisk"] == 1), "tothh_2050" + ].sum() # Total HHs in CoC Tracts, in 2015 and 2050 - metrics_dict[runid,metric_id,'TotHH_inCoCTracts',y1,dbp] = tract_sum_df.loc[(tract_sum_df['coc_flag_pba2050'] == 1), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inCoCTracts',y2,dbp] = tract_sum_df.loc[(tract_sum_df['coc_flag_pba2050'] == 1), 'tothh_2050'].sum() + metrics_dict[runid, metric_id, "TotHH_inCoCTracts", y1, dbp] = tract_sum_df.loc[ + (tract_sum_df["coc_flag_pba2050"] == 1), "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_inCoCTracts", y2, dbp] = tract_sum_df.loc[ + (tract_sum_df["coc_flag_pba2050"] == 1), "tothh_2050" + ].sum() # Total HHs in remainder of region (RoR); i.e. not HRA or TRA or CoC or DR - metrics_dict[runid,metric_id,'TotHH_inRoR',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False) == False) & \ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False) == False) & \ - (parcel_sum_df['pba50chcat'].str.contains('DR', na=False) == False) & \ - (parcel_sum_df['coc_flag_pba2050'] == 0), 'tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inRoR',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False) == False) & \ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False) == False) & \ - (parcel_sum_df['pba50chcat'].str.contains('DR', na=False) == False) & \ - (parcel_sum_df['coc_flag_pba2050'] == 0), 'tothh_2050'].sum() + metrics_dict[runid, metric_id, "TotHH_inRoR", y1, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False) == False) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False) == False) + & (parcel_sum_df["pba50chcat"].str.contains("DR", na=False) == False) + & (parcel_sum_df["coc_flag_pba2050"] == 0), + "tothh_2015", + ].sum() + metrics_dict[runid, metric_id, "TotHH_inRoR", y2, dbp] = parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False) == False) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False) == False) + & (parcel_sum_df["pba50chcat"].str.contains("DR", na=False) == False) + & (parcel_sum_df["coc_flag_pba2050"] == 0), + "tothh_2050", + ].sum() # Total HHs in GGs, in 2015 and 2050 - metrics_dict[runid,metric_id,'TotHH_inGGs',y1,dbp] = GG_sum_df['tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inGGs',y2,dbp] = GG_sum_df['tothh_2050'].sum() + metrics_dict[runid, metric_id, "TotHH_inGGs", y1, dbp] = GG_sum_df[ + "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_inGGs", y2, dbp] = GG_sum_df[ + "tothh_2050" + ].sum() # Total HHs in Transit Rich GGs, in 2015 and 2050 - GG_TRich_sum_df = GG_sum_df[GG_sum_df['Designation']=="Transit-Rich"] - metrics_dict[runid,metric_id,'TotHH_inTRichGGs',y1,dbp] = GG_TRich_sum_df['tothh_2015'].sum() - metrics_dict[runid,metric_id,'TotHH_inTRichGGs',y2,dbp] = GG_TRich_sum_df['tothh_2050'].sum() - - + GG_TRich_sum_df = GG_sum_df[GG_sum_df["Designation"] == "Transit-Rich"] + metrics_dict[runid, metric_id, "TotHH_inTRichGGs", y1, dbp] = GG_TRich_sum_df[ + "tothh_2015" + ].sum() + metrics_dict[runid, metric_id, "TotHH_inTRichGGs", y2, dbp] = GG_TRich_sum_df[ + "tothh_2050" + ].sum() ########### Tracking movement of Q1 households: Q1 share of Households # Share of Households that are Q1, within each geography type in this order: # Overall Region; HRAs; TRAs, DR Tracts; CoCs; Rest of Region; and also GGs and TRichGGs - metrics_dict[runid,metric_id,'Q1HH_shareofRegion',y1,dbp] = parcel_sum_df['hhq1_2015'].sum() / parcel_sum_df['tothh_2015'].sum() - metrics_dict[runid,metric_id,'Q1HH_shareofRegion_normalized',y1,dbp] = parcel_sum_df['hhq1_2015'].sum() / parcel_sum_df['tothh_2015'].sum() * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofRegion',y2,dbp] = parcel_sum_df['hhq1_2050'].sum() / parcel_sum_df['tothh_2050'].sum() - - metrics_dict[runid,metric_id,'Q1HH_shareofHRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inHRA',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofHRA_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofHRA',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofHRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('HRA', na=False), 'hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inHRA',y2,dbp] - - metrics_dict[runid,metric_id,'Q1HH_shareofTRA',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inTRA',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofTRA_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofTRA',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofTRA',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('tra', na=False), 'hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inTRA',y2,dbp] - - metrics_dict[runid,metric_id,'Q1HH_shareofHRAonly',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) & (parcel_sum_df['pba50chcat'].str.contains('tra', na=False) == False), 'hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inHRAonly',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofHRAonly_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofHRAonly',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofHRAonly',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) & (parcel_sum_df['pba50chcat'].str.contains('tra', na=False) == False), 'hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inHRAonly',y2,dbp] - - metrics_dict[runid,metric_id,'Q1HH_shareofTRAonly',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) & (parcel_sum_df['pba50chcat'].str.contains('HRA', na=False) == False), 'hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inTRAonly',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofTRAonly_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofTRAonly',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofTRAonly',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('tra', na=False)) & (parcel_sum_df['pba50chcat'].str.contains('HRA', na=False) == False), 'hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inTRAonly',y2,dbp] - - metrics_dict[runid,metric_id,'Q1HH_shareofHRATRA',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) & (parcel_sum_df['pba50chcat'].str.contains('tra', na=False)), 'hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inHRATRA',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofHRATRA_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofHRATRA',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofHRATRA',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False)) & (parcel_sum_df['pba50chcat'].str.contains('tra', na=False)), 'hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inHRATRA',y2,dbp] - - metrics_dict[runid,metric_id,'Q1HH_shareofDRTracts',y1,dbp] = tract_sum_df.loc[(tract_sum_df['DispRisk'] == 1), 'hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inDRTracts',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofDRTracts_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofDRTracts',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofDRTracts',y2,dbp] = tract_sum_df.loc[(tract_sum_df['DispRisk'] == 1), 'hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inDRTracts',y2,dbp] - - metrics_dict[runid,metric_id,'Q1HH_shareofCoCTracts',y1,dbp] = tract_sum_df.loc[(tract_sum_df['coc_flag_pba2050'] == 1), 'hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inCoCTracts',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofCoCTracts_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofCoCTracts',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofCoCTracts',y2,dbp] = tract_sum_df.loc[(tract_sum_df['coc_flag_pba2050'] == 1), 'hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inCoCTracts',y2,dbp] - - metrics_dict[runid,metric_id,'Q1HH_shareofRoR',y1,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False) == False) & \ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False) == False) & \ - (parcel_sum_df['pba50chcat'].str.contains('DR', na=False) == False) & \ - (parcel_sum_df['coc_flag_pba2050'] == 0), 'hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inRoR',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofRoR_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofRoR',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofRoR',y2,dbp] = parcel_sum_df.loc[(parcel_sum_df['pba50chcat'].str.contains('HRA', na=False) == False) & \ - (parcel_sum_df['pba50chcat'].str.contains('tra', na=False) == False) & \ - (parcel_sum_df['pba50chcat'].str.contains('DR', na=False) == False) & \ - (parcel_sum_df['coc_flag_pba2050'] == 0), 'hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inRoR',y2,dbp] - - - metrics_dict[runid,metric_id,'Q1HH_shareofGGs',y1,dbp] = GG_sum_df['hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inGGs',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofGGs_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofGGs',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofGGs',y2,dbp] = GG_sum_df['hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inGGs',y2,dbp] - - metrics_dict[runid,metric_id,'Q1HH_shareofTRichGGs',y1,dbp] = GG_TRich_sum_df['hhq1_2015'].sum() / metrics_dict[runid,metric_id,'TotHH_inTRichGGs',y1,dbp] - metrics_dict[runid,metric_id,'Q1HH_shareofTRichGGs_normalized',y1,dbp] = metrics_dict[runid,metric_id,'Q1HH_shareofTRichGGs',y1,dbp] * normalize_factor_Q1 - metrics_dict[runid,metric_id,'Q1HH_shareofTRichGGs',y2,dbp] = GG_TRich_sum_df['hhq1_2050'].sum() / metrics_dict[runid,metric_id,'TotHH_inTRichGGs',y2,dbp] - - - - ''' + metrics_dict[runid, metric_id, "Q1HH_shareofRegion", y1, dbp] = ( + parcel_sum_df["hhq1_2015"].sum() / parcel_sum_df["tothh_2015"].sum() + ) + metrics_dict[runid, metric_id, "Q1HH_shareofRegion_normalized", y1, dbp] = ( + parcel_sum_df["hhq1_2015"].sum() + / parcel_sum_df["tothh_2015"].sum() + * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofRegion", y2, dbp] = ( + parcel_sum_df["hhq1_2050"].sum() / parcel_sum_df["tothh_2050"].sum() + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofHRA", y1, dbp] = ( + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "hhq1_2015" + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inHRA", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofHRA_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofHRA", y1, dbp] * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofHRA", y2, dbp] = ( + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("HRA", na=False), "hhq1_2050" + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inHRA", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofTRA", y1, dbp] = ( + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), "hhq1_2015" + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inTRA", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofTRA_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofTRA", y1, dbp] * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofTRA", y2, dbp] = ( + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("tra", na=False), "hhq1_2050" + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inTRA", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofHRAonly", y1, dbp] = ( + parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False) == False), + "hhq1_2015", + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inHRAonly", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofHRAonly_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofHRAonly", y1, dbp] + * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofHRAonly", y2, dbp] = ( + parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False) == False), + "hhq1_2050", + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inHRAonly", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofTRAonly", y1, dbp] = ( + parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False) == False), + "hhq1_2015", + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inTRAonly", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofTRAonly_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofTRAonly", y1, dbp] + * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofTRAonly", y2, dbp] = ( + parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False) == False), + "hhq1_2050", + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inTRAonly", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofHRATRA", y1, dbp] = ( + parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)), + "hhq1_2015", + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inHRATRA", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofHRATRA_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofHRATRA", y1, dbp] + * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofHRATRA", y2, dbp] = ( + parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False)) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False)), + "hhq1_2050", + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inHRATRA", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofDRTracts", y1, dbp] = ( + tract_sum_df.loc[(tract_sum_df["DispRisk"] == 1), "hhq1_2015"].sum() + / metrics_dict[runid, metric_id, "TotHH_inDRTracts", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofDRTracts_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofDRTracts", y1, dbp] + * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofDRTracts", y2, dbp] = ( + tract_sum_df.loc[(tract_sum_df["DispRisk"] == 1), "hhq1_2050"].sum() + / metrics_dict[runid, metric_id, "TotHH_inDRTracts", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofCoCTracts", y1, dbp] = ( + tract_sum_df.loc[(tract_sum_df["coc_flag_pba2050"] == 1), "hhq1_2015"].sum() + / metrics_dict[runid, metric_id, "TotHH_inCoCTracts", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofCoCTracts_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofCoCTracts", y1, dbp] + * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofCoCTracts", y2, dbp] = ( + tract_sum_df.loc[(tract_sum_df["coc_flag_pba2050"] == 1), "hhq1_2050"].sum() + / metrics_dict[runid, metric_id, "TotHH_inCoCTracts", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofRoR", y1, dbp] = ( + parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False) == False) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False) == False) + & (parcel_sum_df["pba50chcat"].str.contains("DR", na=False) == False) + & (parcel_sum_df["coc_flag_pba2050"] == 0), + "hhq1_2015", + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inRoR", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofRoR_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofRoR", y1, dbp] * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofRoR", y2, dbp] = ( + parcel_sum_df.loc[ + (parcel_sum_df["pba50chcat"].str.contains("HRA", na=False) == False) + & (parcel_sum_df["pba50chcat"].str.contains("tra", na=False) == False) + & (parcel_sum_df["pba50chcat"].str.contains("DR", na=False) == False) + & (parcel_sum_df["coc_flag_pba2050"] == 0), + "hhq1_2050", + ].sum() + / metrics_dict[runid, metric_id, "TotHH_inRoR", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofGGs", y1, dbp] = ( + GG_sum_df["hhq1_2015"].sum() + / metrics_dict[runid, metric_id, "TotHH_inGGs", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofGGs_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofGGs", y1, dbp] * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofGGs", y2, dbp] = ( + GG_sum_df["hhq1_2050"].sum() + / metrics_dict[runid, metric_id, "TotHH_inGGs", y2, dbp] + ) + + metrics_dict[runid, metric_id, "Q1HH_shareofTRichGGs", y1, dbp] = ( + GG_TRich_sum_df["hhq1_2015"].sum() + / metrics_dict[runid, metric_id, "TotHH_inTRichGGs", y1, dbp] + ) + metrics_dict[runid, metric_id, "Q1HH_shareofTRichGGs_normalized", y1, dbp] = ( + metrics_dict[runid, metric_id, "Q1HH_shareofTRichGGs", y1, dbp] + * normalize_factor_Q1 + ) + metrics_dict[runid, metric_id, "Q1HH_shareofTRichGGs", y2, dbp] = ( + GG_TRich_sum_df["hhq1_2050"].sum() + / metrics_dict[runid, metric_id, "TotHH_inTRichGGs", y2, dbp] + ) + + """ print('********************D1 Diverse********************') print('Growth of LIHH share of population (normalize factor))',normalize_factor_Q1Q2 ) print('LIHH Share in HRA 2050 %s' % dbp,metrics_dict[runid,metric_id,'LIHH_shareinHRA',y2,dbp] ) print('LIHH Share in HRA 2015 %s' % dbp,metrics_dict[runid,metric_id,'LIHH_shareinHRA_normalized',y1,dbp] ) print('LIHH Share of HRA 2050 %s' % dbp,metrics_dict[runid,metric_id,'LIHH_shareofHRA',y2,dbp]) print('LIHH Share of HRA 2015 %s' % dbp,metrics_dict[runid,metric_id,'LIHH_shareofHRA_normalized',y1,dbp] ) - ''' - + """ -def calculate_Diverse2_LIHH_Displacement(runid, dbp, parcel_sum_df, tract_sum_df, TRA_sum_df, GG_sum_df, normalize_factor_Q1Q2, normalize_factor_Q1, metrics_dict): +def calculate_Diverse2_LIHH_Displacement( + runid, + dbp, + parcel_sum_df, + tract_sum_df, + TRA_sum_df, + GG_sum_df, + normalize_factor_Q1Q2, + normalize_factor_Q1, + metrics_dict, +): metric_id = "D2" # For reference: total number of LIHH in tracts - metrics_dict[runid,metric_id,'LIHH_inDR',y2,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('DR', na=False), 'hhq1_2050'].sum() - metrics_dict[runid,metric_id,'LIHH_inDR',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('DR', na=False), 'hhq1_2015'].sum() - metrics_dict[runid,metric_id,'LIHH_inDR_normalized',y1,dbp] = parcel_sum_df.loc[parcel_sum_df['pba50chcat'].str.contains('DR', na=False), 'hhq1_2015'].sum() * normalize_factor_Q1 - - print('********************D2 Diverse********************') - print('Total Number of LIHH in DR tracts in 2050',metrics_dict[runid,metric_id,'LIHH_inDR',y2,dbp] ) - print('Number of LIHH in DR tracts in 2015',metrics_dict[runid,metric_id,'LIHH_inDR',y1,dbp] ) - print('Number of LIHH in DR tracts in normalized',metrics_dict[runid,metric_id,'LIHH_inDR_normalized',y1,dbp] ) - + metrics_dict[runid, metric_id, "LIHH_inDR", y2, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("DR", na=False), "hhq1_2050" + ].sum() + metrics_dict[runid, metric_id, "LIHH_inDR", y1, dbp] = parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("DR", na=False), "hhq1_2015" + ].sum() + metrics_dict[runid, metric_id, "LIHH_inDR_normalized", y1, dbp] = ( + parcel_sum_df.loc[ + parcel_sum_df["pba50chcat"].str.contains("DR", na=False), "hhq1_2015" + ].sum() + * normalize_factor_Q1 + ) + + print("********************D2 Diverse********************") + print( + "Total Number of LIHH in DR tracts in 2050", + metrics_dict[runid, metric_id, "LIHH_inDR", y2, dbp], + ) + print( + "Number of LIHH in DR tracts in 2015", + metrics_dict[runid, metric_id, "LIHH_inDR", y1, dbp], + ) + print( + "Number of LIHH in DR tracts in normalized", + metrics_dict[runid, metric_id, "LIHH_inDR_normalized", y1, dbp], + ) ###### Displacement at Tract Level (for Displacement Risk Tracts and CoC Tracts and HRA Tracts) # Total number of DR, CoC, HRA Tracts - metrics_dict[runid,metric_id,'Num_DRtracts_total',y1,dbp] = tract_sum_df.loc[(tract_sum_df['DispRisk'] == 1), 'tract_id'].nunique() - metrics_dict[runid,metric_id,'Num_CoCtracts_total',y1,dbp] = tract_sum_df.loc[(tract_sum_df['coc_flag_pba2050'] == 1), 'tract_id'].nunique() - metrics_dict[runid,metric_id,'Num_HRAtracts_total',y1,dbp] = tract_sum_df.loc[(tract_sum_df['hra'] == 1), 'tract_id'].nunique() - + metrics_dict[runid, metric_id, "Num_DRtracts_total", y1, dbp] = tract_sum_df.loc[ + (tract_sum_df["DispRisk"] == 1), "tract_id" + ].nunique() + metrics_dict[runid, metric_id, "Num_CoCtracts_total", y1, dbp] = tract_sum_df.loc[ + (tract_sum_df["coc_flag_pba2050"] == 1), "tract_id" + ].nunique() + metrics_dict[runid, metric_id, "Num_HRAtracts_total", y1, dbp] = tract_sum_df.loc[ + (tract_sum_df["hra"] == 1), "tract_id" + ].nunique() # Calculating share of Q1 households at tract level / we are not going to normalize this since we want to check impacts at neighborhood level - #tract_sum_df['hhq1_pct_2015_normalized'] = tract_sum_df['hhq1_2015'] / tract_sum_df['tothh_2015'] * normalize_factor_Q1 - tract_sum_df['hhq1_pct_2050'] = tract_sum_df['hhq1_2050'] / tract_sum_df['tothh_2050'] - tract_sum_df['hhq1_pct_2015'] = tract_sum_df['hhq1_2015'] / tract_sum_df['tothh_2015'] + # tract_sum_df['hhq1_pct_2015_normalized'] = tract_sum_df['hhq1_2015'] / tract_sum_df['tothh_2015'] * normalize_factor_Q1 + tract_sum_df["hhq1_pct_2050"] = ( + tract_sum_df["hhq1_2050"] / tract_sum_df["tothh_2050"] + ) + tract_sum_df["hhq1_pct_2015"] = ( + tract_sum_df["hhq1_2015"] / tract_sum_df["tothh_2015"] + ) - # Creating functions to check if rows of a dataframe lost hhq1 share or absolute; applied to tract_summary_df and TRA_summary_df - def check_losthhq1_share(row,j): - if (row['hhq1_pct_2015'] == 0): return 0 - elif ((row['hhq1_pct_2050']/row['hhq1_pct_2015']) {}_majorbuf'.format(prefix)) - major = arcpy.SelectLayerByAttribute_management(input_layer, "NEW_SELECTION", '"major_stop" = 1') - arcpy.CopyFeatures_management(major, prefix+"_major") # save selection to new feature class - major_result = arcpy.GetCount_management(prefix+"_major") + logger.info( + "Select buffer for major transit stops => {}_majorbuf".format(prefix) + ) + major = arcpy.SelectLayerByAttribute_management( + input_layer, "NEW_SELECTION", '"major_stop" = 1' + ) + arcpy.CopyFeatures_management( + major, prefix + "_major" + ) # save selection to new feature class + major_result = arcpy.GetCount_management(prefix + "_major") logger.info(" {}_major has {} rows".format(prefix, major_result[0])) - arcpy.Buffer_analysis(prefix+"_major", prefix+"_majorbuf", "0.5 Miles", "FULL", "ROUND", "ALL", None, "PLANAR") - + arcpy.Buffer_analysis( + prefix + "_major", + prefix + "_majorbuf", + "0.5 Miles", + "FULL", + "ROUND", + "ALL", + None, + "PLANAR", + ) + else: - if transit_type=="noplan": + if transit_type == "noplan": logger.info('Selecting "Under Construction" or "Open" stops for no plan') - new_major = arcpy.management.SelectLayerByAttribute(input_layer, "NEW_SELECTION", - '"status" = \'Under Construction\' Or status=\'Open\'') - elif transit_type=="blueprint": - logger.info('Selecting "Under Construction" or "Open" or "Final Blueprint" stops for no plan') - new_major = arcpy.management.SelectLayerByAttribute(input_layer, "NEW_SELECTION", - '"status" = \'Under Construction\' Or status=\'Open\' Or status=\'Final Blueprint\'') - - arcpy.CopyFeatures_management(new_major, prefix+"_new_major") # save selection to new feature class - new_major_result = arcpy.GetCount_management(prefix+"_new_major") + new_major = arcpy.management.SelectLayerByAttribute( + input_layer, + "NEW_SELECTION", + "\"status\" = 'Under Construction' Or status='Open'", + ) + elif transit_type == "blueprint": + logger.info( + 'Selecting "Under Construction" or "Open" or "Final Blueprint" stops for no plan' + ) + new_major = arcpy.management.SelectLayerByAttribute( + input_layer, + "NEW_SELECTION", + "\"status\" = 'Under Construction' Or status='Open' Or status='Final Blueprint'", + ) + + arcpy.CopyFeatures_management( + new_major, prefix + "_new_major" + ) # save selection to new feature class + new_major_result = arcpy.GetCount_management(prefix + "_new_major") logger.info(" {}_new_major has {} rows".format(prefix, new_major_result[0])) - arcpy.Buffer_analysis(prefix+"_new_major", prefix+"_newmajorbuf", "0.5 Miles", "FULL", "ROUND", "ALL", None, "PLANAR") + arcpy.Buffer_analysis( + prefix + "_new_major", + prefix + "_newmajorbuf", + "0.5 Miles", + "FULL", + "ROUND", + "ALL", + None, + "PLANAR", + ) # merge new major buffered with original major buffered - arcpy.Merge_management([curprefix+"_majorbuf",prefix+"_newmajorbuf"], prefix+"_majorbuf_predissolve") + arcpy.Merge_management( + [curprefix + "_majorbuf", prefix + "_newmajorbuf"], + prefix + "_majorbuf_predissolve", + ) # dissolve - arcpy.management.Dissolve(prefix+"_majorbuf_predissolve", prefix+"_majorbuf", - dissolve_field=None, statistics_fields=None, multi_part="MULTI_PART", unsplit_lines="DISSOLVE_LINES") - + arcpy.management.Dissolve( + prefix + "_majorbuf_predissolve", + prefix + "_majorbuf", + dissolve_field=None, + statistics_fields=None, + multi_part="MULTI_PART", + unsplit_lines="DISSOLVE_LINES", + ) + ### buffered hdway_15min stops - logger.info('Creating buffer for stops with headway < 15min => {}_hdwy15buf'.format(prefix)) + logger.info( + "Creating buffer for stops with headway < 15min => {}_hdwy15buf".format(prefix) + ) - if transit_type=="current": - hdwy15 = arcpy.SelectLayerByAttribute_management(input_layer, "NEW_SELECTION", "am_av_hdwy <= 15 And pm_av_hdwy <= 15") - arcpy.CopyFeatures_management(hdwy15, prefix+"_hdwy15") # save selection to new feature class - hdwy15_result = arcpy.GetCount_management(prefix+"_hdwy15") + if transit_type == "current": + hdwy15 = arcpy.SelectLayerByAttribute_management( + input_layer, "NEW_SELECTION", "am_av_hdwy <= 15 And pm_av_hdwy <= 15" + ) + arcpy.CopyFeatures_management( + hdwy15, prefix + "_hdwy15" + ) # save selection to new feature class + hdwy15_result = arcpy.GetCount_management(prefix + "_hdwy15") logger.info(" {}_hdwy15 has {} rows".format(prefix, hdwy15_result[0])) - arcpy.Buffer_analysis(prefix+"_hdwy15", prefix+"_hdwy15buf", "0.25 Miles", "FULL", "ROUND", "ALL", None, "PLANAR") + arcpy.Buffer_analysis( + prefix + "_hdwy15", + prefix + "_hdwy15buf", + "0.25 Miles", + "FULL", + "ROUND", + "ALL", + None, + "PLANAR", + ) else: # copy current - arcpy.CopyFeatures_management(curprefix+"_hdwy15buf", prefix+"_hdwy15buf") + arcpy.CopyFeatures_management(curprefix + "_hdwy15buf", prefix + "_hdwy15buf") ### buffered hdway_30min stops - logger.info('Creating buffer for stops with headway 15-30 min => {}_hdwy30buf'.format(prefix)) + logger.info( + "Creating buffer for stops with headway 15-30 min => {}_hdwy30buf".format( + prefix + ) + ) - if transit_type=="current": - hdwy30 = arcpy.SelectLayerByAttribute_management(input_layer, "NEW_SELECTION", "am_av_hdwy > 15 And am_av_hdwy <= 30 And pm_av_hdwy > 15 And pm_av_hdwy <= 30") - arcpy.CopyFeatures_management(hdwy30, prefix+"_hdwy30") # save selection to new feature class - hdwy30_result = arcpy.GetCount_management(prefix+"_hdwy30") + if transit_type == "current": + hdwy30 = arcpy.SelectLayerByAttribute_management( + input_layer, + "NEW_SELECTION", + "am_av_hdwy > 15 And am_av_hdwy <= 30 And pm_av_hdwy > 15 And pm_av_hdwy <= 30", + ) + arcpy.CopyFeatures_management( + hdwy30, prefix + "_hdwy30" + ) # save selection to new feature class + hdwy30_result = arcpy.GetCount_management(prefix + "_hdwy30") logger.info(" {}_hdwy30 has {} rows".format(prefix, hdwy30_result[0])) - arcpy.Buffer_analysis(prefix+"_hdwy30", prefix+"_hdwy30buf", "0.25 Miles", "FULL", "ROUND", "ALL", None, "PLANAR") + arcpy.Buffer_analysis( + prefix + "_hdwy30", + prefix + "_hdwy30buf", + "0.25 Miles", + "FULL", + "ROUND", + "ALL", + None, + "PLANAR", + ) else: # copy current - arcpy.CopyFeatures_management(curprefix+"_hdwy30buf", prefix+"_hdwy30buf") + arcpy.CopyFeatures_management(curprefix + "_hdwy30buf", prefix + "_hdwy30buf") ### buffered hdway_30plus stops - logger.info('Creating buffer for stops with headway 30+ min => {}_hdwy30plusbuf'.format(prefix)) + logger.info( + "Creating buffer for stops with headway 30+ min => {}_hdwy30plusbuf".format( + prefix + ) + ) - if transit_type=="current": - hdwy30plus = arcpy.SelectLayerByAttribute_management(input_layer, "NEW_SELECTION", "am_av_hdwy > 30 Or pm_av_hdwy > 30") - arcpy.CopyFeatures_management(hdwy30plus, prefix+"_hdwy30plus") # save selection to new feature class - hdwy30plus_result = arcpy.GetCount_management(prefix+"_hdwy30plus") + if transit_type == "current": + hdwy30plus = arcpy.SelectLayerByAttribute_management( + input_layer, "NEW_SELECTION", "am_av_hdwy > 30 Or pm_av_hdwy > 30" + ) + arcpy.CopyFeatures_management( + hdwy30plus, prefix + "_hdwy30plus" + ) # save selection to new feature class + hdwy30plus_result = arcpy.GetCount_management(prefix + "_hdwy30plus") logger.info(" {}_hdwy30plus has {} rows".format(prefix, hdwy30plus_result[0])) - arcpy.Buffer_analysis(prefix+"_hdwy30plus", prefix+"_hdwy30plusbuf", "0.25 Miles", "FULL", "ROUND", "ALL", None, "PLANAR") + arcpy.Buffer_analysis( + prefix + "_hdwy30plus", + prefix + "_hdwy30plusbuf", + "0.25 Miles", + "FULL", + "ROUND", + "ALL", + None, + "PLANAR", + ) else: # copy current - arcpy.CopyFeatures_management(curprefix+"_hdwy30plusbuf", prefix+"_hdwy30plusbuf") + arcpy.CopyFeatures_management( + curprefix + "_hdwy30plusbuf", prefix + "_hdwy30plusbuf" + ) ### Make them disjoint -- first one wins - logger.info('Isolate {}_hdwy15buf => {}_hdwy15buf_only'.format(prefix,prefix)) - arcpy.Erase_analysis(in_features=prefix+"_hdwy15buf", erase_features=prefix+"_majorbuf", - out_feature_class=prefix+"_hdwy15buf_only") - - logger.info('Isolate {}_hdwy30buf => {}_hdwy30buf_only'.format(prefix,prefix)) - arcpy.Erase_analysis(prefix+"_hdwy30buf", prefix+"_majorbuf", prefix+"_hdwy30buf_1") - arcpy.Erase_analysis(prefix+"_hdwy30buf_1", prefix+"_hdwy15buf", prefix+"_hdwy30buf_only") - arcpy.Delete_management([prefix+"_hdwy30buf_1"]) - - logger.info('Isolate {}_hdwy30plusbuf => {}_hdwy30plusbuf_only'.format(prefix,prefix)) - arcpy.Erase_analysis(prefix+"_hdwy30plusbuf", prefix+"_majorbuf", prefix+"_hdwy30plusbuf_1") - arcpy.Erase_analysis(prefix+"_hdwy30plusbuf_1", prefix+"_hdwy15buf", prefix+"_hdwy30plusbuf_2") - arcpy.Erase_analysis(prefix+"_hdwy30plusbuf_2", prefix+"_hdwy30buf", prefix+"_hdwy30plusbuf_only") - arcpy.Delete_management([prefix+"_hdwy30plusbuf_1", - prefix+"_hdwy30plusbuf_2"]) - - logger.info('Rest of Bay Area => {}_none'.format(prefix)) - arcpy.Erase_analysis("BAcounty_expand", prefix+"_majorbuf", "BAcounty_expand_1") - arcpy.Erase_analysis("BAcounty_expand_1", prefix+"_hdwy15buf", "BAcounty_expand_2") - arcpy.Erase_analysis("BAcounty_expand_2", prefix+"_hdwy30buf", "BAcounty_expand_3") - arcpy.Erase_analysis("BAcounty_expand_3", prefix+"_hdwy30plusbuf", prefix+"_none") - arcpy.Delete_management(["BAcounty_expand_1", - "BAcounty_expand_2", - "BAcounty_expand_3"]) - - - logger.info('Merge into one feature class => {}_cat5'.format(prefix)) - arcpy.Merge_management([prefix+"_none", - prefix+"_hdwy30plusbuf_only", - prefix+"_hdwy30buf_only", - prefix+"_hdwy15buf_only", - prefix+"_majorbuf"], - prefix+"_cat5", add_source="ADD_SOURCE_INFO") + logger.info("Isolate {}_hdwy15buf => {}_hdwy15buf_only".format(prefix, prefix)) + arcpy.Erase_analysis( + in_features=prefix + "_hdwy15buf", + erase_features=prefix + "_majorbuf", + out_feature_class=prefix + "_hdwy15buf_only", + ) + + logger.info("Isolate {}_hdwy30buf => {}_hdwy30buf_only".format(prefix, prefix)) + arcpy.Erase_analysis( + prefix + "_hdwy30buf", prefix + "_majorbuf", prefix + "_hdwy30buf_1" + ) + arcpy.Erase_analysis( + prefix + "_hdwy30buf_1", prefix + "_hdwy15buf", prefix + "_hdwy30buf_only" + ) + arcpy.Delete_management([prefix + "_hdwy30buf_1"]) + + logger.info( + "Isolate {}_hdwy30plusbuf => {}_hdwy30plusbuf_only".format(prefix, prefix) + ) + arcpy.Erase_analysis( + prefix + "_hdwy30plusbuf", prefix + "_majorbuf", prefix + "_hdwy30plusbuf_1" + ) + arcpy.Erase_analysis( + prefix + "_hdwy30plusbuf_1", prefix + "_hdwy15buf", prefix + "_hdwy30plusbuf_2" + ) + arcpy.Erase_analysis( + prefix + "_hdwy30plusbuf_2", + prefix + "_hdwy30buf", + prefix + "_hdwy30plusbuf_only", + ) + arcpy.Delete_management([prefix + "_hdwy30plusbuf_1", prefix + "_hdwy30plusbuf_2"]) + + logger.info("Rest of Bay Area => {}_none".format(prefix)) + arcpy.Erase_analysis("BAcounty_expand", prefix + "_majorbuf", "BAcounty_expand_1") + arcpy.Erase_analysis( + "BAcounty_expand_1", prefix + "_hdwy15buf", "BAcounty_expand_2" + ) + arcpy.Erase_analysis( + "BAcounty_expand_2", prefix + "_hdwy30buf", "BAcounty_expand_3" + ) + arcpy.Erase_analysis( + "BAcounty_expand_3", prefix + "_hdwy30plusbuf", prefix + "_none" + ) + arcpy.Delete_management( + ["BAcounty_expand_1", "BAcounty_expand_2", "BAcounty_expand_3"] + ) + + logger.info("Merge into one feature class => {}_cat5".format(prefix)) + arcpy.Merge_management( + [ + prefix + "_none", + prefix + "_hdwy30plusbuf_only", + prefix + "_hdwy30buf_only", + prefix + "_hdwy15buf_only", + prefix + "_majorbuf", + ], + prefix + "_cat5", + add_source="ADD_SOURCE_INFO", + ) # create Service_Level from MERGE_SRC - arcpy.AddField_management(prefix +"_cat5", "Service_Level", "TEXT","","", 200) - with arcpy.da.UpdateCursor(prefix +"_cat5", ["Service_Level", "MERGE_SRC"]) as cursor: - for row in cursor: - if 'none' in row[1]: - row[0] = 'No_Fixed_Route_Transit' - elif 'hdwy30plusbuf' in row[1]: - row[0] = 'Bus_31plus_min' - elif 'hdwy30buf' in row[1]: - row[0] = 'Bus_15_30min' - elif 'hdwy15buf' in row[1]: - row[0] = 'Bus_<15min' - elif 'majorbuf' in row[1]: - row[0] = 'Major_Transit_Stop' - cursor.updateRow(row) - -if __name__ == '__main__': - + arcpy.AddField_management(prefix + "_cat5", "Service_Level", "TEXT", "", "", 200) + with arcpy.da.UpdateCursor( + prefix + "_cat5", ["Service_Level", "MERGE_SRC"] + ) as cursor: + for row in cursor: + if "none" in row[1]: + row[0] = "No_Fixed_Route_Transit" + elif "hdwy30plusbuf" in row[1]: + row[0] = "Bus_31plus_min" + elif "hdwy30buf" in row[1]: + row[0] = "Bus_15_30min" + elif "hdwy15buf" in row[1]: + row[0] = "Bus_<15min" + elif "majorbuf" in row[1]: + row[0] = "Major_Transit_Stop" + cursor.updateRow(row) + + +if __name__ == "__main__": + # create logger logger = logging.getLogger(__name__) - logger.setLevel('DEBUG') + logger.setLevel("DEBUG") # console handler ch = logging.StreamHandler() - ch.setLevel('INFO') - ch.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + ch.setLevel("INFO") + ch.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(ch) # file handler - fh = logging.FileHandler(LOG_FILE, mode='w') - fh.setLevel('DEBUG') - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')) + fh = logging.FileHandler(LOG_FILE, mode="w") + fh.setLevel("DEBUG") + fh.setFormatter( + logging.Formatter( + "%(asctime)s - %(levelname)s - %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p" + ) + ) logger.addHandler(fh) logger.info("WORKING_DIR = {}".format(WORKING_DIR)) logger.info("WORKSPACE_GDB = {}".format(WORKSPACE_GDB)) arcpy.CreateFileGDB_management(WORKING_DIR, WORKSPACE_GDB) - arcpy.env.workspace = os.path.join(WORKING_DIR,WORKSPACE_GDB) + arcpy.env.workspace = os.path.join(WORKING_DIR, WORKSPACE_GDB) arcpy.env.overwriteOutput = True - transit_layer = arcpy.MakeFeatureLayer_management(MTC_ONLINE_TRANSIT_URL,'transit_layer') - bacounty_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_BACOUNTY_URL,'bacounty_portal') - taz_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_TAZ_URL,'taz_portal') - tract_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_TRACT_URL,'tract_portal') - - bacounty = os.path.join(arcpy.env.workspace, "bacounty") - taz = os.path.join(arcpy.env.workspace, "taz") - tract = os.path.join(arcpy.env.workspace, "tract") - - if arcpy.Exists(bacounty): arcpy.Delete_management(bacounty) - if arcpy.Exists(taz): arcpy.Delete_management(taz) - if arcpy.Exists(tract): arcpy.Delete_management(tract) - arcpy.FeatureClassToFeatureClass_conversion(bacounty_portal, arcpy.env.workspace,'bacounty') - arcpy.FeatureClassToFeatureClass_conversion(taz_portal, arcpy.env.workspace,'taz') - arcpy.FeatureClassToFeatureClass_conversion(tract_portal, arcpy.env.workspace,'tract') - - transit_current = arcpy.SelectLayerByAttribute_management(transit_layer, "NEW_SELECTION", "status = 'Existing/Built'") - arcpy.CopyFeatures_management(transit_current, 'transit_current') - - transit_potential = arcpy.SelectLayerByAttribute_management(transit_layer, "NEW_SELECTION", "status <> 'Existing/Built'") - arcpy.CopyFeatures_management(transit_potential, 'transit_potential') + transit_layer = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_TRANSIT_URL, "transit_layer" + ) + bacounty_portal = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_BACOUNTY_URL, "bacounty_portal" + ) + taz_portal = arcpy.MakeFeatureLayer_management(MTC_ONLINE_TAZ_URL, "taz_portal") + tract_portal = arcpy.MakeFeatureLayer_management( + MTC_ONLINE_TRACT_URL, "tract_portal" + ) + + bacounty = os.path.join(arcpy.env.workspace, "bacounty") + taz = os.path.join(arcpy.env.workspace, "taz") + tract = os.path.join(arcpy.env.workspace, "tract") + + if arcpy.Exists(bacounty): + arcpy.Delete_management(bacounty) + if arcpy.Exists(taz): + arcpy.Delete_management(taz) + if arcpy.Exists(tract): + arcpy.Delete_management(tract) + arcpy.FeatureClassToFeatureClass_conversion( + bacounty_portal, arcpy.env.workspace, "bacounty" + ) + arcpy.FeatureClassToFeatureClass_conversion(taz_portal, arcpy.env.workspace, "taz") + arcpy.FeatureClassToFeatureClass_conversion( + tract_portal, arcpy.env.workspace, "tract" + ) + + transit_current = arcpy.SelectLayerByAttribute_management( + transit_layer, "NEW_SELECTION", "status = 'Existing/Built'" + ) + arcpy.CopyFeatures_management(transit_current, "transit_current") + + transit_potential = arcpy.SelectLayerByAttribute_management( + transit_layer, "NEW_SELECTION", "status <> 'Existing/Built'" + ) + arcpy.CopyFeatures_management(transit_potential, "transit_potential") # log info about the workspace log_workspace_contents(logger) - #Using county boundaries alone did not capture all parcels (1,952,484/1,956,212). With buffering 0.5 mi 1,956,044/1,956,212 99.99% of all parcels - logger.info('Create buffered bacounty => bacounty_expand') - arcpy.analysis.Buffer("bacounty", "bacounty_expand", "0.5 Miles", "FULL", "ROUND", "ALL", None, "PLANAR") - - - # create trncur_cat layer, which has 5 disjoint features which together make bacounty_exapdn: + # Using county boundaries alone did not capture all parcels (1,952,484/1,956,212). With buffering 0.5 mi 1,956,044/1,956,212 99.99% of all parcels + logger.info("Create buffered bacounty => bacounty_expand") + arcpy.analysis.Buffer( + "bacounty", + "bacounty_expand", + "0.5 Miles", + "FULL", + "ROUND", + "ALL", + None, + "PLANAR", + ) + + # create trncur_cat layer, which has 5 disjoint features which together make bacounty_exapdn: # majorbuf, hdwy15buf_only hdwy30buf_only, hdwy30plusbuf_only, none create_transit_features(logger, "current") @@ -288,41 +474,58 @@ def create_transit_features(logger, transit_type): ### Bring in urbansim results all_prox = pd.DataFrame() - taz_prox = pd.DataFrame() + taz_prox = pd.DataFrame() for us_runid in list_us_runid: logger.info("") logger.info("==== Processing UrbanSim run {} ====".format(us_runid)) - urbansim_runid = os.path.join(urbansim_run_location,us_runid) + urbansim_runid = os.path.join(urbansim_run_location, us_runid) for model_year in (2015, 2050): if model_year == 2050: if us_runid == us_2050_FBP_Final: - parcel_file = urbansim_runid+'_parcel_data_{}_UBI.csv'.format(model_year) + parcel_file = urbansim_runid + "_parcel_data_{}_UBI.csv".format( + model_year + ) else: - parcel_file = urbansim_runid+'_parcel_data_{}.csv'.format(model_year) - else: - parcel_file = urbansim_runid+'_parcel_data_{}.csv'.format(model_year) - logger.info('Reading {} parcel data from {}'.format(model_year, parcel_file)) - parcel_output = pd.read_csv(parcel_file, engine='python' ) - - logger.info(' Read {} rows'.format(len(parcel_output))) + parcel_file = urbansim_runid + "_parcel_data_{}.csv".format( + model_year + ) + else: + parcel_file = urbansim_runid + "_parcel_data_{}.csv".format(model_year) + logger.info( + "Reading {} parcel data from {}".format(model_year, parcel_file) + ) + parcel_output = pd.read_csv(parcel_file, engine="python") + + logger.info(" Read {} rows".format(len(parcel_output))) # keep essential columns - parcel_output.drop(['geom_id','total_job_spaces','zoned_du', - 'zoned_du_underbuild', 'zoned_du_underbuild_nodev', 'first_building_type'], axis=1, inplace=True) + parcel_output.drop( + [ + "geom_id", + "total_job_spaces", + "zoned_du", + "zoned_du_underbuild", + "zoned_du_underbuild_nodev", + "first_building_type", + ], + axis=1, + inplace=True, + ) logger.info("Head:\n{}".format(parcel_output.head())) - parcel_output['totemp' ] = parcel_output['totemp' ].fillna(0) - parcel_output['totemp' ] = parcel_output['totemp' ].round(0).astype('int') - parcel_output['RETEMPN'] = parcel_output['RETEMPN'].fillna(0) - parcel_output['RETEMPN'] = parcel_output['RETEMPN'].round(0).astype('int') - parcel_output['MWTEMPN'] = parcel_output['MWTEMPN'].fillna(0) - parcel_output['MWTEMPN'] = parcel_output['MWTEMPN'].round(0).astype('int') - + parcel_output["totemp"] = parcel_output["totemp"].fillna(0) + parcel_output["totemp"] = parcel_output["totemp"].round(0).astype("int") + parcel_output["RETEMPN"] = parcel_output["RETEMPN"].fillna(0) + parcel_output["RETEMPN"] = parcel_output["RETEMPN"].round(0).astype("int") + parcel_output["MWTEMPN"] = parcel_output["MWTEMPN"].fillna(0) + parcel_output["MWTEMPN"] = parcel_output["MWTEMPN"].round(0).astype("int") + # save as table in gdb parcel_table = os.path.join(arcpy.env.workspace, "parcel_table") - if arcpy.Exists(parcel_table): arcpy.management.Delete(parcel_table) + if arcpy.Exists(parcel_table): + arcpy.management.Delete(parcel_table) parcel_array = np.array(np.rec.fromrecords(parcel_output.values)) parcel_array.dtype.names = tuple(parcel_output.dtypes.index.tolist()) @@ -330,30 +533,51 @@ def create_transit_features(logger, transit_type): logger.info("Saved to {} in {}".format(parcel_table, arcpy.env.workspace)) # convert to point feature class in GDB - if arcpy.Exists('parcel_fc'): arcpy.management.Delete('parcel_fc') - arcpy.management.XYTableToPoint(in_table=parcel_table, out_feature_class='parcel_fc',x_field='x',y_field='y') - logger.info("Saved to {} in {}".format('parcel_fc', arcpy.env.workspace)) + if arcpy.Exists("parcel_fc"): + arcpy.management.Delete("parcel_fc") + arcpy.management.XYTableToPoint( + in_table=parcel_table, + out_feature_class="parcel_fc", + x_field="x", + y_field="y", + ) + logger.info("Saved to {} in {}".format("parcel_fc", arcpy.env.workspace)) if model_year == 2015: # current - transit_features = ['trn_cur_cat5'] + transit_features = ["trn_cur_cat5"] elif model_year == 2050: # no plan and blueprint - transit_features = ['trn_np_cat5', 'trn_fp_cat5'] + transit_features = ["trn_np_cat5", "trn_fp_cat5"] for transit_feature in transit_features: - logger.info('Summarizing {} parcel data proximity to {}'.format(model_year, transit_feature)) + logger.info( + "Summarizing {} parcel data proximity to {}".format( + model_year, transit_feature + ) + ) log_workspace_contents(logger) try: logger.info("feature classes no paths") - arcpy.SummarizeWithin_analysis(transit_feature, 'parcel_fc', 'prox', keep_all_polygons="KEEP_ALL", - sum_fields=[['tothh','SUM'], ['hhq1','SUM'],['totemp','SUM'],['RETEMPN','SUM'],['MWTEMPN','SUM']]) + arcpy.SummarizeWithin_analysis( + transit_feature, + "parcel_fc", + "prox", + keep_all_polygons="KEEP_ALL", + sum_fields=[ + ["tothh", "SUM"], + ["hhq1", "SUM"], + ["totemp", "SUM"], + ["RETEMPN", "SUM"], + ["MWTEMPN", "SUM"], + ], + ) # hasn't worked, see comments below logger.info("SUCCESS") except: - # Get the tool error messages + # Get the tool error messages msgs = arcpy.GetMessages(2) logger.error("Exception occured; msgs: {}".format(msgs)) @@ -362,11 +586,17 @@ def create_transit_features(logger, transit_type): tbinfo = traceback.format_tb(tb)[0] # Concatenate information together concerning the error into a message string - logger.error("Traceback info:\n{}\nError Info:\n{}".format(tbinfo, str(sys.exc_info()[1]))) - logger.error("It's ok though -- we'll do this another way, but still trying the easy way") + logger.error( + "Traceback info:\n{}\nError Info:\n{}".format( + tbinfo, str(sys.exc_info()[1]) + ) + ) + logger.error( + "It's ok though -- we'll do this another way, but still trying the easy way" + ) # Something related to arcpy.SummarizeWithin_analysis() is buggy - # The following attempts have failed with + # The following attempts have failed with # ERROR 000187: Only supports Geodatabase tables and feature classes # * use the method with feature layers as inputs, with full paths and without # * use the method with feature classes as inputs, with full paths and without @@ -374,7 +604,7 @@ def create_transit_features(logger, transit_type): # * copy feature classes to arcpy.env.scratchGDB and summarizeWithin there # # HOWEVER, after this script fails, running the following on the command line succeeds: - # + # # >>> import arcpy # >>> arcpy.env.workspace='M:\Data\GIS layers\JobsHousingTransitProximity\workspace_2020_1007_1737.gdb' # >>> arcpy.SummarizeWithin_analysis('trn_cur_cat5', 'parcel_fc', 'prox', keep_all_polygons='KEEP_ALL', sum_fields=[['tothh','SUM'], ['hhq1','SUM'],['totemp','SUM'],['RETEMPN','SUM'],['MWTEMPN','SUM']]) @@ -382,104 +612,193 @@ def create_transit_features(logger, transit_type): # >>> prox_sdf = pd.DataFrame.spatial.from_featureclass('prox') # # so we'll summarize within ourselves and use spatial join instead - if arcpy.Exists('parcel_fc_join_trn'): arcpy.management.Delete('parcel_fc_join_trn') + if arcpy.Exists("parcel_fc_join_trn"): + arcpy.management.Delete("parcel_fc_join_trn") logger.info("spatial joining parcel_fc with {}".format(transit_feature)) - arcpy.SpatialJoin_analysis(target_features='parcel_fc', join_features=transit_feature, - out_feature_class='parcel_fc_join_trn') + arcpy.SpatialJoin_analysis( + target_features="parcel_fc", + join_features=transit_feature, + out_feature_class="parcel_fc_join_trn", + ) logger.info("spatial joining parcel_fc_join_trn with {}".format(taz)) - arcpy.SpatialJoin_analysis(target_features='parcel_fc_join_trn', join_features=taz, - out_feature_class='parcel_fc_join_trn_taz') - - logger.info("spatial joining parcel_fc_join_trn_taz with {}".format(tract)) - arcpy.SpatialJoin_analysis(target_features='parcel_fc_join_trn_taz', join_features=tract, - out_feature_class='parcel_fc_join_trn_taz_tract') + arcpy.SpatialJoin_analysis( + target_features="parcel_fc_join_trn", + join_features=taz, + out_feature_class="parcel_fc_join_trn_taz", + ) + + logger.info( + "spatial joining parcel_fc_join_trn_taz with {}".format(tract) + ) + arcpy.SpatialJoin_analysis( + target_features="parcel_fc_join_trn_taz", + join_features=tract, + out_feature_class="parcel_fc_join_trn_taz_tract", + ) logger.info(" ...complete") - prox_sdf_ba = pd.DataFrame.spatial.from_featureclass('parcel_fc_join_trn_taz_tract') - prox_sdf = prox_sdf_ba.groupby('Service_Level').agg({'tothh':'sum', 'hhq1':'sum', - 'totemp':'sum', 'RETEMPN':'sum', - 'MWTEMPN':'sum'}).reset_index() - - prox_sdf['tothh_share' ] = round(prox_sdf.tothh /prox_sdf.tothh.sum(), 2) - prox_sdf['hhq1_share' ] = round(prox_sdf.hhq1 /prox_sdf.hhq1.sum() , 2) - prox_sdf['totemp_share' ] = round(prox_sdf.totemp /prox_sdf.totemp.sum(), 2) - prox_sdf['RETEMPN_share'] = round(prox_sdf.RETEMPN/prox_sdf.RETEMPN.sum(),2) - prox_sdf['MWTEMPN_share'] = round(prox_sdf.MWTEMPN/prox_sdf.MWTEMPN.sum(),2) - prox_sdf['year' ] = str(model_year) - prox_sdf['modelrunID' ] = us_runid - prox_sdf['transit' ] = transit_feature - prox_sdf['area' ] = 'Region' + prox_sdf_ba = pd.DataFrame.spatial.from_featureclass( + "parcel_fc_join_trn_taz_tract" + ) + prox_sdf = ( + prox_sdf_ba.groupby("Service_Level") + .agg( + { + "tothh": "sum", + "hhq1": "sum", + "totemp": "sum", + "RETEMPN": "sum", + "MWTEMPN": "sum", + } + ) + .reset_index() + ) + + prox_sdf["tothh_share"] = round( + prox_sdf.tothh / prox_sdf.tothh.sum(), 2 + ) + prox_sdf["hhq1_share"] = round(prox_sdf.hhq1 / prox_sdf.hhq1.sum(), 2) + prox_sdf["totemp_share"] = round( + prox_sdf.totemp / prox_sdf.totemp.sum(), 2 + ) + prox_sdf["RETEMPN_share"] = round( + prox_sdf.RETEMPN / prox_sdf.RETEMPN.sum(), 2 + ) + prox_sdf["MWTEMPN_share"] = round( + prox_sdf.MWTEMPN / prox_sdf.MWTEMPN.sum(), 2 + ) + prox_sdf["year"] = str(model_year) + prox_sdf["modelrunID"] = us_runid + prox_sdf["transit"] = transit_feature + prox_sdf["area"] = "Region" logger.info("prox_sdf:\n{}".format(prox_sdf)) all_prox = all_prox.append(prox_sdf) prox_sdf_coc = prox_sdf_ba[prox_sdf_ba.coc == 1] - prox_sdf_coc = prox_sdf_coc.groupby('Service_Level').agg({'tothh':'sum', 'hhq1':'sum', - 'totemp':'sum', 'RETEMPN':'sum', - 'MWTEMPN':'sum'}).reset_index() - - prox_sdf_coc['tothh_share' ] = round(prox_sdf_coc.tothh /prox_sdf_coc.tothh.sum(), 2) - prox_sdf_coc['hhq1_share' ] = round(prox_sdf_coc.hhq1 /prox_sdf_coc.hhq1.sum() , 2) - prox_sdf_coc['totemp_share' ] = round(prox_sdf_coc.totemp /prox_sdf_coc.totemp.sum(), 2) - prox_sdf_coc['RETEMPN_share'] = round(prox_sdf_coc.RETEMPN/prox_sdf_coc.RETEMPN.sum(),2) - prox_sdf_coc['MWTEMPN_share'] = round(prox_sdf_coc.MWTEMPN/prox_sdf_coc.MWTEMPN.sum(),2) - prox_sdf_coc['year' ] = str(model_year) - prox_sdf_coc['modelrunID' ] = us_runid - prox_sdf_coc['transit' ] = transit_feature - prox_sdf_coc['area' ] = 'CoCs' + prox_sdf_coc = ( + prox_sdf_coc.groupby("Service_Level") + .agg( + { + "tothh": "sum", + "hhq1": "sum", + "totemp": "sum", + "RETEMPN": "sum", + "MWTEMPN": "sum", + } + ) + .reset_index() + ) + + prox_sdf_coc["tothh_share"] = round( + prox_sdf_coc.tothh / prox_sdf_coc.tothh.sum(), 2 + ) + prox_sdf_coc["hhq1_share"] = round( + prox_sdf_coc.hhq1 / prox_sdf_coc.hhq1.sum(), 2 + ) + prox_sdf_coc["totemp_share"] = round( + prox_sdf_coc.totemp / prox_sdf_coc.totemp.sum(), 2 + ) + prox_sdf_coc["RETEMPN_share"] = round( + prox_sdf_coc.RETEMPN / prox_sdf_coc.RETEMPN.sum(), 2 + ) + prox_sdf_coc["MWTEMPN_share"] = round( + prox_sdf_coc.MWTEMPN / prox_sdf_coc.MWTEMPN.sum(), 2 + ) + prox_sdf_coc["year"] = str(model_year) + prox_sdf_coc["modelrunID"] = us_runid + prox_sdf_coc["transit"] = transit_feature + prox_sdf_coc["area"] = "CoCs" logger.info("prox_sdf_coc:\n{}".format(prox_sdf_coc)) all_prox = all_prox.append(prox_sdf_coc) prox_sdf_hra = prox_sdf_ba[prox_sdf_ba.hra == 1] - prox_sdf_hra = prox_sdf_hra.groupby('Service_Level').agg({'tothh':'sum', 'hhq1':'sum', - 'totemp':'sum', 'RETEMPN':'sum', - 'MWTEMPN':'sum'}).reset_index() - - prox_sdf_hra['tothh_share' ] = round(prox_sdf_hra.tothh /prox_sdf_hra.tothh.sum(), 2) - prox_sdf_hra['hhq1_share' ] = round(prox_sdf_hra.hhq1 /prox_sdf_hra.hhq1.sum() , 2) - prox_sdf_hra['totemp_share' ] = round(prox_sdf_hra.totemp /prox_sdf_hra.totemp.sum(), 2) - prox_sdf_hra['RETEMPN_share'] = round(prox_sdf_hra.RETEMPN/prox_sdf_hra.RETEMPN.sum(),2) - prox_sdf_hra['MWTEMPN_share'] = round(prox_sdf_hra.MWTEMPN/prox_sdf_hra.MWTEMPN.sum(),2) - prox_sdf_hra['year' ] = str(model_year) - prox_sdf_hra['modelrunID' ] = us_runid - prox_sdf_hra['transit' ] = transit_feature - prox_sdf_hra['area' ] = 'HRAs' + prox_sdf_hra = ( + prox_sdf_hra.groupby("Service_Level") + .agg( + { + "tothh": "sum", + "hhq1": "sum", + "totemp": "sum", + "RETEMPN": "sum", + "MWTEMPN": "sum", + } + ) + .reset_index() + ) + + prox_sdf_hra["tothh_share"] = round( + prox_sdf_hra.tothh / prox_sdf_hra.tothh.sum(), 2 + ) + prox_sdf_hra["hhq1_share"] = round( + prox_sdf_hra.hhq1 / prox_sdf_hra.hhq1.sum(), 2 + ) + prox_sdf_hra["totemp_share"] = round( + prox_sdf_hra.totemp / prox_sdf_hra.totemp.sum(), 2 + ) + prox_sdf_hra["RETEMPN_share"] = round( + prox_sdf_hra.RETEMPN / prox_sdf_hra.RETEMPN.sum(), 2 + ) + prox_sdf_hra["MWTEMPN_share"] = round( + prox_sdf_hra.MWTEMPN / prox_sdf_hra.MWTEMPN.sum(), 2 + ) + prox_sdf_hra["year"] = str(model_year) + prox_sdf_hra["modelrunID"] = us_runid + prox_sdf_hra["transit"] = transit_feature + prox_sdf_hra["area"] = "HRAs" logger.info("prox_sdf_hra:\n{}".format(prox_sdf_hra)) all_prox = all_prox.append(prox_sdf_hra) logger.info("all_prox:\n{}".format(all_prox)) - prox_sdf_taz = prox_sdf_ba.groupby(['area_type','Service_Level']).agg({'tothh':'sum', 'hhq1':'sum', - 'totemp':'sum', 'RETEMPN':'sum', - 'MWTEMPN':'sum'}).reset_index() - - prox_sdf_taz['tothh_share' ] = round(prox_sdf_taz.tothh /prox_sdf_taz.tothh.sum(), 2) - prox_sdf_taz['hhq1_share' ] = round(prox_sdf_taz.hhq1 /prox_sdf_taz.hhq1.sum() , 2) - prox_sdf_taz['totemp_share' ] = round(prox_sdf_taz.totemp /prox_sdf_taz.totemp.sum(), 2) - prox_sdf_taz['RETEMPN_share'] = round(prox_sdf_taz.RETEMPN/prox_sdf_taz.RETEMPN.sum(),2) - prox_sdf_taz['MWTEMPN_share'] = round(prox_sdf_taz.MWTEMPN/prox_sdf_taz.MWTEMPN.sum(),2) - prox_sdf_taz['year' ] = str(model_year) - prox_sdf_taz['modelrunID' ] = us_runid - prox_sdf_taz['transit' ] = transit_feature + prox_sdf_taz = ( + prox_sdf_ba.groupby(["area_type", "Service_Level"]) + .agg( + { + "tothh": "sum", + "hhq1": "sum", + "totemp": "sum", + "RETEMPN": "sum", + "MWTEMPN": "sum", + } + ) + .reset_index() + ) + + prox_sdf_taz["tothh_share"] = round( + prox_sdf_taz.tothh / prox_sdf_taz.tothh.sum(), 2 + ) + prox_sdf_taz["hhq1_share"] = round( + prox_sdf_taz.hhq1 / prox_sdf_taz.hhq1.sum(), 2 + ) + prox_sdf_taz["totemp_share"] = round( + prox_sdf_taz.totemp / prox_sdf_taz.totemp.sum(), 2 + ) + prox_sdf_taz["RETEMPN_share"] = round( + prox_sdf_taz.RETEMPN / prox_sdf_taz.RETEMPN.sum(), 2 + ) + prox_sdf_taz["MWTEMPN_share"] = round( + prox_sdf_taz.MWTEMPN / prox_sdf_taz.MWTEMPN.sum(), 2 + ) + prox_sdf_taz["year"] = str(model_year) + prox_sdf_taz["modelrunID"] = us_runid + prox_sdf_taz["transit"] = transit_feature logger.info("prox_sdf:\n{}".format(prox_sdf_taz)) taz_prox = taz_prox.append(prox_sdf_taz) logger.info("taz_prox:\n{}".format(taz_prox)) - - # write it - outfile_ba = 'metrics_proximity_{}.csv'.format(NOW) + outfile_ba = "metrics_proximity_{}.csv".format(NOW) logger.info("") - all_prox.to_csv('metrics_proximity_{}.csv'.format(NOW), index=False) + all_prox.to_csv("metrics_proximity_{}.csv".format(NOW), index=False) logger.info("Wrote {}".format(outfile_ba)) - outfile_taz = 'metrics_proximity__taz{}.csv'.format(NOW) + outfile_taz = "metrics_proximity__taz{}.csv".format(NOW) logger.info("") - taz_prox.to_csv('metrics_proximity_taz_{}.csv'.format(NOW), index=False) + taz_prox.to_csv("metrics_proximity_taz_{}.csv".format(NOW), index=False) logger.info("Wrote {}".format(outfile_taz)) - diff --git a/scripts/urbansim_output_post_processing.py b/scripts/urbansim_output_post_processing.py index a965e89..8340cd6 100644 --- a/scripts/urbansim_output_post_processing.py +++ b/scripts/urbansim_output_post_processing.py @@ -1,8 +1,7 @@ - # coding: utf-8 -#The script is used for process geograpy summary files and combine different run results for visualization in Tableau +# The script is used for process geograpy summary files and combine different run results for visualization in Tableau import pandas as pd import numpy as np import os @@ -10,700 +9,927 @@ from functools import reduce -#PBA40 folder -PBA40_DIR = os.path.join(os.environ["USERPROFILE"], - "Box/Modeling and Surveys/Share Data/plan-bay-area-2040/RTP17 UrbanSim Output/r7224c") +# PBA40 folder +PBA40_DIR = os.path.join( + os.environ["USERPROFILE"], + "Box/Modeling and Surveys/Share Data/plan-bay-area-2040/RTP17 UrbanSim Output/r7224c", +) # The location of Urbansim outputs -URBANSIM_OUTPUT_BOX_DIR = os.path.join(os.environ["USERPROFILE"], - "Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50") +URBANSIM_OUTPUT_BOX_DIR = os.path.join( + os.environ["USERPROFILE"], + "Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim/PBA50", +) # Final draft blueprint output -DBP_DIR = "Draft Blueprint runs/Blueprint Plus Crossing (s23)/v1.7.1- FINAL DRAFT BLUEPRINT" +DBP_DIR = ( + "Draft Blueprint runs/Blueprint Plus Crossing (s23)/v1.7.1- FINAL DRAFT BLUEPRINT" +) # Add new runs here: for comparison -- using v1.8 as a placeholder for now -FBP_v3 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.2.1 (growth summary updates)" -FBP_v4 = 'Final Blueprint runs/Final Blueprint (s24)/BAUS v2.3.1 (devproj updates)' -FBP_v5 = 'Final Blueprint runs/Final Blueprint (s24)/BAUS v2.4' -FBP_v6 = 'Final Blueprint runs/Final Blueprint (s24)/BAUS v2.5' -FBP_v7 = 'Final Blueprint runs/Final Blueprint (s24)/BAUS v2.6.1 (growth summary updates)' -FBP_v8 = 'Final Blueprint runs/Final Blueprint (s24)/BAUS v2.7' -FBP_v9 = 'Final Blueprint runs/Final Blueprint (s24)/BAUS v2.8' -FBP_v10 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.8.1 (parcels geography update)" -FBP_v11 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.9" -FBP_v12 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.10" -FBP_v13 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.11" -FBP_v14 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.12" -FBP_v15 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.15" -FBP_v16 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.16" -FBP_v19 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.19" -FBP_v20 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.20.1 (adds project to devproj)" -FBP_v23 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.23" -FBP_v24 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.24" -FBP_v25 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.25" -FBP_v26 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.26" +FBP_v3 = ( + "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.2.1 (growth summary updates)" +) +FBP_v4 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.3.1 (devproj updates)" +FBP_v5 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.4" +FBP_v6 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.5" +FBP_v7 = ( + "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.6.1 (growth summary updates)" +) +FBP_v8 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.7" +FBP_v9 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.8" +FBP_v10 = ( + "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.8.1 (parcels geography update)" +) +FBP_v11 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.9" +FBP_v12 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.10" +FBP_v13 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.11" +FBP_v14 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.12" +FBP_v15 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.15" +FBP_v16 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.16" +FBP_v19 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.19" +FBP_v20 = ( + "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.20.1 (adds project to devproj)" +) +FBP_v23 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.23" +FBP_v24 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.24" +FBP_v25 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.25" +FBP_v26 = "Final Blueprint runs/Final Blueprint (s24)/BAUS v2.26" # A list of paths of runs, which would be read and produce summaries altogether -PATH_LIST = [PBA40_DIR, DBP_DIR, FBP_v25, FBP_v26] # ---Add new run paths to this list--- -PATH_LIST_PARCEL = [DBP_DIR, FBP_v25, FBP_v26] - -#Visualization folder -VIZ = "Visualizations" - -#Output will into this workbook -OUTPUT_FILE = os.path.join(URBANSIM_OUTPUT_BOX_DIR, VIZ, - "PBA50_growth_{}_allruns.csv") - -#Parcel_geography -PARCEL_GEO_DIR = "Current PBA50 Large General Input Data/2020_10_27_parcels_geography.csv" - -juris_to_county = {'alameda' : 'Alameda', -'albany' : 'Alameda', -'american_canyon' : 'Napa', -'antioch' : 'Contra Costa', -'atherton' : 'San Mateo', -'belmont' : 'San Mateo', -'belvedere' : 'Marin', -'benicia' : 'Solano', -'berkeley' : 'Alameda', -'brentwood' : 'Contra Costa', -'brisbane' : 'San Mateo', -'burlingame' : 'San Mateo', -'calistoga' : 'Napa', -'campbell' : 'Santa Clara', -'clayton' : 'Contra Costa', -'cloverdale' : 'Sonoma', -'colma' : 'San Mateo', -'concord' : 'Contra Costa', -'corte_madera' : 'Marin', -'cotati' : 'Sonoma', -'cupertino' : 'Santa Clara', -'daly_city' : 'San Mateo', -'danville' : 'Contra Costa', -'dixon' : 'Solano', -'dublin' : 'Alameda', -'east_palo_alto' : 'San Mateo', -'el_cerrito' : 'Contra Costa', -'emeryville' : 'Alameda', -'fairfax' : 'Marin', -'fairfield' : 'Solano', -'foster_city' : 'San Mateo', -'fremont' : 'Alameda', -'gilroy' : 'Santa Clara', -'half_moon_bay' : 'San Mateo', -'hayward' : 'Alameda', -'healdsburg' : 'Sonoma', -'hercules' : 'Contra Costa', -'hillsborough' : 'San Mateo', -'lafayette' : 'Contra Costa', -'larkspur' : 'Marin', -'livermore' : 'Alameda', -'los_altos' : 'Santa Clara', -'los_altos_hills' : 'Santa Clara', -'los_gatos' : 'Santa Clara', -'martinez' : 'Contra Costa', -'menlo_park' : 'San Mateo', -'millbrae' : 'San Mateo', -'mill_valley' : 'Marin', -'milpitas' : 'Santa Clara', -'monte_sereno' : 'Santa Clara', -'moraga' : 'Contra Costa', -'morgan_hill' : 'Santa Clara', -'mountain_view' : 'Santa Clara', -'napa' : 'Napa', -'newark' : 'Alameda', -'novato' : 'Marin', -'oakland' : 'Alameda', -'oakley' : 'Contra Costa', -'orinda' : 'Contra Costa', -'pacifica' : 'San Mateo', -'palo_alto' : 'Santa Clara', -'petaluma' : 'Sonoma', -'piedmont' : 'Alameda', -'pinole' : 'Contra Costa', -'pittsburg' : 'Contra Costa', -'pleasant_hill' : 'Contra Costa', -'pleasanton' : 'Alameda', -'portola_valley' : 'San Mateo', -'redwood_city' : 'San Mateo', -'richmond' : 'Contra Costa', -'rio_vista' : 'Solano', -'rohnert_park' : 'Sonoma', -'ross' : 'Marin', -'st_helena' : 'Napa', -'san_anselmo' : 'Marin', -'san_bruno' : 'San Mateo', -'san_carlos' : 'San Mateo', -'san_francisco' : 'San Francisco', -'san_jose' : 'Santa Clara', -'san_leandro' : 'Alameda', -'san_mateo' : 'San Mateo', -'san_pablo' : 'Contra Costa', -'san_rafael' : 'Marin', -'san_ramon' : 'Contra Costa', -'santa_clara' : 'Santa Clara', -'santa_rosa' : 'Sonoma', -'saratoga' : 'Santa Clara', -'sausalito' : 'Marin', -'sebastopol' : 'Sonoma', -'sonoma' : 'Sonoma', -'south_san_francisco' : 'San Mateo', -'suisun_city' : 'Solano', -'sunnyvale' : 'Santa Clara', -'tiburon' : 'Marin', -'union_city' : 'Alameda', -'vacaville' : 'Solano', -'vallejo' : 'Solano', -'walnut_creek' : 'Contra Costa', -'windsor' : 'Sonoma', -'woodside' : 'San Mateo', -'yountville' : 'Napa', -'unincorporated_alameda' : "Alameda", -'unincorporated_contra_costa' : "Contra Costa", -'unincorporated_marin' : "Marin", -'unincorporated_napa' : "Napa", -'unincorporated_san_francisco' : "San Francisco", -'unincorporated_san_mateo' : "San Mateo", -'unincorporated_santa_clara' : "Santa Clara", -'unincorporated_solano' : "Solano", -'unincorporated_sonoma' : "Sonoma"} - -cn_to_county = {1 : 'San Francisco', - 2 : 'San Mateo', - 3 : 'Santa Clara', - 4 : 'Alameda', - 5 : 'Contra Costa', - 6 : 'Solano', - 7 : 'Napa', - 8 : 'Sonoma', - 9 : 'Marin'} - -sd_mapping = {1 : 'Greater Downtown San Francisco', - 2 : 'San Francisco Richmond District', - 3 : 'San Francisco Mission District', - 4 : 'San Francisco Sunset District', - 5 : 'Daly City and San Bruno', - 6 : 'San Mateo and Burlingame', - 7 : 'Redwood City and Menlo Park', - 8 : 'Palo Alto and Los Altos', - 9 : 'Sunnyvale and Mountain View', - 10 : 'Cupertino and Saratoga', - 11 : 'Central San Jose', - 12 : 'Milpitas and East San Jose', - 13 : 'South San Jose', - 14 : 'Gilroy and Morgan Hill', - 15 : 'Livermore and Pleasanton', - 16 : 'Fremont and Union City', - 17 : 'Hayward and San Leandro', - 18 : 'Oakland and Alameda', - 19 : 'Berkeley and Albany', - 20 : 'Richmond and El Cerrito', - 21 : 'Concord and Martinez', - 22 : 'Walnut Creek', - 23 : 'Danville and San Ramon', - 24 : 'Antioch and Pittsburg', - 25 : 'Vallejo and Benicia', - 26 : 'Fairfield and Vacaville', - 27 : 'Napa', - 28 : 'St Helena', - 29 : 'Petaluma and Rohnert Park', - 30 : 'Santa Rosa and Sebastopol', - 31 : 'Healdsburg and cloverdale', - 32 : 'Novato', - 33 : 'San Rafael', - 34 : 'Mill Valley and Sausalito'} - -#calculate growth at the regional level for main variables using taz summaries +PATH_LIST = [ + PBA40_DIR, + DBP_DIR, + FBP_v25, + FBP_v26, +] # ---Add new run paths to this list--- +PATH_LIST_PARCEL = [DBP_DIR, FBP_v25, FBP_v26] + +# Visualization folder +VIZ = "Visualizations" + +# Output will into this workbook +OUTPUT_FILE = os.path.join(URBANSIM_OUTPUT_BOX_DIR, VIZ, "PBA50_growth_{}_allruns.csv") + +# Parcel_geography +PARCEL_GEO_DIR = ( + "Current PBA50 Large General Input Data/2020_10_27_parcels_geography.csv" +) + +juris_to_county = { + "alameda": "Alameda", + "albany": "Alameda", + "american_canyon": "Napa", + "antioch": "Contra Costa", + "atherton": "San Mateo", + "belmont": "San Mateo", + "belvedere": "Marin", + "benicia": "Solano", + "berkeley": "Alameda", + "brentwood": "Contra Costa", + "brisbane": "San Mateo", + "burlingame": "San Mateo", + "calistoga": "Napa", + "campbell": "Santa Clara", + "clayton": "Contra Costa", + "cloverdale": "Sonoma", + "colma": "San Mateo", + "concord": "Contra Costa", + "corte_madera": "Marin", + "cotati": "Sonoma", + "cupertino": "Santa Clara", + "daly_city": "San Mateo", + "danville": "Contra Costa", + "dixon": "Solano", + "dublin": "Alameda", + "east_palo_alto": "San Mateo", + "el_cerrito": "Contra Costa", + "emeryville": "Alameda", + "fairfax": "Marin", + "fairfield": "Solano", + "foster_city": "San Mateo", + "fremont": "Alameda", + "gilroy": "Santa Clara", + "half_moon_bay": "San Mateo", + "hayward": "Alameda", + "healdsburg": "Sonoma", + "hercules": "Contra Costa", + "hillsborough": "San Mateo", + "lafayette": "Contra Costa", + "larkspur": "Marin", + "livermore": "Alameda", + "los_altos": "Santa Clara", + "los_altos_hills": "Santa Clara", + "los_gatos": "Santa Clara", + "martinez": "Contra Costa", + "menlo_park": "San Mateo", + "millbrae": "San Mateo", + "mill_valley": "Marin", + "milpitas": "Santa Clara", + "monte_sereno": "Santa Clara", + "moraga": "Contra Costa", + "morgan_hill": "Santa Clara", + "mountain_view": "Santa Clara", + "napa": "Napa", + "newark": "Alameda", + "novato": "Marin", + "oakland": "Alameda", + "oakley": "Contra Costa", + "orinda": "Contra Costa", + "pacifica": "San Mateo", + "palo_alto": "Santa Clara", + "petaluma": "Sonoma", + "piedmont": "Alameda", + "pinole": "Contra Costa", + "pittsburg": "Contra Costa", + "pleasant_hill": "Contra Costa", + "pleasanton": "Alameda", + "portola_valley": "San Mateo", + "redwood_city": "San Mateo", + "richmond": "Contra Costa", + "rio_vista": "Solano", + "rohnert_park": "Sonoma", + "ross": "Marin", + "st_helena": "Napa", + "san_anselmo": "Marin", + "san_bruno": "San Mateo", + "san_carlos": "San Mateo", + "san_francisco": "San Francisco", + "san_jose": "Santa Clara", + "san_leandro": "Alameda", + "san_mateo": "San Mateo", + "san_pablo": "Contra Costa", + "san_rafael": "Marin", + "san_ramon": "Contra Costa", + "santa_clara": "Santa Clara", + "santa_rosa": "Sonoma", + "saratoga": "Santa Clara", + "sausalito": "Marin", + "sebastopol": "Sonoma", + "sonoma": "Sonoma", + "south_san_francisco": "San Mateo", + "suisun_city": "Solano", + "sunnyvale": "Santa Clara", + "tiburon": "Marin", + "union_city": "Alameda", + "vacaville": "Solano", + "vallejo": "Solano", + "walnut_creek": "Contra Costa", + "windsor": "Sonoma", + "woodside": "San Mateo", + "yountville": "Napa", + "unincorporated_alameda": "Alameda", + "unincorporated_contra_costa": "Contra Costa", + "unincorporated_marin": "Marin", + "unincorporated_napa": "Napa", + "unincorporated_san_francisco": "San Francisco", + "unincorporated_san_mateo": "San Mateo", + "unincorporated_santa_clara": "Santa Clara", + "unincorporated_solano": "Solano", + "unincorporated_sonoma": "Sonoma", +} + +cn_to_county = { + 1: "San Francisco", + 2: "San Mateo", + 3: "Santa Clara", + 4: "Alameda", + 5: "Contra Costa", + 6: "Solano", + 7: "Napa", + 8: "Sonoma", + 9: "Marin", +} + +sd_mapping = { + 1: "Greater Downtown San Francisco", + 2: "San Francisco Richmond District", + 3: "San Francisco Mission District", + 4: "San Francisco Sunset District", + 5: "Daly City and San Bruno", + 6: "San Mateo and Burlingame", + 7: "Redwood City and Menlo Park", + 8: "Palo Alto and Los Altos", + 9: "Sunnyvale and Mountain View", + 10: "Cupertino and Saratoga", + 11: "Central San Jose", + 12: "Milpitas and East San Jose", + 13: "South San Jose", + 14: "Gilroy and Morgan Hill", + 15: "Livermore and Pleasanton", + 16: "Fremont and Union City", + 17: "Hayward and San Leandro", + 18: "Oakland and Alameda", + 19: "Berkeley and Albany", + 20: "Richmond and El Cerrito", + 21: "Concord and Martinez", + 22: "Walnut Creek", + 23: "Danville and San Ramon", + 24: "Antioch and Pittsburg", + 25: "Vallejo and Benicia", + 26: "Fairfield and Vacaville", + 27: "Napa", + 28: "St Helena", + 29: "Petaluma and Rohnert Park", + 30: "Santa Rosa and Sebastopol", + 31: "Healdsburg and cloverdale", + 32: "Novato", + 33: "San Rafael", + 34: "Mill Valley and Sausalito", +} + +# calculate growth at the regional level for main variables using taz summaries def county_calculator(DF1, DF2): - if ('zone_id' in DF1.columns) & ('zone_id' in DF2.columns): - DF1.rename(columns={'zone_id': 'TAZ'}, inplace=True) - DF2.rename(columns={'zone_id': 'TAZ'}, inplace=True) - - if ('TAZ' in DF1.columns) & ('TAZ' in DF2.columns): - DF_merge = DF1.merge(DF2, on = 'TAZ').fillna(0) - DF_merge['TOTPOP GROWTH'] = DF_merge['TOTPOP_y']-DF_merge['TOTPOP_x'] - DF_merge['TOTEMP GROWTH'] = DF_merge['TOTEMP_y']-DF_merge['TOTEMP_x'] - DF_merge['AGREMPN GROWTH'] = DF_merge['AGREMPN_y']-DF_merge['AGREMPN_x'] - DF_merge['FPSEMPN GROWTH'] = DF_merge['FPSEMPN_y']-DF_merge['FPSEMPN_x'] - DF_merge['HEREMPN GROWTH'] = DF_merge['HEREMPN_y']-DF_merge['HEREMPN_x'] - DF_merge['MWTEMPN GROWTH'] = DF_merge['MWTEMPN_y']-DF_merge['MWTEMPN_x'] - DF_merge['OTHEMPN GROWTH'] = DF_merge['OTHEMPN_y']-DF_merge['OTHEMPN_x'] - DF_merge['RETEMPN GROWTH'] = DF_merge['RETEMPN_y']-DF_merge['RETEMPN_x'] - DF_merge['TOTHH GROWTH'] = DF_merge['TOTHH_y']-DF_merge['TOTHH_x'] - DF_merge['HHINCQ1 GROWTH'] = DF_merge['HHINCQ1_y']-DF_merge['HHINCQ1_x'] - DF_merge['HHINCQ2 GROWTH'] = DF_merge['HHINCQ2_y']-DF_merge['HHINCQ2_x'] - DF_merge['HHINCQ3 GROWTH'] = DF_merge['HHINCQ3_y']-DF_merge['HHINCQ3_x'] - DF_merge['HHINCQ4 GROWTH'] = DF_merge['HHINCQ4_y']-DF_merge['HHINCQ4_x'] - - if 'COUNTY_x' in DF_merge.columns: - DF_merge['COUNTY_NAME_x'] = DF_merge['COUNTY_x'].map(cn_to_county) - DF_CO_GRWTH = DF_merge.groupby(['COUNTY_NAME_x']).sum().reset_index() - if 'COUNTY_NAME_x' in DF_merge.columns: - DF_CO_GRWTH = DF_merge.groupby(['COUNTY_NAME_x']).sum().reset_index() - - DF_CO_GRWTH['TOTEMP GROWTH SHR'] = DF_CO_GRWTH['TOTEMP GROWTH']/(DF_CO_GRWTH['TOTEMP_y'].sum()-DF_CO_GRWTH['TOTEMP_x'].sum()) - DF_CO_GRWTH['TOTHH GROWTH SHR'] = DF_CO_GRWTH['TOTHH GROWTH']/(DF_CO_GRWTH['TOTHH_y'].sum()-DF_CO_GRWTH['TOTHH_x'].sum()) - - DF_CO_GRWTH['TOTEMP PCT GROWTH'] = DF_CO_GRWTH['TOTEMP_y']/DF_CO_GRWTH['TOTEMP_x']-1 - DF_CO_GRWTH['TOTHH PCT GROWTH'] = DF_CO_GRWTH['TOTHH_y']/DF_CO_GRWTH['TOTHH_x']-1 - - DF_CO_GRWTH['TOTEMP SHR CHNG'] = DF_CO_GRWTH['TOTEMP_y']/DF_CO_GRWTH['TOTEMP_y'].sum()-DF_CO_GRWTH['TOTEMP_x']/DF_CO_GRWTH['TOTEMP_x'].sum() - DF_CO_GRWTH['TOTHH SHR CHNG'] = DF_CO_GRWTH['TOTHH_y']/DF_CO_GRWTH['TOTHH_y'].sum()-DF_CO_GRWTH['TOTHH_x']/DF_CO_GRWTH['TOTHH_x'].sum() - - DF_COLUMNS = ['COUNTY_NAME_x', - 'TOTPOP GROWTH', - 'TOTEMP GROWTH', - 'AGREMPN GROWTH', - 'FPSEMPN GROWTH', - 'HEREMPN GROWTH', - 'MWTEMPN GROWTH', - 'OTHEMPN GROWTH', - 'RETEMPN GROWTH', - 'TOTHH GROWTH', - 'HHINCQ1 GROWTH', - 'HHINCQ2 GROWTH', - 'HHINCQ3 GROWTH', - 'HHINCQ4 GROWTH', - 'TOTEMP GROWTH SHR', - 'TOTHH GROWTH SHR', - 'TOTEMP PCT GROWTH', - 'TOTHH PCT GROWTH', - 'TOTEMP SHR CHNG', - 'TOTHH SHR CHNG'] + if ("zone_id" in DF1.columns) & ("zone_id" in DF2.columns): + DF1.rename(columns={"zone_id": "TAZ"}, inplace=True) + DF2.rename(columns={"zone_id": "TAZ"}, inplace=True) + + if ("TAZ" in DF1.columns) & ("TAZ" in DF2.columns): + DF_merge = DF1.merge(DF2, on="TAZ").fillna(0) + DF_merge["TOTPOP GROWTH"] = DF_merge["TOTPOP_y"] - DF_merge["TOTPOP_x"] + DF_merge["TOTEMP GROWTH"] = DF_merge["TOTEMP_y"] - DF_merge["TOTEMP_x"] + DF_merge["AGREMPN GROWTH"] = DF_merge["AGREMPN_y"] - DF_merge["AGREMPN_x"] + DF_merge["FPSEMPN GROWTH"] = DF_merge["FPSEMPN_y"] - DF_merge["FPSEMPN_x"] + DF_merge["HEREMPN GROWTH"] = DF_merge["HEREMPN_y"] - DF_merge["HEREMPN_x"] + DF_merge["MWTEMPN GROWTH"] = DF_merge["MWTEMPN_y"] - DF_merge["MWTEMPN_x"] + DF_merge["OTHEMPN GROWTH"] = DF_merge["OTHEMPN_y"] - DF_merge["OTHEMPN_x"] + DF_merge["RETEMPN GROWTH"] = DF_merge["RETEMPN_y"] - DF_merge["RETEMPN_x"] + DF_merge["TOTHH GROWTH"] = DF_merge["TOTHH_y"] - DF_merge["TOTHH_x"] + DF_merge["HHINCQ1 GROWTH"] = DF_merge["HHINCQ1_y"] - DF_merge["HHINCQ1_x"] + DF_merge["HHINCQ2 GROWTH"] = DF_merge["HHINCQ2_y"] - DF_merge["HHINCQ2_x"] + DF_merge["HHINCQ3 GROWTH"] = DF_merge["HHINCQ3_y"] - DF_merge["HHINCQ3_x"] + DF_merge["HHINCQ4 GROWTH"] = DF_merge["HHINCQ4_y"] - DF_merge["HHINCQ4_x"] + + if "COUNTY_x" in DF_merge.columns: + DF_merge["COUNTY_NAME_x"] = DF_merge["COUNTY_x"].map(cn_to_county) + DF_CO_GRWTH = DF_merge.groupby(["COUNTY_NAME_x"]).sum().reset_index() + if "COUNTY_NAME_x" in DF_merge.columns: + DF_CO_GRWTH = DF_merge.groupby(["COUNTY_NAME_x"]).sum().reset_index() + + DF_CO_GRWTH["TOTEMP GROWTH SHR"] = DF_CO_GRWTH["TOTEMP GROWTH"] / ( + DF_CO_GRWTH["TOTEMP_y"].sum() - DF_CO_GRWTH["TOTEMP_x"].sum() + ) + DF_CO_GRWTH["TOTHH GROWTH SHR"] = DF_CO_GRWTH["TOTHH GROWTH"] / ( + DF_CO_GRWTH["TOTHH_y"].sum() - DF_CO_GRWTH["TOTHH_x"].sum() + ) + + DF_CO_GRWTH["TOTEMP PCT GROWTH"] = ( + DF_CO_GRWTH["TOTEMP_y"] / DF_CO_GRWTH["TOTEMP_x"] - 1 + ) + DF_CO_GRWTH["TOTHH PCT GROWTH"] = ( + DF_CO_GRWTH["TOTHH_y"] / DF_CO_GRWTH["TOTHH_x"] - 1 + ) + + DF_CO_GRWTH["TOTEMP SHR CHNG"] = ( + DF_CO_GRWTH["TOTEMP_y"] / DF_CO_GRWTH["TOTEMP_y"].sum() + - DF_CO_GRWTH["TOTEMP_x"] / DF_CO_GRWTH["TOTEMP_x"].sum() + ) + DF_CO_GRWTH["TOTHH SHR CHNG"] = ( + DF_CO_GRWTH["TOTHH_y"] / DF_CO_GRWTH["TOTHH_y"].sum() + - DF_CO_GRWTH["TOTHH_x"] / DF_CO_GRWTH["TOTHH_x"].sum() + ) + + DF_COLUMNS = [ + "COUNTY_NAME_x", + "TOTPOP GROWTH", + "TOTEMP GROWTH", + "AGREMPN GROWTH", + "FPSEMPN GROWTH", + "HEREMPN GROWTH", + "MWTEMPN GROWTH", + "OTHEMPN GROWTH", + "RETEMPN GROWTH", + "TOTHH GROWTH", + "HHINCQ1 GROWTH", + "HHINCQ2 GROWTH", + "HHINCQ3 GROWTH", + "HHINCQ4 GROWTH", + "TOTEMP GROWTH SHR", + "TOTHH GROWTH SHR", + "TOTEMP PCT GROWTH", + "TOTHH PCT GROWTH", + "TOTEMP SHR CHNG", + "TOTHH SHR CHNG", + ] DF_CO_GRWTH = DF_CO_GRWTH[DF_COLUMNS].copy() - DF_CO_GRWTH = DF_CO_GRWTH.rename(columns={'COUNTY_NAME_x': 'county'}) + DF_CO_GRWTH = DF_CO_GRWTH.rename(columns={"COUNTY_NAME_x": "county"}) return DF_CO_GRWTH else: - print ('Merge cannot be performed') + print("Merge cannot be performed") + -#calculate the growth between 2015 and 2050 for taz summaries +# calculate the growth between 2015 and 2050 for taz summaries def taz_calculator(DF1, DF2): - #PBA40 has a couple of different columns - if ('total_residential_units' in DF1.columns) & ('total_residential_units' in DF2.columns): - DF1.rename(columns={'total_residential_units': 'RES_UNITS'}, inplace=True) - DF2.rename(columns={'total_residential_units': 'RES_UNITS'}, inplace=True) - - if ('zone_id' in DF1.columns) & ('zone_id' in DF2.columns): - DF1.rename(columns={'zone_id': 'TAZ'}, inplace=True) - DF2.rename(columns={'zone_id': 'TAZ'}, inplace=True) - - if ('TAZ' in DF1.columns) & ('TAZ' in DF2.columns): - DF_merge = DF1.merge(DF2, on = 'TAZ').fillna(0) - DF_merge['AGREMPN GROWTH'] = DF_merge['AGREMPN_y']-DF_merge['AGREMPN_x'] - DF_merge['FPSEMPN GROWTH'] = DF_merge['FPSEMPN_y']-DF_merge['FPSEMPN_x'] - DF_merge['HEREMPN GROWTH'] = DF_merge['HEREMPN_y']-DF_merge['HEREMPN_x'] - DF_merge['MWTEMPN GROWTH'] = DF_merge['MWTEMPN_y']-DF_merge['MWTEMPN_x'] - DF_merge['OTHEMPN GROWTH'] = DF_merge['OTHEMPN_y']-DF_merge['OTHEMPN_x'] - DF_merge['RETEMPN GROWTH'] = DF_merge['RETEMPN_y']-DF_merge['RETEMPN_x'] - DF_merge['TOTEMP GROWTH'] = DF_merge['TOTEMP_y']-DF_merge['TOTEMP_x'] - DF_merge['HHINCQ1 GROWTH'] = DF_merge['HHINCQ1_y']-DF_merge['HHINCQ1_x'] - DF_merge['HHINCQ2 GROWTH'] = DF_merge['HHINCQ2_y']-DF_merge['HHINCQ2_x'] - DF_merge['HHINCQ3 GROWTH'] = DF_merge['HHINCQ3_y']-DF_merge['HHINCQ3_x'] - DF_merge['HHINCQ4 GROWTH'] = DF_merge['HHINCQ4_y']-DF_merge['HHINCQ4_x'] - DF_merge['TOTHH GROWTH'] = DF_merge['TOTHH_y']-DF_merge['TOTHH_x'] - DF_merge['TOTPOP GROWTH'] = DF_merge['TOTPOP_y']-DF_merge['TOTPOP_x'] - DF_merge['RES_UNITS GROWTH'] = DF_merge['RES_UNITS_y']-DF_merge['RES_UNITS_x'] - DF_merge['MFDU GROWTH'] = DF_merge['MFDU_y']-DF_merge['MFDU_x'] - DF_merge['SFDU GROWTH'] = DF_merge['SFDU_y']-DF_merge['SFDU_x'] - - DF_merge['TOTEMP GROWTH SHR'] = DF_merge['TOTEMP GROWTH']/(DF_merge['TOTEMP_y'].sum()/DF_merge['TOTEMP_x'].sum()) - DF_merge['TOTHH GROWTH SHR'] = DF_merge['TOTHH GROWTH']/(DF_merge['TOTHH_y'].sum()/DF_merge['TOTHH_x'].sum()) - - DF_merge['TOTEMP PCT GROWTH'] = DF_merge['TOTEMP_y']/DF_merge['TOTEMP_x']-1 - DF_merge['TOTHH PCT GROWTH'] = DF_merge['TOTHH_y']/DF_merge['TOTHH_x']-1 - - DF_merge['TOTEMP SHR CHNG'] = DF_merge['TOTEMP_y']/DF_merge['TOTEMP_y'].sum()-DF_merge['TOTEMP_x']/DF_merge['TOTEMP_x'].sum() - DF_merge['TOTHH SHR CHNG'] = DF_merge['TOTHH_y']/DF_merge['TOTHH_y'].sum()-DF_merge['TOTHH_x']/DF_merge['TOTHH_x'].sum() - - TAZ_DF_COLUMNS = ['TAZ', - 'SD_x', - 'ZONE_x', - 'COUNTY_x', - 'AGREMPN GROWTH', - 'FPSEMPN GROWTH', - 'HEREMPN GROWTH', - 'MWTEMPN GROWTH', - 'OTHEMPN GROWTH', - 'RETEMPN GROWTH', - 'TOTEMP GROWTH', - 'HHINCQ1 GROWTH', - 'HHINCQ2 GROWTH', - 'HHINCQ3 GROWTH', - 'HHINCQ4 GROWTH', - 'TOTHH GROWTH', - 'TOTPOP GROWTH', - 'RES_UNITS GROWTH', - 'MFDU GROWTH', - 'TOTEMP GROWTH SHR', - 'TOTHH GROWTH SHR', - 'TOTEMP PCT GROWTH', - 'TOTHH PCT GROWTH', - 'TOTEMP SHR CHNG', - 'TOTHH SHR CHNG'] - + # PBA40 has a couple of different columns + if ("total_residential_units" in DF1.columns) & ( + "total_residential_units" in DF2.columns + ): + DF1.rename(columns={"total_residential_units": "RES_UNITS"}, inplace=True) + DF2.rename(columns={"total_residential_units": "RES_UNITS"}, inplace=True) + + if ("zone_id" in DF1.columns) & ("zone_id" in DF2.columns): + DF1.rename(columns={"zone_id": "TAZ"}, inplace=True) + DF2.rename(columns={"zone_id": "TAZ"}, inplace=True) + + if ("TAZ" in DF1.columns) & ("TAZ" in DF2.columns): + DF_merge = DF1.merge(DF2, on="TAZ").fillna(0) + DF_merge["AGREMPN GROWTH"] = DF_merge["AGREMPN_y"] - DF_merge["AGREMPN_x"] + DF_merge["FPSEMPN GROWTH"] = DF_merge["FPSEMPN_y"] - DF_merge["FPSEMPN_x"] + DF_merge["HEREMPN GROWTH"] = DF_merge["HEREMPN_y"] - DF_merge["HEREMPN_x"] + DF_merge["MWTEMPN GROWTH"] = DF_merge["MWTEMPN_y"] - DF_merge["MWTEMPN_x"] + DF_merge["OTHEMPN GROWTH"] = DF_merge["OTHEMPN_y"] - DF_merge["OTHEMPN_x"] + DF_merge["RETEMPN GROWTH"] = DF_merge["RETEMPN_y"] - DF_merge["RETEMPN_x"] + DF_merge["TOTEMP GROWTH"] = DF_merge["TOTEMP_y"] - DF_merge["TOTEMP_x"] + DF_merge["HHINCQ1 GROWTH"] = DF_merge["HHINCQ1_y"] - DF_merge["HHINCQ1_x"] + DF_merge["HHINCQ2 GROWTH"] = DF_merge["HHINCQ2_y"] - DF_merge["HHINCQ2_x"] + DF_merge["HHINCQ3 GROWTH"] = DF_merge["HHINCQ3_y"] - DF_merge["HHINCQ3_x"] + DF_merge["HHINCQ4 GROWTH"] = DF_merge["HHINCQ4_y"] - DF_merge["HHINCQ4_x"] + DF_merge["TOTHH GROWTH"] = DF_merge["TOTHH_y"] - DF_merge["TOTHH_x"] + DF_merge["TOTPOP GROWTH"] = DF_merge["TOTPOP_y"] - DF_merge["TOTPOP_x"] + DF_merge["RES_UNITS GROWTH"] = DF_merge["RES_UNITS_y"] - DF_merge["RES_UNITS_x"] + DF_merge["MFDU GROWTH"] = DF_merge["MFDU_y"] - DF_merge["MFDU_x"] + DF_merge["SFDU GROWTH"] = DF_merge["SFDU_y"] - DF_merge["SFDU_x"] + + DF_merge["TOTEMP GROWTH SHR"] = DF_merge["TOTEMP GROWTH"] / ( + DF_merge["TOTEMP_y"].sum() / DF_merge["TOTEMP_x"].sum() + ) + DF_merge["TOTHH GROWTH SHR"] = DF_merge["TOTHH GROWTH"] / ( + DF_merge["TOTHH_y"].sum() / DF_merge["TOTHH_x"].sum() + ) + + DF_merge["TOTEMP PCT GROWTH"] = DF_merge["TOTEMP_y"] / DF_merge["TOTEMP_x"] - 1 + DF_merge["TOTHH PCT GROWTH"] = DF_merge["TOTHH_y"] / DF_merge["TOTHH_x"] - 1 + + DF_merge["TOTEMP SHR CHNG"] = ( + DF_merge["TOTEMP_y"] / DF_merge["TOTEMP_y"].sum() + - DF_merge["TOTEMP_x"] / DF_merge["TOTEMP_x"].sum() + ) + DF_merge["TOTHH SHR CHNG"] = ( + DF_merge["TOTHH_y"] / DF_merge["TOTHH_y"].sum() + - DF_merge["TOTHH_x"] / DF_merge["TOTHH_x"].sum() + ) + + TAZ_DF_COLUMNS = [ + "TAZ", + "SD_x", + "ZONE_x", + "COUNTY_x", + "AGREMPN GROWTH", + "FPSEMPN GROWTH", + "HEREMPN GROWTH", + "MWTEMPN GROWTH", + "OTHEMPN GROWTH", + "RETEMPN GROWTH", + "TOTEMP GROWTH", + "HHINCQ1 GROWTH", + "HHINCQ2 GROWTH", + "HHINCQ3 GROWTH", + "HHINCQ4 GROWTH", + "TOTHH GROWTH", + "TOTPOP GROWTH", + "RES_UNITS GROWTH", + "MFDU GROWTH", + "TOTEMP GROWTH SHR", + "TOTHH GROWTH SHR", + "TOTEMP PCT GROWTH", + "TOTHH PCT GROWTH", + "TOTEMP SHR CHNG", + "TOTHH SHR CHNG", + ] + DF_TAZ_GROWTH = DF_merge[TAZ_DF_COLUMNS].copy() - DF_TAZ_GROWTH = DF_TAZ_GROWTH.rename(columns={'SD_x': 'SD', 'ZONE_x': 'ZONE', 'COUNTY_x': 'COUNTY'}) - DF_TAZ_GROWTH['SD_NAME'] = DF_TAZ_GROWTH['SD'].map(sd_mapping) - DF_TAZ_GROWTH['CNTY_NAME'] = DF_TAZ_GROWTH['COUNTY'].map(cn_to_county) + DF_TAZ_GROWTH = DF_TAZ_GROWTH.rename( + columns={"SD_x": "SD", "ZONE_x": "ZONE", "COUNTY_x": "COUNTY"} + ) + DF_TAZ_GROWTH["SD_NAME"] = DF_TAZ_GROWTH["SD"].map(sd_mapping) + DF_TAZ_GROWTH["CNTY_NAME"] = DF_TAZ_GROWTH["COUNTY"].map(cn_to_county) return DF_TAZ_GROWTH else: - print ('Merge cannot be performed') + print("Merge cannot be performed") -#A separate calculator for juris, pda, and superdistrct summaries, because they have different columns + +# A separate calculator for juris, pda, and superdistrct summaries, because they have different columns def nontaz_calculator(DF1, DF2): - DF_COLUMNS = ['agrempn growth', - 'fpsempn growth', - 'herempn growth', - 'mwtempn growth', - 'othempn growth', - 'retempn growth', - 'totemp growth', - 'hhincq1 growth', - 'hhincq2 growth', - 'hhincq3 growth', - 'hhincq4 growth', - 'tothh growth', - 'mfdu growth', - 'sfdu growth', - 'nonres_sqft growth', - 'dr_units growth', - 'incl_units growth', - 'subsd_units growth', - 'totemp growth shr', - 'tothh growth shr', - 'totemp pct growth', - 'tothh pct growth', - 'totemp shr chng', - 'tothh shr chng'] - - if ('juris' in DF1.columns) & ('juris' in DF2.columns): - DF_merge = DF1.merge(DF2, on = 'juris').fillna(0) - DF_COLUMNS = ['juris'] + DF_COLUMNS - elif ('superdistrict' in DF1.columns) & ('superdistrict' in DF2.columns): - DF_merge = DF1.merge(DF2, on = 'superdistrict').fillna(0) - DF_merge['sd_name'] = DF_merge['superdistrict'].map(sd_mapping) - DF_COLUMNS = ['superdistrict','sd_name'] + DF_COLUMNS + DF_COLUMNS = [ + "agrempn growth", + "fpsempn growth", + "herempn growth", + "mwtempn growth", + "othempn growth", + "retempn growth", + "totemp growth", + "hhincq1 growth", + "hhincq2 growth", + "hhincq3 growth", + "hhincq4 growth", + "tothh growth", + "mfdu growth", + "sfdu growth", + "nonres_sqft growth", + "dr_units growth", + "incl_units growth", + "subsd_units growth", + "totemp growth shr", + "tothh growth shr", + "totemp pct growth", + "tothh pct growth", + "totemp shr chng", + "tothh shr chng", + ] + + if ("juris" in DF1.columns) & ("juris" in DF2.columns): + DF_merge = DF1.merge(DF2, on="juris").fillna(0) + DF_COLUMNS = ["juris"] + DF_COLUMNS + elif ("superdistrict" in DF1.columns) & ("superdistrict" in DF2.columns): + DF_merge = DF1.merge(DF2, on="superdistrict").fillna(0) + DF_merge["sd_name"] = DF_merge["superdistrict"].map(sd_mapping) + DF_COLUMNS = ["superdistrict", "sd_name"] + DF_COLUMNS else: - print ('Merge cannot be performed') - - DF_merge['agrempn growth'] = DF_merge['agrempn_y']-DF_merge['agrempn_x'] - DF_merge['fpsempn growth'] = DF_merge['fpsempn_y']-DF_merge['fpsempn_x'] - DF_merge['herempn growth'] = DF_merge['herempn_y']-DF_merge['herempn_x'] - DF_merge['mwtempn growth'] = DF_merge['mwtempn_y']-DF_merge['mwtempn_x'] - DF_merge['othempn growth'] = DF_merge['othempn_y']-DF_merge['othempn_x'] - DF_merge['retempn growth'] = DF_merge['retempn_y']-DF_merge['retempn_x'] - DF_merge['totemp growth'] = DF_merge['totemp_y']-DF_merge['totemp_x'] - DF_merge['hhincq1 growth'] = DF_merge['hhincq1_y']-DF_merge['hhincq1_x'] - DF_merge['hhincq2 growth'] = DF_merge['hhincq2_y']-DF_merge['hhincq2_x'] - DF_merge['hhincq3 growth'] = DF_merge['hhincq3_y']-DF_merge['hhincq3_x'] - DF_merge['hhincq4 growth'] = DF_merge['hhincq4_y']-DF_merge['hhincq4_x'] - DF_merge['tothh growth'] = DF_merge['tothh_y']-DF_merge['tothh_x'] - DF_merge['mfdu growth'] = DF_merge['mfdu_y']-DF_merge['mfdu_x'] - DF_merge['sfdu growth'] = DF_merge['sfdu_y']-DF_merge['sfdu_x'] - DF_merge['nonres_sqft growth'] = DF_merge['non_residential_sqft_y']-DF_merge['non_residential_sqft_x'] - DF_merge['dr_units growth'] = DF_merge['deed_restricted_units_y']-DF_merge['deed_restricted_units_x'] - DF_merge['incl_units growth'] = DF_merge['inclusionary_units_y']-DF_merge['inclusionary_units_x'] - DF_merge['subsd_units growth'] = DF_merge['subsidized_units_y']-DF_merge['subsidized_units_x'] - - DF_merge['totemp growth shr'] = DF_merge['totemp growth']/(DF_merge['totemp_y'].sum()-DF_merge['totemp_x'].sum()) - DF_merge['tothh growth shr'] = DF_merge['tothh growth']/(DF_merge['tothh_y'].sum()-DF_merge['tothh_x'].sum()) - - DF_merge['totemp pct growth'] = DF_merge['totemp_y']/DF_merge['totemp_x']-1 - DF_merge['tothh pct growth'] = DF_merge['tothh_y']/DF_merge['tothh_x']-1 - - DF_merge['totemp shr chng'] = DF_merge['totemp_y']/DF_merge['totemp_y'].sum()-DF_merge['totemp_x']/DF_merge['totemp_x'].sum() - DF_merge['tothh shr chng'] = DF_merge['tothh_y']/DF_merge['tothh_y'].sum()-DF_merge['tothh_x']/DF_merge['tothh_x'].sum() + print("Merge cannot be performed") + + DF_merge["agrempn growth"] = DF_merge["agrempn_y"] - DF_merge["agrempn_x"] + DF_merge["fpsempn growth"] = DF_merge["fpsempn_y"] - DF_merge["fpsempn_x"] + DF_merge["herempn growth"] = DF_merge["herempn_y"] - DF_merge["herempn_x"] + DF_merge["mwtempn growth"] = DF_merge["mwtempn_y"] - DF_merge["mwtempn_x"] + DF_merge["othempn growth"] = DF_merge["othempn_y"] - DF_merge["othempn_x"] + DF_merge["retempn growth"] = DF_merge["retempn_y"] - DF_merge["retempn_x"] + DF_merge["totemp growth"] = DF_merge["totemp_y"] - DF_merge["totemp_x"] + DF_merge["hhincq1 growth"] = DF_merge["hhincq1_y"] - DF_merge["hhincq1_x"] + DF_merge["hhincq2 growth"] = DF_merge["hhincq2_y"] - DF_merge["hhincq2_x"] + DF_merge["hhincq3 growth"] = DF_merge["hhincq3_y"] - DF_merge["hhincq3_x"] + DF_merge["hhincq4 growth"] = DF_merge["hhincq4_y"] - DF_merge["hhincq4_x"] + DF_merge["tothh growth"] = DF_merge["tothh_y"] - DF_merge["tothh_x"] + DF_merge["mfdu growth"] = DF_merge["mfdu_y"] - DF_merge["mfdu_x"] + DF_merge["sfdu growth"] = DF_merge["sfdu_y"] - DF_merge["sfdu_x"] + DF_merge["nonres_sqft growth"] = ( + DF_merge["non_residential_sqft_y"] - DF_merge["non_residential_sqft_x"] + ) + DF_merge["dr_units growth"] = ( + DF_merge["deed_restricted_units_y"] - DF_merge["deed_restricted_units_x"] + ) + DF_merge["incl_units growth"] = ( + DF_merge["inclusionary_units_y"] - DF_merge["inclusionary_units_x"] + ) + DF_merge["subsd_units growth"] = ( + DF_merge["subsidized_units_y"] - DF_merge["subsidized_units_x"] + ) + + DF_merge["totemp growth shr"] = DF_merge["totemp growth"] / ( + DF_merge["totemp_y"].sum() - DF_merge["totemp_x"].sum() + ) + DF_merge["tothh growth shr"] = DF_merge["tothh growth"] / ( + DF_merge["tothh_y"].sum() - DF_merge["tothh_x"].sum() + ) + + DF_merge["totemp pct growth"] = DF_merge["totemp_y"] / DF_merge["totemp_x"] - 1 + DF_merge["tothh pct growth"] = DF_merge["tothh_y"] / DF_merge["tothh_x"] - 1 + + DF_merge["totemp shr chng"] = ( + DF_merge["totemp_y"] / DF_merge["totemp_y"].sum() + - DF_merge["totemp_x"] / DF_merge["totemp_x"].sum() + ) + DF_merge["tothh shr chng"] = ( + DF_merge["tothh_y"] / DF_merge["tothh_y"].sum() + - DF_merge["tothh_x"] / DF_merge["tothh_x"].sum() + ) DF_GROWTH = DF_merge[DF_COLUMNS].copy() - + return DF_GROWTH -#This is to have an easier way to read summary files, particularly when you don't know the run id, only the folder -#load the file by looking for the file location, geography level, base year, and end year summary files -#for PBA40, baseyear is 2010, and end year is 2040 -#for PBA50 (DBP and new runs) baseyear is 2015, and end year is 2050 -#return a tuple of baseyear summary file, endyear summary file, and the runid +# This is to have an easier way to read summary files, particularly when you don't know the run id, only the folder + +# load the file by looking for the file location, geography level, base year, and end year summary files +# for PBA40, baseyear is 2010, and end year is 2040 +# for PBA50 (DBP and new runs) baseyear is 2015, and end year is 2050 +# return a tuple of baseyear summary file, endyear summary file, and the runid def FILELOADER(path, geo, baseyear, endyear): - localpath = '' - #PBA40 has a different pattern for taz files + localpath = "" + # PBA40 has a different pattern for taz files if path == PBA40_DIR: localpath = path - if geo == 'taz': - pattern_baseyear = "(run[0-9]{1,4}c_%s_summaries_%s.csv)"%(geo, baseyear) - pattern_endyear = "(run[0-9]{1,4}c_%s_summaries_%s.csv)"%(geo, endyear) + if geo == "taz": + pattern_baseyear = "(run[0-9]{1,4}c_%s_summaries_%s.csv)" % (geo, baseyear) + pattern_endyear = "(run[0-9]{1,4}c_%s_summaries_%s.csv)" % (geo, endyear) else: - pattern_baseyear = "(run[0-9]{1,4}_%s_summaries_%s.csv)"%(geo, baseyear) - pattern_endyear = "(run[0-9]{1,4}_%s_summaries_%s.csv)"%(geo, endyear) - - #DBP has a different pattern from all new BP runs + pattern_baseyear = "(run[0-9]{1,4}_%s_summaries_%s.csv)" % (geo, baseyear) + pattern_endyear = "(run[0-9]{1,4}_%s_summaries_%s.csv)" % (geo, endyear) + + # DBP has a different pattern from all new BP runs else: - localpath = os.path.join(URBANSIM_OUTPUT_BOX_DIR, path) #the path is the run folder, must be specificed. - pattern_baseyear = "(run[0-9]{1,4}_%s_summaries_%s.csv)"%(geo, baseyear) - pattern_endyear = "(run[0-9]{1,4}_%s_summaries_%s.csv)"%(geo, endyear) - + localpath = os.path.join( + URBANSIM_OUTPUT_BOX_DIR, path + ) # the path is the run folder, must be specificed. + pattern_baseyear = "(run[0-9]{1,4}_%s_summaries_%s.csv)" % (geo, baseyear) + pattern_endyear = "(run[0-9]{1,4}_%s_summaries_%s.csv)" % (geo, endyear) + files_baseyear = [f for f in os.listdir(localpath) if re.match(pattern_baseyear, f)] filename_baseyear = files_baseyear[0] - filepath_baseyear = os.path.join(localpath,filename_baseyear) + filepath_baseyear = os.path.join(localpath, filename_baseyear) summary_baseyear = pd.read_csv(filepath_baseyear) files_endyear = [f for f in os.listdir(localpath) if re.match(pattern_endyear, f)] filename_endyear = files_endyear[0] - filepath_endyear = os.path.join(localpath,filename_endyear) + filepath_endyear = os.path.join(localpath, filename_endyear) summary_endyear = pd.read_csv(filepath_endyear) - - version = path.split('/')[-1] - + + version = path.split("/")[-1] + return summary_baseyear, summary_endyear, version -#This is to define a separate fileloader for parcel difference data. With the zoningmod category, we should be able to -#summarize growth by different geography types that is more nuanced. + +# This is to define a separate fileloader for parcel difference data. With the zoningmod category, we should be able to +# summarize growth by different geography types that is more nuanced. def GEO_SUMMARY_LOADER(path, geo, baseyear, endyear): localpath = os.path.join(URBANSIM_OUTPUT_BOX_DIR, path) - parcel_baseyear_pattern = "(run[0-9]{1,4}_parcel_data_%s.csv)"%(baseyear) - parcel_endyear_pattern = "(run[0-9]{1,4}_parcel_data_%s.csv)"%(endyear) - - parcel_baseyear_search = [f for f in os.listdir(localpath) if re.match(parcel_baseyear_pattern, f)] - filename_parcel_baseyear = parcel_baseyear_search[0] - parcel_baseyear_filepath = os.path.join(localpath,filename_parcel_baseyear) + parcel_baseyear_pattern = "(run[0-9]{1,4}_parcel_data_%s.csv)" % (baseyear) + parcel_endyear_pattern = "(run[0-9]{1,4}_parcel_data_%s.csv)" % (endyear) + + parcel_baseyear_search = [ + f for f in os.listdir(localpath) if re.match(parcel_baseyear_pattern, f) + ] + filename_parcel_baseyear = parcel_baseyear_search[0] + parcel_baseyear_filepath = os.path.join(localpath, filename_parcel_baseyear) parcel_baseyear = pd.read_csv(parcel_baseyear_filepath) - - parcel_endyear_search = [f for f in os.listdir(localpath) if re.match(parcel_endyear_pattern, f)] - filename_parcel_endyear = parcel_endyear_search[0] - parcel_endyear_filepath = os.path.join(localpath,filename_parcel_endyear) + + parcel_endyear_search = [ + f for f in os.listdir(localpath) if re.match(parcel_endyear_pattern, f) + ] + filename_parcel_endyear = parcel_endyear_search[0] + parcel_endyear_filepath = os.path.join(localpath, filename_parcel_endyear) parcel_endyear = pd.read_csv(parcel_endyear_filepath) - - version = path.split('/')[-1] - - - parcel_geobase_file = pd.read_csv(os.path.join(URBANSIM_OUTPUT_BOX_DIR, PARCEL_GEO_DIR)) - parcel_geobase_file_r = parcel_geobase_file[['PARCEL_ID','juris','fbpchcat']] - parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4']] - parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4']] - parcel_merge = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id').fillna(0) - parcel_data = parcel_geobase_file_r.merge(parcel_merge, left_on = 'PARCEL_ID', right_on = 'parcel_id', how = 'left') - #else: - #parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4']] - #parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4','juris','pba50chcat']] - #parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id', how = 'left').fillna(0) - - parcel_data['totemp diff'] = parcel_data['totemp_y']-parcel_data['totemp_x'] - parcel_data['tothh diff'] = parcel_data['tothh_y']-parcel_data['tothh_x'] - parcel_data['hhq1 diff'] = parcel_data['hhq1_y']-parcel_data['hhq1_x'] - parcel_data['hhq2 diff'] = parcel_data['hhq2_y']-parcel_data['hhq2_x'] - parcel_data['hhq3 diff'] = parcel_data['hhq3_y']-parcel_data['hhq3_x'] - parcel_data['hhq4 diff'] = parcel_data['hhq4_y']-parcel_data['hhq4_x'] - - parcel_data = parcel_data[['parcel_id','tothh diff','totemp diff','hhq1 diff', 'hhq2 diff','hhq3 diff','hhq4 diff','juris','fbpchcat']].copy() - - #geography summaries - parcel_geo = parcel_data.loc[parcel_data['fbpchcat'].str.contains(geo, na=False)] - parcel_geo = parcel_geo.groupby(['juris']).agg({'tothh diff':'sum','totemp diff':'sum', 'hhq1 diff':'sum', 'hhq2 diff':'sum', 'hhq3 diff':'sum', 'hhq4 diff':'sum', }).reset_index() - parcel_geo['geo_category'] = 'yes_%s'%(geo) - parcel_geo_no = parcel_data.loc[~parcel_data['fbpchcat'].str.contains(geo, na=False)] - parcel_geo_no = parcel_geo_no.groupby(['juris']).agg({'tothh diff':'sum','totemp diff':'sum', 'hhq1 diff':'sum', 'hhq2 diff':'sum', 'hhq3 diff':'sum', 'hhq4 diff':'sum', }).reset_index() - parcel_geo_no['geo_category'] = 'no_%s'%(geo) - - parcel_geo_summary = pd.concat([parcel_geo, parcel_geo_no], ignore_index = True) - parcel_geo_summary.sort_values(by = 'juris', inplace = True) - parcel_geo_summary['VERSION'] = version - + + version = path.split("/")[-1] + + parcel_geobase_file = pd.read_csv( + os.path.join(URBANSIM_OUTPUT_BOX_DIR, PARCEL_GEO_DIR) + ) + parcel_geobase_file_r = parcel_geobase_file[["PARCEL_ID", "juris", "fbpchcat"]] + parcel_baseyear = parcel_baseyear[ + ["parcel_id", "tothh", "totemp", "hhq1", "hhq2", "hhq3", "hhq4"] + ] + parcel_endyear = parcel_endyear[ + ["parcel_id", "tothh", "totemp", "hhq1", "hhq2", "hhq3", "hhq4"] + ] + parcel_merge = parcel_baseyear.merge(parcel_endyear, on="parcel_id").fillna(0) + parcel_data = parcel_geobase_file_r.merge( + parcel_merge, left_on="PARCEL_ID", right_on="parcel_id", how="left" + ) + # else: + # parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4']] + # parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4','juris','pba50chcat']] + # parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id', how = 'left').fillna(0) + + parcel_data["totemp diff"] = parcel_data["totemp_y"] - parcel_data["totemp_x"] + parcel_data["tothh diff"] = parcel_data["tothh_y"] - parcel_data["tothh_x"] + parcel_data["hhq1 diff"] = parcel_data["hhq1_y"] - parcel_data["hhq1_x"] + parcel_data["hhq2 diff"] = parcel_data["hhq2_y"] - parcel_data["hhq2_x"] + parcel_data["hhq3 diff"] = parcel_data["hhq3_y"] - parcel_data["hhq3_x"] + parcel_data["hhq4 diff"] = parcel_data["hhq4_y"] - parcel_data["hhq4_x"] + + parcel_data = parcel_data[ + [ + "parcel_id", + "tothh diff", + "totemp diff", + "hhq1 diff", + "hhq2 diff", + "hhq3 diff", + "hhq4 diff", + "juris", + "fbpchcat", + ] + ].copy() + + # geography summaries + parcel_geo = parcel_data.loc[parcel_data["fbpchcat"].str.contains(geo, na=False)] + parcel_geo = ( + parcel_geo.groupby(["juris"]) + .agg( + { + "tothh diff": "sum", + "totemp diff": "sum", + "hhq1 diff": "sum", + "hhq2 diff": "sum", + "hhq3 diff": "sum", + "hhq4 diff": "sum", + } + ) + .reset_index() + ) + parcel_geo["geo_category"] = "yes_%s" % (geo) + parcel_geo_no = parcel_data.loc[ + ~parcel_data["fbpchcat"].str.contains(geo, na=False) + ] + parcel_geo_no = ( + parcel_geo_no.groupby(["juris"]) + .agg( + { + "tothh diff": "sum", + "totemp diff": "sum", + "hhq1 diff": "sum", + "hhq2 diff": "sum", + "hhq3 diff": "sum", + "hhq4 diff": "sum", + } + ) + .reset_index() + ) + parcel_geo_no["geo_category"] = "no_%s" % (geo) + + parcel_geo_summary = pd.concat([parcel_geo, parcel_geo_no], ignore_index=True) + parcel_geo_summary.sort_values(by="juris", inplace=True) + parcel_geo_summary["VERSION"] = version + return parcel_geo_summary ##Similar to above, this is to define a separate fileloader to produce summaries for overlapping geographies. W def TWO_GEO_SUMMARY_LOADER(path, geo1, geo2, baseyear, endyear): localpath = os.path.join(URBANSIM_OUTPUT_BOX_DIR, path) - parcel_baseyear_pattern = "(run[0-9]{1,4}_parcel_data_%s.csv)"%(baseyear) - parcel_endyear_pattern = "(run[0-9]{1,4}_parcel_data_%s.csv)"%(endyear) - - parcel_baseyear_search = [f for f in os.listdir(localpath) if re.match(parcel_baseyear_pattern, f)] - filename_parcel_baseyear = parcel_baseyear_search[0] - parcel_baseyear_filepath = os.path.join(localpath,filename_parcel_baseyear) + parcel_baseyear_pattern = "(run[0-9]{1,4}_parcel_data_%s.csv)" % (baseyear) + parcel_endyear_pattern = "(run[0-9]{1,4}_parcel_data_%s.csv)" % (endyear) + + parcel_baseyear_search = [ + f for f in os.listdir(localpath) if re.match(parcel_baseyear_pattern, f) + ] + filename_parcel_baseyear = parcel_baseyear_search[0] + parcel_baseyear_filepath = os.path.join(localpath, filename_parcel_baseyear) parcel_baseyear = pd.read_csv(parcel_baseyear_filepath) - - parcel_endyear_search = [f for f in os.listdir(localpath) if re.match(parcel_endyear_pattern, f)] - filename_parcel_endyear = parcel_endyear_search[0] - parcel_endyear_filepath = os.path.join(localpath,filename_parcel_endyear) + + parcel_endyear_search = [ + f for f in os.listdir(localpath) if re.match(parcel_endyear_pattern, f) + ] + filename_parcel_endyear = parcel_endyear_search[0] + parcel_endyear_filepath = os.path.join(localpath, filename_parcel_endyear) parcel_endyear = pd.read_csv(parcel_endyear_filepath) - - version = path.split('/')[-1] - - #if path == DBP_DIR: - parcel_geobase_file = pd.read_csv(os.path.join(URBANSIM_OUTPUT_BOX_DIR, PARCEL_GEO_DIR)) - parcel_geobase_file_r = parcel_geobase_file[['PARCEL_ID','juris','fbpchcat']] - parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4']] - parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4']] - parcel_merge = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id').fillna(0) - parcel_data = parcel_geobase_file_r.merge(parcel_merge, left_on = 'PARCEL_ID', right_on = 'parcel_id', how = 'left') - #else: - #parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4']] - #parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4','juris','fbpchcat']] - #parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id', how = 'left').fillna(0) - - parcel_data['totemp diff'] = parcel_data['totemp_y']-parcel_data['totemp_x'] - parcel_data['tothh diff'] = parcel_data['tothh_y']-parcel_data['tothh_x'] - parcel_data['hhq1 diff'] = parcel_data['hhq1_y']-parcel_data['hhq1_x'] - parcel_data['hhq2 diff'] = parcel_data['hhq2_y']-parcel_data['hhq2_x'] - parcel_data['hhq3 diff'] = parcel_data['hhq3_y']-parcel_data['hhq3_x'] - parcel_data['hhq4 diff'] = parcel_data['hhq4_y']-parcel_data['hhq4_x'] - - parcel_data = parcel_data[['parcel_id','tothh diff','totemp diff','hhq1 diff', 'hhq2 diff','hhq3 diff','hhq4 diff','juris','fbpchcat']].copy() - - #two geographies - parcel_geo2 = parcel_data.loc[parcel_data['fbpchcat'].str.contains(geo1, na=False)] - parcel_geo2 = parcel_geo2.loc[parcel_geo2['fbpchcat'].str.contains(geo2, na=False)] - parcel_geo2_group = parcel_geo2.groupby(['juris']).agg({'tothh diff':'sum','totemp diff':'sum', 'hhq1 diff':'sum', 'hhq2 diff':'sum', 'hhq3 diff':'sum', 'hhq4 diff':'sum', }).reset_index() - parcel_geo2_group['geo_category'] = 'yes_%s'%(geo1+geo2) - - parcel_geo2_no_1 = parcel_data.loc[parcel_data['fbpchcat'].str.contains(geo1, na=False)] - parcel_geo2_no_1 = parcel_geo2_no_1.loc[~parcel_geo2_no_1['fbpchcat'].str.contains(geo2, na=False)] - parcel_geo2_no_2 = parcel_data.loc[parcel_data['fbpchcat'].str.contains(geo2, na=False)] - parcel_geo2_no_2 = parcel_geo2_no_2.loc[~parcel_geo2_no_2['fbpchcat'].str.contains(geo1, na=False)] - parcel_geo2_no_3 = parcel_data.loc[~parcel_data['fbpchcat'].str.contains(geo1 + "|" + geo2, na=False)] - - parcel_geo2_no = pd.concat([parcel_geo2_no_1, parcel_geo2_no_2, parcel_geo2_no_3], ignore_index = True) - parcel_geo2_no_group = parcel_geo2_no.groupby(['juris']).agg({'tothh diff':'sum','totemp diff':'sum', 'hhq1 diff':'sum', 'hhq2 diff':'sum', 'hhq3 diff':'sum', 'hhq4 diff':'sum', }).reset_index() - parcel_geo2_no_group['geo_category'] = 'no_%s'%(geo1+geo2) - - parcel_geo2_summary = pd.concat([parcel_geo2_group, parcel_geo2_no_group], ignore_index = True) - parcel_geo2_summary['VERSION'] = version - + + version = path.split("/")[-1] + + # if path == DBP_DIR: + parcel_geobase_file = pd.read_csv( + os.path.join(URBANSIM_OUTPUT_BOX_DIR, PARCEL_GEO_DIR) + ) + parcel_geobase_file_r = parcel_geobase_file[["PARCEL_ID", "juris", "fbpchcat"]] + parcel_baseyear = parcel_baseyear[ + ["parcel_id", "tothh", "totemp", "hhq1", "hhq2", "hhq3", "hhq4"] + ] + parcel_endyear = parcel_endyear[ + ["parcel_id", "tothh", "totemp", "hhq1", "hhq2", "hhq3", "hhq4"] + ] + parcel_merge = parcel_baseyear.merge(parcel_endyear, on="parcel_id").fillna(0) + parcel_data = parcel_geobase_file_r.merge( + parcel_merge, left_on="PARCEL_ID", right_on="parcel_id", how="left" + ) + # else: + # parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4']] + # parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4','juris','fbpchcat']] + # parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id', how = 'left').fillna(0) + + parcel_data["totemp diff"] = parcel_data["totemp_y"] - parcel_data["totemp_x"] + parcel_data["tothh diff"] = parcel_data["tothh_y"] - parcel_data["tothh_x"] + parcel_data["hhq1 diff"] = parcel_data["hhq1_y"] - parcel_data["hhq1_x"] + parcel_data["hhq2 diff"] = parcel_data["hhq2_y"] - parcel_data["hhq2_x"] + parcel_data["hhq3 diff"] = parcel_data["hhq3_y"] - parcel_data["hhq3_x"] + parcel_data["hhq4 diff"] = parcel_data["hhq4_y"] - parcel_data["hhq4_x"] + + parcel_data = parcel_data[ + [ + "parcel_id", + "tothh diff", + "totemp diff", + "hhq1 diff", + "hhq2 diff", + "hhq3 diff", + "hhq4 diff", + "juris", + "fbpchcat", + ] + ].copy() + + # two geographies + parcel_geo2 = parcel_data.loc[parcel_data["fbpchcat"].str.contains(geo1, na=False)] + parcel_geo2 = parcel_geo2.loc[parcel_geo2["fbpchcat"].str.contains(geo2, na=False)] + parcel_geo2_group = ( + parcel_geo2.groupby(["juris"]) + .agg( + { + "tothh diff": "sum", + "totemp diff": "sum", + "hhq1 diff": "sum", + "hhq2 diff": "sum", + "hhq3 diff": "sum", + "hhq4 diff": "sum", + } + ) + .reset_index() + ) + parcel_geo2_group["geo_category"] = "yes_%s" % (geo1 + geo2) + + parcel_geo2_no_1 = parcel_data.loc[ + parcel_data["fbpchcat"].str.contains(geo1, na=False) + ] + parcel_geo2_no_1 = parcel_geo2_no_1.loc[ + ~parcel_geo2_no_1["fbpchcat"].str.contains(geo2, na=False) + ] + parcel_geo2_no_2 = parcel_data.loc[ + parcel_data["fbpchcat"].str.contains(geo2, na=False) + ] + parcel_geo2_no_2 = parcel_geo2_no_2.loc[ + ~parcel_geo2_no_2["fbpchcat"].str.contains(geo1, na=False) + ] + parcel_geo2_no_3 = parcel_data.loc[ + ~parcel_data["fbpchcat"].str.contains(geo1 + "|" + geo2, na=False) + ] + + parcel_geo2_no = pd.concat( + [parcel_geo2_no_1, parcel_geo2_no_2, parcel_geo2_no_3], ignore_index=True + ) + parcel_geo2_no_group = ( + parcel_geo2_no.groupby(["juris"]) + .agg( + { + "tothh diff": "sum", + "totemp diff": "sum", + "hhq1 diff": "sum", + "hhq2 diff": "sum", + "hhq3 diff": "sum", + "hhq4 diff": "sum", + } + ) + .reset_index() + ) + parcel_geo2_no_group["geo_category"] = "no_%s" % (geo1 + geo2) + + parcel_geo2_summary = pd.concat( + [parcel_geo2_group, parcel_geo2_no_group], ignore_index=True + ) + parcel_geo2_summary["VERSION"] = version + return parcel_geo2_summary + if __name__ == "__main__": - - #process taz file first, since it is different from other files - GEO = 'taz' + + # process taz file first, since it is different from other files + GEO = "taz" DF_LIST = [] DF_COUNTY_LIST = [] - + for path in PATH_LIST: - #Read PBA40 file + # Read PBA40 file if path == PBA40_DIR: baseyear = 2010 - endyear = 2040 + endyear = 2040 else: baseyear = 2015 - endyear = 2050 - - data_summary = FILELOADER(path, GEO, baseyear, endyear) + endyear = 2050 + + data_summary = FILELOADER(path, GEO, baseyear, endyear) summary_baseyear = data_summary[0] - summary_endyear = data_summary[1] - summary_runid = data_summary[2] - - #calculate growth and combine files - DF = taz_calculator(summary_baseyear, summary_endyear) - if summary_runid == 'run7224c': - new_names = [(i,'PBA40_'+ i) for i in DF.iloc[:, 1:].columns.values] - DF.rename(columns = dict(new_names), inplace=True) - elif summary_runid == 'run98': - new_names = [(i,'DBP_'+ i) for i in DF.iloc[:, 1:].columns.values] - DF.rename(columns = dict(new_names), inplace=True) + summary_endyear = data_summary[1] + summary_runid = data_summary[2] + + # calculate growth and combine files + DF = taz_calculator(summary_baseyear, summary_endyear) + if summary_runid == "run7224c": + new_names = [(i, "PBA40_" + i) for i in DF.iloc[:, 1:].columns.values] + DF.rename(columns=dict(new_names), inplace=True) + elif summary_runid == "run98": + new_names = [(i, "DBP_" + i) for i in DF.iloc[:, 1:].columns.values] + DF.rename(columns=dict(new_names), inplace=True) else: - new_names = [(i,summary_runid +'_'+ i) for i in DF.iloc[:, 1:].columns.values] - DF.rename(columns = dict(new_names), inplace=True) - + new_names = [ + (i, summary_runid + "_" + i) for i in DF.iloc[:, 1:].columns.values + ] + DF.rename(columns=dict(new_names), inplace=True) + DF_LIST.append(DF) - + DF_COUNTY = county_calculator(summary_baseyear, summary_endyear) - if summary_runid == 'run7224c': - new_names = [(i,'PBA40_'+ i) for i in DF_COUNTY.iloc[:, 1:].columns.values] - DF_COUNTY.rename(columns = dict(new_names), inplace=True) - elif summary_runid == 'run98': - new_names = [(i,'DBP_'+ i) for i in DF_COUNTY.iloc[:, 1:].columns.values] - DF_COUNTY.rename(columns = dict(new_names), inplace=True) + if summary_runid == "run7224c": + new_names = [ + (i, "PBA40_" + i) for i in DF_COUNTY.iloc[:, 1:].columns.values + ] + DF_COUNTY.rename(columns=dict(new_names), inplace=True) + elif summary_runid == "run98": + new_names = [(i, "DBP_" + i) for i in DF_COUNTY.iloc[:, 1:].columns.values] + DF_COUNTY.rename(columns=dict(new_names), inplace=True) else: - new_names = [(i,summary_runid +'_'+ i) for i in DF_COUNTY.iloc[:, 1:].columns.values] - DF_COUNTY.rename(columns = dict(new_names), inplace=True) + new_names = [ + (i, summary_runid + "_" + i) + for i in DF_COUNTY.iloc[:, 1:].columns.values + ] + DF_COUNTY.rename(columns=dict(new_names), inplace=True) DF_COUNTY_LIST.append(DF_COUNTY) - - DF_MERGE = reduce(lambda left,right: pd.merge(left, right, on = 'TAZ', how='outer'), DF_LIST) - DF_MERGE.to_csv(OUTPUT_FILE.format(GEO), index = False) - - DF_COUNTY_MERGE = reduce(lambda left,right: pd.merge(left, right, on = 'county', how='outer'), DF_COUNTY_LIST) - DF_COUNTY_MERGE.to_csv(OUTPUT_FILE.format('county'), index = False) - - #then process other geography summaries - GEO = ['juris','superdistrict'] + + DF_MERGE = reduce( + lambda left, right: pd.merge(left, right, on="TAZ", how="outer"), DF_LIST + ) + DF_MERGE.to_csv(OUTPUT_FILE.format(GEO), index=False) + + DF_COUNTY_MERGE = reduce( + lambda left, right: pd.merge(left, right, on="county", how="outer"), + DF_COUNTY_LIST, + ) + DF_COUNTY_MERGE.to_csv(OUTPUT_FILE.format("county"), index=False) + + # then process other geography summaries + GEO = ["juris", "superdistrict"] DF_LIST = [] - + for geo in GEO: - #PDA summary comparisons should exclude PBA40 and DBP, because they used PBA40 PDAs, - #for PBA50 PDAs, only include new runs + # PDA summary comparisons should exclude PBA40 and DBP, because they used PBA40 PDAs, + # for PBA50 PDAs, only include new runs for path in PATH_LIST: - #Read PBA40 file + # Read PBA40 file if path == PBA40_DIR: baseyear = 2010 - endyear = 2040 + endyear = 2040 else: baseyear = 2015 - endyear = 2050 - - data_summary = FILELOADER(path, geo, baseyear, endyear) + endyear = 2050 + + data_summary = FILELOADER(path, geo, baseyear, endyear) summary_baseyear = data_summary[0] - summary_endyear = data_summary[1] - summary_runid = data_summary[2] - - #calculate growth and combine files - DF = nontaz_calculator(summary_baseyear, summary_endyear) - if summary_runid == 'run7224c': - new_names = [(i,'PBA40_'+ i) for i in DF.iloc[:, 1:].columns.values] - DF.rename(columns = dict(new_names), inplace=True) - elif summary_runid == 'run98': - new_names = [(i,'DBP_'+ i) for i in DF.iloc[:, 1:].columns.values] - DF.rename(columns = dict(new_names), inplace=True) + summary_endyear = data_summary[1] + summary_runid = data_summary[2] + + # calculate growth and combine files + DF = nontaz_calculator(summary_baseyear, summary_endyear) + if summary_runid == "run7224c": + new_names = [(i, "PBA40_" + i) for i in DF.iloc[:, 1:].columns.values] + DF.rename(columns=dict(new_names), inplace=True) + elif summary_runid == "run98": + new_names = [(i, "DBP_" + i) for i in DF.iloc[:, 1:].columns.values] + DF.rename(columns=dict(new_names), inplace=True) else: - new_names = [(i,summary_runid +'_'+ i) for i in DF.iloc[:, 1:].columns.values] - DF.rename(columns = dict(new_names), inplace=True) + new_names = [ + (i, summary_runid + "_" + i) for i in DF.iloc[:, 1:].columns.values + ] + DF.rename(columns=dict(new_names), inplace=True) DF_LIST.append(DF) - - DF_MERGE = reduce(lambda left,right: pd.merge(left, right, on = geo, how='outer'), DF_LIST) - DF_MERGE.to_csv(OUTPUT_FILE.format(geo), index = False) + + DF_MERGE = reduce( + lambda left, right: pd.merge(left, right, on=geo, how="outer"), DF_LIST + ) + DF_MERGE.to_csv(OUTPUT_FILE.format(geo), index=False) DF_LIST = [] - - #summary from parcel data + + # summary from parcel data baseyear = 2015 endyear = 2050 - GEO = ['GG','tra','HRA', 'DIS'] + GEO = ["GG", "tra", "HRA", "DIS"] DF_LIST = [] - #PATH_LIST_PARCEL = filter(lambda x: (x != PBA40_DIR)&(x != DBP__CLEANER_DIR),PATH_LIST) + # PATH_LIST_PARCEL = filter(lambda x: (x != PBA40_DIR)&(x != DBP__CLEANER_DIR),PATH_LIST) for geo in GEO: for path in PATH_LIST_PARCEL: DF_JURIS = GEO_SUMMARY_LOADER(path, geo, baseyear, endyear) DF_LIST.append(DF_JURIS) - + DF_UNION = pd.concat(DF_LIST) - DF_UNION['county'] = DF_UNION['juris'].map(juris_to_county) - DF_UNION.sort_values(by = ['VERSION','county','juris','geo_category'],ascending=[False, True, True, False], inplace=True) - DF_UNION.set_index(['VERSION','county','juris','geo_category'], inplace=True) + DF_UNION["county"] = DF_UNION["juris"].map(juris_to_county) + DF_UNION.sort_values( + by=["VERSION", "county", "juris", "geo_category"], + ascending=[False, True, True, False], + inplace=True, + ) + DF_UNION.set_index(["VERSION", "county", "juris", "geo_category"], inplace=True) DF_UNION.to_csv(OUTPUT_FILE.format(geo)) DF_LIST = [] - - #summaries for overlapping geos - geo_1, geo_2, geo_3 = 'tra','DIS','HRA' - DF_LIST =[] + + # summaries for overlapping geos + geo_1, geo_2, geo_3 = "tra", "DIS", "HRA" + DF_LIST = [] DF_LIST_2 = [] for path in PATH_LIST_PARCEL: DF_TWO_GEO = TWO_GEO_SUMMARY_LOADER(path, geo_1, geo_2, baseyear, endyear) DF_TWO_GEO_2 = TWO_GEO_SUMMARY_LOADER(path, geo_1, geo_3, baseyear, endyear) DF_LIST.append(DF_TWO_GEO) DF_LIST_2.append(DF_TWO_GEO_2) - + DF_MERGE = pd.concat(DF_LIST) - DF_MERGE['county'] = DF_MERGE['juris'].map(juris_to_county) - DF_MERGE.sort_values(by = ['VERSION','county','juris','geo_category'],ascending=[False,True, True, False], inplace=True) - DF_MERGE.set_index(['VERSION','county','juris','geo_category'], inplace=True) - DF_MERGE.to_csv(OUTPUT_FILE.format(geo_1+geo_2)) - - DF_MERGE_2 = pd.concat(DF_LIST_2) - DF_MERGE_2['county'] = DF_MERGE_2['juris'].map(juris_to_county) - DF_MERGE_2.sort_values(by = ['VERSION','county','juris','geo_category'],ascending=[False, True, True, False], inplace=True) - DF_MERGE_2.set_index(['VERSION','county','juris','geo_category'], inplace=True) - DF_MERGE_2.to_csv(OUTPUT_FILE.format(geo_1+geo_3)) + DF_MERGE["county"] = DF_MERGE["juris"].map(juris_to_county) + DF_MERGE.sort_values( + by=["VERSION", "county", "juris", "geo_category"], + ascending=[False, True, True, False], + inplace=True, + ) + DF_MERGE.set_index(["VERSION", "county", "juris", "geo_category"], inplace=True) + DF_MERGE.to_csv(OUTPUT_FILE.format(geo_1 + geo_2)) + DF_MERGE_2 = pd.concat(DF_LIST_2) + DF_MERGE_2["county"] = DF_MERGE_2["juris"].map(juris_to_county) + DF_MERGE_2.sort_values( + by=["VERSION", "county", "juris", "geo_category"], + ascending=[False, True, True, False], + inplace=True, + ) + DF_MERGE_2.set_index(["VERSION", "county", "juris", "geo_category"], inplace=True) + DF_MERGE_2.to_csv(OUTPUT_FILE.format(geo_1 + geo_3)) diff --git a/utilities/export_filegdb_layers.py b/utilities/export_filegdb_layers.py index c5017e0..6383b2b 100644 --- a/utilities/export_filegdb_layers.py +++ b/utilities/export_filegdb_layers.py @@ -14,12 +14,19 @@ import argparse, os, sys, time import arcpy -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) - parser.add_argument("geodatabase", metavar="geodatabase.gdb", help="File geodatabase with layer export") +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "geodatabase", + metavar="geodatabase.gdb", + help="File geodatabase with layer export", + ) parser.add_argument("--layer", help="Layer to export") - parser.add_argument("--format", choices=["csv","dbf","shp","geojson"]) + parser.add_argument("--format", choices=["csv", "dbf", "shp", "geojson"]) args = parser.parse_args() @@ -27,16 +34,18 @@ # a dictionary of fields with required data type # add to this dictionary as needed - field_types = {'parcel_id': 'LONG', - 'zone_id': 'SHORT', - 'geom_id_s': 'TEXT'} + field_types = {"parcel_id": "LONG", "zone_id": "SHORT", "geom_id_s": "TEXT"} if not args.layer: print("workspace: {}".format(arcpy.env.workspace)) for dataset in arcpy.ListDatasets(): print(" dataset: {}".format(dataset)) - print(" feature classes: {} ".format(arcpy.ListFeatureClasses(feature_dataset=dataset))) - + print( + " feature classes: {} ".format( + arcpy.ListFeatureClasses(feature_dataset=dataset) + ) + ) + print(" feature classes: {} ".format(arcpy.ListFeatureClasses())) print(" tables: {} ".format(arcpy.ListTables())) @@ -51,44 +60,69 @@ # create a new field with the correct data type arcpy.AddField_management(args.layer, "temp", new_type) # calculate the value based on the old field - arcpy.CalculateField_management(args.layer, "temp", 'int(round(!field!))', "PYTHON3") + arcpy.CalculateField_management( + args.layer, "temp", "int(round(!field!))", "PYTHON3" + ) # delete the old field - arcpy.AlterField_management(args.layer, f_name, f_name+"_old", f_aliasName+"_old") + arcpy.AlterField_management( + args.layer, f_name, f_name + "_old", f_aliasName + "_old" + ) # rename the new field arcpy.AlterField_management(args.layer, "temp", f_name, f_aliasName) if args.layer in arcpy.ListFeatureClasses(): result = arcpy.GetCount_management(os.path.join(args.geodatabase, args.layer)) - print("Feature Class [{}] has {} rows".format(os.path.join(args.geodatabase, args.layer), result[0])) + print( + "Feature Class [{}] has {} rows".format( + os.path.join(args.geodatabase, args.layer), result[0] + ) + ) if args.format == "geojson": outfile = "{}.geojson".format(args.layer) - arcpy.FeaturesToJSON_conversion(os.path.join(args.geodatabase, args.layer), outfile, geoJSON='GEOJSON') + arcpy.FeaturesToJSON_conversion( + os.path.join(args.geodatabase, args.layer), outfile, geoJSON="GEOJSON" + ) print("Wrote {}".format(outfile)) if args.format == "shp": outfile = "{}.shp".format(args.layer) - arcpy.FeatureClassToShapefile_conversion(os.path.join(args.geodatabase, args.layer), Output_Folder=".") + arcpy.FeatureClassToShapefile_conversion( + os.path.join(args.geodatabase, args.layer), Output_Folder="." + ) print("Wrote {}".format(outfile)) if args.format == "csv": - outfile = os.path.join(".","{}.csv".format(args.layer)) - arcpy.CopyRows_management(os.path.join(args.geodatabase, args.layer), outfile) + outfile = os.path.join(".", "{}.csv".format(args.layer)) + arcpy.CopyRows_management( + os.path.join(args.geodatabase, args.layer), outfile + ) print("Wrote {}".format(outfile)) if args.layer in arcpy.ListTables(): result = arcpy.GetCount_management(os.path.join(args.geodatabase, args.layer)) - print("Table [{}] has {} rows".format(os.path.join(args.geodatabase, args.layer), result[0])) + print( + "Table [{}] has {} rows".format( + os.path.join(args.geodatabase, args.layer), result[0] + ) + ) if args.format == "csv": outfile = "{}.csv".format(args.layer) - arcpy.TableToTable_conversion(os.path.join(args.geodatabase, args.layer), out_path=".", out_name=outfile) + arcpy.TableToTable_conversion( + os.path.join(args.geodatabase, args.layer), + out_path=".", + out_name=outfile, + ) print("Write {}".format(outfile)) if args.format == "dbf": outfile = "{}.dbf".format(args.layer) - arcpy.TableToTable_conversion(os.path.join(args.geodatabase, args.layer), out_path=".", out_name=outfile) + arcpy.TableToTable_conversion( + os.path.join(args.geodatabase, args.layer), + out_path=".", + out_name=outfile, + ) print("Wrote {}".format(outfile)) - diff --git a/utilities/h5 content.py b/utilities/h5 content.py index 65cc960..d1ba345 100644 --- a/utilities/h5 content.py +++ b/utilities/h5 content.py @@ -1,29 +1,31 @@ - # coding: utf-8 # This code take the building.csv file (output of development project script) and put it in the H5 file holder -import numpy as np +import numpy as np import pandas as pd -#read the latest building file -building = pd.read_csv('C:/Users/blu/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim 1.5/H5 Contents/h5 contents/buildings_2020Mar20.1512.csv') +# read the latest building file +building = pd.read_csv( + "C:/Users/blu/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim 1.5/H5 Contents/h5 contents/buildings_2020Mar20.1512.csv" +) -#read store -store = pd.HDFStore('C:/Users/blu/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim 1.5/PBA50/Current PBA50 Large General Input Data/2020_03_17_bayarea_v5.h5') +# read store +store = pd.HDFStore( + "C:/Users/blu/Box/Modeling and Surveys/Urban Modeling/Bay Area UrbanSim 1.5/PBA50/Current PBA50 Large General Input Data/2020_03_17_bayarea_v5.h5" +) -#open the store +# open the store store.open() -#check if H5 has alread the building file +# check if H5 has alread the building file keys = store.keys() -if '/buildings' in keys: - store.remove('buildings') - store.put('buildings',building,format = 'table', append = False) +if "/buildings" in keys: + store.remove("buildings") + store.put("buildings", building, format="table", append=False) else: - store.put('buildings',building,format = 'table', append = False) + store.put("buildings", building, format="table", append=False) -#close the store +# close the store store.close() - diff --git a/utilities/import_filegdb_layers.py b/utilities/import_filegdb_layers.py index a400485..fc11632 100644 --- a/utilities/import_filegdb_layers.py +++ b/utilities/import_filegdb_layers.py @@ -9,33 +9,48 @@ import argparse, os, sys, time import arcpy, numpy, pandas -if __name__ == '__main__': +if __name__ == "__main__": start = time.time() - parser = argparse.ArgumentParser(description=USAGE, formatter_class=argparse.RawDescriptionHelpFormatter,) - parser.add_argument("input_gdb", metavar="input.gdb", help="Input geodatabase") - parser.add_argument("input_layer", metavar="input_layer", help="Geometry layer in input geodatabase") - parser.add_argument("input_field", metavar="input_layer", help="Join field in input_layer") - parser.add_argument("join_csv", metavar="join.csv", help="CSV layer for joining") - parser.add_argument("join_field", metavar="join_field", help="Join field in join_csv") - parser.add_argument("join_type", choices=["KEEP_ALL","KEEP_COMMON"], default="KEEP_ALL", help="Outer join vs inner join. Default is KEEP_ALL, or outer") - parser.add_argument("output_gdb", metavar="output.gdb", help="Output geodatabase ") + parser = argparse.ArgumentParser( + description=USAGE, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("input_gdb", metavar="input.gdb", help="Input geodatabase") + parser.add_argument( + "input_layer", metavar="input_layer", help="Geometry layer in input geodatabase" + ) + parser.add_argument( + "input_field", metavar="input_layer", help="Join field in input_layer" + ) + parser.add_argument("join_csv", metavar="join.csv", help="CSV layer for joining") + parser.add_argument( + "join_field", metavar="join_field", help="Join field in join_csv" + ) + parser.add_argument( + "join_type", + choices=["KEEP_ALL", "KEEP_COMMON"], + default="KEEP_ALL", + help="Outer join vs inner join. Default is KEEP_ALL, or outer", + ) + parser.add_argument("output_gdb", metavar="output.gdb", help="Output geodatabase ") args = parser.parse_args() - print(" {:15}: {}".format("input_gdb", args.input_gdb)) + print(" {:15}: {}".format("input_gdb", args.input_gdb)) print(" {:15}: {}".format("input_layer", args.input_layer)) print(" {:15}: {}".format("input_field", args.input_field)) - print(" {:15}: {}".format("join_csv", args.join_csv)) - print(" {:15}: {}".format("join_field", args.join_field)) - print(" {:15}: {}".format("join_type", args.join_type)) - print(" {:15}: {}".format("output_gdb", args.output_gdb)) + print(" {:15}: {}".format("join_csv", args.join_csv)) + print(" {:15}: {}".format("join_field", args.join_field)) + print(" {:15}: {}".format("join_type", args.join_type)) + print(" {:15}: {}".format("output_gdb", args.output_gdb)) # our workspace will be the output_gdb if not os.path.exists(args.output_gdb): - (head,tail) = os.path.split(args.output_gdb) + (head, tail) = os.path.split(args.output_gdb) print("head: {} tail: {}".format(head, tail)) - if head=="": head="." + if head == "": + head = "." arcpy.CreateFileGDB_management(head, tail) print("Created {}".format(args.output_gdb)) @@ -43,7 +58,11 @@ # read the csv df = pandas.read_csv(args.join_csv) - print("Read {} lines from {}. Head:\n{}Dtypes:\n{}".format(len(df), args.join_csv, df.head(), df.dtypes)) + print( + "Read {} lines from {}. Head:\n{}Dtypes:\n{}".format( + len(df), args.join_csv, df.head(), df.dtypes + ) + ) # copy to the output_gdb as a table table_name = os.path.split(args.join_csv)[1] @@ -65,7 +84,7 @@ # we shall pare it down to just the Geometry and join_field fields = arcpy.ListFields(os.path.join(args.input_gdb, args.input_layer)) delete_fields = [] - keep_fields = [] + keep_fields = [] for field in fields: # keep Geometry and join_field if field.type == "Geometry" or field.name == args.input_field or field.required: @@ -77,8 +96,8 @@ print("Keeping fields {}".format(keep_fields)) print("Deleting fields {}".format(delete_fields)) # make sure we found both a geometry field and the join_field - assert(len(keep_fields)>=2) - assert(args.input_field in keep_fields) + assert len(keep_fields) >= 2 + assert args.input_field in keep_fields # delete the fields post join since the join might reduce the size substantially if it's an inner join # delete the layer if it already exists in the output gdb @@ -87,13 +106,23 @@ print("Found {} -- deleting".format(args.input_layer)) # copy the input to output_gdb with the same name - arcpy.CopyFeatures_management(os.path.join(args.input_gdb, args.input_layer), - os.path.join(args.output_gdb, args.input_layer)) + arcpy.CopyFeatures_management( + os.path.join(args.input_gdb, args.input_layer), + os.path.join(args.output_gdb, args.input_layer), + ) # create join layer with input_layer and join_table - print("Joining {} with {}".format(os.path.join(args.output_gdb, args.input_layer), table_name)) - joined_table = arcpy.AddJoin_management(os.path.join(args.output_gdb, args.input_layer), args.input_field, - os.path.join(args.output_gdb, table_name), args.join_field, - join_type=args.join_type) + print( + "Joining {} with {}".format( + os.path.join(args.output_gdb, args.input_layer), table_name + ) + ) + joined_table = arcpy.AddJoin_management( + os.path.join(args.output_gdb, args.input_layer), + args.input_field, + os.path.join(args.output_gdb, table_name), + args.join_field, + join_type=args.join_type, + ) new_table_name = "{}_joined".format(table_name) @@ -103,14 +132,20 @@ print("Found {} -- deleting".format(new_table_name)) # save it - arcpy.CopyFeatures_management(joined_table, os.path.join(args.output_gdb, new_table_name)) - print("Completed creation of {}".format(os.path.join(args.output_gdb, new_table_name))) + arcpy.CopyFeatures_management( + joined_table, os.path.join(args.output_gdb, new_table_name) + ) + print( + "Completed creation of {}".format(os.path.join(args.output_gdb, new_table_name)) + ) # NOW delete fields # do these one at a time since sometimes they fail for field in delete_fields: try: - arcpy.DeleteField_management(os.path.join(args.output_gdb, new_table_name), [field]) + arcpy.DeleteField_management( + os.path.join(args.output_gdb, new_table_name), [field] + ) print("Deleted field {}".format(field)) except: print("Error deleting field {}: {}".format(field, sys.exc_info())) @@ -118,4 +153,4 @@ num_rows = arcpy.GetCount_management(os.path.join(args.output_gdb, new_table_name)) print("{} has {} records".format(new_table_name, num_rows[0])) - print("Script took {0:0.1f} minutes".format((time.time()-start)/60.0)) \ No newline at end of file + print("Script took {0:0.1f} minutes".format((time.time() - start) / 60.0))