From b06a73f8a939f572ffaeb2f4943bd362002cee5f Mon Sep 17 00:00:00 2001
From: Elizabeth Theocharides <etheocharides@bayareametro.gov>
Date: Tue, 3 Sep 2024 09:34:27 -0700
Subject: [PATCH 1/7] add slr parcels to parcel summaries

---
 baus/slr.py                      | 12 +++++++++++-
 baus/summaries/core_summaries.py |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/baus/slr.py b/baus/slr.py
index 043c6d4d8..19b86a017 100644
--- a/baus/slr.py
+++ b/baus/slr.py
@@ -10,7 +10,7 @@
 
 
 @orca.step()
-def slr_inundate(slr_progression, slr_parcel_inundation, year, parcels):
+def slr_inundate(slr_progression, slr_parcel_inundation, year, parcels, initial_year):
 
     # inundated parcels are all parcels at or below the SLR progression level in that year
     slr_progression = slr_progression.to_frame()
@@ -33,6 +33,16 @@ def slr_inundate(slr_progression, slr_parcel_inundation, year, parcels):
     orca.add_column('parcels', 'slr_nodev', slr_nodev)
     parcels = orca.get_table("parcels")
 
+    # also track how many parcels were mitigated for summary purposes
+    # because of the way the inputs are setup, all mitigated parcels are mitigated from the start of the simulation
+    if year != initial_year:
+        return
+    mitigation_parcels = slr_parcel_inundation.query('inundation==100').astype('bool')
+    slr_mitigation = pd.Series(False, parcels.index)
+    mitigation = pd.Series(mitigation_parcels['inundation'])
+    slr_mitigation.update(mitigation)
+    orca.add_column('parcels', 'slr_mitigation', slr_mitigation)
+
 
 @orca.step()
 def slr_remove_dev(buildings, households, jobs):
diff --git a/baus/summaries/core_summaries.py b/baus/summaries/core_summaries.py
index 3cddb572e..2bf9c2ae2 100644
--- a/baus/summaries/core_summaries.py
+++ b/baus/summaries/core_summaries.py
@@ -11,7 +11,7 @@ def parcel_summary(run_name, parcels, buildings, households, jobs, year, initial
     if year not in [initial_summary_year, final_year] + interim_summary_years:
         return
 
-    df = parcels.to_frame(["geom_id", "x", "y", 'max_dua', 'built_dua', 'max_far', 'built_far'])
+    df = parcels.to_frame(["geom_id", "x", "y", 'max_dua', 'built_dua', 'max_far', 'built_far', 'slr_nodev', 'slr_mitigation'])
     
     # add building data for parcels
     building_df = orca.merge_tables('buildings', [parcels, buildings], columns=['parcel_id', 'residential_units', 'deed_restricted_units',

From 6a6a739d15c4b61f90201698331226cff1728487 Mon Sep 17 00:00:00 2001
From: Elizabeth Theocharides <etheocharides@bayareametro.gov>
Date: Tue, 3 Sep 2024 13:07:02 -0700
Subject: [PATCH 2/7] output parcel level slr information to a separate table

---
 baus/summaries/core_summaries.py    |  2 +-
 baus/summaries/hazards_summaries.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/baus/summaries/core_summaries.py b/baus/summaries/core_summaries.py
index 2bf9c2ae2..3cddb572e 100644
--- a/baus/summaries/core_summaries.py
+++ b/baus/summaries/core_summaries.py
@@ -11,7 +11,7 @@ def parcel_summary(run_name, parcels, buildings, households, jobs, year, initial
     if year not in [initial_summary_year, final_year] + interim_summary_years:
         return
 
-    df = parcels.to_frame(["geom_id", "x", "y", 'max_dua', 'built_dua', 'max_far', 'built_far', 'slr_nodev', 'slr_mitigation'])
+    df = parcels.to_frame(["geom_id", "x", "y", 'max_dua', 'built_dua', 'max_far', 'built_far'])
     
     # add building data for parcels
     building_df = orca.merge_tables('buildings', [parcels, buildings], columns=['parcel_id', 'residential_units', 'deed_restricted_units',
diff --git a/baus/summaries/hazards_summaries.py b/baus/summaries/hazards_summaries.py
index d5608e63e..b915ca653 100644
--- a/baus/summaries/hazards_summaries.py
+++ b/baus/summaries/hazards_summaries.py
@@ -7,7 +7,13 @@
 
 
 @orca.step()
-def hazards_slr_summary(run_setup, run_name, year):
+def hazards_slr_summary(run_setup, run_name, parcels, year):
+
+    # first export parcel level information on inundated and mitigated parcels
+    slr_parcel_summary = parcels.to_frame(['parcel_id', 'slr_nodev', 'slr_mitigation'])
+    hazsumm_output_dir = pathlib.Path(orca.get_injectable("outputs_dir")) / "hazards_summaries"
+    hazsumm_output_dir.mkdir(parents=True, exist_ok=True)
+    slr_parcel_summary.to_csv(hazsumm_output_dir / f"{run_name}_slr_parcel_summary_{year}.csv")
 
     if not run_setup['run_slr']:
         return
@@ -54,8 +60,6 @@ def hazards_slr_summary(run_setup, run_name, year):
     for empsix in ['AGREMPN', 'MWTEMPN', 'RETEMPN', 'FPSEMPN', 'HEREMPN', 'OTHEMPN']:
         slr_summary["impacted_jobs_"+str(empsix)] = (unplaced_jobs_tot["empsix"] == empsix).sum()
 
-    hazsumm_output_dir = pathlib.Path(orca.get_injectable("outputs_dir")) / "hazards_summaries"
-    hazsumm_output_dir.mkdir(parents=True, exist_ok=True)
     slr_summary.to_csv(hazsumm_output_dir / f"{run_name}_slr_summary_{year}.csv")
 
 

From ea0c2f0349d6c5fe42c348a83b0e79f527924763 Mon Sep 17 00:00:00 2001
From: Elizabeth Theocharides <etheocharides@bayareametro.gov>
Date: Thu, 5 Sep 2024 10:33:51 -0700
Subject: [PATCH 3/7] read SLR metrics inputs from model outputs

---
 scripts/metrics/metrics_healthy.py |   5 +-
 scripts/metrics/metrics_utils.py   | 109 +++++++----------------------
 2 files changed, 27 insertions(+), 87 deletions(-)

diff --git a/scripts/metrics/metrics_healthy.py b/scripts/metrics/metrics_healthy.py
index 4df65088d..e0c63197a 100644
--- a/scripts/metrics/metrics_healthy.py
+++ b/scripts/metrics/metrics_healthy.py
@@ -250,8 +250,9 @@ def slr_protection(rtp, modelrun_alias, modelrun_id, modelrun_data, output_path,
     geog_name = 'eir_coc_id' if rtp=="RTP2021" else 'epc_id'
 
     # SLR parcels - all parcels in the SLR input files that are inundated or mitigated
-    slr_area = [df.inundation.isin([12,24,10,20,100]), (df.inundation.isin([12,24,10,20,100]) & (df[geog_name].notnull()))]
-    slr_protected_area = [df.inundation == 100, (df.inundation == 100) & (df[geog_name].notnull())]
+    slr_area = [((df.slr_nodev == True) | (df.slr_mitigation == True)), 
+                (((df.slr_nodev == True) | (df.slr_mitigation == True)) & (df[geog_name].notnull()))]
+    slr_protected_area = [df.slr_mitigation == True, ((df.slr_mitigation == True) & (df[geog_name].notnull()))]
 
     protected_households_pct = []
     for slr, slr_protected in zip(slr_area, slr_protected_area):
diff --git a/scripts/metrics/metrics_utils.py b/scripts/metrics/metrics_utils.py
index 3aaaee686..a854364ef 100644
--- a/scripts/metrics/metrics_utils.py
+++ b/scripts/metrics/metrics_utils.py
@@ -18,16 +18,10 @@
 
 pba50_geography_crosswalk_df = pd.DataFrame() # parcel -> PBA50 growth geographies for use in rtp2025 metrics
 
-rtp2025_np_parcel_inundation_df    = pd.DataFrame() # parcel -> parcel sea level rise inundation
-rtp2025_dbp_parcel_inundation_df    = pd.DataFrame() # parcel -> parcel sea level rise inundation
-
 rtp2021_tract_crosswalk_df      = pd.DataFrame() # parcel -> tracts, including coc/epc, displacement, growth geography, HRA, TRA, PPA
 rtp2021_pda_crosswalk_df        = pd.DataFrame() # parcel -> PDA (pda_id_pba50_fb)
 rtp2021_geography_crosswalk_df  = pd.DataFrame() # parcel -> parcel category (fbpchcat -> growth geog, hra, tra), jurisdiction
 
-rtp2021_np_parcel_inundation_df    = pd.DataFrame() # parcel -> parcel sea level rise inundation
-rtp2021_fbp_parcel_inundation_df    = pd.DataFrame() # parcel -> parcel sea level rise inundation
-
 PARCEL_AREA_FILTERS = {
     'RTP2021': {
             'HRA'       : lambda df: df['hra_id'] == 'HRA',
@@ -111,15 +105,11 @@ def load_data_for_runs(
 
     global rtp2025_parcel_taz_crosswalk_df
     global parcel_taz_sd_crosswalk_df
-    global rtp2025_np_parcel_inundation_df
-    global rtp2025_dbp_parcel_inundation_df
     global pba50_geography_crosswalk_df
 
     global rtp2021_geography_crosswalk_df
     global rtp2021_tract_crosswalk_df
     global rtp2021_pda_crosswalk_df
-    global rtp2021_np_parcel_inundation_df
-    global rtp2021_fbp_parcel_inundation_df
 
     CROSSWALKS_DIR = M_DRIVE / "urban_modeling" / "baus" / "BAUS Inputs" / "basis_inputs" / "crosswalks"
 
@@ -362,19 +352,6 @@ def load_data_for_runs(
 
             logging.debug("rtp2025_parcel_taz_crosswalk_df.head():\n{}".format(rtp2025_parcel_taz_crosswalk_df))
             logging.debug("rtp2025_parcel_taz_crosswalk_df.dtypes():\n{}".format(rtp2025_parcel_taz_crosswalk_df.dtypes))
-            
-            
-        if len(rtp2025_np_parcel_inundation_df) == 0:
-            PARCEL_INUNDATION_FILE = METRICS_DIR / "metrics_input_files" / "slr_parcel_inundation_PBA50Plus_NP.csv"
-            rtp2025_np_parcel_inundation_df = pd.read_csv(PARCEL_INUNDATION_FILE)
-            logging.info("  Read {:,} rows from crosswalk {}".format(len(rtp2025_np_parcel_inundation_df), PARCEL_INUNDATION_FILE))
-            logging.debug("  rtp2025_np_parcel_inundation_df.head():\n{}".format(rtp2025_np_parcel_inundation_df.head()))
-
-        if len(rtp2025_dbp_parcel_inundation_df) == 0:
-            PARCEL_INUNDATION_FILE = METRICS_DIR / "metrics_input_files" / "slr_parcel_inundation_PBA50Plus_DBP.csv"
-            rtp2025_dbp_parcel_inundation_df = pd.read_csv(PARCEL_INUNDATION_FILE)
-            logging.info("  Read {:,} rows from crosswalk {}".format(len(rtp2025_dbp_parcel_inundation_df), PARCEL_INUNDATION_FILE))
-            logging.debug("  rtp2025_dbp_parcel_inundation_df.head():\n{}".format(rtp2025_dbp_parcel_inundation_df.head()))
 
         # define analysis years
         if skip_base_year:
@@ -385,6 +362,7 @@ def load_data_for_runs(
                 modelrun_data[2025] = {}  # for later interpolation to 2023
         modelrun_data[2050]  = {}
         parcel_pattern       = "core_summaries/*_parcel_summary_{}.csv"
+        slr_parcel_pattern   = "hazards_summaries/*_slr_parcel_summary_{}.csv"
         geo_summary_pattern  = "geographic_summaries/*_county_summary_{}.csv"
         taz1_summary_pattern = "travel_model_summaries/*_taz1_summary_{}.csv"
         taz1_interim_summary_pattern = "core_summaries/*_interim_zone_output_{}.csv"
@@ -531,29 +509,18 @@ def load_data_for_runs(
             rtp2021_geography_crosswalk_df['jurisdiction'] = rtp2021_geography_crosswalk_df.jurisdiction.str.replace("St ","St. ") # St. Helena
             logging.debug(f"rtp2021_geography_crosswalk_df.jurisdiction.value_counts(dropna=False):\n{rtp2021_geography_crosswalk_df.jurisdiction.value_counts(dropna=False)}")
 
-        if len(rtp2021_np_parcel_inundation_df) == 0:
-            PARCEL_INUNDATION_FILE = METRICS_DIR / "metrics_input_files" / "slr_parcel_inundation_PBA50_NP.csv"
-            rtp2021_np_parcel_inundation_df = pd.read_csv(PARCEL_INUNDATION_FILE)
-            logging.info("  Read {:,} rows from file {}".format(len(rtp2021_np_parcel_inundation_df), PARCEL_INUNDATION_FILE))
-            logging.debug("  rtp2021_np_parcel_inundation_df.head():\n{}".format(rtp2021_np_parcel_inundation_df.head()))
-
-        if len(rtp2021_fbp_parcel_inundation_df) == 0:
-            PARCEL_INUNDATION_FILE = METRICS_DIR / "metrics_input_files" / "slr_parcel_inundation_PBA50_FBP.csv"
-            rtp2021_fbp_parcel_inundation_df = pd.read_csv(PARCEL_INUNDATION_FILE)
-            logging.info("  Read {:,} rows from crosswalk {}".format(len(rtp2021_fbp_parcel_inundation_df), PARCEL_INUNDATION_FILE))
-            logging.debug("  rtp2021_fbp_parcel_inundation_df.head():\n{}".format(rtp2021_fbp_parcel_inundation_df.head()))
-
         # define analysis years
         modelrun_data[2015] = {}
         modelrun_data[2050] = {}
         parcel_pattern       = "*_parcel_data_{}.csv"
+        slr_parcel_pattern   = "*_slr_parcel_summary_{}.csv"
         geo_summary_pattern  = "*_county_summaries_{}.csv"
         taz1_summary_pattern = "*_taz_summaries_{}.csv"
 
     else:
         raise ValueError(f"Unrecognized plan: {rtp}")
 
-    # Load parcels summaries
+    # Load parcel summaries
     for year in sorted(modelrun_data.keys()):
         # handle RTP2021 hacks
         if (rtp=="RTP2021") and (year == 2050) and (modelrun_alias=="No Project"):
@@ -577,6 +544,24 @@ def load_data_for_runs(
         logging.debug("Head:\n{}".format(parcel_df.head()))
         logging.debug("preserved_units.value_counts():\n{}".format(parcel_df['preserved_units'].value_counts(dropna=False)))
 
+        # also add parcel-level sea level rise summaries and merge them to the parcels table
+        logging.debug("Looking for sea level rise parcel data matching {}".format(slr_parcel_pattern).format(year))
+        file = next(run_directory_path.glob(slr_parcel_pattern.format(year)))
+        logging.debug(f"Found {file}")
+        slr_parcel_df = pd.read_csv(file)
+        logging.info("  Read {:,} rows from slr parcel file {}".format(len(slr_parcel_df), file))
+        logging.debug("Head:\n{}".format(slr_parcel_df.head()))
+        parcel_df = pd.merge(
+            left     = parcel_df,
+            right    = slr_parcel_df,
+            how      = "left",
+            left_on  = "parcel_id",
+            right_on = "parcel_id",
+            validate = "one_to_one"
+        )
+        logging.debug("Head after merge with slr_parcel_df:\n{}".format(parcel_df.head()))
+        logging.debug("slr_parcel_df.dtypes:\n{}".format(parcel_df.dtypes))
+
         if rtp == "RTP2025":
             # add geography crosswalk for zoning categories
             parcel_df = pd.merge(
@@ -649,29 +634,6 @@ def load_data_for_runs(
             logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes))
             logging.debug("Head after merge with rtp2025_urban_area_crosswalk_df:\n{}".format(parcel_df.head()))
 
-            # add parcel sea level rise inundation based on the Plan scenario
-            this_modelrun_alias = classify_runid_alias(modelrun_alias)
-            if this_modelrun_alias == "NP":
-                parcel_df = pd.merge(
-                    left     = parcel_df,
-                    right    = rtp2025_np_parcel_inundation_df,
-                    how      = "left",
-                    on       = "parcel_id",
-                    validate = "one_to_one"
-                )
-                logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes))
-                logging.debug("Head after merge with rtp2025_np_parcel_inundation_df:\n{}".format(parcel_df.head()))
-            elif this_modelrun_alias == "DBP":
-                parcel_df = pd.merge(
-                    left     = parcel_df,
-                    right    = rtp2025_dbp_parcel_inundation_df,
-                    how      = "left",
-                    on       = "parcel_id",
-                    validate = "one_to_one"
-                )
-                logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes))
-                logging.debug("Head after merge with rtp2025_dbp_parcel_inundation_df:\n{}".format(parcel_df.head()))
-
             # rtp2025_tract_crosswalk_df.columns should all be ints -- convert
             cols_int64 = ['tract10','tract20']
             cols_int   = ['tract20_epc','tract20_growth_geo','tract20_tra','tract20_hra','tract10_DispRisk','in_urban_area']
@@ -722,29 +684,6 @@ def load_data_for_runs(
             logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes))
             logging.debug("Head after merge with rtp2025_tract_crosswalk_df:\n{}".format(parcel_df.head()))
 
-            # add parcel sea level rise inundation *input* based on the scenario
-            this_modelrun_alias = classify_runid_alias(modelrun_alias)
-            if this_modelrun_alias == "NP":
-                parcel_df = pd.merge(
-                    left     = parcel_df,
-                    right    = rtp2021_np_parcel_inundation_df,
-                    how      = "left",
-                    on       = "parcel_id",
-                    validate = "one_to_one"
-                )
-                logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes))
-                logging.debug("Head after merge with rtp2021_np_parcel_inundation_df:\n{}".format(parcel_df.head()))
-            else:
-                parcel_df = pd.merge(
-                    left     = parcel_df,
-                    right    = rtp2021_fbp_parcel_inundation_df,
-                    how      = "left",
-                    on       = "parcel_id",
-                    validate = "one_to_one"
-                )
-                logging.debug("parcel_df.dtypes:\n{}".format(parcel_df.dtypes))
-                logging.debug("Head after merge with rtp2021_fbp_parcel_inundation_df:\n{}".format(parcel_df.head()))
-
             # Merge the tract and coc crosswalks
             parcel_df = parcel_df.merge(rtp2021_tract_crosswalk_df, on="parcel_id", how="left")
             logging.debug("parcel_df after first merge with tract crosswalk:\n{}".format(parcel_df.head(30)))
@@ -771,8 +710,8 @@ def load_data_for_runs(
                                 # use after may 3 2024
                                 'np','cur','dbp',
                                 
-                                # sea level rise column
-                                "inundation"]
+                                # sea level rise columns
+                                "slr_nodev", "slr_mitigation"]
 
             parcel_df = parcel_df[columns_to_keep]
             logging.debug("parcel_df:\n{}".format(parcel_df.head(30)))
@@ -877,7 +816,7 @@ def load_data_for_runs(
 
             df = df1.copy()
             for col in df.columns:
-                if pd.api.types.is_numeric_dtype(df[col]):
+                if (type(df[col]) == int) or (type(df[col]) == float):
                     # Long way to write 3/5 but maybe it'll pay off in future... :)
                     df[col] = df1[col] + ((2023 - t1) / (t2 - t1))*(df2[col] - df1[col])
 

From 45d8db268a394c0ce6ef21d6169df8e0f8790666 Mon Sep 17 00:00:00 2001
From: Elizabeth Theocharides <etheocharides@bayareametro.gov>
Date: Tue, 17 Sep 2024 12:49:28 -0700
Subject: [PATCH 4/7] add PBA50 slr inputs information

---
 scripts/metrics/metrics_healthy.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/metrics/metrics_healthy.py b/scripts/metrics/metrics_healthy.py
index e0c63197a..d23083b90 100644
--- a/scripts/metrics/metrics_healthy.py
+++ b/scripts/metrics/metrics_healthy.py
@@ -223,6 +223,11 @@ def slr_protection(rtp, modelrun_alias, modelrun_id, modelrun_data, output_path,
     as a percentage of all households in sea level rise areas and a percentage of all 
     households in sea level rise areas that are EPCs.
 
+    To run for PBA50, move the files from "Box\Plan Bay Area 2050+\Performance and Equity\/
+    Plan Performance\Equity_Performance_Metrics\PBA50_reproduce_for_QA\slr_metrics_inputs"
+    to the relevant Plan run outputs folder, since these model output files were generated 
+    post-Plan run to use in these standalone metrics.
+
     Parameters:
     - rtp (str): RTP2021 or RTP2025.
     - modelrun_alias (str): Alias for the model run, used for labeling output.
@@ -231,7 +236,8 @@ def slr_protection(rtp, modelrun_alias, modelrun_id, modelrun_data, output_path,
     - output_path (str): File path for saving the output results
     - append_output (bool): True if appending output; False if writing
 
-    Writes metrics_slrProtection.csv to output_path, appending if append_output is True. Columns are:
+    Writes metrics_healthy1_hazard_resilience_SLR.csv to output_path, appending if append_output is True. Columns are:
+    - modelrun_id
     - modelrun_alias
     - hazard
     - area_alias

From a7cb702e77512bc96c43ff24a05626029eca92f4 Mon Sep 17 00:00:00 2001
From: Elizabeth Theocharides <etheocharides@bayareametro.gov>
Date: Wed, 18 Sep 2024 11:08:31 -0700
Subject: [PATCH 5/7] Update RTP2025 greenfield metric

---
 scripts/metrics/metrics_healthy.py       | 67 ++++++++----------------
 scripts/metrics/metrics_lu_standalone.py |  3 +-
 scripts/metrics/metrics_utils.py         | 34 ++++++++++--
 3 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/scripts/metrics/metrics_healthy.py b/scripts/metrics/metrics_healthy.py
index d23083b90..dc437c8c2 100644
--- a/scripts/metrics/metrics_healthy.py
+++ b/scripts/metrics/metrics_healthy.py
@@ -129,20 +129,19 @@ def non_greenfield_development_share(
         modelrun_alias: str,
         modelrun_id: str,
         modelrun_data: dict,
-        run_directory_path: pathlib.Path,
         output_path: pathlib.Path,
         append_output: bool
     ):
     '''
-    Calculate and export the share of development that falls within the 2020 urban area footprint
-    (or is outside the urban area footprint but suitably low-density as to be rural in character).
+    Calculate and export the share of non-greenfield development in the final model year, 
+    with greenfield development defined as development that falls outside of the 2020 urban area footprint 
+    and has a dwelling unit (or non-residential equivalent) per acre greater than 1.
     
     Parameters:
     - rtp (str): RTP2021 or RTP2025.
     - modelrun_alias (str): Alias for the model run, used for labeling output.
     - modelrun_id (str): Identifier for the model run.
     - modelrun_data (dict): year -> {"parcel" -> parcel DataFrame, "county" -> county DataFrame }
-    - run_directory_path (Path): The directory path for this model run.
     - output_path (Path): The directory path to save the output CSV file.
     - append_output (bool): True if appending output; False if writing.
     '''
@@ -156,65 +155,41 @@ def non_greenfield_development_share(
     # Define a potentially very impactful constant used to convert residential units to non-residential sqft and vice versa
     SQFT_PER_UNIT = 1750  # close to the weighted average size of developer-model units in a recent BAUS run
 
-    # Read in and select new buildings post 2020
-    modelrun_name = modelrun_id
-    # Sometimes the modelrun_id is a whole file path
-    # Handle both forms of slashes in this field
-    if '\\' in modelrun_id:
-        modelrun_name = modelrun_id.split('\\')[-1]
-    if '/' in modelrun_id:
-        modelrun_name = modelrun_id.split('/')[-1]
-    NEW_BUILDINGS_PATH = pathlib.Path(run_directory_path) / f'core_summaries/{modelrun_name}_new_buildings_summary.csv'
-    logging.info(f'  Reading new_buildings_summary from {NEW_BUILDINGS_PATH}...')
-    new_buildings = pd.read_csv(
-        NEW_BUILDINGS_PATH,
-        usecols=['parcel_id', 'year_built', 'building_sqft', 'residential_units'],
-        dtype={'parcel_id': int}
-    )
-    new_buildings = new_buildings[new_buildings['year_built'] > 2020]
-    logging.debug(f'  {len(new_buildings)} buildings built after 2020')
+    # get parcel and buildings data for horizon year
+    year_horizon = sorted(modelrun_data.keys())[-1]
+    buildings_horizon_year = modelrun_data[year_horizon]["buildings"]
+    # only look at buildings built after the Plan's initial year
+    year_initial = sorted(modelrun_data.keys())[0]
+    buildings_df = buildings_horizon_year.loc[buildings_horizon_year.year_built > year_initial]
 
-    # Some residential buildings (from the development pipeline) have no building_sqft);
-    # convert residential units to sqft equivalent so we can summarize "all development"
-    new_buildings.loc[new_buildings['building_sqft'] == 0, 'building_sqft'] = \
-        new_buildings.loc[new_buildings['building_sqft'] == 0, 'residential_units'] * SQFT_PER_UNIT
-    
-    # We are interested in development on any parcel:
+    # we are interested in development on any parcel:
     # 1. outside the 2020 urban area footprint AND
     # 2. greater than 1 DU-equivalent per acre in 2050
-    parcel_df = modelrun_data[2050]['parcel'].copy()
-    parcel_df['du_equiv_per_acre'] = (parcel_df['residential_units'] + (parcel_df['non_residential_sqft'] / SQFT_PER_UNIT)) \
-                                     / parcel_df['ACRES']
-    dense_greenfield_parcels = parcel_df.loc[
-        (parcel_df['du_equiv_per_acre'] > 1.0) & (parcel_df['in_urban_area'] == 0),
-        'parcel_id'
-    ]
+    buildings_df['du_equiv_per_acre'] = (buildings_df['residential_units_total'] + (buildings_df['non_residential_sqft_total'] / SQFT_PER_UNIT)) /\
+                                         buildings_df['parcel_acres']
+    dense_greenfield = buildings_df.loc[(buildings_df['du_equiv_per_acre'] > 1.0) & (buildings_df['in_urban_area'] == 0)]
 
-    # Calculate share of "all development" (in terms of building_sqft) that occurred on "dense greenfield parcels"
-    total_development = new_buildings['building_sqft'].sum()
-    dense_greenfield_development = new_buildings.loc[
-        new_buildings['parcel_id'].isin(dense_greenfield_parcels),
-        'building_sqft'
-    ].sum()
-    greenfield_development_pct = dense_greenfield_development / total_development
+    # then calculate the share of denser greenfield development parcel avres as a proportion of all development parcel acres 
+    dense_greenfield_development_share = dense_greenfield.drop_duplicates(['parcel_id'])['parcel_acres'].sum() /\
+        buildings_df.drop_duplicates(['parcel_id'])['parcel_acres'].sum()
 
     # Add metadata, format, and export to CSV
-    greenfield_development_df = pd.DataFrame({
+    non_greenfield_development_df = pd.DataFrame({
         'modelrun_id': modelrun_id,
         'modelrun_alias': modelrun_alias,
         'area_alias': 'Regionwide',
         'area': 'all',
-        'development_in_urban_footprint_pct': 1 - greenfield_development_pct
+        'non_greenfield_development_share': 1 - dense_greenfield_development_share
     }, index=[0])
     out_file = pathlib.Path(output_path) / 'metrics_healthy2_development_in_urban_footprint.csv'
-    greenfield_development_df.to_csv(
+    non_greenfield_development_df.to_csv(
         out_file,
         mode='a' if append_output else 'w',
         header=False if append_output else True,
         index=False,
     )
-    logging.info(f"{'Appended' if append_output else 'Wrote'} {len(greenfield_development_df)} " \
-                 + f"line{'s' if len(greenfield_development_df) > 1 else ''} to {out_file}")
+    logging.info(f"{'Appended' if append_output else 'Wrote'} {len(non_greenfield_development_df)} " \
+                 + f"line{'s' if len(non_greenfield_development_df) > 1 else ''} to {out_file}")
     
     
 def slr_protection(rtp, modelrun_alias, modelrun_id, modelrun_data, output_path, append_output):
diff --git a/scripts/metrics/metrics_lu_standalone.py b/scripts/metrics/metrics_lu_standalone.py
index 3d515ed99..2999c1e43 100644
--- a/scripts/metrics/metrics_lu_standalone.py
+++ b/scripts/metrics/metrics_lu_standalone.py
@@ -230,8 +230,7 @@ def main():
             metrics_healthy.urban_park_acres(
                 BOX_DIR, args.rtp, modelrun_alias, modelrun_id, modelrun_data, OUTPUT_PATH, append_output)
             metrics_healthy.non_greenfield_development_share(
-                args.rtp, modelrun_alias, modelrun_id, modelrun_data, run_directory_path,
-                OUTPUT_PATH, append_output)
+                args.rtp, modelrun_alias, modelrun_id, modelrun_data, OUTPUT_PATH, append_output)
             metrics_healthy.slr_protection(
                 args.rtp, modelrun_alias, modelrun_id, modelrun_data, OUTPUT_PATH, append_output)
 
diff --git a/scripts/metrics/metrics_utils.py b/scripts/metrics/metrics_utils.py
index a854364ef..7e56c0d30 100644
--- a/scripts/metrics/metrics_utils.py
+++ b/scripts/metrics/metrics_utils.py
@@ -90,9 +90,10 @@ def load_data_for_runs(
 
     Returns:
     - dict with year -> {
-        "parcel" -> parcel DataFrame, 
-        "county" -> county DataFrame,
-        "TAZ1454"-> taz DataFrame (necessary for totpop, which is only tabulated for TAZs)
+        "parcel"    -> parcel DataFrame,
+        "buildings" -> building DataFrame, 
+        "county"    -> county DataFrame,
+        "TAZ1454"   -> taz DataFrame (necessary for totpop, which is only tabulated for TAZs)
       }
     
     """
@@ -362,6 +363,7 @@ def load_data_for_runs(
                 modelrun_data[2025] = {}  # for later interpolation to 2023
         modelrun_data[2050]  = {}
         parcel_pattern       = "core_summaries/*_parcel_summary_{}.csv"
+        buildings_pattern    = "core_summaries/*_building_summary_{}.csv"
         slr_parcel_pattern   = "hazards_summaries/*_slr_parcel_summary_{}.csv"
         geo_summary_pattern  = "geographic_summaries/*_county_summary_{}.csv"
         taz1_summary_pattern = "travel_model_summaries/*_taz1_summary_{}.csv"
@@ -718,6 +720,32 @@ def load_data_for_runs(
 
         modelrun_data[year]['parcel'] = parcel_df
 
+    # Load building data for horizon year
+    horizon_year = sorted(modelrun_data.keys())[-1]
+    logging.debug("Looking for building summaries matching {}".format(buildings_pattern.format(horizon_year)))
+    file = next(run_directory_path.glob(buildings_pattern.format(horizon_year)))
+    logging.debug(f"Found {file}")
+    buildings_df = pd.read_csv(file)
+    logging.info("  Read {:,} rows from geography summary {}".format(len(buildings_df), file))
+    logging.debug("Head:\n{}".format(buildings_df))
+
+    # merge parcel information for horizon year onto buildings
+    parcel_df = modelrun_data[horizon_year]['parcel']
+    parcels = parcel_df[['parcel_id', 'residential_units', 'non_residential_sqft', 'ACRES', 'in_urban_area']].\
+                          rename(columns={"residential_units": "residential_units_total", "non_residential_sqft": "non_residential_sqft_total",
+                                          "ACRES": "parcel_acres"})
+    buildings_df = pd.merge(
+        left     = buildings_df,
+        right    = parcels,
+        how      = "left",
+        on       = "parcel_id",
+        validate = "many_to_one"
+        )   
+    logging.debug("Head after merge with parcel_df:\n{}".format(buildings_df.head()))
+    logging.debug("Length after merge with parcel_df:\n{}".format(len(buildings_df)))
+
+    modelrun_data[horizon_year]['buildings'] = buildings_df  
+
     # Load county summaries
     for year in sorted(modelrun_data.keys()):
         logging.debug("Looking for geographic summaries matching {}".format(geo_summary_pattern.format(year)))

From c5e13f6a1e4eb04ac4cc0521631b4d58bb4a0bae Mon Sep 17 00:00:00 2001
From: Elizabeth Theocharides <etheocharides@bayareametro.gov>
Date: Wed, 18 Sep 2024 13:54:38 -0700
Subject: [PATCH 6/7] add greenfield metric for RTP2021

---
 scripts/metrics/metrics_healthy.py |  5 -----
 scripts/metrics/metrics_utils.py   | 11 ++++++++++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/scripts/metrics/metrics_healthy.py b/scripts/metrics/metrics_healthy.py
index dc437c8c2..a7dbbbbb1 100644
--- a/scripts/metrics/metrics_healthy.py
+++ b/scripts/metrics/metrics_healthy.py
@@ -147,11 +147,6 @@ def non_greenfield_development_share(
     '''
     logging.info("Calculating non_greenfield_development_share")
 
-    # Guard clause: this metric is implemented for RTP2025 / PBA50+ only
-    if rtp != 'RTP2025':
-        logging.info("  RTP2021 is not supported - skipping")
-        return
-
     # Define a potentially very impactful constant used to convert residential units to non-residential sqft and vice versa
     SQFT_PER_UNIT = 1750  # close to the weighted average size of developer-model units in a recent BAUS run
 
diff --git a/scripts/metrics/metrics_utils.py b/scripts/metrics/metrics_utils.py
index 7e56c0d30..0949682ab 100644
--- a/scripts/metrics/metrics_utils.py
+++ b/scripts/metrics/metrics_utils.py
@@ -484,7 +484,9 @@ def load_data_for_runs(
         if len(rtp2021_geography_crosswalk_df) == 0:
             # pba50_metrics.py called this "parcel_geography_file" - use it to get fbpchcat
             GEOGRAPHY_CROSSWALK_FILE = METRICS_DIR / "metrics_input_files" / "2021_02_25_parcels_geography.csv"
-            rtp2021_geography_crosswalk_df = pd.read_csv(GEOGRAPHY_CROSSWALK_FILE, usecols=['PARCEL_ID','fbpchcat','ppa_id','eir_coc_id', 'juris_name_full'])
+            rtp2021_geography_crosswalk_df = pd.read_csv(GEOGRAPHY_CROSSWALK_FILE, usecols=['PARCEL_ID', 'ACRES', 'fbpchcat','ppa_id','eir_coc_id', 'juris_name_full', 'urbanized'])
+            # match RTP2025 column name
+            rtp2021_geography_crosswalk_df.rename(columns={"urbanized": "in_urban_area"}, inplace=True)
             logging.info("  Read {:,} rows from crosswalk {}".format(len(rtp2021_geography_crosswalk_df), GEOGRAPHY_CROSSWALK_FILE))
             logging.debug("  rtp2021_geography_crosswalk_df.head():\n{}".format(rtp2021_geography_crosswalk_df.head()))
 
@@ -516,6 +518,7 @@ def load_data_for_runs(
         modelrun_data[2050] = {}
         parcel_pattern       = "*_parcel_data_{}.csv"
         slr_parcel_pattern   = "*_slr_parcel_summary_{}.csv"
+        buildings_pattern    = "*_building_data_{}.csv"        
         geo_summary_pattern  = "*_county_summaries_{}.csv"
         taz1_summary_pattern = "*_taz_summaries_{}.csv"
 
@@ -696,6 +699,8 @@ def load_data_for_runs(
             # Retain only a subset of columns after merging
             columns_to_keep = ['parcel_id', 'tract10', 'fbpchcat', 
                                 'gg_id', 'tra_id', 'hra_id', 'dis_id', 'ppa_id', 'eir_coc_id','jurisdiction',
+                                # greenfield columns
+                                'in_urban_area', 'ACRES',
                                 'zone_id', 'county', 'superdistrict',
                                 'hhq1', 'hhq2', 'hhq3', 'hhq4', 
                                 'tothh', 'totemp',
@@ -731,6 +736,10 @@ def load_data_for_runs(
 
     # merge parcel information for horizon year onto buildings
     parcel_df = modelrun_data[horizon_year]['parcel']
+    # if RTP2021 get non_residential_sqft from the buildings table
+    if rtp=="RTP2021":
+        parcel_df = parcel_df.merge(buildings_df[['parcel_id', 'non_residential_sqft']].groupby(['parcel_id']).sum(), on='parcel_id', how='left')
+    # distinguish the column names from the buildings table names (these are parcel totals)
     parcels = parcel_df[['parcel_id', 'residential_units', 'non_residential_sqft', 'ACRES', 'in_urban_area']].\
                           rename(columns={"residential_units": "residential_units_total", "non_residential_sqft": "non_residential_sqft_total",
                                           "ACRES": "parcel_acres"})

From 0d9fb3a7c6c1c865173b0685d2ed3420da1db49a Mon Sep 17 00:00:00 2001
From: Elizabeth Theocharides <etheocharides@bayareametro.gov>
Date: Thu, 19 Sep 2024 13:35:59 -0700
Subject: [PATCH 7/7] text updates

---
 scripts/metrics/metrics_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/metrics/metrics_utils.py b/scripts/metrics/metrics_utils.py
index 0949682ab..597c8ecf5 100644
--- a/scripts/metrics/metrics_utils.py
+++ b/scripts/metrics/metrics_utils.py
@@ -91,7 +91,7 @@ def load_data_for_runs(
     Returns:
     - dict with year -> {
         "parcel"    -> parcel DataFrame,
-        "buildings" -> building DataFrame, 
+        "buildings" -> buildings DataFrame, 
         "county"    -> county DataFrame,
         "TAZ1454"   -> taz DataFrame (necessary for totpop, which is only tabulated for TAZs)
       }
@@ -727,12 +727,12 @@ def load_data_for_runs(
 
     # Load building data for horizon year
     horizon_year = sorted(modelrun_data.keys())[-1]
-    logging.debug("Looking for building summaries matching {}".format(buildings_pattern.format(horizon_year)))
+    logging.debug("Looking for buildings summary matching {}".format(buildings_pattern.format(horizon_year)))
     file = next(run_directory_path.glob(buildings_pattern.format(horizon_year)))
     logging.debug(f"Found {file}")
     buildings_df = pd.read_csv(file)
-    logging.info("  Read {:,} rows from geography summary {}".format(len(buildings_df), file))
-    logging.debug("Head:\n{}".format(buildings_df))
+    logging.info("  Read {:,} rows from buildinsg summary {}".format(len(buildings_df), file))
+    logging.debug("Head:\n{}".format(buildingss_df))
 
     # merge parcel information for horizon year onto buildings
     parcel_df = modelrun_data[horizon_year]['parcel']