Skip to content

Commit

Permalink
[DEV-6907] improved GTAS ETL logging
Browse files Browse the repository at this point in the history
  • Loading branch information
tony-sappe committed Feb 24, 2021
1 parent 8b4f1e9 commit dd50354
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 16 deletions.
2 changes: 1 addition & 1 deletion usaspending_api/common/etl/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from usaspending_api.common.etl import ETLObjectBase
from usaspending_api.common.etl.operations import delete_obsolete_rows, insert_missing_rows, update_changed_rows
from usaspending_api.common.helpers.sql_helpers import execute_dml_sql
from usaspending_api.common.helpers.timing_helpers import ConsoleTimer as Timer
from usaspending_api.common.helpers.timing_helpers import ScriptTimer as Timer


class ETLMixin:
Expand Down
39 changes: 24 additions & 15 deletions usaspending_api/references/management/commands/load_gtas.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,29 +41,35 @@


class Command(mixins.ETLMixin, BaseCommand):
help = "Update GTAS aggregations used as domain data"
help = "Drop and recreate all GTAS reference data"

@transaction.atomic()
def handle(self, *args, **options):
logger.info("Creating broker cursor")
logger.info("Starting ETL script")
self.process_data()
logger.info("GTAS ETL finished successfully!")

@transaction.atomic()
def process_data(self):
broker_cursor = connections["data_broker"].cursor()

logger.info("Running TOTAL_OBLIGATION_SQL")
logger.info("Extracting data from Broker")
broker_cursor.execute(self.broker_fetch_sql())

logger.info("Getting total obligation values from cursor")
total_obligation_values = dictfetchall(broker_cursor)

logger.info("Deleting all existing GTAS total obligation records in website")
GTASSF133Balances.objects.all().delete()
deletes = GTASSF133Balances.objects.all().delete()
logger.info(f"Deleted {deletes[0]:,} records")

logger.info("Inserting GTAS total obligations records into website")
logger.info("Transforming new GTAS records")
total_obligation_objs = [GTASSF133Balances(**values) for values in total_obligation_values]
GTASSF133Balances.objects.bulk_create(total_obligation_objs)

self._execute_dml_sql(self.tas_fk_sql(), "Populating TAS foreign keys")
logger.info("Loading new GTAS records into database")
new_rec_count = len(GTASSF133Balances.objects.bulk_create(total_obligation_objs))
logger.info(f"Loaded: {new_rec_count:,} records")

logger.info("GTAS loader finished successfully!")
load_rec = self._execute_dml_sql(self.tas_fk_sql(), "Populating TAS foreign keys")
logger.info(f"Set {load_rec:,} TAS FKs in GTAS table, {new_rec_count - load_rec:,} NULLs")
logger.info("Committing transaction to database")

def broker_fetch_sql(self):
return f"""
Expand Down Expand Up @@ -110,7 +116,10 @@ def column_statements(self):
return "\n".join(simple_fields + inverted_fields + year_specific_fields)

def tas_fk_sql(self):
return """UPDATE gtas_sf133_balances
SET treasury_account_identifier = tas.treasury_account_identifier
FROM treasury_appropriation_account tas
WHERE tas.tas_rendering_label = gtas_sf133_balances.tas_rendering_label;"""
return """
UPDATE gtas_sf133_balances
SET treasury_account_identifier = tas.treasury_account_identifier
FROM treasury_appropriation_account tas
WHERE
tas.tas_rendering_label = gtas_sf133_balances.tas_rendering_label
AND gtas_sf133_balances.treasury_account_identifier IS DISTINCT FROM tas.treasury_account_identifier"""

0 comments on commit dd50354

Please sign in to comment.