Add sample users to QuickStart (#38)

* gitignore dist/ as in metadataservice PR #28 * Add load of example users * Ditch utf-8 encode for consistensy w. other models
DataChefHQ · Jun 30, 2022 · e0068ca · e0068ca
1 parent 1000678
commit e0068ca
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 11 deletions.
diff --git a/databuilder/databuilder/models/user.py b/databuilder/databuilder/models/user.py
@@ -57,18 +57,18 @@ def __init__(self,
                            then we will have a cron job to update the ex-employee nodes based on
                            the case if this timestamp hasn't been updated for two weeks.
         """
-        self.first_name = first_name.encode('utf-8')
-        self.last_name = last_name.encode('utf-8')
-        self.name = name.encode('utf-8')
+        self.first_name = first_name
+        self.last_name = last_name
+        self.name = name
 
-        self.email = email.encode('utf-8')
-        self.github_username = github_username.encode('utf-8')
+        self.email = email
+        self.github_username = github_username
         # todo: team will be a separate node once Amundsen People supports team
-        self.team_name = team_name.encode('utf-8')
-        self.manager_email = manager_email.encode('utf-8')
-        self.employee_type = employee_type.encode('utf-8')
+        self.team_name = team_name
+        self.manager_email = manager_email
+        self.employee_type = employee_type
         # this attr not available in team service, either update team service, update with FE
-        self.slack_id = slack_id.encode('utf-8')
+        self.slack_id = slack_id
         self.is_active = is_active
         self.updated_at = updated_at
 

diff --git a/databuilder/example/sample_data/sample_user.csv b/databuilder/example/sample_data/sample_user.csv
@@ -0,0 +1,4 @@
+email,first_name,last_name,name,github_username,team_name,employee_type,manager_email,slack_id
+[email protected],Roald,Amundsen,"Roald Amundsen",lyft,"Team Amundsen",sailor,"[email protected]",ramundzn
+[email protected],Christopher,Columbus,"Christopher Columbus",ChristopherColumbusFAKE,"Team Amundsen",sailor,"[email protected]",chrisc
+[email protected],  Buzz,       Aldrin,  "Buzz Aldrin",BuzzAldrinFAKE,                  "Team Amundsen",astronaut,"[email protected]",buzz
diff --git a/databuilder/example/scripts/sample_data_loader.py b/databuilder/example/scripts/sample_data_loader.py
@@ -112,6 +112,42 @@ def load_col_data_from_csv(file_name):
         conn.commit()
 
 
+def load_user_data_from_csv(file_name):
+    conn = create_connection(DB_FILE)
+    if conn:
+        cur = conn.cursor()
+        cur.execute('drop table if exists test_user_metadata')
+        cur.execute('create table if not exists test_user_metadata '
+                    '(email VARCHAR(64) NOT NULL , '
+                    'first_name VARCHAR(64) NOT NULL , '
+                    'last_name VARCHAR(64) NOT NULL , '
+                    'name VARCHAR(64) NOT NULL , '
+                    'github_username VARCHAR(64) NOT NULL , '
+                    'team_name VARCHAR(64) NOT NULL, '
+                    'employee_type VARCHAR(64) NOT NULL,'
+                    'manager_email VARCHAR(64) NOT NULL,'
+                    'slack_id VARCHAR(64) NOT NULL)')
+        file_loc = 'example/sample_data/' + file_name
+        with open(file_loc, 'r') as fin:
+            dr = csv.DictReader(fin)
+            to_db = [(i['email'],
+                      i['first_name'],
+                      i['last_name'],
+                      i['name'],
+                      i['github_username'],
+                      i['team_name'],
+                      i['employee_type'],
+                      i['manager_email'],
+                      i['slack_id']) for i in dr]
+
+        cur.executemany("INSERT INTO test_user_metadata ("
+                        "email, first_name, last_name, name, github_username, "
+                        "team_name, employee_type, "
+                        "manager_email, slack_id ) VALUES "
+                        "(?, ?, ?, ?, ?, ?, ?, ?, ?);", to_db)
+        conn.commit()
+
+
 # todo: Add a second model
 def create_sample_job(table_name, model_name):
     sql = textwrap.dedent("""
@@ -209,6 +245,7 @@ def create_es_publisher_sample_job():
 if __name__ == "__main__":
     load_table_data_from_csv('sample_table.csv')
     load_col_data_from_csv('sample_col.csv')
+    load_user_data_from_csv('sample_user.csv')
     if create_connection(DB_FILE):
         # start table job
         job1 = create_sample_job('test_table_metadata',
@@ -220,6 +257,11 @@ def create_es_publisher_sample_job():
                                  'example.models.test_column_model.TestColumnMetadata')
         job2.launch()
 
+        # start user job
+        job_user = create_sample_job('test_user_metadata',
+                                     'databuilder.models.user.User')
+        job_user.launch()
+
         # start Elasticsearch publish job
-        job3 = create_es_publisher_sample_job()
-        job3.launch()
+        job_es = create_es_publisher_sample_job()
+        job_es.launch()