Initialize hardware store case study

mathesar-foundation · Dec 17, 2024 · 92b41b8 · 92b41b8
1 parent ca59679
commit 92b41b8
Show file tree

Hide file tree

Showing 5 changed files with 431 additions and 0 deletions.
diff --git a/hardware_store/README.md b/hardware_store/README.md
@@ -0,0 +1,82 @@
+# Hardware Store sample data
+
+This sample dataset represents a chain of hardware stores managing their inventory and rentals.
+
+```mermaid
+%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html
+
+erDiagram
+%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html
+
+    store_locations {
+        BIGINT id PK
+        string name
+        string address
+    }
+
+    customers {
+        BIGINT id PK
+        string first_name
+        string last_name
+        string email
+        string phone
+        string address
+    }
+
+    assets {
+        BIGINT id PK
+        string name
+        string serial_number
+        NUMERIC rental_price
+        NUMERIC sale_price
+        string rental_period
+        string location
+        BIGINT store_id FK
+    }
+
+    transactions {
+        BIGINT id PK
+        BIGINT asset_id FK
+        BIGINT customer_id FK
+        string transaction_type
+        TIMESTAMP transaction_date
+        NUMERIC total_charge
+        string note
+    }
+
+    rentals {
+        BIGINT id PK
+        BIGINT transaction_id FK
+        TIMESTAMP rental_start
+        TIMESTAMP rental_end
+        TIMESTAMP time_out
+        TIMESTAMP time_in
+        INTERVAL rental_time
+    }
+
+%% Relationships
+%%  See: https://mermaid.js.org/syntax/entityRelationshipDiagram.html#relationship-syntax
+    assets ||--|{ store_locations : "store_id"
+    transactions ||--|| assets : "asset_id"
+    transactions ||--|{ customers : "customer_id"
+    rentals ||--|| transactions : "transaction_id"
+
+```
+
+
+## Loading Data
+
+The generated SQL file, `generate_data/load_data.sql`, contains all the necessary COPY commands to import data into your database. The data (and the load data file) are produced by the `generate_data.py` file, which can be adjusted and re-run to alter the data if needed.
+
+Load the data into a locally-running Mathesar instance like this:
+
+```shell
+# First load the schema and tables
+docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < schema.sql
+# Then the sample data
+docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < generated_data.sql
+```
+
+## Development
+
+The only requirement is to install dependencies with `pip install -r requirements.txt`.
diff --git a/hardware_store/generate_data.py b/hardware_store/generate_data.py
@@ -0,0 +1,104 @@
+import os
+import random
+from faker import Faker
+import faker_commerce
+from datetime import datetime
+
+fake = Faker()
+fake.add_provider(faker_commerce.Provider)
+
+# Number of rows to generate
+NUM_STORES = 5
+NUM_CUSTOMERS = 20
+NUM_ASSETS = 50
+NUM_TRANSACTIONS = 60
+NUM_RENTALS = 30
+
+# Helper function to clean values for COPY
+def clean_value(value):
+    if value is None:
+        return r"\N"  # PostgreSQL NULL
+    if isinstance(value, str):
+        return value.replace("\t", " ").replace("\n", " ")
+    return str(value)
+
+# Table Data Generation
+def generate_store_locations():
+    for i in range(1, NUM_STORES + 1):
+        yield [i, fake.company(), fake.address()]
+
+def generate_customers():
+    for i in range(1, NUM_CUSTOMERS + 1):
+        yield [
+            i,
+            fake.first_name(),
+            fake.last_name(),
+            fake.email(),
+            fake.phone_number(),
+            fake.address(),
+        ]
+
+def generate_assets(store_ids):
+    for i in range(1, NUM_ASSETS + 1):
+        rental_period = random.choice(["daily", "weekly", "monthly"])
+        rental_price = round(random.uniform(5, 100), 2)
+        sale_price = (
+            round(rental_price * random.uniform(0.5, 0.8), 2)  # Discounted sale price
+            if random.random() < 0.2 else None
+        )
+        yield [
+            i,
+            fake.ecommerce_name(),
+            fake.unique.ean13(),
+            rental_price,
+            sale_price,
+            rental_period,
+            f"Aisle {random.randint(1, 20)} - Shelf {random.randint(1, 10)}",
+            random.choice(store_ids),
+        ]
+
+def generate_transactions(asset_ids, customer_ids):
+    for i in range(1, NUM_TRANSACTIONS + 1):
+        asset_id = random.choice(asset_ids)
+        customer_id = random.choice(customer_ids)
+        transaction_type = random.choice(["Sale", "Rental", "Return"])
+        transaction_date = fake.date_time_this_year()
+        total_charge = round(random.uniform(10, 500), 2)
+        yield [i, asset_id, customer_id, transaction_type, transaction_date, total_charge, fake.sentence()]
+
+def generate_rentals(transaction_ids):
+    for i in range(1, NUM_RENTALS + 1):
+        transaction_id = random.choice(transaction_ids)
+        rental_start = fake.date_time_this_year()
+        rental_end = fake.date_time_between_dates(datetime_start=rental_start)
+        rental_time = rental_end - rental_start
+        yield [i, transaction_id, rental_start, rental_end, rental_start, rental_end, rental_time]
+
+# Generate Data
+store_ids = list(range(1, NUM_STORES + 1))
+customer_ids = list(range(1, NUM_CUSTOMERS + 1))
+asset_ids = list(range(1, NUM_ASSETS + 1))
+transaction_ids = list(range(1, NUM_TRANSACTIONS + 1))
+
+tables = {
+    "store_locations": generate_store_locations(),
+    "customers": generate_customers(),
+    "assets": generate_assets(store_ids),
+    "transactions": generate_transactions(asset_ids, customer_ids),
+    "rentals": generate_rentals(transaction_ids),
+}
+
+# Write to SQL file
+sql_file = os.path.join(os.getcwd(), "generated_data.sql")
+
+with open(sql_file, "w") as f:
+    f.write('SET search_path="Hardware Store";\n\n')
+
+    for table_name, generator in tables.items():
+        f.write(f"COPY {table_name} FROM stdin;\n")
+        for row in generator:
+            cleaned_row = "\t".join(map(clean_value, row))
+            f.write(f"{cleaned_row}\n")
+        f.write("\\.\n\n")
+
+print(f"SQL file generated: {sql_file}")