Skip to content

Commit

Permalink
Initialize hardware store case study
Browse files Browse the repository at this point in the history
  • Loading branch information
zackkrida committed Dec 17, 2024
1 parent ca59679 commit 92b41b8
Show file tree
Hide file tree
Showing 5 changed files with 431 additions and 0 deletions.
82 changes: 82 additions & 0 deletions hardware_store/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Hardware Store sample data

This sample dataset represents a chain of hardware stores managing their inventory and rentals.

```mermaid
%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html
erDiagram
%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html
store_locations {
BIGINT id PK
string name
string address
}
customers {
BIGINT id PK
string first_name
string last_name
string email
string phone
string address
}
assets {
BIGINT id PK
string name
string serial_number
NUMERIC rental_price
NUMERIC sale_price
string rental_period
string location
BIGINT store_id FK
}
transactions {
BIGINT id PK
BIGINT asset_id FK
BIGINT customer_id FK
string transaction_type
TIMESTAMP transaction_date
NUMERIC total_charge
string note
}
rentals {
BIGINT id PK
BIGINT transaction_id FK
TIMESTAMP rental_start
TIMESTAMP rental_end
TIMESTAMP time_out
TIMESTAMP time_in
INTERVAL rental_time
}
%% Relationships
%% See: https://mermaid.js.org/syntax/entityRelationshipDiagram.html#relationship-syntax
assets ||--|{ store_locations : "store_id"
transactions ||--|| assets : "asset_id"
transactions ||--|{ customers : "customer_id"
rentals ||--|| transactions : "transaction_id"
```


## Loading Data

The generated SQL file, `generate_data/load_data.sql`, contains all the necessary COPY commands to import data into your database. The data (and the load data file) are produced by the `generate_data.py` file, which can be adjusted and re-run to alter the data if needed.

Load the data into a locally-running Mathesar instance like this:

```shell
# First load the schema and tables
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < schema.sql
# Then the sample data
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < generated_data.sql
```

## Development

The only requirement is to install dependencies with `pip install -r requirements.txt`.
104 changes: 104 additions & 0 deletions hardware_store/generate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import os
import random
from faker import Faker
import faker_commerce
from datetime import datetime

fake = Faker()
fake.add_provider(faker_commerce.Provider)

# Number of rows to generate
NUM_STORES = 5
NUM_CUSTOMERS = 20
NUM_ASSETS = 50
NUM_TRANSACTIONS = 60
NUM_RENTALS = 30

# Helper function to clean values for COPY
def clean_value(value):
if value is None:
return r"\N" # PostgreSQL NULL
if isinstance(value, str):
return value.replace("\t", " ").replace("\n", " ")
return str(value)

# Table Data Generation
def generate_store_locations():
for i in range(1, NUM_STORES + 1):
yield [i, fake.company(), fake.address()]

def generate_customers():
for i in range(1, NUM_CUSTOMERS + 1):
yield [
i,
fake.first_name(),
fake.last_name(),
fake.email(),
fake.phone_number(),
fake.address(),
]

def generate_assets(store_ids):
for i in range(1, NUM_ASSETS + 1):
rental_period = random.choice(["daily", "weekly", "monthly"])
rental_price = round(random.uniform(5, 100), 2)
sale_price = (
round(rental_price * random.uniform(0.5, 0.8), 2) # Discounted sale price
if random.random() < 0.2 else None
)
yield [
i,
fake.ecommerce_name(),
fake.unique.ean13(),
rental_price,
sale_price,
rental_period,
f"Aisle {random.randint(1, 20)} - Shelf {random.randint(1, 10)}",
random.choice(store_ids),
]

def generate_transactions(asset_ids, customer_ids):
for i in range(1, NUM_TRANSACTIONS + 1):
asset_id = random.choice(asset_ids)
customer_id = random.choice(customer_ids)
transaction_type = random.choice(["Sale", "Rental", "Return"])
transaction_date = fake.date_time_this_year()
total_charge = round(random.uniform(10, 500), 2)
yield [i, asset_id, customer_id, transaction_type, transaction_date, total_charge, fake.sentence()]

def generate_rentals(transaction_ids):
for i in range(1, NUM_RENTALS + 1):
transaction_id = random.choice(transaction_ids)
rental_start = fake.date_time_this_year()
rental_end = fake.date_time_between_dates(datetime_start=rental_start)
rental_time = rental_end - rental_start
yield [i, transaction_id, rental_start, rental_end, rental_start, rental_end, rental_time]

# Generate Data
store_ids = list(range(1, NUM_STORES + 1))
customer_ids = list(range(1, NUM_CUSTOMERS + 1))
asset_ids = list(range(1, NUM_ASSETS + 1))
transaction_ids = list(range(1, NUM_TRANSACTIONS + 1))

tables = {
"store_locations": generate_store_locations(),
"customers": generate_customers(),
"assets": generate_assets(store_ids),
"transactions": generate_transactions(asset_ids, customer_ids),
"rentals": generate_rentals(transaction_ids),
}

# Write to SQL file
sql_file = os.path.join(os.getcwd(), "generated_data.sql")

with open(sql_file, "w") as f:
f.write('SET search_path="Hardware Store";\n\n')

for table_name, generator in tables.items():
f.write(f"COPY {table_name} FROM stdin;\n")
for row in generator:
cleaned_row = "\t".join(map(clean_value, row))
f.write(f"{cleaned_row}\n")
f.write("\\.\n\n")

print(f"SQL file generated: {sql_file}")
Loading

0 comments on commit 92b41b8

Please sign in to comment.