-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_generator.py
73 lines (63 loc) · 1.25 KB
/
data_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from pymongo import MongoClient
import numpy as np
from tqdm import tqdm
mongo_local_conn = MongoClient('mongodb://127.0.0.1')
mongo_local_coll = mongo_local_conn['ms_shopping_cart']['fake_orders_weighted_2']
# Uniform distr. = .0625
CATEGORIES = [
'Trousers',
'T-Shirts',
'Jackets',
'Boots',
'Shoes',
'Skirts',
'Dresses',
'Accessories',
'Sweatshirts',
'Backpacks',
'Bags',
'Underwear',
'Shirts',
'Swimwear',
'High Heels',
'Sportwear'
]
CAT_WEIGHTS = [
.1,
.15,
.1,
.05,
.1,
.01,
.01,
.05,
.1,
.12,
.06,
.08,
.02,
.01,
.02,
.02
]
NUM_ITEM_WEIGHTS = [
.15,
.2,
.26,
.15,
.1,
.05,
.05,
.02,
.01,
.01
]
for i in tqdm(range(0, 100000)):
order = {'items': [], 'categories': [], 'order_id': i}
for j in range(1, np.random.choice(np.arange(1, 11), p=NUM_ITEM_WEIGHTS)):
selected_category = np.random.choice(np.arange(0, len(CATEGORIES)), p=CAT_WEIGHTS, replace=False)
order['items'].append(
{'category_id': selected_category, 'category_label': CATEGORIES[selected_category]}
)
order['categories'].append(selected_category)
mongo_local_coll.insert_one(order)