-
Notifications
You must be signed in to change notification settings - Fork 0
/
params.yaml
34 lines (29 loc) · 873 Bytes
/
params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
dataset:
url: https://raw.githubusercontent.com/Himanshu-1703/reddit-sentiment-analysis/refs/heads/main/data/reddit.csv
rename_columns:
clean_comment: text
category: target
train_size: 0.75
target_labels: [neg, neu, pos]
ingestion:
processed_train_path: data/processed/train.parquet
processed_test_path: data/processed/test.parquet
building:
vectorizer:
module: sklearn.feature_extraction.text
name: TfidfVectorizer
path: models/vectorizer.pkl
params:
max_features: 5000
model:
module: sklearn.ensemble
name: HistGradientBoostingClassifier
path: models/classifier.pkl
params: {}
evaluation:
train_vec_path: models/train_vec_data.pkl
mlflow:
# For local run: mlflow will store artifacts in this directory
# For cloud run: specify URL below
tracking_uri: ./mlruns
experiment_name: exp1-arv-testing