diff --git a/demo.py b/demo.py
deleted file mode 100644
index 8494436..0000000
--- a/demo.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# %%
-%load_ext autoreload
-%autoreload 2
-
-# %%
-import pandas as pd
-from transformers import AutoTokenizer, BertForSequenceClassification
-from data_processing import build_text_data, load_tabular_data, split_dataset
-from transformers import pipeline
-import torch
-# %%
-
-# merged_df = load_dataframe()
-# path = build_vocab(df=merged_df)
-# ds_dict_encoded = split_dataset(pretrained_model="bert")
-
-# %%
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-DEVICE = "cpu"
-ckp = "bert-base-uncased"
-model = BertForSequenceClassification.from_pretrained(ckp, num_labels=2).to(DEVICE)
-# %%
-tokenizer = AutoTokenizer.from_pretrained(ckp)
-
-# %%
-df = pd.read_csv("output.csv")
-df.iloc[0]
-
-# %%
-
-# clf = pipeline("text-classification", model=model)
-clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
-# %%
-# for text in df["text"].tolist()[:10]:
-#     print(clf(text.to(DEVICE)))
-# %%
-inputs = tokenizer(df["text"].tolist()[:200], padding=True, truncation=True, return_tensors="pt").to(DEVICE)
-
-outputs = model(**inputs)
-# %%
-from transformers import AutoModelForSequenceClassification
-
-# TODO: build a benchmark for sequenceclassification without SFT
diff --git a/demo_sft_hpo_deephyper.py b/demo_sft_hpo_deephyper.py
deleted file mode 100644
index ef20b8d..0000000
--- a/demo_sft_hpo_deephyper.py
+++ /dev/null
@@ -1,131 +0,0 @@
-""" Hyperparameter search (HPS) for SFT with DeeyHyper
-
-* Task: text-classification (binary labels)
-* Method: Supervised Fine-tuning
-* Dataset: 1000genome
-* Pre-trained model: bert-base-uncased
-
-Ref:
-* https://docs.nersc.gov/machinelearning/hpo/
-* https://deephyper.readthedocs.io/en/latest/install/hpc/nersc.html
-"""
-
-import evaluate
-import numpy as np
-from datasets import load_dataset
-from transformers import (AutoModelForSequenceClassification, AutoTokenizer,
-                          BertConfig, DataCollatorWithPadding, Trainer,
-                          TrainingArguments)
-from deephyper.problem import HpProblem
-from deephyper.search.hps import CBO
-from deephyper.evaluator import Evaluator
-
-
-name = "1000genome"
-ckp = "bert-base-uncased"
-
-# load dataset
-raw_dataset = load_dataset("csv",
-                           data_files={"train": f"./data/{name}/train.csv",
-                                       "validation": f"./data/{name}/validation.csv",
-                                       "test": f"./data/{name}/test.csv"})
-
-tokenizer = AutoTokenizer.from_pretrained(ckp)
-tokenizer_datasets = raw_dataset.map(lambda data: tokenizer(data["text"], truncation=True), batched=True)
-data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
-tokenizer_datasets = tokenizer_datasets.remove_columns(["text"])
-tokenized_datasets = tokenizer_datasets.rename_column("label", "labels")
-tokenized_datasets.set_format(type="torch")
-
-train_dataset = tokenized_datasets['train']
-eval_dataset = tokenized_datasets['validation']
-acc = evaluate.load("accuracy")
-
-
-def compute_metrics(eval_pred):
-    predictions, labels = eval_pred
-    predictions = np.argmax(predictions, axis=1)
-    return acc.compute(predictions=predictions, references=labels)
-
-
-def run(config):
-
-    trainer_config = {"learning_rate": config.get("learning_rate", 5e-5),
-                      "weight_decay": config.get("weight_decay", 0.0),
-                      "adam_beta1": config.get("adam_beta1", 0.9),
-                      "adam_beta2": config.get("adam_beta2", 0.999),
-                      "adam_epsilon": config.get("adam_epsilon", 1e-8),
-                      "max_grad_norm": config.get("max_grad_norm", 1.0),
-                      "num_train_epochs": config.get("num_train_epochs", 3),
-                      "per_device_train_batch_size": config.get("per_device_train_batch_size", 32)}
-
-    # NOTE: not defined in problem
-    model_config = {"hidden_act": config.get("hidden_act", "gelu"),
-                    "hidden_dropout_prob": config.get("hidden_dropout_prob", 0.1),
-                    "hidden_size": config.get("hidden_size", 768),
-                    "initializer_range": config.get("initializer_range", 0.02),
-                    "intermediate_size": config.get("intermediate_size", 3072),
-                    "layer_norm_eps": config.get("layer_norm_eps", 1e-12),
-                    "max_position_embeddings": config.get("max_position_embeddings", 512),
-                    "num_attention_heads": config.get("num_attention_heads", 12),
-                    "num_hidden_layers": config.get("num_hidden_layers", 12),
-                    "type_vocab_size": config.get("type_vocab_size", 2),
-                    "vocab_size": config.get("vocab_size", 30522)}
-    # NOTE: add hps in Bert:
-    # https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertConfig
-    bert_config = BertConfig(**model_config)
-
-    model = AutoModelForSequenceClassification.from_pretrained(ckp, config=bert_config)
-
-    # NOTE: add hps to training arguments
-    training_args = TrainingArguments(
-        output_dir="./models/tmp-sft",
-        overwrite_output_dir=True,
-        save_strategy="no",
-        seed=42,
-        auto_find_batch_size=True,
-        **trainer_config
-    )
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=train_dataset,
-        eval_dataset=eval_dataset,
-        # compute_metrics=compute_metrics, # NOTE: remove metrics and it will return eval loss by default
-        tokenizer=tokenizer,
-        # model_init=model,
-        data_collator=data_collator,
-    )
-    trainer.train()
-
-    # Evaluate the model
-    eval_results = trainer.evaluate()
-
-    # Return the evaluation loss, maximize the -loss
-    return -eval_results['eval_loss']
-
-
-problem = HpProblem()
-# NOTE: define hyperparameters search space
-# trainer parameters
-problem.add_hyperparameter((1e-6, 1e-4, "log-uniform"), "learning_rate", default_value=5e-5)
-problem.add_hyperparameter((0.0, 1e-4), "weight_decay", default_value=0.0)
-problem.add_hyperparameter((0.9, 0.999), "adam_beta1", default_value=0.9)
-problem.add_hyperparameter((0.9, 0.999), "adam_beta2", default_value=0.999)
-problem.add_hyperparameter((1e-9, 1e-7), "adam_epsilon", default_value=1e-8)
-problem.add_hyperparameter((0.5, 1.0), "max_grad_norm", default_value=1.0)
-problem.add_hyperparameter((3, 10), "num_train_epochs", default_value=3)
-problem.add_hyperparameter([16, 32, 64, 128], "per_device_train_batch_size", default_value=32)
-# pretrain config parameters
-
-# define the evaluator to distribute the computation
-# TODO: check the method compatible with NERSC for multi-gpu usage
-evaluator = Evaluator.create(run,
-                             method="serial",
-                             method_kwargs={
-                                 "num_workers": 2,
-                             })
-
-search = CBO(problem, evaluator)
-
-results = search.search(max_evals=10)
diff --git a/demo_sft_hpo_optuna.py b/demo_sft_hpo_optuna.py
deleted file mode 100644
index 624a181..0000000
--- a/demo_sft_hpo_optuna.py
+++ /dev/null
@@ -1,121 +0,0 @@
-""" Hyperparameter search (HPS) for SFT with Optuna (with `trainer.hyperparameter_search`)
-
-* Task: text-classification (binary labels)
-* Method: Supervised Fine-tuning
-* Dataset: 1000genome
-* Pre-trained model: bert-base-uncased
-
-"""
-
-import pickle
-
-import evaluate
-import numpy as np
-from datasets import load_dataset
-from transformers import (AutoModelForSequenceClassification, AutoTokenizer,
-                          BertConfig, DataCollatorWithPadding, Trainer,
-                          TrainingArguments)
-
-
-def optuna_hp_space(trial):
-    # NOTE: define the hyperparameter search space
-    return {
-        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
-        "weight_decay": trial.suggest_float("weight_decay", 0.0, 1e-4),
-        "adam_beta1": trial.suggest_float("adam_beta1", 0.9, 0.999),
-        "adam_beta2": trial.suggest_float("adam_beta2", 0.9, 0.999),
-        "adam_epsilon": trial.suggest_float("adam_epsilon", 1e-9, 1e-7),
-        "max_grad_norm": trial.suggest_float("max_grad_norm", 0.5, 1.0),
-        "num_train_epochs": trial.suggest_int("num_train_epochs", 3, 10),
-        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64, 128]),
-    }
-
-
-name = "1000genome"
-ckps = [
-    # "albert-base-v2",
-    # "albert-large-v2",
-    # "bert-base-cased",
-    # "bert-base-uncased",
-    "bert-large-cased",
-    "bert-large-uncased",
-    "distilbert-base-cased",
-    "distilbert-base-uncased",
-    "roberta-base",
-    "roberta-large",
-    "xlnet-base-cased",
-    "xlnet-large-cased"
-]
-
-# load dataset
-raw_dataset = load_dataset("csv",
-                           data_files={"train": f"./data/{name}/train.csv",
-                                       "validation": f"./data/{name}/validation.csv",
-                                       "test": f"./data/{name}/test.csv"})
-
-res = {}
-for ckp in ckps:
-    tokenizer = AutoTokenizer.from_pretrained(ckp)
-    tokenizer_datasets = raw_dataset.map(lambda data: tokenizer(data["text"], truncation=True), batched=True)
-    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
-    tokenizer_datasets = tokenizer_datasets.remove_columns(["text"])
-    tokenized_datasets = tokenizer_datasets.rename_column("label", "labels")
-    tokenized_datasets.set_format(type="torch")
-
-    # NOTE: add hps in Bert:
-    # https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertConfig
-    config = BertConfig()
-    train_dataset = tokenized_datasets['train']
-    eval_dataset = tokenized_datasets['validation']
-    acc = evaluate.load("accuracy")
-
-    def compute_metrics(eval_pred):
-        predictions, labels = eval_pred
-        predictions = np.argmax(predictions, axis=1)
-        return acc.compute(predictions=predictions, references=labels)
-
-        # model_init = AutoModelForSequenceClassification.from_pretrained(ckp, config=config)
-
-    def model_init(trial):
-        return AutoModelForSequenceClassification.from_pretrained(
-            ckp,
-            # from_tf=bool(".ckpt" in model_args.model_name_or_path),
-            config=config,
-            # cache_dir=model_args.cache_dir,
-            # revision=model_args.model_revision,
-            # token=True if model_args.use_auth_token else None,
-            ignore_mismatched_sizes=True,
-        )
-
-    # set hps to training arguments
-    training_args = TrainingArguments(
-        output_dir="./models/tmp-sft",
-        overwrite_output_dir=True,
-        save_strategy="no",
-        seed=42,
-        auto_find_batch_size=True,
-    )
-
-    trainer = Trainer(
-        model=None,
-        args=training_args,
-        train_dataset=train_dataset,
-        eval_dataset=eval_dataset,
-        # compute_metrics=compute_metrics, # NOTE: remove metrics and it will return eval loss by default
-        tokenizer=tokenizer,
-        model_init=model_init,
-        data_collator=data_collator,
-    )
-
-    best_trials = trainer.hyperparameter_search(
-        direction="minimize",
-        backend="optuna",
-        hp_space=optuna_hp_space,
-        n_trials=20,
-        # compute_objective=compute_objective,
-    )
-
-    print(ckp, best_trials.hyperparameters)
-    res[ckp] = best_trials.hyperparameters
-
-pickle.dump(res, open("hps_optuna.pkl", "wb"))
diff --git a/demo_v1.py b/demo_v1.py
deleted file mode 100644
index 6013633..0000000
--- a/demo_v1.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# %%
-
-# %%
-import pandas as pd
-import torch
-from transformers import (AlbertForSequenceClassification, AutoModel,
-                          AutoModelForSequenceClassification, AutoTokenizer, RobertaModel,
-                          BertForSequenceClassification, pipeline)
-
-from data_processing import build_text_data, load_tabular_data, split_dataset
-from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
-from tqdm import tqdm
-
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-DEVICE = "cpu"
-print(DEVICE)
-
-# %%
-''' preprocess data '''
-merged_df = load_tabular_data(columns=["wms_delay"])
-build_text_data(df=merged_df)
-df = pd.read_csv("output.csv")
-
-# %%
-ckp = "albert-base-v2"
-ckp = "bert-base-uncased"
-ckp = "roberta-base"
-model = AutoModelForSequenceClassification.from_pretrained(ckp, num_labels=2).to(DEVICE)
-tokenizer = AutoTokenizer.from_pretrained(ckp)
-
-# %%
-
-# %%
-clf = pipeline("text-classification", model=model, tokenizer=tokenizer)
-torch.cuda.empty_cache()
-y_pred = []
-for i in tqdm(range(len(df))):
-    # tokers = tokenizer([df['text'][i]], padding=True, truncation=True, return_tensors="pt").to(DEVICE)
-    # outputs = model(**tokers)
-    y_pred.append(int(clf(df['text'][i])[0]["label"].split("_")[1]))
-y_true = df["label"].tolist()
-# inputs = tokenizer(df["text"].tolist()[:1000], padding=True, truncation=True, return_tensors="pt").to(DEVICE)
-# outputs = model(**inputs)
-# outputs.logits.argmax(1)
-print(classification_report(y_true, y_pred))
-
-# %%
diff --git a/run_demo_sft_hpo_deephyper.sh b/run_demo_sft_hpo_deephyper.sh
deleted file mode 100755
index 4dbfb1e..0000000
--- a/run_demo_sft_hpo_deephyper.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-
-#SBATCH --time=03:00:00
-#SBATCH --nodes=1
-#SBATCH --constraint=gpu
-#SBATCH --qos=regular
-#SBATCH --account=m4144
-
-module load conda
-conda activate hf
-export HF_EVALUATE_OFFLINE=1
-
-cd /global/homes/p/papajim/GitHub/poseidon/LLM_AD
-python3 /global/homes/p/papajim/GitHub/poseidon/LLM_AD/demo_sft_hpo_deephyper.py
-
-exit
diff --git a/run_demo_sft_hpo_optuna.sh b/run_demo_sft_hpo_optuna.sh
deleted file mode 100755
index f2beb5d..0000000
--- a/run_demo_sft_hpo_optuna.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-
-#SBATCH --time=03:00:00
-#SBATCH --nodes=1
-#SBATCH --constraint=gpu
-#SBATCH --qos=regular
-#SBATCH --account=m4144
-
-module load conda
-conda activate hf
-export HF_EVALUATE_OFFLINE=1
-
-cd /global/homes/p/papajim/GitHub/poseidon/LLM_AD
-python3 /global/homes/p/papajim/GitHub/poseidon/LLM_AD/demo_sft_hpo_optuna.py
-
-exit
diff --git a/using_sft.ipynb b/using_sft.ipynb
deleted file mode 100644
index e99dd16..0000000
--- a/using_sft.ipynb
+++ /dev/null
@@ -1,774 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# A full supervised fine-tuning using local data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Load dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from datasets import load_dataset\n",
-    "import pandas as pd\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Split the dataset into train/validation/test"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Downloading and preparing dataset csv/default to /tmp/jinh/huggingface/datasets/csv/default-6e4f0556d6300670/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6a896d19d2cb4b8288d2abe04feaa560",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4d3a744c964f495a91f1e4a289166332",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "850c10574d8f4a6c8d020ebcfc1ddbc5",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating train split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b5a6a5a808234d229779936c621782b7",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating validation split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "08ced0c2cc9c41cb9eff654e25fb0fc6",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating test split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset csv downloaded and prepared to /tmp/jinh/huggingface/datasets/csv/default-6e4f0556d6300670/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4dd9093359c6426bb57b671ba2ed5856",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/3 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "DatasetDict({\n",
-       "    train: Dataset({\n",
-       "        features: ['text', 'label'],\n",
-       "        num_rows: 38469\n",
-       "    })\n",
-       "    validation: Dataset({\n",
-       "        features: ['text', 'label'],\n",
-       "        num_rows: 4809\n",
-       "    })\n",
-       "    test: Dataset({\n",
-       "        features: ['text', 'label'],\n",
-       "        num_rows: 4809\n",
-       "    })\n",
-       "})"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = pd.read_csv(\"./output.csv\")\n",
-    "# shuffle the dataset\n",
-    "df = df.sample(frac=1).reset_index(drop=True)\n",
-    "train_ratio, validation_ratio, test_raio = 0.8, 0.1, 0.1\n",
-    "total_size = len(df)\n",
-    "train_df = df[: int(total_size * train_ratio)]\n",
-    "validation_df = df[int(total_size * train_ratio) : int(total_size * (train_ratio + validation_ratio))]\n",
-    "test_df = df[int(total_size * (train_ratio + validation_ratio)) :]\n",
-    "\n",
-    "train_df = pd.DataFrame(train_df)\n",
-    "validation_df = pd.DataFrame(validation_df)\n",
-    "test_df = pd.DataFrame(test_df)\n",
-    "\n",
-    "train_df.to_csv(\"./data/train.csv\", index=False)\n",
-    "validation_df.to_csv(\"./data/validation.csv\", index=False)\n",
-    "test_df.to_csv(\"./data/test.csv\", index=False)\n",
-    "\n",
-    "\n",
-    "raw_dataset = load_dataset(\"csv\", data_files={\"train\": \"./data/train.csv\", \"validation\": \"./data/validation.csv\", \"test\": \"./data/test.csv\"})\n",
-    "raw_dataset\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Another option is to use train_test_split for Dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Downloading and preparing dataset csv/default to /tmp/jinh/huggingface/datasets/csv/default-102b9fb8b70ba79c/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ff84688c08ac4ab3b6627d8c06669b21",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3f1fe1698f2b4f0cb4383c329f62c1cd",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d4b2b955e88148fea0c985c3d07e6079",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating train split: 0 examples [00:00, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset csv downloaded and prepared to /tmp/jinh/huggingface/datasets/csv/default-102b9fb8b70ba79c/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cb8017199a544fb0a2044941d40bc2bc",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DatasetDict({\n",
-      "    train: Dataset({\n",
-      "        features: ['text', 'label'],\n",
-      "        num_rows: 48087\n",
-      "    })\n",
-      "})\n",
-      "DatasetDict({\n",
-      "    train: Dataset({\n",
-      "        features: ['text', 'label'],\n",
-      "        num_rows: 38469\n",
-      "    })\n",
-      "    validation: Dataset({\n",
-      "        features: ['text', 'label'],\n",
-      "        num_rows: 4809\n",
-      "    })\n",
-      "    test: Dataset({\n",
-      "        features: ['text', 'label'],\n",
-      "        num_rows: 4809\n",
-      "    })\n",
-      "})\n"
-     ]
-    }
-   ],
-   "source": [
-    "_raw_dataset = load_dataset(\"csv\", data_files=\"./output.csv\")\n",
-    "print(_raw_dataset)\n",
-    "raw_dataset = _raw_dataset['train'].train_test_split(test_size=0.2, seed=42)\n",
-    "raw_dataset[\"train\"] = raw_dataset.pop(\"train\")\n",
-    "raw_dataset['validation'] = raw_dataset.pop(\"test\")\n",
-    "_ds = raw_dataset['validation'].train_test_split(test_size=0.5, seed=42)\n",
-    "raw_dataset['test'] = _ds.pop(\"train\")\n",
-    "raw_dataset['validation'] = _ds.pop(\"test\")\n",
-    "print(raw_dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "07be1f5be216440794029be7d06c0ddd",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/38469 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5ac75be76c0e4bb292749bfef14047d6",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/4809 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "548a1e5d81024c3bbe5b484c5d129c6b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/4809 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from transformers import AutoTokenizer, DataCollatorWithPadding\n",
-    "\n",
-    "\n",
-    "ckp = \"bert-base-uncased\"\n",
-    "tokenizer = AutoTokenizer.from_pretrained(ckp)\n",
-    "\n",
-    "def tokenize_function(data):\n",
-    "    return tokenizer(data[\"text\"], truncation=True)\n",
-    "\n",
-    "tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)\n",
-    "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "DatasetDict({\n",
-       "    train: Dataset({\n",
-       "        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
-       "        num_rows: 38469\n",
-       "    })\n",
-       "    validation: Dataset({\n",
-       "        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
-       "        num_rows: 4809\n",
-       "    })\n",
-       "    test: Dataset({\n",
-       "        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
-       "        num_rows: 4809\n",
-       "    })\n",
-       "})"
-      ]
-     },
-     "execution_count": 43,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenized_datasets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['labels', 'input_ids', 'token_type_ids', 'attention_mask']"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# prepare the columns\n",
-    "tokenized_datasets = tokenized_datasets.remove_columns([\"text\"])\n",
-    "tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n",
-    "tokenized_datasets.set_format(\"torch\")\n",
-    "tokenized_datasets[\"train\"].column_names\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torch.utils.data import DataLoader\n",
-    "\n",
-    "train_dataloader = DataLoader(\n",
-    "    tokenized_datasets[\"train\"], shuffle=True, batch_size=64, collate_fn=data_collator\n",
-    ")\n",
-    "eval_dataloader = DataLoader(\n",
-    "    tokenized_datasets[\"validation\"], batch_size=64, collate_fn=data_collator\n",
-    ")\n",
-    "test_dataloader = DataLoader(\n",
-    "    tokenized_datasets[\"test\"], batch_size=64, collate_fn=data_collator\n",
-    ")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'labels': torch.Size([64]),\n",
-       " 'input_ids': torch.Size([64, 10]),\n",
-       " 'token_type_ids': torch.Size([64, 10]),\n",
-       " 'attention_mask': torch.Size([64, 10])}"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "for batch in train_dataloader:\n",
-    "    break\n",
-    "{k: v.shape for k, v in batch.items()}\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    }
-   ],
-   "source": [
-    "from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
-    "\n",
-    "model = AutoModelForSequenceClassification.from_pretrained(ckp, num_labels=2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor(0.5622, grad_fn=<NllLossBackward0>) torch.Size([64, 2])\n"
-     ]
-    }
-   ],
-   "source": [
-    "outputs = model(**batch)\n",
-    "print(outputs.loss, outputs.logits.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/jinh/miniconda3/envs/hf/lib/python3.11/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "from transformers import AdamW\n",
-    "\n",
-    "optimizer = AdamW(model.parameters(), lr=5e-5)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "602\n"
-     ]
-    }
-   ],
-   "source": [
-    "from transformers import get_scheduler\n",
-    "\n",
-    "num_epochs = 1\n",
-    "num_training_steps = num_epochs * len(train_dataloader)\n",
-    "lr_scheduler = get_scheduler(\n",
-    "    \"linear\",\n",
-    "    optimizer=optimizer,\n",
-    "    num_warmup_steps=0,\n",
-    "    num_training_steps=num_training_steps,\n",
-    ")\n",
-    "print(num_training_steps)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "cuda\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "\n",
-    "DEVICE = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
-    "model.to(DEVICE)\n",
-    "print(DEVICE)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a2f6231e046d4d47a5ce58fd4508ee1b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/602 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from tqdm.auto import tqdm\n",
-    "# reload the model\n",
-    "model = AutoModelForSequenceClassification.from_pretrained(ckp, num_labels=2)\n",
-    "model.to(DEVICE)\n",
-    "progress_bar = tqdm(range(num_training_steps))\n",
-    "\n",
-    "model.train()\n",
-    "for epoch in range(num_epochs):\n",
-    "    for batch in train_dataloader:\n",
-    "        batch = {k: v.to(DEVICE) for k, v in batch.items()}\n",
-    "        outputs = model(**batch)\n",
-    "        loss = outputs.loss\n",
-    "        loss.backward()\n",
-    "\n",
-    "        optimizer.step()\n",
-    "        lr_scheduler.step()\n",
-    "        optimizer.zero_grad()\n",
-    "        progress_bar.update(1)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/jinh/miniconda3/envs/hf/lib/python3.11/site-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
-      "  _warn_prf(average, modifier, msg_start, len(result))\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'accuracy': 0.2561863173216885, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0}"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import evaluate\n",
-    "\n",
-    "clf_metrics = evaluate.combine([\"accuracy\", \"f1\", \"precision\", \"recall\"])\n",
-    "model.eval()\n",
-    "for batch in eval_dataloader:\n",
-    "    batch = {k: v.to(DEVICE) for k, v in batch.items()}\n",
-    "    with torch.no_grad():\n",
-    "        outputs = model(**batch)\n",
-    "\n",
-    "    logits = outputs.logits\n",
-    "    predictions = torch.argmax(logits, dim=-1)\n",
-    "    clf_metrics.add_batch(predictions=predictions, references=batch[\"labels\"])\n",
-    "\n",
-    "clf_metrics.compute()\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# model.push_to_hub(\"bert-base-uncased-poseidon\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from evaluate import evaluator\n",
-    "\n",
-    "task_eval = evaluator(\"text-classification\")\n",
-    "model = AutoModelForSequenceClassification.from_pretrained(\"bert-base-uncased\")\n",
-    "results = task_eval.compute(model_or_pipeline=model, \n",
-    "                            data=raw_dataset['test'], tokenizer=tokenizer, \n",
-    "                            label_mapping={\"LABEL_0\": 0, \"LABEL_1\": 1},\n",
-    "                            metric=clf_metrics,)    \n",
-    "results\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# now go over a couple of different ckps\n",
-    "ckps = [\"bert-base-uncased\", \n",
-    "        # \"bert-base-cased\", \n",
-    "        # \"distilbert-base-cased\",\n",
-    "        # \"distilbert-base-uncased\",\n",
-    "        # \"roberta-base\",\n",
-    "        # \"roberta-large\",\n",
-    "        # \"albert-base-v2\",\n",
-    "        # \"albert-large-v2\",\n",
-    "        # \"albert-xlarge-v2\",\n",
-    "        # \"albert-xxlarge-v2\",\n",
-    "        # \"distilbert-base-uncased-finetuned-sst-2-english\",\n",
-    "        # \"distilbert-base-cased-finetuned-mrpc\",\n",
-    "        # \"roberta-base-openai-detector\",\n",
-    "        ]\n",
-    "ckps_res = {}\n",
-    "for ckp in ckps:\n",
-    "    model = AutoModelForSequenceClassification.from_pretrained(ckp, num_labels=2)\n",
-    "    results = task_eval.compute(model_or_pipeline=model, \n",
-    "                                data=raw_dataset['test'], tokenizer=tokenizer, \n",
-    "                                label_mapping={\"LABEL_0\": 0, \"LABEL_1\": 1},\n",
-    "                                metric=clf_metrics,)    \n",
-    "    ckps_res[ckp] = results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.save_pretrained(\"./test-trainer-sft/\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "hf",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/using_transformers.ipynb b/using_transformers.ipynb
deleted file mode 100644
index be2d7bd..0000000
--- a/using_transformers.ipynb
+++ /dev/null
@@ -1,1314 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Using Transformers Pipeline\n",
-    "\n",
-    "* Ease of use\n",
-    "* Flexibility\n",
-    "* Simplicity\n",
-    "\n",
-    "\n",
-    "![image](https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline-dark.svg)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using device: cuda\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "import torch\n",
-    "from sklearn.metrics import (accuracy_score, classification_report,\n",
-    "                             confusion_matrix)\n",
-    "from transformers import (AlbertForSequenceClassification, AutoModel,\n",
-    "                          AutoModelForSequenceClassification, AutoTokenizer,\n",
-    "                          BertForSequenceClassification,\n",
-    "                          DistilBertForSequenceClassification, RobertaModel,\n",
-    "                          pipeline)\n",
-    "\n",
-    "from data_processing import build_text_data, load_tabular_data, split_dataset\n",
-    "\n",
-    "DEVICE = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "print(f\"Using device: {DEVICE}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Processing local data\n",
-    "\n",
-    "* prepare the local data to be sentences describing the job\n",
-    "* save the processed data into `./output.csv` with columns to be a set of features and `label`\n",
-    "* convert the dataframe into `Dataset` object for transformers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    wms_delay  queue_delay  runtime  post_script_delay  stage_in_delay  \\\n",
-      "0         1.0          4.0      6.0                5.0             NaN   \n",
-      "76        6.0         20.0   2078.0                5.0            53.0   \n",
-      "75        6.0         20.0   1507.0                5.0            71.0   \n",
-      "74        6.0         20.0   5177.0                5.0            58.0   \n",
-      "73        6.0         20.0   5112.0                5.0            43.0   \n",
-      "\n",
-      "    stage_out_delay  stage_in_bytes  stage_out_bytes  \\\n",
-      "0               NaN             NaN              NaN   \n",
-      "76              5.0    1.014283e+09         388321.0   \n",
-      "75              4.0    1.014283e+09         357330.0   \n",
-      "74              5.0    1.014283e+09         338748.0   \n",
-      "73              5.0    1.014283e+09         466869.0   \n",
-      "\n",
-      "    kickstart_executables_cpu_time  label  \n",
-      "0                              0.2      0  \n",
-      "76                           769.1      1  \n",
-      "75                           785.8      1  \n",
-      "74                           793.6      1  \n",
-      "73                           772.8      1  \n",
-      "                                                text  label\n",
-      "0  wms_delay is 1.0 queue_delay is 4.0 runtime is...      0\n",
-      "1  wms_delay is 6.0 queue_delay is 20.0 runtime i...      1\n",
-      "2  wms_delay is 6.0 queue_delay is 20.0 runtime i...      1\n",
-      "3  wms_delay is 6.0 queue_delay is 20.0 runtime i...      1\n",
-      "4  wms_delay is 6.0 queue_delay is 20.0 runtime i...      1\n"
-     ]
-    }
-   ],
-   "source": [
-    "''' preprocess data '''\n",
-    "tab_df = load_tabular_data()\n",
-    "print(tab_df.head())\n",
-    "\n",
-    "text_df = build_text_data(df=tab_df)\n",
-    "print(text_df.head())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Using pipeline directly\n",
-    "\n",
-    "* pipeline is a high-level API for transformers\n",
-    "* call the pipeline with the NLP task name (e.g. `text-classification`)\n",
-    "  * by default, the text classification pipeline is the sentiment analysis, \n",
-    "  which provide the sentiment score for each sentence (positive/negative).\n",
-    "* send a sentence directly to the pipeline\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "text     wms_delay is 1.0 queue_delay is 4.0 runtime is...\n",
-      "label                                                    0\n",
-      "Name: 0, dtype: object\n"
-     ]
-    }
-   ],
-   "source": [
-    "# take a look at the first text, with label 0\n",
-    "print(text_df.loc[0,:])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n",
-      "Using a pipeline without specifying a model name and revision in production is not recommended.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "[{'label': 'NEGATIVE', 'score': 0.9977535605430603}]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# specify the task as `text-classification` \n",
-    "clf = pipeline(\"text-classification\")\n",
-    "clf(text_df.loc[0,:][\"text\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).\n",
-      "Using a pipeline without specifying a model name and revision in production is not recommended.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'sequence': 'wms_delay is 1.0 queue_delay is 4.0 runtime is 6.0 post_script_delay is 5.0 stage_in_delay is nan stage_out_delay is nan stage_in_bytes is nan stage_out_bytes is nan kickstart_executables_cpu_time is 0.2 ',\n",
-       " 'labels': ['abnormal', 'normal'],\n",
-       " 'scores': [0.8415219783782959, 0.1584780067205429]}"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# specify the task as `zero-shot-classification`\n",
-    "zsc = pipeline(\"zero-shot-classification\")\n",
-    "zsc(text_df.loc[0,:][\"text\"], [\"normal\", \"abnormal\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "__NOTE__:\n",
-    "* without specify the checkpoint (which weight to be used), the pipeline will use the default checkpoint for the task.\n",
-    "* both the `text-classification` and `zero-shot-classification` pipelines failed on the first sentence.\n",
-    "\n",
-    "Now, consider the `tokenzier` and `model` to be used in the pipeline.\n",
-    "\n",
-    "## Tokenizer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# ckp = \"albert-base-v2\"\n",
-    "# ckp = \"roberta-base\"\n",
-    "# ckp = \"bert-base-uncased\"\n",
-    "ckp = \"distilbert-base-uncased-finetuned-sst-2-english\"\n",
-    "tokenizer = AutoTokenizer.from_pretrained(ckp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tokens (subwords):     ['w', '##ms', '_', 'delay']\n",
-      "ids (inputs to model): [1059, 5244, 1035, 8536]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# example of tokenize a sentence\n",
-    "tokens = tokenizer.tokenize(\"wms_delay\")\n",
-    "ids = tokenizer.convert_tokens_to_ids(tokens)\n",
-    "print(f\"tokens (subwords):     {tokens}\")\n",
-    "print(f\"ids (inputs to model): {ids}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[101, 8536, 2003, 102]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# the `encode` method outputs ids include special tokens\n",
-    "print(tokenizer.encode(\"delay is \"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['wms_delay is 1.0 queue_delay is 4.0 runtime is 6.0 post_script_delay is 5.0 stage_in_delay is nan stage_out_delay is nan stage_in_bytes is nan stage_out_bytes is nan kickstart_executables_cpu_time is 0.2 ',\n",
-       " 'wms_delay is 6.0 queue_delay is 20.0 runtime is 2078.0 post_script_delay is 5.0 stage_in_delay is 53.0 stage_out_delay is 5.0 stage_in_bytes is 1014283276.0 stage_out_bytes is 388321.0 kickstart_executables_cpu_time is 769.1 ',\n",
-       " 'wms_delay is 6.0 queue_delay is 20.0 runtime is 1507.0 post_script_delay is 5.0 stage_in_delay is 71.0 stage_out_delay is 4.0 stage_in_bytes is 1014283276.0 stage_out_bytes is 357330.0 kickstart_executables_cpu_time is 785.8 ',\n",
-       " 'wms_delay is 6.0 queue_delay is 20.0 runtime is 5177.0 post_script_delay is 5.0 stage_in_delay is 58.0 stage_out_delay is 5.0 stage_in_bytes is 1014283276.0 stage_out_bytes is 338748.0 kickstart_executables_cpu_time is 793.6 ']"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "text_df.loc[:3,:]['text'].tolist()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'input_ids': tensor([[  101,  1059,  5244,  1035,  8536,  2003,  1015,  1012,  1014, 24240,\n",
-      "          1035,  8536,  2003,  1018,  1012,  1014,  2448,  7292,  2003,  1020,\n",
-      "          1012,  1014,  2695,  1035,  5896,  1035,  8536,  2003,  1019,  1012,\n",
-      "          1014,  2754,  1035,  1999,  1035,  8536,  2003, 16660,  2754,  1035,\n",
-      "          2041,  1035,  8536,  2003, 16660,  2754,  1035,  1999,  1035, 27507,\n",
-      "          2003, 16660,  2754,  1035,  2041,  1035, 27507,  2003, 16660, 14590,\n",
-      "          7559,  2102,  1035,  4654,  8586, 23056,  2015,  1035, 17368,  1035,\n",
-      "          2051,  2003,  1014,  1012,  1016,   102,     0,     0,     0,     0,\n",
-      "             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,\n",
-      "             0,     0,     0,     0],\n",
-      "        [  101,  1059,  5244,  1035,  8536,  2003,  1020,  1012,  1014, 24240,\n",
-      "          1035,  8536,  2003,  2322,  1012,  1014,  2448,  7292,  2003, 19843,\n",
-      "          2620,  1012,  1014,  2695,  1035,  5896,  1035,  8536,  2003,  1019,\n",
-      "          1012,  1014,  2754,  1035,  1999,  1035,  8536,  2003,  5187,  1012,\n",
-      "          1014,  2754,  1035,  2041,  1035,  8536,  2003,  1019,  1012,  1014,\n",
-      "          2754,  1035,  1999,  1035, 27507,  2003,  7886, 20958,  2620, 16703,\n",
-      "          2581,  2575,  1012,  1014,  2754,  1035,  2041,  1035, 27507,  2003,\n",
-      "          4229,  2620, 16703,  2487,  1012,  1014, 14590,  7559,  2102,  1035,\n",
-      "          4654,  8586, 23056,  2015,  1035, 17368,  1035,  2051,  2003,  6146,\n",
-      "          2683,  1012,  1015,   102],\n",
-      "        [  101,  1059,  5244,  1035,  8536,  2003,  1020,  1012,  1014, 24240,\n",
-      "          1035,  8536,  2003,  2322,  1012,  1014,  2448,  7292,  2003,  5018,\n",
-      "          2581,  1012,  1014,  2695,  1035,  5896,  1035,  8536,  2003,  1019,\n",
-      "          1012,  1014,  2754,  1035,  1999,  1035,  8536,  2003,  6390,  1012,\n",
-      "          1014,  2754,  1035,  2041,  1035,  8536,  2003,  1018,  1012,  1014,\n",
-      "          2754,  1035,  1999,  1035, 27507,  2003,  7886, 20958,  2620, 16703,\n",
-      "          2581,  2575,  1012,  1014,  2754,  1035,  2041,  1035, 27507,  2003,\n",
-      "         26231, 22394,  2692,  1012,  1014, 14590,  7559,  2102,  1035,  4654,\n",
-      "          8586, 23056,  2015,  1035, 17368,  1035,  2051,  2003,  6275,  2629,\n",
-      "          1012,  1022,   102,     0],\n",
-      "        [  101,  1059,  5244,  1035,  8536,  2003,  1020,  1012,  1014, 24240,\n",
-      "          1035,  8536,  2003,  2322,  1012,  1014,  2448,  7292,  2003,  4868,\n",
-      "          2581,  2581,  1012,  1014,  2695,  1035,  5896,  1035,  8536,  2003,\n",
-      "          1019,  1012,  1014,  2754,  1035,  1999,  1035,  8536,  2003,  5388,\n",
-      "          1012,  1014,  2754,  1035,  2041,  1035,  8536,  2003,  1019,  1012,\n",
-      "          1014,  2754,  1035,  1999,  1035, 27507,  2003,  7886, 20958,  2620,\n",
-      "         16703,  2581,  2575,  1012,  1014,  2754,  1035,  2041,  1035, 27507,\n",
-      "          2003, 27908,  2581, 18139,  1012,  1014, 14590,  7559,  2102,  1035,\n",
-      "          4654,  8586, 23056,  2015,  1035, 17368,  1035,  2051,  2003,  6535,\n",
-      "          2509,  1012,  1020,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
-      "        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
-      "        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],\n",
-      "        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
-      "         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n"
-     ]
-    }
-   ],
-   "source": [
-    "inputs = tokenizer(text_df.loc[:3,:][\"text\"].tolist(), padding=True,truncation=True, return_tensors=\"pt\")\n",
-    "inputs_labels = text_df.loc[:3,:][\"label\"].tolist()\n",
-    "print(inputs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The output itself is a dictionary containing two keys, `input_ids` and `attention_mask`. `input_ids` contains two rows of integers (one for each sentence) that are the unique identifiers of the tokens in each sentence. We’ll explain what the `attention_mask` is later in this chapter.\n",
-    "\n",
-    "The input_ids have special ids, `101` and `102` are the beginning and end of the sentence, respectively."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Going through the model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DistilBertModel(\n",
-      "  (embeddings): Embeddings(\n",
-      "    (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
-      "    (position_embeddings): Embedding(512, 768)\n",
-      "    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-      "    (dropout): Dropout(p=0.1, inplace=False)\n",
-      "  )\n",
-      "  (transformer): Transformer(\n",
-      "    (layer): ModuleList(\n",
-      "      (0-5): 6 x TransformerBlock(\n",
-      "        (attention): MultiHeadSelfAttention(\n",
-      "          (dropout): Dropout(p=0.1, inplace=False)\n",
-      "          (q_lin): Linear(in_features=768, out_features=768, bias=True)\n",
-      "          (k_lin): Linear(in_features=768, out_features=768, bias=True)\n",
-      "          (v_lin): Linear(in_features=768, out_features=768, bias=True)\n",
-      "          (out_lin): Linear(in_features=768, out_features=768, bias=True)\n",
-      "        )\n",
-      "        (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-      "        (ffn): FFN(\n",
-      "          (dropout): Dropout(p=0.1, inplace=False)\n",
-      "          (lin1): Linear(in_features=768, out_features=3072, bias=True)\n",
-      "          (lin2): Linear(in_features=3072, out_features=768, bias=True)\n",
-      "          (activation): GELUActivation()\n",
-      "        )\n",
-      "        (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-      "      )\n",
-      "    )\n",
-      "  )\n",
-      ")\n"
-     ]
-    }
-   ],
-   "source": [
-    "# start with a AutoModel - BertModel without specific task\n",
-    "model = AutoModel.from_pretrained(ckp)\n",
-    "print(model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BaseModelOutput(last_hidden_state=tensor([[[-0.5112,  0.3341, -0.2621,  ...,  0.0405, -0.3250, -0.0681],\n",
-       "         [-0.3345, -0.0419, -0.4377,  ...,  0.0296, -0.2275,  0.3376],\n",
-       "         [-0.7079,  0.6680, -0.3945,  ...,  0.1466, -0.4400, -0.2390],\n",
-       "         ...,\n",
-       "         [-0.6479,  0.1558, -0.4304,  ...,  0.3244, -0.3796, -0.0942],\n",
-       "         [-0.5215,  0.2106, -0.4026,  ...,  0.1038, -0.4682,  0.0129],\n",
-       "         [-0.3970,  0.1923, -0.3712,  ...,  0.1037, -0.5123, -0.0208]],\n",
-       "\n",
-       "        [[-0.4192,  0.3030, -0.2822,  ...,  0.0500, -0.2020, -0.0663],\n",
-       "         [-0.3530, -0.0792, -0.3236,  ...,  0.0387, -0.0820,  0.3589],\n",
-       "         [-0.7125,  0.6483, -0.3507,  ...,  0.1025, -0.3564, -0.1653],\n",
-       "         ...,\n",
-       "         [-0.7792, -0.0933, -0.5075,  ...,  0.2453, -0.0520, -0.3853],\n",
-       "         [-0.9141,  0.2545, -0.4900,  ...,  0.1813,  0.0997,  0.1334],\n",
-       "         [-0.0699,  0.6108, -0.2630,  ..., -0.1825, -0.4212, -0.6291]],\n",
-       "\n",
-       "        [[-0.4244,  0.3019, -0.2419,  ...,  0.0480, -0.1983, -0.0835],\n",
-       "         [-0.3324, -0.0761, -0.3353,  ...,  0.0440, -0.0956,  0.3643],\n",
-       "         [-0.6756,  0.6525, -0.3535,  ...,  0.1112, -0.3586, -0.1722],\n",
-       "         ...,\n",
-       "         [-0.7099,  0.0261, -0.3319,  ...,  0.2399, -0.1338,  0.1442],\n",
-       "         [-0.0572,  0.5939, -0.2408,  ..., -0.1730, -0.4059, -0.6460],\n",
-       "         [-0.3344,  0.1219, -0.2716,  ...,  0.0145, -0.3661,  0.0069]],\n",
-       "\n",
-       "        [[-0.4322,  0.3155, -0.2775,  ...,  0.0280, -0.2172, -0.0634],\n",
-       "         [-0.3726, -0.0609, -0.3632,  ...,  0.0522, -0.1184,  0.3377],\n",
-       "         [-0.7105,  0.6389, -0.3871,  ...,  0.1206, -0.3611, -0.1768],\n",
-       "         ...,\n",
-       "         [-0.8050,  0.0185, -0.5444,  ...,  0.2388, -0.1470, -0.3872],\n",
-       "         [-0.4621,  0.1332, -0.2450,  ...,  0.1669, -0.1935,  0.0883],\n",
-       "         [-0.1054,  0.6148, -0.2663,  ..., -0.1879, -0.4263, -0.6198]]],\n",
-       "       grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# for this model, it cannot be used for classification directly\n",
-    "model(**inputs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Model heads: Making sense out of numbers\n",
-    "![image](https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head-dark.svg)\n",
-    "\n",
-    "Now, consider the model for `text-classification` task."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "output: \n",
-      " SequenceClassifierOutput(loss=None, logits=tensor([[ 3.3347, -2.7614],\n",
-      "        [ 3.1755, -2.6341],\n",
-      "        [ 3.1681, -2.6305],\n",
-      "        [ 3.2287, -2.6752]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)\n",
-      "logits: \n",
-      " tensor([[ 3.3347, -2.7614],\n",
-      "        [ 3.1755, -2.6341],\n",
-      "        [ 3.1681, -2.6305],\n",
-      "        [ 3.2287, -2.6752]], grad_fn=<AddmmBackward0>)\n",
-      "prob. : \n",
-      " tensor([[0.9978, 0.0022],\n",
-      "        [0.9970, 0.0030],\n",
-      "        [0.9970, 0.0030],\n",
-      "        [0.9973, 0.0027]], grad_fn=<SoftmaxBackward0>)\n",
-      "labels: \n",
-      " tensor([0, 0, 0, 0])\n",
-      "true_labels: \n",
-      " [0, 1, 1, 1]\n"
-     ]
-    }
-   ],
-   "source": [
-    "model = DistilBertForSequenceClassification.from_pretrained(ckp, num_labels=2)\n",
-    "outputs = model(**inputs)\n",
-    "print(\"output: \\n\", outputs)\n",
-    "print(\"logits: \\n\", outputs.logits)\n",
-    "print(\"prob. : \\n\", torch.nn.functional.softmax(outputs.logits, dim=-1))\n",
-    "print(\"labels: \\n\", outputs.logits.argmax(dim=-1))\n",
-    "print(\"true_labels: \\n\", inputs_labels)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DistilBertForSequenceClassification(\n",
-      "  (distilbert): DistilBertModel(\n",
-      "    (embeddings): Embeddings(\n",
-      "      (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
-      "      (position_embeddings): Embedding(512, 768)\n",
-      "      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-      "      (dropout): Dropout(p=0.1, inplace=False)\n",
-      "    )\n",
-      "    (transformer): Transformer(\n",
-      "      (layer): ModuleList(\n",
-      "        (0-5): 6 x TransformerBlock(\n",
-      "          (attention): MultiHeadSelfAttention(\n",
-      "            (dropout): Dropout(p=0.1, inplace=False)\n",
-      "            (q_lin): Linear(in_features=768, out_features=768, bias=True)\n",
-      "            (k_lin): Linear(in_features=768, out_features=768, bias=True)\n",
-      "            (v_lin): Linear(in_features=768, out_features=768, bias=True)\n",
-      "            (out_lin): Linear(in_features=768, out_features=768, bias=True)\n",
-      "          )\n",
-      "          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-      "          (ffn): FFN(\n",
-      "            (dropout): Dropout(p=0.1, inplace=False)\n",
-      "            (lin1): Linear(in_features=768, out_features=3072, bias=True)\n",
-      "            (lin2): Linear(in_features=3072, out_features=768, bias=True)\n",
-      "            (activation): GELUActivation()\n",
-      "          )\n",
-      "          (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
-      "        )\n",
-      "      )\n",
-      "    )\n",
-      "  )\n",
-      "  (pre_classifier): Linear(in_features=768, out_features=768, bias=True)\n",
-      "  (classifier): Linear(in_features=768, out_features=2, bias=True)\n",
-      "  (dropout): Dropout(p=0.2, inplace=False)\n",
-      ")\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Handling multiple sentences\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tokens ['w', '##ms', '_', 'delay', 'is', '1', '.', '0', 'queue', '_', 'delay', 'is', '4', '.', '0', 'run', '##time', 'is', '6', '.', '0', 'post', '_', 'script', '_', 'delay', 'is', '5', '.', '0', 'stage', '_', 'in', '_', 'delay', 'is', 'nan', 'stage', '_', 'out', '_', 'delay', 'is', 'nan', 'stage', '_', 'in', '_', 'bytes', 'is', 'nan', 'stage', '_', 'out', '_', 'bytes', 'is', 'nan', 'kicks', '##tar', '##t', '_', 'ex', '##ec', '##utable', '##s', '_', 'cpu', '_', 'time', 'is', '0', '.', '2']\n",
-      "IDs    [1059, 5244, 1035, 8536, 2003, 1015, 1012, 1014, 24240, 1035, 8536, 2003, 1018, 1012, 1014, 2448, 7292, 2003, 1020, 1012, 1014, 2695, 1035, 5896, 1035, 8536, 2003, 1019, 1012, 1014, 2754, 1035, 1999, 1035, 8536, 2003, 16660, 2754, 1035, 2041, 1035, 8536, 2003, 16660, 2754, 1035, 1999, 1035, 27507, 2003, 16660, 2754, 1035, 2041, 1035, 27507, 2003, 16660, 14590, 7559, 2102, 1035, 4654, 8586, 23056, 2015, 1035, 17368, 1035, 2051, 2003, 1014, 1012, 1016]\n",
-      "Logits tensor([[ 3.7297, -3.0441]], grad_fn=<AddmmBackward0>)\n",
-      "Prob.  tensor([[0.9989, 0.0011]], grad_fn=<SoftmaxBackward0>)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# first sentence\n",
-    "tokens = tokenizer.tokenize([text_df.loc[0,:][\"text\"]][0])\n",
-    "print(\"Tokens\", tokens)\n",
-    "ids = tokenizer.convert_tokens_to_ids(tokens)\n",
-    "print(\"IDs   \",ids)\n",
-    "output = model(torch.tensor([ids]))\n",
-    "print(\"Logits\", output.logits)\n",
-    "prob = torch.nn.functional.softmax(output.logits, dim=-1)\n",
-    "print(\"Prob. \", prob)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "__NOTE__:\n",
-    "* the tokens are splitted into subwords, or integers\n",
-    "* logits are the model output\n",
-    "* probabilities indicates the confidence of the model on the prediction between [0, 1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'\\ndf = pd.read_csv(\"output.csv\")\\ntorch.cuda.empty_cache()\\ny_pred = []\\nfor i in range(len(df)):\\n    # tokers = tokenizer([df[\\'text\\'][i]], padding=True, truncation=True, return_tensors=\"pt\").to(DEVICE)\\n    # outputs = model(**tokers)\\n    y_pred.append(int(clf(df[\\'text\\'][i])[0][\"label\"].split(\"_\")[1]))\\ny_true = df[\"label\"].tolist()\\n# inputs = tokenizer(df[\"text\"].tolist()[:1000], padding=True, truncation=True, return_tensors=\"pt\").to(DEVICE)\\n# outputs = model(**inputs)\\n# outputs.logits.argmax(1)\\nclassification_report(y_true, y_pred)\\n'"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "'''\n",
-    "df = pd.read_csv(\"output.csv\")\n",
-    "torch.cuda.empty_cache()\n",
-    "y_pred = []\n",
-    "for i in range(len(df)):\n",
-    "    # tokers = tokenizer([df['text'][i]], padding=True, truncation=True, return_tensors=\"pt\").to(DEVICE)\n",
-    "    # outputs = model(**tokers)\n",
-    "    y_pred.append(int(clf(df['text'][i])[0][\"label\"].split(\"_\")[1]))\n",
-    "y_true = df[\"label\"].tolist()\n",
-    "# inputs = tokenizer(df[\"text\"].tolist()[:1000], padding=True, truncation=True, return_tensors=\"pt\").to(DEVICE)\n",
-    "# outputs = model(**inputs)\n",
-    "# outputs.logits.argmax(1)\n",
-    "classification_report(y_true, y_pred)\n",
-    "'''"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### wrapping up: from tokenizer to model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "pred labels tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
-      "        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
-      "        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
-      "        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
-      "        0, 0, 0, 0])\n",
-      "true labels [0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1]\n"
-     ]
-    }
-   ],
-   "source": [
-    "inputs = tokenizer(text_df['text'].tolist()[:100], padding=True, truncation=True, return_tensors=\"pt\")\n",
-    "output = model(**inputs)\n",
-    "print(\"pred labels\", output.logits.argmax(1))\n",
-    "print(\"true labels\", text_df['label'].tolist()[:100])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.metrics import accuracy_score, classification_report\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "              precision    recall  f1-score   support\n",
-      "\n",
-      "           0       1.00      0.26      0.41       100\n",
-      "           1       0.00      0.00      0.00         0\n",
-      "\n",
-      "    accuracy                           0.26       100\n",
-      "   macro avg       0.50      0.13      0.21       100\n",
-      "weighted avg       1.00      0.26      0.41       100\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "rep = classification_report(output.logits.argmax(1).detach().cpu().numpy(), text_df['label'].tolist()[:100])\n",
-    "print(rep)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "__NOTE__:\n",
-    "* tokenizer and model take all the input is inefficient. `OOM` issue on both CPU and GPU.\n",
-    "* "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Supervised Fine-Tuning\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from datasets import load_dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Found cached dataset csv (/tmp/jinh/huggingface/datasets/csv/default-0a7c04ab8c22fc34/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7fe4a4f05c5349c499ada5f3f54f877f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/3 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# TODO: replace with your own dataset\n",
-    "raw_datasets = load_dataset(\"csv\", data_files={\"train\": \"./data/train.csv\", \"validation\": \"./data/validation.csv\", \"test\": \"./data/test.csv\"})\n",
-    "# raw_datasets\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ed0fc781854544b5b70d3038a8de0fc6",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map:   0%|          | 0/38469 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "ename": "NameError",
-     "evalue": "name 'tokenizer' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[1;32m/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb Cell 36\u001b[0m line \u001b[0;36m6\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#X53sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtokenizer_function\u001b[39m(data):\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#X53sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m tokenizer(data[\u001b[39m\"\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m\"\u001b[39m], truncation\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#X53sdnNjb2RlLXJlbW90ZQ%3D%3D?line=5'>6</a>\u001b[0m tokenized_datasets \u001b[39m=\u001b[39m raw_datasets\u001b[39m.\u001b[39mmap(tokenizer_function, batched\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#X53sdnNjb2RlLXJlbW90ZQ%3D%3D?line=6'>7</a>\u001b[0m data_collator \u001b[39m=\u001b[39m DataCollatorWithPadding(tokenizer\u001b[39m=\u001b[39mtokenizer)\n",
-      "File \u001b[0;32m~/miniconda3/envs/hf/lib/python3.11/site-packages/datasets/dataset_dict.py:851\u001b[0m, in \u001b[0;36mDatasetDict.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_names, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, desc)\u001b[0m\n\u001b[1;32m    848\u001b[0m \u001b[39mif\u001b[39;00m cache_file_names \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    849\u001b[0m     cache_file_names \u001b[39m=\u001b[39m {k: \u001b[39mNone\u001b[39;00m \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m}\n\u001b[1;32m    850\u001b[0m \u001b[39mreturn\u001b[39;00m DatasetDict(\n\u001b[0;32m--> 851\u001b[0m     {\n\u001b[1;32m    852\u001b[0m         k: dataset\u001b[39m.\u001b[39mmap(\n\u001b[1;32m    853\u001b[0m             function\u001b[39m=\u001b[39mfunction,\n\u001b[1;32m    854\u001b[0m             with_indices\u001b[39m=\u001b[39mwith_indices,\n\u001b[1;32m    855\u001b[0m             with_rank\u001b[39m=\u001b[39mwith_rank,\n\u001b[1;32m    856\u001b[0m             input_columns\u001b[39m=\u001b[39minput_columns,\n\u001b[1;32m    857\u001b[0m             batched\u001b[39m=\u001b[39mbatched,\n\u001b[1;32m    858\u001b[0m             batch_size\u001b[39m=\u001b[39mbatch_size,\n\u001b[1;32m    859\u001b[0m             drop_last_batch\u001b[39m=\u001b[39mdrop_last_batch,\n\u001b[1;32m    860\u001b[0m             remove_columns\u001b[39m=\u001b[39mremove_columns,\n\u001b[1;32m    861\u001b[0m             keep_in_memory\u001b[39m=\u001b[39mkeep_in_memory,\n\u001b[1;32m    862\u001b[0m             load_from_cache_file\u001b[39m=\u001b[39mload_from_cache_file,\n\u001b[1;32m    863\u001b[0m             cache_file_name\u001b[39m=\u001b[39mcache_file_names[k],\n\u001b[1;32m    864\u001b[0m             writer_batch_size\u001b[39m=\u001b[39mwriter_batch_size,\n\u001b[1;32m    865\u001b[0m             features\u001b[39m=\u001b[39mfeatures,\n\u001b[1;32m    866\u001b[0m             disable_nullable\u001b[39m=\u001b[39mdisable_nullable,\n\u001b[1;32m    867\u001b[0m             fn_kwargs\u001b[39m=\u001b[39mfn_kwargs,\n\u001b[1;32m    868\u001b[0m             num_proc\u001b[39m=\u001b[39mnum_proc,\n\u001b[1;32m    869\u001b[0m             desc\u001b[39m=\u001b[39mdesc,\n\u001b[1;32m    870\u001b[0m         )\n\u001b[1;32m    871\u001b[0m         \u001b[39mfor\u001b[39;00m k, dataset \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mitems()\n\u001b[1;32m    872\u001b[0m     }\n\u001b[1;32m    873\u001b[0m )\n",
-      "File \u001b[0;32m~/miniconda3/envs/hf/lib/python3.11/site-packages/datasets/dataset_dict.py:852\u001b[0m, in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    848\u001b[0m \u001b[39mif\u001b[39;00m cache_file_names \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    849\u001b[0m     cache_file_names \u001b[39m=\u001b[39m {k: \u001b[39mNone\u001b[39;00m \u001b[39mfor\u001b[39;00m k \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m}\n\u001b[1;32m    850\u001b[0m \u001b[39mreturn\u001b[39;00m DatasetDict(\n\u001b[1;32m    851\u001b[0m     {\n\u001b[0;32m--> 852\u001b[0m         k: dataset\u001b[39m.\u001b[39mmap(\n\u001b[1;32m    853\u001b[0m             function\u001b[39m=\u001b[39mfunction,\n\u001b[1;32m    854\u001b[0m             with_indices\u001b[39m=\u001b[39mwith_indices,\n\u001b[1;32m    855\u001b[0m             with_rank\u001b[39m=\u001b[39mwith_rank,\n\u001b[1;32m    856\u001b[0m             input_columns\u001b[39m=\u001b[39minput_columns,\n\u001b[1;32m    857\u001b[0m             batched\u001b[39m=\u001b[39mbatched,\n\u001b[1;32m    858\u001b[0m             batch_size\u001b[39m=\u001b[39mbatch_size,\n\u001b[1;32m    859\u001b[0m             drop_last_batch\u001b[39m=\u001b[39mdrop_last_batch,\n\u001b[1;32m    860\u001b[0m             remove_columns\u001b[39m=\u001b[39mremove_columns,\n\u001b[1;32m    861\u001b[0m             keep_in_memory\u001b[39m=\u001b[39mkeep_in_memory,\n\u001b[1;32m    862\u001b[0m             load_from_cache_file\u001b[39m=\u001b[39mload_from_cache_file,\n\u001b[1;32m    863\u001b[0m             cache_file_name\u001b[39m=\u001b[39mcache_file_names[k],\n\u001b[1;32m    864\u001b[0m             writer_batch_size\u001b[39m=\u001b[39mwriter_batch_size,\n\u001b[1;32m    865\u001b[0m             features\u001b[39m=\u001b[39mfeatures,\n\u001b[1;32m    866\u001b[0m             disable_nullable\u001b[39m=\u001b[39mdisable_nullable,\n\u001b[1;32m    867\u001b[0m             fn_kwargs\u001b[39m=\u001b[39mfn_kwargs,\n\u001b[1;32m    868\u001b[0m             num_proc\u001b[39m=\u001b[39mnum_proc,\n\u001b[1;32m    869\u001b[0m             desc\u001b[39m=\u001b[39mdesc,\n\u001b[1;32m    870\u001b[0m         )\n\u001b[1;32m    871\u001b[0m         \u001b[39mfor\u001b[39;00m k, dataset \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mitems()\n\u001b[1;32m    872\u001b[0m     }\n\u001b[1;32m    873\u001b[0m )\n",
-      "File \u001b[0;32m~/miniconda3/envs/hf/lib/python3.11/site-packages/datasets/arrow_dataset.py:580\u001b[0m, in \u001b[0;36mtransmit_tasks.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    578\u001b[0m     \u001b[39mself\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m=\u001b[39m kwargs\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mself\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m    579\u001b[0m \u001b[39m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 580\u001b[0m out: Union[\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mDatasetDict\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m func(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m    581\u001b[0m datasets: List[\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(out\u001b[39m.\u001b[39mvalues()) \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(out, \u001b[39mdict\u001b[39m) \u001b[39melse\u001b[39;00m [out]\n\u001b[1;32m    582\u001b[0m \u001b[39mfor\u001b[39;00m dataset \u001b[39min\u001b[39;00m datasets:\n\u001b[1;32m    583\u001b[0m     \u001b[39m# Remove task templates if a column mapping of the template is no longer valid\u001b[39;00m\n",
-      "File \u001b[0;32m~/miniconda3/envs/hf/lib/python3.11/site-packages/datasets/arrow_dataset.py:545\u001b[0m, in \u001b[0;36mtransmit_format.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    538\u001b[0m self_format \u001b[39m=\u001b[39m {\n\u001b[1;32m    539\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mtype\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_format_type,\n\u001b[1;32m    540\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mformat_kwargs\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_format_kwargs,\n\u001b[1;32m    541\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_format_columns,\n\u001b[1;32m    542\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39moutput_all_columns\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_output_all_columns,\n\u001b[1;32m    543\u001b[0m }\n\u001b[1;32m    544\u001b[0m \u001b[39m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 545\u001b[0m out: Union[\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mDatasetDict\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m func(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m    546\u001b[0m datasets: List[\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(out\u001b[39m.\u001b[39mvalues()) \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(out, \u001b[39mdict\u001b[39m) \u001b[39melse\u001b[39;00m [out]\n\u001b[1;32m    547\u001b[0m \u001b[39m# re-apply format to the output\u001b[39;00m\n",
-      "File \u001b[0;32m~/miniconda3/envs/hf/lib/python3.11/site-packages/datasets/arrow_dataset.py:3087\u001b[0m, in \u001b[0;36mDataset.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)\u001b[0m\n\u001b[1;32m   3079\u001b[0m \u001b[39mif\u001b[39;00m transformed_dataset \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m   3080\u001b[0m     \u001b[39mwith\u001b[39;00m logging\u001b[39m.\u001b[39mtqdm(\n\u001b[1;32m   3081\u001b[0m         disable\u001b[39m=\u001b[39m\u001b[39mnot\u001b[39;00m logging\u001b[39m.\u001b[39mis_progress_bar_enabled(),\n\u001b[1;32m   3082\u001b[0m         unit\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m examples\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   3085\u001b[0m         desc\u001b[39m=\u001b[39mdesc \u001b[39mor\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mMap\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m   3086\u001b[0m     ) \u001b[39mas\u001b[39;00m pbar:\n\u001b[0;32m-> 3087\u001b[0m         \u001b[39mfor\u001b[39;00m rank, done, content \u001b[39min\u001b[39;00m Dataset\u001b[39m.\u001b[39m_map_single(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mdataset_kwargs):\n\u001b[1;32m   3088\u001b[0m             \u001b[39mif\u001b[39;00m done:\n\u001b[1;32m   3089\u001b[0m                 shards_done \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n",
-      "File \u001b[0;32m~/miniconda3/envs/hf/lib/python3.11/site-packages/datasets/arrow_dataset.py:3463\u001b[0m, in \u001b[0;36mDataset._map_single\u001b[0;34m(shard, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset)\u001b[0m\n\u001b[1;32m   3459\u001b[0m indices \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\n\u001b[1;32m   3460\u001b[0m     \u001b[39mrange\u001b[39m(\u001b[39m*\u001b[39m(\u001b[39mslice\u001b[39m(i, i \u001b[39m+\u001b[39m batch_size)\u001b[39m.\u001b[39mindices(shard\u001b[39m.\u001b[39mnum_rows)))\n\u001b[1;32m   3461\u001b[0m )  \u001b[39m# Something simpler?\u001b[39;00m\n\u001b[1;32m   3462\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 3463\u001b[0m     batch \u001b[39m=\u001b[39m apply_function_on_filtered_inputs(\n\u001b[1;32m   3464\u001b[0m         batch,\n\u001b[1;32m   3465\u001b[0m         indices,\n\u001b[1;32m   3466\u001b[0m         check_same_num_examples\u001b[39m=\u001b[39m\u001b[39mlen\u001b[39m(shard\u001b[39m.\u001b[39mlist_indexes()) \u001b[39m>\u001b[39m \u001b[39m0\u001b[39m,\n\u001b[1;32m   3467\u001b[0m         offset\u001b[39m=\u001b[39moffset,\n\u001b[1;32m   3468\u001b[0m     )\n\u001b[1;32m   3469\u001b[0m \u001b[39mexcept\u001b[39;00m NumExamplesMismatchError:\n\u001b[1;32m   3470\u001b[0m     \u001b[39mraise\u001b[39;00m DatasetTransformationNotAllowedError(\n\u001b[1;32m   3471\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mUsing `.map` in batched mode on a dataset with attached indexes is allowed only if it doesn\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt create or remove existing examples. You can first run `.drop_index() to remove your index and then re-add it.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m   3472\u001b[0m     ) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39;00m\n",
-      "File \u001b[0;32m~/miniconda3/envs/hf/lib/python3.11/site-packages/datasets/arrow_dataset.py:3344\u001b[0m, in \u001b[0;36mDataset._map_single.<locals>.apply_function_on_filtered_inputs\u001b[0;34m(pa_inputs, indices, check_same_num_examples, offset)\u001b[0m\n\u001b[1;32m   3342\u001b[0m \u001b[39mif\u001b[39;00m with_rank:\n\u001b[1;32m   3343\u001b[0m     additional_args \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m (rank,)\n\u001b[0;32m-> 3344\u001b[0m processed_inputs \u001b[39m=\u001b[39m function(\u001b[39m*\u001b[39mfn_args, \u001b[39m*\u001b[39madditional_args, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mfn_kwargs)\n\u001b[1;32m   3345\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(processed_inputs, LazyDict):\n\u001b[1;32m   3346\u001b[0m     processed_inputs \u001b[39m=\u001b[39m {\n\u001b[1;32m   3347\u001b[0m         k: v \u001b[39mfor\u001b[39;00m k, v \u001b[39min\u001b[39;00m processed_inputs\u001b[39m.\u001b[39mdata\u001b[39m.\u001b[39mitems() \u001b[39mif\u001b[39;00m k \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m processed_inputs\u001b[39m.\u001b[39mkeys_to_format\n\u001b[1;32m   3348\u001b[0m     }\n",
-      "\u001b[1;32m/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb Cell 36\u001b[0m line \u001b[0;36m4\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#X53sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mtokenizer_function\u001b[39m(data):\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#X53sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m tokenizer(data[\u001b[39m\"\u001b[39m\u001b[39mtext\u001b[39m\u001b[39m\"\u001b[39m], truncation\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'tokenizer' is not defined"
-     ]
-    }
-   ],
-   "source": [
-    "# setup tokenizer function\n",
-    "from transformers import DataCollatorWithPadding\n",
-    "def tokenizer_function(data):\n",
-    "    return tokenizer(data[\"text\"], truncation=True)\n",
-    "\n",
-    "tokenized_datasets = raw_datasets.map(tokenizer_function, batched=True)\n",
-    "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='14427' max='14427' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [14427/14427 10:50, Epoch 3/3]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       " <tr style=\"text-align: left;\">\n",
-       "      <th>Step</th>\n",
-       "      <th>Training Loss</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>500</td>\n",
-       "      <td>0.541000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1000</td>\n",
-       "      <td>0.559700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1500</td>\n",
-       "      <td>0.543100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>2000</td>\n",
-       "      <td>0.543000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>2500</td>\n",
-       "      <td>0.530200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>3000</td>\n",
-       "      <td>0.550100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>3500</td>\n",
-       "      <td>0.549800</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>4000</td>\n",
-       "      <td>0.531500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>4500</td>\n",
-       "      <td>0.543900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>5000</td>\n",
-       "      <td>0.547900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>5500</td>\n",
-       "      <td>0.544300</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>6000</td>\n",
-       "      <td>0.534000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>6500</td>\n",
-       "      <td>0.536400</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>7000</td>\n",
-       "      <td>0.529500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>7500</td>\n",
-       "      <td>0.557400</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>8000</td>\n",
-       "      <td>0.537500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>8500</td>\n",
-       "      <td>0.534000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>9000</td>\n",
-       "      <td>0.537900</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>9500</td>\n",
-       "      <td>0.535200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>10000</td>\n",
-       "      <td>0.535700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>10500</td>\n",
-       "      <td>0.527200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>11000</td>\n",
-       "      <td>0.525100</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>11500</td>\n",
-       "      <td>0.546300</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>12000</td>\n",
-       "      <td>0.540500</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>12500</td>\n",
-       "      <td>0.553300</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>13000</td>\n",
-       "      <td>0.547600</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>13500</td>\n",
-       "      <td>0.518700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>14000</td>\n",
-       "      <td>0.539300</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "TrainOutput(global_step=14427, training_loss=0.5403152088237468, metrics={'train_runtime': 650.842, 'train_samples_per_second': 177.32, 'train_steps_per_second': 22.167, 'total_flos': 298587208529160.0, 'train_loss': 0.5403152088237468, 'epoch': 3.0})"
-      ]
-     },
-     "execution_count": 66,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# trainer API\n",
-    "from transformers import TrainingArguments, Trainer\n",
-    "\n",
-    "training_args = TrainingArguments(output_dir=\"sft\", \n",
-    "                                  save_strategy=\"epoch\", overwrite_output_dir=True)\n",
-    "trainer = Trainer(model, training_args, \n",
-    "                  train_dataset=tokenized_datasets[\"train\"], \n",
-    "                  eval_dataset=tokenized_datasets[\"validation\"], \n",
-    "                  data_collator=data_collator, \n",
-    "                  tokenizer=tokenizer)\n",
-    "\n",
-    "trainer.train()\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(4809, 2) (4809,)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# NOTE: evaluate on the validation set\n",
-    "predictions = trainer.predict(tokenized_datasets[\"validation\"])\n",
-    "print(predictions.predictions.shape, predictions.label_ids.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "preds = np.argmax(predictions.predictions, axis=-1)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "106"
-      ]
-     },
-     "execution_count": 69,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "(preds==0).sum()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'accuracy': 0.7602412143896861,\n",
-       " 'f1': 0.8602932267054405,\n",
-       " 'precision': 0.7548373378694451,\n",
-       " 'recall': 1.0}"
-      ]
-     },
-     "execution_count": 70,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import evaluate\n",
-    "\n",
-    "metric = evaluate.combine([\"accuracy\", \"f1\", \"precision\", \"recall\"])\n",
-    "metric.compute(predictions=preds, references=predictions.label_ids)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1.0"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "accuracy_score(predictions.label_ids, preds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# wrapping everything together, we can define a `compute_metrics` function and put in trainer\n",
-    "def compute_metrics(eval_preds):\n",
-    "    metric = evaluate.combine([\"accuracy\", \"f1\", \"precision\", \"recall\"])\n",
-    "    logits, labels = eval_preds\n",
-    "    predictions = np.argmax(logits, axis=-1)\n",
-    "    return metric.compute(predictions=predictions, references=labels)\n",
-    "    # return accuracy_score(labels, predictions)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='1803' max='1803' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [1803/1803 01:28, Epoch 3/3]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       " <tr style=\"text-align: left;\">\n",
-       "      <th>Step</th>\n",
-       "      <th>Training Loss</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>500</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>1500</td>\n",
-       "      <td>0.013400</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "TrainOutput(global_step=1803, training_loss=0.0037077239942009204, metrics={'train_runtime': 88.7796, 'train_samples_per_second': 162.47, 'train_steps_per_second': 20.309, 'total_flos': 351620221110624.0, 'train_loss': 0.0037077239942009204, 'epoch': 3.0})"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# update trainer\n",
-    "trainer = Trainer(\n",
-    "    model,\n",
-    "    training_args,\n",
-    "    train_dataset=tokenized_datasets[\"train\"],\n",
-    "    eval_dataset=tokenized_datasets[\"validation\"],\n",
-    "    data_collator=data_collator,\n",
-    "    tokenizer=tokenizer,\n",
-    "    compute_metrics=compute_metrics,\n",
-    ")\n",
-    "trainer.train()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "too many values to unpack (expected 2)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[1;32m/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb Cell 44\u001b[0m line \u001b[0;36m2\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m predictions \u001b[39m=\u001b[39m trainer\u001b[39m.\u001b[39mpredict(tokenized_datasets[\u001b[39m\"\u001b[39m\u001b[39mtest\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m compute_metrics(predictions)\n",
-      "\u001b[1;32m/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb Cell 44\u001b[0m line \u001b[0;36m4\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcompute_metrics\u001b[39m(eval_preds):\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m     metric \u001b[39m=\u001b[39m evaluate\u001b[39m.\u001b[39mcombine([\u001b[39m\"\u001b[39m\u001b[39maccuracy\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mf1\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mprecision\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mrecall\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m     logits, labels \u001b[39m=\u001b[39m eval_preds\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a>\u001b[0m     predictions \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39margmax(logits, axis\u001b[39m=\u001b[39m\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[1;32m      <a href='vscode-notebook-cell://ssh-remote%2B7b22686f73744e616d65223a224743452d433038227d/home/jinh/PoSeiDon/llm_ad/using_transformers.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=5'>6</a>\u001b[0m     \u001b[39mreturn\u001b[39;00m metric\u001b[39m.\u001b[39mcompute(predictions\u001b[39m=\u001b[39mpredictions, references\u001b[39m=\u001b[39mlabels)\n",
-      "\u001b[0;31mValueError\u001b[0m: too many values to unpack (expected 2)"
-     ]
-    }
-   ],
-   "source": [
-    "predictions = trainer.predict(tokenized_datasets[\"test\"])\n",
-    "compute_metrics(predictions)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'test_loss': 3.4706106877280263e-09,\n",
-       " 'test_accuracy': 1.0,\n",
-       " 'test_f1': 1.0,\n",
-       " 'test_precision': 1.0,\n",
-       " 'test_recall': 1.0,\n",
-       " 'test_runtime': 58.3066,\n",
-       " 'test_samples_per_second': 659.788,\n",
-       " 'test_steps_per_second': 82.478}"
-      ]
-     },
-     "execution_count": 54,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "predictions.metrics\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of parameters: 66,955,010\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_params = sum(p.numel() for p in model.parameters())\n",
-    "print(\"Number of parameters: {:,}\".format(num_params))\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "hf",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}