From d7238df6e45c681e46456ef22a04e8cb6147b9d3 Mon Sep 17 00:00:00 2001 From: Ivan Zhou Date: Mon, 29 Jan 2024 02:54:04 +0000 Subject: [PATCH] update debug --- config/data/rpv1_debug.yaml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/config/data/rpv1_debug.yaml b/config/data/rpv1_debug.yaml index 79676af54..cfe607af9 100644 --- a/config/data/rpv1_debug.yaml +++ b/config/data/rpv1_debug.yaml @@ -1,7 +1,16 @@ cache_dir: gs://levanter-data/tokenized/redpajama_v1_llama_mixture rows_per_chunk: 4096 tokenizer: "meta-llama/Llama-2-7b-hf" -train_urls: - - gs://levanter-data/dev/redpajama/wikipedia/wiki.jsonl -validation_urls: - - https://data.together.xyz/redpajama-data-1T/v1.0.0/c4/c4-train.01023-of-01024.jsonl +configs: + # StackExchange: + # train_urls: + # - gs://levanter-data/dev/redpajama/stackexchange/stackexchange.jsonl + wikipedia: + train_urls: + - gs://levanter-data/dev/redpajama/wikipedia/wiki.jsonl + validation_urls: + - https://data.together.xyz/redpajama-data-1T/v1.0.0/c4/c4-train.01023-of-01024.jsonl +train_weights: + # StackExchange: + wikipedia: 24 +stop_strategy: all_exhausted