Skip to content

Commit

Permalink
forgot to run precommit
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmeda14960 committed Nov 20, 2024
1 parent 41b43c7 commit 4eb4281
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/levanter/data/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,7 @@ def preprocess_chat_example(batch, tokenizer: PreTrainedTokenizerBase, should_ap
"""
Preprocess chat examples to match the format of preprocess_supervised_example.
Returns a dict with input_ids and sources_len like the supervised case.
Args:
batch: List of dicts with input/output pairs
tokenizer: HuggingFace tokenizer
Expand All @@ -954,7 +954,7 @@ def preprocess_chat_example(batch, tokenizer: PreTrainedTokenizerBase, should_ap
full_examples = [f"{s}{t}" for s, t in zip(sources, targets)]
examples_tokenized = tokenizer(full_examples, padding=False, truncation=True)

# Get source lengths to mask loss appropriately
# Get source lengths to mask loss appropriately
source_lens = [len(s) for s in sources_tokenized["input_ids"]]

return {
Expand Down

0 comments on commit 4eb4281

Please sign in to comment.