Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update local changes made during the NeurIPS experiments #53

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions execution/executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def process_output(self, output: str, tokenizer_eos_token: str) -> str:
# for llama-based model
return output.lstrip().split("\n\n")[0].split(";")[0].strip()
else:
return output.lstrip().split(tokenizer_eos_token)[0].split("\n\n")[0].split(";")[0].strip()
return output.lstrip().split(tokenizer_eos_token)[0].split("\n\n")[0].split(";")[0].strip().replace("-- ", "")

@overrides
def exec_result_eq(self, program_dict_1: Dict[str, Any], program_dict_2: Dict[str, Any]) -> bool:
Expand Down Expand Up @@ -246,7 +246,7 @@ def gold_program_len(self, example: Dict[str, Any]) -> int:

@overrides
def process_output(self, output: str, tokenizer_eos_token: str) -> str:
return output.split("### Task End ###")[0] # NOTE: we can't strip because python code need to maintain the indentation
return output.lstrip().split("### Task End ###")[0] # NOTE: we can't strip because python code need to maintain the indentation

@overrides
def exec_result_eq(self, program_dict_1: Dict[str, Any], program_dict_2: Dict[str, Any]) -> bool:
Expand Down Expand Up @@ -339,6 +339,67 @@ def exec_result_eq(self, program_dict_1: Dict[str, Any], program_dict_2: Dict[st
except Exception:
return False

@classmethod
def real_exec_program(cls, program: str, example: Dict[str, Any]) -> Tuple[int, Union[str, List, Dict]]:
result = execute(program, output_locals=True)

if result["result"] == "passed":
if "answer" in result["locals"]:
try:
executed_answer = float(result["locals"]["answer"]["str_value"])
except Exception:
executed_answer = result["locals"]["answer"]["str_value"]
exec_match = executed_answer == example["answer"]

# the executed_answer needs to be a state dict
state_dict = dict()
for k, v in result["locals"].items():
state_dict[k] = v["str_value"]
executed_answer = state_dict
else:
executed_answer = "ERROR: no answer variable"
exec_match = -1
else:
executed_answer = "ERROR: program failed to execute"
exec_match = -1

return exec_match, executed_answer

class MathExecutorGPT(BaseExecutor):
def __init__(self, **kwargs):
super().__init__(**kwargs)

@overrides
def cache_key_func(self, program: str, example: Dict[str, Any]) -> str:
return example["question"] + " | " + program

@overrides
def program_len(self, program: str) -> int:
return python_program_len(program)

@overrides
def gold_program_len(self, example: Dict[str, Any]) -> int:
return 0

@overrides
def process_output(self, output: str, tokenizer_eos_token: str) -> str:
return output.lstrip().split(tokenizer_eos_token)[0].strip()

@overrides
def exec_result_eq(self, program_dict_1: Dict[str, Any], program_dict_2: Dict[str, Any]) -> bool:
if isinstance(program_dict_1['exec_result'], str) or isinstance(program_dict_2['exec_result'], str):
return False
else:
try:
str_match = program_dict_1['exec_result']["answer"] == program_dict_2['exec_result']["answer"]
if str_match:
return True
else:
numeric_match = abs(float(program_dict_1['exec_result']["answer"]) - float(program_dict_2['exec_result']["answer"])) < 1e-6
return numeric_match
except Exception:
return False

@classmethod
def real_exec_program(cls, program: str, example: Dict[str, Any]) -> Tuple[int, Union[str, List, Dict]]:
result = execute(program, output_locals=True)
Expand Down
2 changes: 1 addition & 1 deletion finetuning/lightning_modules/models/openai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def __init__(self,
) -> None:
SUPPORTED_OPENAI_MODELS = ["code-davinci-002", "code-cushman-002",
"code-cushman-001", "code-davinci-001",
"gpt-3.5-turbo", "text-davinci-003", "text-davinci-002","gpt-4"]
"gpt-3.5-turbo", "text-davinci-003", "text-davinci-002","gpt-4", "gpt-4-0314", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-4-0613"]
assert engine in SUPPORTED_OPENAI_MODELS, f"OpenAIModel only supports {SUPPORTED_OPENAI_MODELS}"

self.engine = engine
Expand Down
2 changes: 1 addition & 1 deletion finetuning/lightning_modules/models/seq2seq_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def generate_and_post_process(self,
temp = temperature

# https://github.com/THUDM/ChatGLM-6B/issues/31
if "santacoder" in self.transformer_model_name or "gpt-neox-20b" in self.transformer_model_name or "replit" in self.transformer_model_name:
if "santacoder" in self.transformer_model_name or "gpt-neox-20b" in self.transformer_model_name or "replit" in self.transformer_model_name or "Llama-2" in self.transformer_model_name or "mpt" in self.transformer_model_name:
use_sample = False

generation_results = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, do_sample=use_sample,
Expand Down
40 changes: 25 additions & 15 deletions finetuning/lightning_modules/models/seq2seq_model_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def get_model(model_name: str,
gradient_checkpointing=gradient_ckpt, use_cache=not gradient_ckpt)
if len(additional_special_tokens) > 0:
model.resize_token_embeddings(len(tokenizer))
elif model_name in ["EleutherAI/gpt-neox-20b", "EleutherAI/pythia-1.4b-deduped", "EleutherAI/pythia-6.9b-deduped", "EleutherAI/pythia-12b-deduped", "databricks/dolly-v2-7b", "databricks/dolly-v2-12b"]:
elif model_name in ["EleutherAI/gpt-neox-20b", "databricks/dolly-v2-7b", "databricks/dolly-v2-12b"] or "EleutherAI/pythia-" in model_name:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

Expand All @@ -91,16 +91,17 @@ def get_model(model_name: str,
# gradient_checkpointing=gradient_ckpt,
use_cache=not gradient_ckpt,
**additional_init_args)
elif model_name.startswith("Salesforce/codegen-"):
elif model_name.startswith("Salesforce/codegen") or model_name.startswith("Salesforce/xgen"):
tokenizer = AutoTokenizer.from_pretrained(model_name,
additional_special_tokens=additional_special_tokens)
additional_special_tokens=additional_special_tokens,
trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

if not tokenizer_only:
model = AutoModelForCausalLM.from_pretrained(model_name,
pad_token_id=tokenizer.eos_token_id,
torch_dtype=torch.float16,
# device_map="auto",
trust_remote_code=True,
use_cache=True)
elif model_name.startswith("bigscience/bloom-"):
tokenizer = AutoTokenizer.from_pretrained(model_name,
Expand Down Expand Up @@ -145,17 +146,26 @@ def get_model(model_name: str,

if not tokenizer_only:
model = BartForSequenceClassification.from_pretrained(model_name, num_labels=2)
elif "llama" in model_name.lower() or "alpaca" in model_name.lower():
elif "llama" in model_name.lower() or "alpaca" in model_name.lower() or "vicuna" in model_name.lower() or "lemur" in model_name.lower():
tokenizer = LlamaTokenizer.from_pretrained(model_name,
additional_special_tokens=additional_special_tokens)
tokenizer.pad_token = tokenizer.eos_token

if not tokenizer_only:
model = LlamaForCausalLM.from_pretrained(model_name,
pad_token_id=tokenizer.eos_token_id,
torch_dtype=torch.float16)
if len(additional_special_tokens) > 0:
model.resize_token_embeddings(len(tokenizer))
if "30" in model_name or "34" in model_name:
model = LlamaForCausalLM.from_pretrained(model_name,
pad_token_id=tokenizer.eos_token_id,
load_in_8bit=True)
elif "65" in model_name or "70" in model_name or "lemur" in model_name.lower():
model = LlamaForCausalLM.from_pretrained(model_name,
pad_token_id=tokenizer.eos_token_id,
load_in_4bit=True,
device_map="auto")
else:
model = LlamaForCausalLM.from_pretrained(model_name,
pad_token_id=tokenizer.eos_token_id)
# if len(additional_special_tokens) > 0:
# model.resize_token_embeddings(len(tokenizer))
elif model_name == "bigcode/santacoder":
tokenizer = AutoTokenizer.from_pretrained(model_name,
additional_special_tokens=additional_special_tokens)
Expand All @@ -169,7 +179,7 @@ def get_model(model_name: str,
)
if len(additional_special_tokens) > 0:
model.resize_token_embeddings(len(tokenizer))
elif model_name in ["bigcode/starcoder", "HuggingFaceH4/starchat-alpha"]:
elif model_name in ["bigcode/starcoder", "HuggingFaceH4/starchat-alpha", "bigcode/starcoderplus", "WizardLM/WizardCoder-15B-V1.0"]:
tokenizer = AutoTokenizer.from_pretrained(model_name,
additional_special_tokens=additional_special_tokens)
tokenizer.pad_token = tokenizer.eos_token
Expand All @@ -181,7 +191,7 @@ def get_model(model_name: str,
trust_remote_code=True)
if len(additional_special_tokens) > 0:
model.resize_token_embeddings(len(tokenizer))
elif model_name == "replit/replit-code-v1-3b":
elif model_name == "replit/replit-code-v1-3b" or "mpt" in model_name:
tokenizer = AutoTokenizer.from_pretrained(model_name,
additional_special_tokens=additional_special_tokens,
trust_remote_code=True)
Expand All @@ -190,7 +200,7 @@ def get_model(model_name: str,
if not tokenizer_only:
model = AutoModelForCausalLM.from_pretrained(model_name,
pad_token_id=tokenizer.eos_token_id,
torch_dtype=torch.float16,
load_in_8bit=True if "30" in model_name else False,
trust_remote_code=True)
if len(additional_special_tokens) > 0:
model.resize_token_embeddings(len(tokenizer))
Expand All @@ -201,11 +211,11 @@ def get_model(model_name: str,
tokenizer.pad_token = tokenizer.eos_token

# to accomandate the length of openai models and the prompt
if engine in ["code-davinci-002", "gpt-4"]:
if engine in ["code-davinci-002"] or engine.startswith("gpt-4"):
model_length = 8001
elif engine in ["code-cushman-001", "code-cushman-002"]:
model_length = 1024
elif engine in ["text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"]:
elif engine in ["text-davinci-002", "text-davinci-003"] or engine.startswith("gpt-3.5-turbo"):
model_length = 4096

tokenizer.model_max_length = model_length
Expand Down
Loading