Skip to content

Commit

Permalink
fixes to run chatgpt.json prompt dataset in python
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Jan 8, 2024
1 parent 4f61b9f commit 29735f2
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ python/flexflow/core/legion_cffi_header.py
/inference/tokenizer/*
/inference/prompt/*
/inference/output/*

/tests/inference/python_test_configs/*.json
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,5 @@ gpt_tokenizer
# pip version
python/flexflow/version.txt

inference_tensors
inference_tensors
tests/inference/python_test_configs/*.json
2 changes: 1 addition & 1 deletion python/flexflow/core/flexflow_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_c_name(name):
if name is None:
return ffi.NULL
else:
return ffi.new("char[]", name.encode("ascii"))
return ffi.new("char[]", name.encode("utf-8"))


def get_datatype_size(datatype):
Expand Down
6 changes: 5 additions & 1 deletion src/c/flexflow_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1596,7 +1596,11 @@ flexflow_generation_result_t
GenerationResult result = handle->generate(prompts, max_seq_length);
DEBUG_PRINT(
"[Model] generate %p %s %i", handle, text_str.c_str(), max_seq_length);
assert(result.output_tokens.size() <= max_seq_length);
// If the prompt exceeds max seq len, check that we return the prompt with no
// additional token. Otherwise, check that the output does not exceed the max
// sequence length.
assert(result.output_tokens.size() <= max_seq_length ||
result.output_tokens.size() == result.input_tokens.size());
output_length_and_tokens[0] = result.output_tokens.size();
std::copy(result.output_tokens.begin(),
result.output_tokens.end(),
Expand Down
1 change: 0 additions & 1 deletion src/runtime/model.cu
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ FFHandler
} else {
handle.batch_config_metadata = nullptr;
}


// checkCUDA(cudaMalloc(&handle.workSpace, handle.workSpaceSize));
#ifdef FF_USE_NCCL
Expand Down
3 changes: 2 additions & 1 deletion tests/inference/python_inference_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ set -e
cd "${BASH_SOURCE[0]%/*}"

# Generate test configs
rm -rf python_test_configs/*.json
python python_test_configs/generate_configs.py

# Run all tests
# Loop through .json files in the ./python_test_configs dir
for file in ./python_test_configs/*.json; do
for file in ./python_test_configs/*"llama"*.json; do
# Check filename prefix
if [[ $file == *"incr_dec"* ]]; then
script="../../inference/python/incr_decoding.py"
Expand Down

0 comments on commit 29735f2

Please sign in to comment.