fixes to run chatgpt.json prompt dataset in python

suranap · Jan 8, 2024 · 29735f2 · 29735f2
1 parent 4f61b9f
commit 29735f2
Show file tree

Hide file tree

Showing 6 changed files with 12 additions and 5 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -17,3 +17,5 @@ python/flexflow/core/legion_cffi_header.py
 /inference/tokenizer/*
 /inference/prompt/*
 /inference/output/*
+
+/tests/inference/python_test_configs/*.json
diff --git a/.gitignore b/.gitignore
@@ -186,4 +186,5 @@ gpt_tokenizer
 # pip version
 python/flexflow/version.txt
 
-inference_tensors
+inference_tensors
+tests/inference/python_test_configs/*.json
diff --git a/python/flexflow/core/flexflow_cffi.py b/python/flexflow/core/flexflow_cffi.py
@@ -56,7 +56,7 @@ def get_c_name(name):
     if name is None:
         return ffi.NULL
     else:
-        return ffi.new("char[]", name.encode("ascii"))
+        return ffi.new("char[]", name.encode("utf-8"))
 
 
 def get_datatype_size(datatype):

diff --git a/src/c/flexflow_c.cc b/src/c/flexflow_c.cc
@@ -1596,7 +1596,11 @@ flexflow_generation_result_t
   GenerationResult result = handle->generate(prompts, max_seq_length);
   DEBUG_PRINT(
       "[Model] generate %p %s %i", handle, text_str.c_str(), max_seq_length);
-  assert(result.output_tokens.size() <= max_seq_length);
+  // If the prompt exceeds max seq len, check that we return the prompt with no
+  // additional token. Otherwise, check that the output does not exceed the max
+  // sequence length.
+  assert(result.output_tokens.size() <= max_seq_length ||
+         result.output_tokens.size() == result.input_tokens.size());
   output_length_and_tokens[0] = result.output_tokens.size();
   std::copy(result.output_tokens.begin(),
             result.output_tokens.end(),

diff --git a/src/runtime/model.cu b/src/runtime/model.cu
@@ -175,7 +175,6 @@ FFHandler
   } else {
     handle.batch_config_metadata = nullptr;
   }
-
 
   // checkCUDA(cudaMalloc(&handle.workSpace, handle.workSpaceSize));
 #ifdef FF_USE_NCCL

diff --git a/tests/inference/python_inference_tests.sh b/tests/inference/python_inference_tests.sh
@@ -6,11 +6,12 @@ set -e
 cd "${BASH_SOURCE[0]%/*}"
 
 # Generate test configs
+rm -rf python_test_configs/*.json
 python python_test_configs/generate_configs.py
 
 # Run all tests
 # Loop through .json files in the ./python_test_configs dir 
-for file in ./python_test_configs/*.json; do
+for file in ./python_test_configs/*"llama"*.json; do
     # Check filename prefix
     if [[ $file == *"incr_dec"* ]]; then
       script="../../inference/python/incr_decoding.py"