From 2407720c21bb3ab8f4ebb1b46ddfde1309cf85a3 Mon Sep 17 00:00:00 2001 From: Braelyn Boynton Date: Fri, 23 Feb 2024 12:39:37 -0800 Subject: [PATCH 1/3] pass in openai api key --- jaiqu/helpers.py | 14 +++++++------- jaiqu/jaiqu.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/jaiqu/helpers.py b/jaiqu/helpers.py index 723280c..c12a4a7 100644 --- a/jaiqu/helpers.py +++ b/jaiqu/helpers.py @@ -11,7 +11,7 @@ def to_key(response: str) -> str | None: return key -def identify_key(key, value, input_schema, key_hints=None) -> tuple[Optional[str], str]: +def identify_key(key, value, input_schema, api_key=None, key_hints=None) -> tuple[Optional[str], str]: """Identify if a key is present in a schema. This function uses the OpenAI API to generate a response.""" system_message = """You are a perfect system designed to validate and extract data from JSON files. @@ -57,17 +57,17 @@ def identify_key(key, value, input_schema, key_hints=None) -> tuple[Optional[str "content": f"Is `{key}` of type `{value}` present in the desired schema?:\n\n {input_schema}" }] - reasoning_response = OpenAI().chat.completions.create(messages=messages, + reasoning_response = OpenAI(api_key).chat.completions.create(messages=messages, model="gpt-4", # logit_bias={2575: 100, 4139: 100}, # max_tokens=1 ) completion = str(reasoning_response.choices[0].message.content) - return (to_key(completion), completion) + return to_key(completion), completion -def create_jq_string(input_schema, key, value) -> str: +def create_jq_string(input_schema, key, value, api_key) -> str: messages: list[ChatCompletionMessageParam] = [{ "role": "system", "content": f"""You are a perfect jq engineer designed to validate and extract data from JSON files using jq. Only reply with code. Do NOT use any natural language. Do NOT use markdown i.e. ```. @@ -89,11 +89,11 @@ def create_jq_string(input_schema, key, value) -> str: "content": f"Write jq to extract the key `{key}`of type `{value['type']}`" }] - response = OpenAI().chat.completions.create(messages=messages, model="gpt-4-0125-preview") + response = OpenAI(api_key).chat.completions.create(messages=messages, model="gpt-4-0125-preview") return str(response.choices[0].message.content) -def repair_query(query, error, input_schema): +def repair_query(query, error, input_schema, api_key): messages: list[ChatCompletionMessageParam] = [{ "role": "system", "content": "You are a perfect jq engineer designed to validate and extract data from JSON files using jq. Only reply with code. Do NOT use any natural language. Do NOT use markdown i.e. ```." @@ -107,7 +107,7 @@ def repair_query(query, error, input_schema): Error: {error} Schema: {input_schema}"""}] - response = OpenAI().chat.completions.create(messages=messages, + response = OpenAI(api_key).chat.completions.create(messages=messages, model="gpt-4-0125-preview") return str(response.choices[0].message.content) diff --git a/jaiqu/jaiqu.py b/jaiqu/jaiqu.py index 8ffce3b..570b6e4 100644 --- a/jaiqu/jaiqu.py +++ b/jaiqu/jaiqu.py @@ -6,7 +6,7 @@ from .helpers import identify_key, create_jq_string, repair_query, dict_to_jq_filter -def validate_schema(input_json: dict, output_schema: dict, key_hints=None) -> tuple[dict, bool]: +def validate_schema(input_json: dict, output_schema: dict, open_ai_api_key: str, key_hints=None) -> tuple[dict, bool]: """Validates whether the required data in the output json schema is present in the input json.""" """The input and output json should already be parsed into a dictionary""" results = {} @@ -14,7 +14,7 @@ def validate_schema(input_json: dict, output_schema: dict, key_hints=None) -> tu with tqdm(total=len(output_schema['properties']), desc="Validating schema") as pbar: for key, value in output_schema['properties'].items(): pbar.set_postfix_str(f"Key: {key}", refresh=True) - response_key, response_reasoning = identify_key(key, value, input_json, key_hints) + response_key, response_reasoning = identify_key(key, value, input_json, open_ai_api_key, key_hints) if response_key is not None: results[key] = {"identified": True, "key": response_key, @@ -35,7 +35,7 @@ def validate_schema(input_json: dict, output_schema: dict, key_hints=None) -> tu return results, valid -def translate_schema(input_json, output_schema, key_hints=None, max_retries=10) -> str: +def translate_schema(input_json, output_schema, open_ai_key, key_hints=None, max_retries=10) -> str: """Translates the output schema into a jq filter to extract the required data from the input json.""" schema_properties, is_valid = validate_schema(input_json, output_schema, key_hints) @@ -50,7 +50,7 @@ def translate_schema(input_json, output_schema, key_hints=None, max_retries=10) with tqdm(total=len(filtered_schema), desc="Translating schema") as pbar, tqdm(total=max_retries, desc="Retry attempts") as pbar_retries: for key, value in filtered_schema.items(): pbar.set_postfix_str(f"Key: {key}", refresh=True) - jq_string = create_jq_string(input_json, key, value) + jq_string = create_jq_string(input_json, key, value, open_ai_key) if jq_string == "None": # If the response is empty, skip the key pbar.update(1) From 26129b785ef58782495b606f2b032bb791a94978 Mon Sep 17 00:00:00 2001 From: Braelyn Boynton Date: Fri, 23 Feb 2024 12:51:08 -0800 Subject: [PATCH 2/3] key optional --- jaiqu/jaiqu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jaiqu/jaiqu.py b/jaiqu/jaiqu.py index 570b6e4..d6bcf64 100644 --- a/jaiqu/jaiqu.py +++ b/jaiqu/jaiqu.py @@ -6,7 +6,7 @@ from .helpers import identify_key, create_jq_string, repair_query, dict_to_jq_filter -def validate_schema(input_json: dict, output_schema: dict, open_ai_api_key: str, key_hints=None) -> tuple[dict, bool]: +def validate_schema(input_json: dict, output_schema: dict, open_ai_api_key: str | None = None, key_hints=None) -> tuple[dict, bool]: """Validates whether the required data in the output json schema is present in the input json.""" """The input and output json should already be parsed into a dictionary""" results = {} From d270f709aa69f9c9c6285cf5f58d7402ac7491d0 Mon Sep 17 00:00:00 2001 From: Braelyn Boynton Date: Fri, 23 Feb 2024 13:19:31 -0800 Subject: [PATCH 3/3] key naming --- jaiqu/helpers.py | 12 ++++++------ jaiqu/jaiqu.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/jaiqu/helpers.py b/jaiqu/helpers.py index 0905f78..fcd720c 100644 --- a/jaiqu/helpers.py +++ b/jaiqu/helpers.py @@ -11,7 +11,7 @@ def to_key(response: str) -> Union[str, None]: return key -def identify_key(key, value, input_schema, api_key=None, key_hints=None) -> tuple[Optional[str], str]: +def identify_key(key, value, input_schema, openai_api_key=None, key_hints=None) -> tuple[Optional[str], str]: """Identify if a key is present in a schema. This function uses the OpenAI API to generate a response.""" system_message = """You are a perfect system designed to validate and extract data from JSON files. @@ -57,7 +57,7 @@ def identify_key(key, value, input_schema, api_key=None, key_hints=None) -> tupl "content": f"Is `{key}` of type `{value}` present in the desired schema?:\n\n {input_schema}" }] - reasoning_response = OpenAI(api_key).chat.completions.create(messages=messages, + reasoning_response = OpenAI(openai_api_key).chat.completions.create(messages=messages, model="gpt-4", # logit_bias={2575: 100, 4139: 100}, # max_tokens=1 @@ -67,7 +67,7 @@ def identify_key(key, value, input_schema, api_key=None, key_hints=None) -> tupl return to_key(completion), completion -def create_jq_string(input_schema, key, value, api_key) -> str: +def create_jq_string(input_schema, key, value, openai_api_key) -> str: messages: list[ChatCompletionMessageParam] = [{ "role": "system", "content": f"""You are a perfect jq engineer designed to validate and extract data from JSON files using jq. Only reply with code. Do NOT use any natural language. Do NOT use markdown i.e. ```. @@ -89,11 +89,11 @@ def create_jq_string(input_schema, key, value, api_key) -> str: "content": f"Write jq to extract the key `{key}`of type `{value['type']}`" }] - response = OpenAI(api_key).chat.completions.create(messages=messages, model="gpt-4-0125-preview") + response = OpenAI(openai_api_key).chat.completions.create(messages=messages, model="gpt-4-0125-preview") return str(response.choices[0].message.content) -def repair_query(query, error, input_schema, api_key): +def repair_query(query, error, input_schema, openai_api_key): messages: list[ChatCompletionMessageParam] = [{ "role": "system", "content": "You are a perfect jq engineer designed to validate and extract data from JSON files using jq. Only reply with code. Do NOT use any natural language. Do NOT use markdown i.e. ```." @@ -107,7 +107,7 @@ def repair_query(query, error, input_schema, api_key): Error: {error} Schema: {input_schema}"""}] - response = OpenAI(api_key).chat.completions.create(messages=messages, + response = OpenAI(openai_api_key).chat.completions.create(messages=messages, model="gpt-4-0125-preview") return str(response.choices[0].message.content) diff --git a/jaiqu/jaiqu.py b/jaiqu/jaiqu.py index d6bcf64..64154d6 100644 --- a/jaiqu/jaiqu.py +++ b/jaiqu/jaiqu.py @@ -6,7 +6,7 @@ from .helpers import identify_key, create_jq_string, repair_query, dict_to_jq_filter -def validate_schema(input_json: dict, output_schema: dict, open_ai_api_key: str | None = None, key_hints=None) -> tuple[dict, bool]: +def validate_schema(input_json: dict, output_schema: dict, openai_api_key: str | None = None, key_hints=None) -> tuple[dict, bool]: """Validates whether the required data in the output json schema is present in the input json.""" """The input and output json should already be parsed into a dictionary""" results = {} @@ -14,7 +14,7 @@ def validate_schema(input_json: dict, output_schema: dict, open_ai_api_key: str with tqdm(total=len(output_schema['properties']), desc="Validating schema") as pbar: for key, value in output_schema['properties'].items(): pbar.set_postfix_str(f"Key: {key}", refresh=True) - response_key, response_reasoning = identify_key(key, value, input_json, open_ai_api_key, key_hints) + response_key, response_reasoning = identify_key(key, value, input_json, openai_api_key, key_hints) if response_key is not None: results[key] = {"identified": True, "key": response_key, @@ -35,7 +35,7 @@ def validate_schema(input_json: dict, output_schema: dict, open_ai_api_key: str return results, valid -def translate_schema(input_json, output_schema, open_ai_key, key_hints=None, max_retries=10) -> str: +def translate_schema(input_json, output_schema, openai_api_key: str | None = None, key_hints=None, max_retries=10) -> str: """Translates the output schema into a jq filter to extract the required data from the input json.""" schema_properties, is_valid = validate_schema(input_json, output_schema, key_hints) @@ -50,7 +50,7 @@ def translate_schema(input_json, output_schema, open_ai_key, key_hints=None, max with tqdm(total=len(filtered_schema), desc="Translating schema") as pbar, tqdm(total=max_retries, desc="Retry attempts") as pbar_retries: for key, value in filtered_schema.items(): pbar.set_postfix_str(f"Key: {key}", refresh=True) - jq_string = create_jq_string(input_json, key, value, open_ai_key) + jq_string = create_jq_string(input_json, key, value, openai_api_key) if jq_string == "None": # If the response is empty, skip the key pbar.update(1) @@ -64,7 +64,7 @@ def translate_schema(input_json, output_schema, open_ai_key, key_hints=None, max except Exception as e: tries += 1 pbar_retries.update(1) - jq_string = repair_query(jq_string, str(e), input_json) + jq_string = repair_query(jq_string, str(e), input_json, openai_api_key) if tries >= max_retries: raise RuntimeError( f"Failed to create a valid jq filter for key '{key}' after {max_retries} retries.") @@ -87,6 +87,6 @@ def translate_schema(input_json, output_schema, open_ai_key, key_hints=None, max pbar_validation.update(1) if tries >= max_retries: raise RuntimeError(f"Failed to validate the jq filter after {max_retries} retries.") - complete_filter = repair_query(complete_filter, str(e), input_json) + complete_filter = repair_query(complete_filter, str(e), input_json, openai_api_key) pbar.close() return complete_filter