From 4a422766d74ebb5b33267afe4e4a5b5d48ef4ac6 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Mon, 9 Dec 2024 18:56:50 +0800 Subject: [PATCH] optimize auto completion trigger (#1359) --- skyvern/constants.py | 1 + .../skyvern/auto-completion-potential-answers.j2 | 2 +- .../prompts/skyvern/parse-input-or-select-context.j2 | 2 +- skyvern/webeye/actions/handler.py | 11 +++++++++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/skyvern/constants.py b/skyvern/constants.py index 54319b388..a523ce9e4 100644 --- a/skyvern/constants.py +++ b/skyvern/constants.py @@ -15,6 +15,7 @@ SAVE_DOWNLOADED_FILES_TIMEOUT = 180 GET_DOWNLOADED_FILES_TIMEOUT = 30 NAVIGATION_MAX_RETRY_TIME = 5 +AUTO_COMPLETION_POTENTIAL_VALUES_COUNT = 5 # reserved fields for navigation payload SPECIAL_FIELD_VERIFICATION_CODE = "verification_code" diff --git a/skyvern/forge/prompts/skyvern/auto-completion-potential-answers.j2 b/skyvern/forge/prompts/skyvern/auto-completion-potential-answers.j2 index 5e6b7a187..a93900759 100644 --- a/skyvern/forge/prompts/skyvern/auto-completion-potential-answers.j2 +++ b/skyvern/forge/prompts/skyvern/auto-completion-potential-answers.j2 @@ -1,5 +1,5 @@ You're doing an auto completion input action on HTML page. The current filled value doesn't match any option. -Based on the context, current value, user goal and user details, give ten most potential values with the same meaning as the current value. +Based on the context, current value, user goal and user details, give {{ potential_value_count }} most potential values with the same meaning as the current value. You can provide values like: - Subset or superset meaning from the current value - Summarized from the current value diff --git a/skyvern/forge/prompts/skyvern/parse-input-or-select-context.j2 b/skyvern/forge/prompts/skyvern/parse-input-or-select-context.j2 index f5419088c..919fcc413 100644 --- a/skyvern/forge/prompts/skyvern/parse-input-or-select-context.j2 +++ b/skyvern/forge/prompts/skyvern/parse-input-or-select-context.j2 @@ -8,7 +8,7 @@ Reply in the following JSON format: "field": str, // Which field is this action intended to fill out? "is_required": bool, // True if this is a required field, otherwise false. "is_search_bar": bool, // True if the element to take the action is a search bar, otherwise false. - "is_location_input": bool, // True if the element is asking user to input where he lives, otherwise false. For example, it is asking for location, or address, or other similar information. Output False if it only requires ZIP code. + "is_location_input": bool, // True if the element is asking user to input where he lives, otherwise false. For example, it is asking for location, or address, or other similar information. Output False if it only requires ZIP code or postal code. } Existing reasoning context: diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index a58cc7ef7..70226da39 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -14,7 +14,12 @@ from pydantic import BaseModel from skyvern.config import settings -from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT, REPO_ROOT_DIR, SKYVERN_ID_ATTR +from skyvern.constants import ( + AUTO_COMPLETION_POTENTIAL_VALUES_COUNT, + BROWSER_DOWNLOAD_TIMEOUT, + REPO_ROOT_DIR, + SKYVERN_ID_ATTR, +) from skyvern.exceptions import ( EmptySelect, ErrEmptyTweakValue, @@ -1597,6 +1602,7 @@ async def input_or_auto_complete_input( prompt = prompt_engine.load_prompt( "auto-completion-potential-answers", + potential_value_count=AUTO_COMPLETION_POTENTIAL_VALUES_COUNT, field_information=input_or_select_context.field, current_value=current_value, navigation_goal=task.navigation_goal, @@ -1604,10 +1610,11 @@ async def input_or_auto_complete_input( ) LOG.info( - "Ask LLM to give 10 potential values based on the current value", + "Ask LLM to give potential values based on the current value", current_value=current_value, step_id=step.step_id, task_id=task.task_id, + potential_value_count=AUTO_COMPLETION_POTENTIAL_VALUES_COUNT, ) json_respone = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, step=step) values: list[dict] = json_respone.get("potential_values", [])