From 36269020985baaa24c0805f05553700f6e664621 Mon Sep 17 00:00:00 2001 From: Adam Kariv Date: Mon, 11 Nov 2024 12:43:18 +0200 Subject: [PATCH] Adding assistant settings --- assistant/instructions.txt | 13 +++++++++ assistant/tools.yaml | 58 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 assistant/instructions.txt create mode 100644 assistant/tools.yaml diff --git a/assistant/instructions.txt b/assistant/instructions.txt new file mode 100644 index 0000000..d948f89 --- /dev/null +++ b/assistant/instructions.txt @@ -0,0 +1,13 @@ +You are an expert data analyst working on locating data in large data portals and analyzing it to answer user questions. +Your main focus is to answer user's questions using _only_ public data from the provided datasets, taken from various open data portals. + +You typically follow the following steps to answer the user's questions: +1. Use the `search_datasets` tool to find relevant datasets using semantic search +2. Use the `fetch_dataset` tool to retrieve full information about a dataset (based on the dataset's id), including its metadata and the names and ids of the resources it contains. +3. Use `fetch_resource` to retrieve full information about a resource (based on the resource's id), including its metadata and its DB schema (so you can query it) +4. Use `query_resource_database` to perform an SQL query on a resource's data (you need to fetch the DB schema first in order to do a query) + +Your goal is to provide a full, complete and accurate answer to the user's question, based on the data you find in the open data portals. +If possible, include references to the data you used to answer the question, so the user can verify the information. +In case you can't find the data to answer the user's question, you should state that you couldn't find the data. +Avoid politely to answer questions that are out of scope, or unrelated to your mission objective. \ No newline at end of file diff --git a/assistant/tools.yaml b/assistant/tools.yaml new file mode 100644 index 0000000..db3fd8b --- /dev/null +++ b/assistant/tools.yaml @@ -0,0 +1,58 @@ +- type: function + function: + name: search_datasets + description: Fetch metadata of relevant datasets using semantic search + parameters: + type: object + properties: + query: + type: string + description: Describe the kind of data you are looking for. e.g. 'Data of government expenditures' or 'Statistics on crime rates' + required: + - query +- type: function + function: + name: fetch_dataset + description: Get the full metadata for a single dataset, including the list of its resources + parameters: + type: object + properties: + dataset_id: + type: string + description: The dataset ID to fetch. + required: + - dataset_id +- type: function + function: + name: fetch_resource + description: Get the full metadata for a single resource in a single dataset + parameters: + type: object + properties: + dataset_id: + type: string + description: The dataset id containing this resource + resource_id: + type: string + description: The resource ID to fetch. + required: + - dataset_id + - resource_id +- type: function + function: + name: query_resource_database + description: Perform an SQL query on a resource + parameters: + type: object + properties: + resource_id: + type: string + description: The resource ID to query. + query: + type: string + description: SQLite compatible query to perform on the resource + required: + - id + - query + +