Merge branch 'staging' of github.com:georgia-tech-db/eva into staging

georgia-tech-db · Sep 19, 2023 · 3cac03c · 3cac03c
2 parents 922859d + ea6bfc6
commit 3cac03c
Show file tree

Hide file tree

Showing 39 changed files with 887 additions and 326 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,43 @@
+name: Release
+
+on:
+  workflow_run:
+    workflows: [Sync Staging and Master]
+    types: 
+      - completed
+    branches: 
+      - staging
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: master
+      - name: Install env.
+        run: |
+          python -m venv test_evadb
+          source test_evadb/bin/activate
+          pip install --upgrade pip
+          pip install ".[dev]"
+      - name: Create pypirc.
+        env:
+          PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} 
+          PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 
+        run: |
+          cat > ~/.pypirc <<EOL
+          [distutils]
+          index-servers =
+            pypi
+          [pypi]
+          repository = https://upload.pypi.org/legacy/
+          username = ${PYPI_USERNAME} 
+          password = ${PYPI_PASSWORD}
+          EOL
+      - name: Release.
+        env:
+          GITHUB_KEY: ${{ github.token }}
+        run: |
+          source test_evadb/bin/activate
+          python script/releasing/releaser.py -n minor -u
diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml
@@ -0,0 +1,30 @@
+name: Sync Staging and Master
+
+on:
+  push:
+    branches:
+      - staging
+    paths:
+      - evadb/version.py
+
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install env.
+        run: |
+          python -m venv test_evadb
+          source test_evadb/bin/activate
+          pip install --upgrade pip
+          pip install ".[dev]"
+      - name: Check version and sync.
+        run: |
+          source test_evadb/bin/activate
+          cmd=$(python -c "from evadb.version import _REVISION; print(_REVISION if 'dev' not in _REVISION else 'skip')")
+          if [[ "$cmd" != "skip" ]]; then
+            git pull
+            git checkout master
+            git reset --hard ${{ github.sha }}
+            git push -f origin master
+          fi
diff --git a/apps/privategpt/ingest.py b/apps/privategpt/ingest.py
@@ -23,38 +23,37 @@ def load_data(source_folder_path: str):
     cursor = evadb.connect(path).cursor()
 
     # Drop function if it already exists
-    cursor.drop_function("embedding").execute()
-
+    cursor.query("DROP FUNCTION IF EXISTS embedding;").execute()
     # Create function from Python file
     # This function is a sentence feature extractor
-    embedding_udf = cursor.create_function(
-        udf_name="embedding",
-        if_not_exists=True,
-        impl_path=f"{path}/udfs/sentence_feature_extractor.py",
-    )
-    embedding_udf.execute()
+    text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS embedding
+            IMPL  '{path}/functions/sentence_feature_extractor.py';
+            """
+    print(text_feat_function_query)
+    cursor.query(text_feat_function_query).execute()
 
     print("🧹 Dropping existing tables in EvaDB")
-    cursor.drop_table("data_table").execute()
-    cursor.drop_table("embedding_table").execute()
+    cursor.query("DROP TABLE IF EXISTS data_table;").execute()
+    cursor.query("DROP TABLE IF EXISTS embedding_table;").execute()
 
     print("📄 Loading PDFs into EvaDB")
-    cursor.load(
-        file_regex=f"{source_folder_path}/*.pdf", format="PDF", table_name="data_table"
-    ).execute()
+    text_load_query = f"""LOAD PDF '{source_folder_path}/*.pdf' INTO data_table;"""
+    print(text_load_query)
+    cursor.query(text_load_query).execute()
 
     print("🤖 Extracting Feature Embeddings. This may take some time ...")
     cursor.query(
         "CREATE TABLE IF NOT EXISTS embedding_table AS SELECT embedding(data), data FROM data_table;"
     ).execute()
 
     print("🔍 Building FAISS Index ...")
-    cursor.create_vector_index(
-        index_name="embedding_index",
-        table_name="embedding_table",
-        expr="features",
-        using="FAISS",
-    )
+    cursor.query(
+        """
+        CREATE INDEX embedding_index
+        ON embedding_table (features)
+        USING FAISS;
+    """
+    ).execute()
 
 
 def main():

diff --git a/apps/privategpt/privateGPT.py b/apps/privategpt/privateGPT.py
@@ -23,13 +23,15 @@
 
 
 def query(question):
-    context_docs = (
-        cursor.table("embedding_table")
-        .order(f"Similarity(embedding('{question}'), features)")
-        .limit(3)
-        .select("data")
-        .df()
-    )
+    context_docs = cursor.query(
+        f"""
+        SELECT data
+        FROM embedding_table
+        ORDER BY Similarity(embedding('{question}'), features)
+        ASC LIMIT 3;
+    """
+    ).df()
+
     # Merge all context information.
     context = "; \n".join(context_docs["embedding_table.data"])
 
@@ -51,8 +53,10 @@ def query(question):
     print("\n>> Context: ")
     print(context)
 
+
 print(
-    "🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before running this file."
+    "🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before"
+    " running this file."
 )
 
 ## Take input of queries from user in a loop

diff --git a/apps/story_qa/evadb_qa.py b/apps/story_qa/evadb_qa.py
@@ -12,19 +12,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 from time import perf_counter
 
 from gpt4all import GPT4All
 from unidecode import unidecode
-from util import download_story, read_text_line, try_execute
+from util import download_story, read_text_line
 
 import evadb
 
 
-def ask_question(path):
+def ask_question(story_path: str):
     # Initialize early to exclude download time.
     llm = GPT4All("ggml-gpt4all-j-v1.3-groovy")
 
+    path = os.path.dirname(evadb.__file__)
     cursor = evadb.connect().cursor()
 
     story_table = "TablePPText"
@@ -35,17 +37,17 @@ def ask_question(path):
     t_i = 0
 
     timestamps[t_i] = perf_counter()
-    print("Setup UDF")
+    print("Setup Function")
 
-    Text_feat_udf_query = """CREATE UDF IF NOT EXISTS SentenceFeatureExtractor
-            IMPL  'evadb/udfs/sentence_feature_extractor.py';
+    Text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS SentenceFeatureExtractor
+            IMPL  '{path}/functions/sentence_feature_extractor.py';
             """
 
-    cursor.query("DROP UDF IF EXISTS SentenceFeatureExtractor;").execute()
-    cursor.query(Text_feat_udf_query).execute()
+    cursor.query("DROP FUNCTION IF EXISTS SentenceFeatureExtractor;").execute()
+    cursor.query(Text_feat_function_query).execute()
 
-    try_execute(cursor, f"DROP TABLE IF EXISTS {story_table};")
-    try_execute(cursor, f"DROP TABLE IF EXISTS {story_feat_table};")
+    cursor.query(f"DROP TABLE IF EXISTS {story_table};").execute()
+    cursor.query(f"DROP TABLE IF EXISTS {story_feat_table};").execute()
 
     t_i = t_i + 1
     timestamps[t_i] = perf_counter()
@@ -56,7 +58,7 @@ def ask_question(path):
     cursor.query(f"CREATE TABLE {story_table} (id INTEGER, data TEXT(1000));").execute()
 
     # Insert text chunk by chunk.
-    for i, text in enumerate(read_text_line(path)):
+    for i, text in enumerate(read_text_line(story_path)):
         print("text: --" + text + "--")
         ascii_text = unidecode(text)
         cursor.query(
@@ -84,7 +86,7 @@ def ask_question(path):
 
     # Create search index on extracted features.
     cursor.query(
-        f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING FAISS;"
+        f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING" " FAISS;"
     ).execute()
 
     t_i = t_i + 1
@@ -139,9 +141,9 @@ def ask_question(path):
 
 
 def main():
-    path = download_story()
+    story_path = download_story()
 
-    ask_question(path)
+    ask_question(story_path)
 
 
 if __name__ == "__main__":

diff --git a/apps/youtube_channel_qa/README.md b/apps/youtube_channel_qa/README.md
@@ -28,6 +28,6 @@ pip install -r requirements.txt
 ## Usage
 Run script: 
 ```bat
-python multi_youtube_video_qa.py
+python youtube_channel_qa.py
 ```