Skip to content

Commit

Permalink
Merge branch 'staging' of github.com:georgia-tech-db/eva into staging
Browse files Browse the repository at this point in the history
  • Loading branch information
gaurav274 committed Sep 19, 2023
2 parents 922859d + ea6bfc6 commit 3cac03c
Show file tree
Hide file tree
Showing 39 changed files with 887 additions and 326 deletions.
43 changes: 43 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Release

on:
workflow_run:
workflows: [Sync Staging and Master]
types:
- completed
branches:
- staging

jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: master
- name: Install env.
run: |
python -m venv test_evadb
source test_evadb/bin/activate
pip install --upgrade pip
pip install ".[dev]"
- name: Create pypirc.
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
cat > ~/.pypirc <<EOL
[distutils]
index-servers =
pypi
[pypi]
repository = https://upload.pypi.org/legacy/
username = ${PYPI_USERNAME}
password = ${PYPI_PASSWORD}
EOL
- name: Release.
env:
GITHUB_KEY: ${{ github.token }}
run: |
source test_evadb/bin/activate
python script/releasing/releaser.py -n minor -u
30 changes: 30 additions & 0 deletions .github/workflows/sync.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Sync Staging and Master

on:
push:
branches:
- staging
paths:
- evadb/version.py

jobs:
sync:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install env.
run: |
python -m venv test_evadb
source test_evadb/bin/activate
pip install --upgrade pip
pip install ".[dev]"
- name: Check version and sync.
run: |
source test_evadb/bin/activate
cmd=$(python -c "from evadb.version import _REVISION; print(_REVISION if 'dev' not in _REVISION else 'skip')")
if [[ "$cmd" != "skip" ]]; then
git pull
git checkout master
git reset --hard ${{ github.sha }}
git push -f origin master
fi
37 changes: 18 additions & 19 deletions apps/privategpt/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,37 @@ def load_data(source_folder_path: str):
cursor = evadb.connect(path).cursor()

# Drop function if it already exists
cursor.drop_function("embedding").execute()

cursor.query("DROP FUNCTION IF EXISTS embedding;").execute()
# Create function from Python file
# This function is a sentence feature extractor
embedding_udf = cursor.create_function(
udf_name="embedding",
if_not_exists=True,
impl_path=f"{path}/udfs/sentence_feature_extractor.py",
)
embedding_udf.execute()
text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS embedding
IMPL '{path}/functions/sentence_feature_extractor.py';
"""
print(text_feat_function_query)
cursor.query(text_feat_function_query).execute()

print("🧹 Dropping existing tables in EvaDB")
cursor.drop_table("data_table").execute()
cursor.drop_table("embedding_table").execute()
cursor.query("DROP TABLE IF EXISTS data_table;").execute()
cursor.query("DROP TABLE IF EXISTS embedding_table;").execute()

print("📄 Loading PDFs into EvaDB")
cursor.load(
file_regex=f"{source_folder_path}/*.pdf", format="PDF", table_name="data_table"
).execute()
text_load_query = f"""LOAD PDF '{source_folder_path}/*.pdf' INTO data_table;"""
print(text_load_query)
cursor.query(text_load_query).execute()

print("🤖 Extracting Feature Embeddings. This may take some time ...")
cursor.query(
"CREATE TABLE IF NOT EXISTS embedding_table AS SELECT embedding(data), data FROM data_table;"
).execute()

print("🔍 Building FAISS Index ...")
cursor.create_vector_index(
index_name="embedding_index",
table_name="embedding_table",
expr="features",
using="FAISS",
)
cursor.query(
"""
CREATE INDEX embedding_index
ON embedding_table (features)
USING FAISS;
"""
).execute()


def main():
Expand Down
20 changes: 12 additions & 8 deletions apps/privategpt/privateGPT.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@


def query(question):
context_docs = (
cursor.table("embedding_table")
.order(f"Similarity(embedding('{question}'), features)")
.limit(3)
.select("data")
.df()
)
context_docs = cursor.query(
f"""
SELECT data
FROM embedding_table
ORDER BY Similarity(embedding('{question}'), features)
ASC LIMIT 3;
"""
).df()

# Merge all context information.
context = "; \n".join(context_docs["embedding_table.data"])

Expand All @@ -51,8 +53,10 @@ def query(question):
print("\n>> Context: ")
print(context)


print(
"🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before running this file."
"🔮 Welcome to EvaDB! Don't forget to run `python ingest.py` before"
" running this file."
)

## Take input of queries from user in a loop
Expand Down
28 changes: 15 additions & 13 deletions apps/story_qa/evadb_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from time import perf_counter

from gpt4all import GPT4All
from unidecode import unidecode
from util import download_story, read_text_line, try_execute
from util import download_story, read_text_line

import evadb


def ask_question(path):
def ask_question(story_path: str):
# Initialize early to exclude download time.
llm = GPT4All("ggml-gpt4all-j-v1.3-groovy")

path = os.path.dirname(evadb.__file__)
cursor = evadb.connect().cursor()

story_table = "TablePPText"
Expand All @@ -35,17 +37,17 @@ def ask_question(path):
t_i = 0

timestamps[t_i] = perf_counter()
print("Setup UDF")
print("Setup Function")

Text_feat_udf_query = """CREATE UDF IF NOT EXISTS SentenceFeatureExtractor
IMPL 'evadb/udfs/sentence_feature_extractor.py';
Text_feat_function_query = f"""CREATE FUNCTION IF NOT EXISTS SentenceFeatureExtractor
IMPL '{path}/functions/sentence_feature_extractor.py';
"""

cursor.query("DROP UDF IF EXISTS SentenceFeatureExtractor;").execute()
cursor.query(Text_feat_udf_query).execute()
cursor.query("DROP FUNCTION IF EXISTS SentenceFeatureExtractor;").execute()
cursor.query(Text_feat_function_query).execute()

try_execute(cursor, f"DROP TABLE IF EXISTS {story_table};")
try_execute(cursor, f"DROP TABLE IF EXISTS {story_feat_table};")
cursor.query(f"DROP TABLE IF EXISTS {story_table};").execute()
cursor.query(f"DROP TABLE IF EXISTS {story_feat_table};").execute()

t_i = t_i + 1
timestamps[t_i] = perf_counter()
Expand All @@ -56,7 +58,7 @@ def ask_question(path):
cursor.query(f"CREATE TABLE {story_table} (id INTEGER, data TEXT(1000));").execute()

# Insert text chunk by chunk.
for i, text in enumerate(read_text_line(path)):
for i, text in enumerate(read_text_line(story_path)):
print("text: --" + text + "--")
ascii_text = unidecode(text)
cursor.query(
Expand Down Expand Up @@ -84,7 +86,7 @@ def ask_question(path):

# Create search index on extracted features.
cursor.query(
f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING FAISS;"
f"CREATE INDEX {index_table} ON {story_feat_table} (features) USING" " FAISS;"
).execute()

t_i = t_i + 1
Expand Down Expand Up @@ -139,9 +141,9 @@ def ask_question(path):


def main():
path = download_story()
story_path = download_story()

ask_question(path)
ask_question(story_path)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion apps/youtube_channel_qa/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ pip install -r requirements.txt
## Usage
Run script:
```bat
python multi_youtube_video_qa.py
python youtube_channel_qa.py
```

Loading

0 comments on commit 3cac03c

Please sign in to comment.