Skip to content

Commit

Permalink
Merge pull request #4 from sugarforever/feature/multi-files-knowledge…
Browse files Browse the repository at this point in the history
…base

multi-files knowledgebase support
  • Loading branch information
sugarforever authored Mar 5, 2024
2 parents 6049193 + 013f81e commit c6787d4
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 37 deletions.
21 changes: 13 additions & 8 deletions pages/knowledgebases/index.vue
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,21 @@ const validate = (state) => {
return errors
}
const selectedFile = ref(null);
const selectedFiles = ref([]);
const onFileChange = async (e) => {
selectedFile.value = e.target.files[0];
console.log(e.target.files);
selectedFiles.value = e.target.files;
console.log('changed');
};
const loading = ref(false);
const onSubmit = async () => {
loading.value = true;
const formData = new FormData();
formData.append("file", selectedFile.value);
Array.from(selectedFiles.value).forEach((file, index) => {
console.log(`Index ${index}`, file);
formData.append(`file_${index}`, file);
});
formData.append("name", state.name);
formData.append("description", state.description);
formData.append("embedding", state.embedding);
Expand All @@ -38,7 +43,7 @@ const onSubmit = async () => {
});
loading.value = false;
state.selectedFile = null;
state.selectedFiles = [];
refresh();
}
Expand All @@ -52,8 +57,8 @@ const columns = [{
key: 'name',
label: 'Name'
}, {
key: 'filename',
label: 'File Name'
key: 'files',
label: 'Files'
}, {
key: 'description',
label: 'Description'
Expand All @@ -67,7 +72,7 @@ const knowlegeBases = computed(() => {
return {
id: knowledgebase.id,
name: knowledgebase.name,
filename: knowledgebase.filename,
files: knowledgebase.files.map((file) => file.url).join(','),
description: knowledgebase.description,
embedding: knowledgebase.embedding,
}
Expand All @@ -93,7 +98,7 @@ const knowlegeBases = computed(() => {
</UFormGroup>

<UFormGroup label="File as Knowledge Base" name="file">
<UInput type="file" size="sm" v-model="state.selectedFile" @change="onFileChange" />
<UInput multiple type="file" size="sm" v-model="state.selectedFile" @change="onFileChange" />
</UFormGroup>

<UButton type="submit" :loading="loading">
Expand Down
30 changes: 30 additions & 0 deletions prisma/migrations/20240305202320_knowlegebase_files/migration.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
Warnings:
- You are about to drop the column `filename` on the `KnowledgeBase` table. All the data in the column will be lost.
*/
-- CreateTable
CREATE TABLE "KnowledgeBaseFile" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"url" TEXT NOT NULL,
"knowledgeBaseId" INTEGER NOT NULL,
CONSTRAINT "KnowledgeBaseFile_knowledgeBaseId_fkey" FOREIGN KEY ("knowledgeBaseId") REFERENCES "KnowledgeBase" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
);

-- RedefineTables
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_KnowledgeBase" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"name" TEXT NOT NULL,
"embedding" TEXT,
"description" TEXT,
"created" DATETIME,
"updated" DATETIME
);
INSERT INTO "new_KnowledgeBase" ("created", "description", "embedding", "id", "name", "updated") SELECT "created", "description", "embedding", "id", "name", "updated" FROM "KnowledgeBase";
DROP TABLE "KnowledgeBase";
ALTER TABLE "new_KnowledgeBase" RENAME TO "KnowledgeBase";
CREATE UNIQUE INDEX "KnowledgeBase_name_key" ON "KnowledgeBase"("name");
PRAGMA foreign_key_check;
PRAGMA foreign_keys=ON;
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- RedefineTables
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_KnowledgeBaseFile" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"url" TEXT NOT NULL,
"knowledgeBaseId" INTEGER NOT NULL,
CONSTRAINT "KnowledgeBaseFile_knowledgeBaseId_fkey" FOREIGN KEY ("knowledgeBaseId") REFERENCES "KnowledgeBase" ("id") ON DELETE CASCADE ON UPDATE CASCADE
);
INSERT INTO "new_KnowledgeBaseFile" ("id", "knowledgeBaseId", "url") SELECT "id", "knowledgeBaseId", "url" FROM "KnowledgeBaseFile";
DROP TABLE "KnowledgeBaseFile";
ALTER TABLE "new_KnowledgeBaseFile" RENAME TO "KnowledgeBaseFile";
PRAGMA foreign_key_check;
PRAGMA foreign_keys=ON;
9 changes: 8 additions & 1 deletion prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@ generator client {
model KnowledgeBase {
id Int @id @default(autoincrement())
name String @unique
filename String
embedding String?
description String?
created DateTime?
updated DateTime?
files KnowledgeBaseFile[] @relation("Files")
}

model KnowledgeBaseFile {
id Int @id @default(autoincrement())
url String
knowledgeBase KnowledgeBase @relation("Files", fields: [knowledgeBaseId], references: [id], onDelete: Cascade)
knowledgeBaseId Int
}
6 changes: 5 additions & 1 deletion server/api/knowledgebases/index.get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ import { PrismaClient, type KnowledgeBase } from '@prisma/client';
const listKnowledgeBases = async (): Promise<KnowledgeBase[] | null> => {
const prisma = new PrismaClient();
try {
return await prisma.knowledgeBase.findMany();
return await prisma.knowledgeBase.findMany({
include: {
files: true
}
});
} catch (error) {
console.error("Error fetching knowledge bases: ", error);
return null;
Expand Down
63 changes: 37 additions & 26 deletions server/api/knowledgebases/index.post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,30 @@ import { PrismaClient } from '@prisma/client';
const ingestDocument = async (file, collectionName, embedding) => {
const tmp_file_path = `tmp/${file.filename}`;

try {
const status = await writeFile(tmp_file_path, file.data)
console.log(`Writing data to file ${tmp_file_path}: ${status}`);
const status = await writeFile(tmp_file_path, file.data)
console.log(`Writing data to file ${tmp_file_path}: ${status}`);

const loader = new PDFLoader(tmp_file_path);
const docs = await loader.load();
const loader = new PDFLoader(tmp_file_path);
const docs = await loader.load();

const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });
const splits = await textSplitter.splitDocuments(docs);
const embeddings = new OllamaEmbeddings({
model: embedding,
baseUrl: "http://localhost:11434",
});
await Chroma.fromDocuments(splits, embeddings, {
collectionName: collectionName,
url: "http://localhost:8000"
});
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });
const splits = await textSplitter.splitDocuments(docs);
const embeddings = new OllamaEmbeddings({
model: embedding,
baseUrl: "http://localhost:11434",
});

const dbConfig = {
collectionName: collectionName,
url: "http://localhost:8000"
};
const existingCollection = await Chroma.fromExistingCollection(embeddings, dbConfig);
if (existingCollection) {
await existingCollection.addDocuments(splits);
console.log(`Chroma collection ${collectionName} updated`);
} else {
await Chroma.fromDocuments(splits, embeddings, dbConfig);
console.log(`Chroma collection ${collectionName} created`);
} catch (err) {
console.error(err)
}
}

Expand All @@ -37,13 +40,12 @@ export default defineEventHandler(async (event) => {

const knowledgeBase: { [key: string]: string | Date } = {};
const decoder = new TextDecoder("utf-8");
let uploadedFile = null;
const uploadedFiles = [];
items?.forEach((item) => {
const { name, data, filename } = item;
if (name) {
if (name === "file") {
uploadedFile = item;
knowledgeBase["filename"] = filename;
if (name.startsWith("file_")) {
uploadedFiles.push(item);
}
if (["name", "description", "embedding"].includes(name)) {
knowledgeBase[name] = decoder.decode(data);
Expand All @@ -56,13 +58,22 @@ export default defineEventHandler(async (event) => {
const affected = await prisma.knowledgeBase.create({
data: knowledgeBase
});
console.log(`Created knowledge base ${knowledgeBase.name} for ${knowledgeBase.filename}: ${affected}`);
console.log(`Created knowledge base ${knowledgeBase.name}: ${affected}`);

if (uploadedFile) {
await ingestDocument(uploadedFile, `collection_${affected.id}`, affected.embedding);
}
if (uploadedFiles.length > 0) {
for (const uploadedFile of uploadedFiles) {
await ingestDocument(uploadedFile, `collection_${affected.id}`, affected.embedding);

const createdKnowledgeBaseFile = await prisma.knowledgeBaseFile.create({
data: {
url: uploadedFile.filename,
knowledgeBaseId: affected.id
}
});

console.log(knowledgeBase);
console.log(createdKnowledgeBaseFile);
}
}

return {
status: "success"
Expand Down
2 changes: 1 addition & 1 deletion vercel.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"functions": {
"server/api/**/*.ts": {
"maxDuration": 60
"maxDuration": 10
}
}
}

0 comments on commit c6787d4

Please sign in to comment.