Skip to content

Commit

Permalink
ai search proof of concept
Browse files Browse the repository at this point in the history
  • Loading branch information
lassejaco committed Apr 27, 2024
1 parent 17ef30c commit 47fc8ee
Show file tree
Hide file tree
Showing 7 changed files with 1,420 additions and 2 deletions.
4 changes: 3 additions & 1 deletion devcon/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"lint": "next lint",
"scripts:db-cleanup": "ts-node src/scripts/db-cleanup",
"scripts:pretalx-import": "ts-node src/scripts/pretalx-import",
"scripts:twitter-import": "ts-node src/scripts/twitter-import"
"scripts:twitter-import": "ts-node src/scripts/twitter-import",
"ai": "ts-node src/pages/api/ai/open-ai"
},
"dependencies": {
"@elastic/elasticsearch": "^8.1.0",
Expand Down Expand Up @@ -51,6 +52,7 @@
"next-pwa": "^5.5.4",
"node-fetch": "2",
"nodemailer": "^6.7.2",
"openai": "^4.38.5",
"panzoom": "^9.4.3",
"qrcode.react": "^1.0.1",
"query-string": "^7.1.0",
Expand Down
Empty file.
77 changes: 77 additions & 0 deletions devcon/src/pages/api/ai/load-content.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import fs from 'fs';
import path from 'path';
import matter from 'gray-matter'; // For parsing front matter in MDX files

// Function to read and parse MDX files from a folder
function loadAndParseMDXFiles(relativeFolderPath: string): void {
const folderPath = path.resolve(__dirname, relativeFolderPath);

fs.readdir(folderPath, (err, files) => {
if (err) {
console.error('Error reading folder:', err);
return;
}

const sectionsByFile: Record<string, string[]> = {};

files.forEach((file) => {
const filePath = path.join(folderPath, file);

fs.readFile(filePath, 'utf8', (err, data) => {
if (err) {
console.error('Error reading file:', err);
return;
}

const { content, data: frontMatter } = matter(data);

// Extract the sections or content from the frontmatter data
// These sections can be used for creating OpenAI embeddings
const sections = extractSections(frontMatter);

// Store the sections in the sectionsByFile object with the filename as the key
sectionsByFile[file] = sections;

// Write the sectionsByFile object to a JSON file in the current folder
const outputPath = path.join(__dirname, 'sections.json');
fs.writeFile(outputPath, JSON.stringify(sectionsByFile, null, 2), (err) => {
if (err) {
console.error('Error writing file:', err);
return;
}
console.log('Sections written to sections.json');
});
});
});
});
}

// Function to extract sections from frontmatter data
function extractSections(frontMatter: any): string[] {
const sections: string[] = [];

// Recursive function to extract fields from the frontmatter data
function extractFields(obj: any): void {
Object.keys(obj).forEach((key) => {
const value = obj[key];

// Check if the value is an object and not an array or a primitive value
if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
extractFields(value);
} else {
// Add the value to the sections array
sections.push(value);
}
});
}

extractFields(frontMatter);

return sections;
}


export default () => {
// Call the function with the folder path containing MDX files
return loadAndParseMDXFiles('../../../../cms/pages');
}
176 changes: 176 additions & 0 deletions devcon/src/pages/api/ai/open-ai.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import OpenAI from "openai";
import fs from 'fs';
import path from 'path'
// import LoadContent from './load-content';
require('dotenv').config()

// LoadContent();

import sections from './sections.json';

const openai = new OpenAI({
apiKey: process.env.OPEN_AI_KEY
});

// Function to load embeddings from file
function loadEmbeddings() {
const filePath = path.resolve(__dirname, 'openai_embeddings.json');
const data = fs.readFileSync(filePath, 'utf8');
const parsedData = JSON.parse(data);
return parsedData;
}

/**
* Calculate the cosine similarity between two vectors.
*
* @param vecA The first vector of type number[].
* @param vecB The second vector of type number[].
* @returns The cosine similarity as a number between 0 and 1.
*/
function cosineSimilarity(vecA: number[], vecB: number[]): number {
const dotProduct = vecA.reduce((acc: number, curr: number, idx: number) => acc + curr * vecB[idx], 0);
const magnitudeA = Math.sqrt(vecA.reduce((acc: number, val: number) => acc + val * val, 0));
const magnitudeB = Math.sqrt(vecB.reduce((acc: number, val: number) => acc + val * val, 0));
return dotProduct / (magnitudeA * magnitudeB);
}


// // Function to find the most relevant section
// async function findMostRelevantSection(query: any) {
// const embeddings = loadEmbeddings();
// const queryEmbedding = await createOpenAIEmbedding(query);

// let highestSimilarity = -1;
// let mostRelevantSection = null;

// embeddings.forEach((section: any, index: any) => {
// const similarity = cosineSimilarity(queryEmbedding, section.embedding); // Directly use `section.embedding` since it's an array
// if (similarity > highestSimilarity) {
// highestSimilarity = similarity;
// mostRelevantSection = section; // `Section ${index + 1}`; // You may want to replace this with a more descriptive identifier
// }
// });

// return mostRelevantSection;
// }

// Function to create a single OpenAI embedding
async function createOpenAIEmbedding(text: any) {
const response = await openai.embeddings.create({
model: "text-embedding-3-small",
input: text,
encoding_format: "float",
});

return response.data[0].embedding;
}

// Function to create OpenAI embeddings
// async function createOpenAIEmbeddings(sections: string): Promise<void> {
// try {
// const embedding = await openai.embeddings.create({
// model: "text-embedding-3-small",
// input: sections,
// encoding_format: "float",
// });

// fs.writeFileSync(path.resolve(__dirname, 'openai_embeddings.json'), JSON.stringify(embedding));

// console.log('OpenAI Embeddings:', embedding);
// } catch (error) {
// console.error('Error creating OpenAI embeddings:', error);
// }
// }

async function generateResponseUsingCompletionsAPI(relevantText: string, query: string) {
const prompt = `Based on the following information: "${relevantText}", how would you answer the question: "${query}"?`;
const completion = await openai.chat.completions.create({
model: "gpt-3.5-turbo",
messages: [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who won the world series in 2020?"}

],
max_tokens: 150,
temperature: 0.5
});

return completion.choices[0];
}

const api = (() => {
const _interface = {
createEmbeddingsFromContent: async () => {
const formattedSections = Object.entries(sections).map(([key, value]) => {
return `Page ${key}: ${value}`;
})

try {
const allPromises = formattedSections.map(async (section) => {
const embedding = await createOpenAIEmbedding(section);

return {
embedding: embedding,
text: section
}
})

await Promise.allSettled(allPromises).then((results) => {
//@ts-ignore
fs.writeFileSync(path.resolve(__dirname, 'openai_embeddings.json'), JSON.stringify(results.map(({ value }) => value)));
});
} catch (error) {
console.error('Error creating OpenAI embeddings:', error);
}
},
getRelevantTextByQuery: async (query: string) => {
const embeddings = loadEmbeddings();
const queryEmbedding = await createOpenAIEmbedding(query);

let highestSimilarity = -1;
let mostRelevantSection = '';

embeddings.forEach((section: any, index: any) => {
const similarity = cosineSimilarity(queryEmbedding, section.embedding); // Directly use `section.embedding` since it's an array

if (similarity > highestSimilarity) {
highestSimilarity = similarity;
mostRelevantSection = section.text; // `Section ${index + 1}`; // You may want to replace this with a more descriptive identifier
}
});

return mostRelevantSection;
},
generateResponseUsingCompletionsAPI: async (relevantText: string, query: string) => {
const prompt = `Based on the following information: "${relevantText}", how would you answer the question: "${query}"?`;
const completion = await openai.chat.completions.create({
model: "gpt-3.5-turbo",
messages: [
{"role": "system", "content": prompt},
]
})

return completion.choices[0];
}
}

return _interface
})();

// api.createEmbeddingsFromContent();

const main = async () => {
const query = 'Where was Devcon 0 held?';
// Compare embedding of query with each section, return most similar
const mostRelevantSection = await api.getRelevantTextByQuery(query)
// Take result of most relevant section and generate response
const relevantText = await api.generateResponseUsingCompletionsAPI(mostRelevantSection, query)

console.log('The query was: ', query);
console.log('The answer was: ', relevantText);

}

main();

// https://cookbook.openai.com/examples/question_answering_using_embeddings
Loading

0 comments on commit 47fc8ee

Please sign in to comment.