diff --git a/db/apply.sh b/db/apply.sh index 39e94c0..c4350dd 100755 --- a/db/apply.sh +++ b/db/apply.sh @@ -6,4 +6,4 @@ atlas schema apply \ -u "postgres://postgres:password@ai:9001/postgres?sslmode=disable" \ --to file://schema.sql \ - --dev-url "docker://postgres/15/test" \ \ No newline at end of file + --dev-url "docker://postgres/15/test" \ No newline at end of file diff --git a/db/schema.sql b/db/schema.sql index 4aeff0c..4e59850 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -23,9 +23,9 @@ CREATE TABLE articles( id serial PRIMARY KEY, created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, - title varchar(255), - slug varchar(255), - body text, + title varchar(255), -- REMOVE after translation + slug varchar(255), -- REMOVE after translation + body text, -- REMOVE after translation sveriges_radio_title varchar(255) NOT NULL, sveriges_radio_link varchar(255) NOT NULL, transcribed_text text, @@ -33,12 +33,30 @@ CREATE TABLE articles( image_url text, -- deprecated image_prompt text, -- deprecated image_is_ai_generated boolean DEFAULT TRUE, -- deprecated - audio_url text, + audio_url text, -- REMOVE after translation is_related_to_sweden boolean, + is_published boolean DEFAULT FALSE, -- REMOVE after translation + is_published_on_social_media boolean DEFAULT FALSE, -- REMOVE after translation + category text, -- REMOVE after translation + page_views integer DEFAULT 0 -- REMOVE after translation +); + +CREATE TABLE article_translations( + id serial PRIMARY KEY, + created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP, + article_id integer REFERENCES articles(id) ON DELETE CASCADE, + "language" text NOT NULL, + title varchar(255), + slug varchar(255), + body text, + audio_url text, is_published boolean DEFAULT FALSE, + social_media_hook text, is_published_on_social_media boolean DEFAULT FALSE, category text, - page_views integer DEFAULT 0 + page_views integer DEFAULT 0, + UNIQUE (article_id, "language") ); CREATE TABLE article_social_media_hooks( diff --git a/web/src/components/ArticleSummaryLarge.tsx b/web/src/components/ArticleSummaryLarge.tsx index 3a36a44..2f31411 100644 --- a/web/src/components/ArticleSummaryLarge.tsx +++ b/web/src/components/ArticleSummaryLarge.tsx @@ -22,7 +22,7 @@ export function ArticleSummaryLarge({ article }: { article: any }) { key={article.id} className={`flex space-x-4 md:col-span-1 col-span-2 border-b-1 border-gray-200 my-0 py-0 `} > - +

diff --git a/web/src/components/Footer.tsx b/web/src/components/Footer.tsx index 82d63f1..82417ee 100644 --- a/web/src/components/Footer.tsx +++ b/web/src/components/Footer.tsx @@ -3,6 +3,7 @@ import Link from 'next/link'; const navigation = { main: [ { name: 'About', href: '/about' }, + { name: 'Articles', href: '/articles' }, // { name: 'Blog', href: '#' }, // { name: 'Jobs', href: '#' }, // { name: 'Press', href: '#' }, diff --git a/web/src/components/Layout.tsx b/web/src/components/Layout.tsx index 2b5e7b4..d4cbafc 100644 --- a/web/src/components/Layout.tsx +++ b/web/src/components/Layout.tsx @@ -5,6 +5,8 @@ import { format } from 'date-fns'; import Link from 'next/link'; import { Footer } from './Footer'; import { UserButton, useAuth } from '@clerk/nextjs'; +import { useRouter } from 'next/router'; +import { languages } from '@/utils/helpers'; function TopBanner() { return ( @@ -23,20 +25,22 @@ function TopBanner() { } export function Layout({ children }: { children: React.ReactNode }) { - const now = new Date(); - const currentDay = format(now, 'EEEE'); - const currentDate = format(now, 'MMM d, yyyy'); + const router = useRouter(); + + const lang = (router.query.lang as string) ?? 'en'; const { isSignedIn } = useAuth(); + const dir = lang && languages[lang].rtl ? 'rtl' : 'ltr'; + return ( <> {/* */} -
+
-
- +
+
Logo
@@ -48,20 +52,11 @@ export function Layout({ children }: { children: React.ReactNode }) {

- Swedish News in English + {languages[lang].slogan}

-
-
- -
-
-
{currentDay}
-
{currentDate}
-
-
+
+
+ +
+ + {/*
+ +
+
+
{currentDay}
+
{currentDate}
+
*/} +
diff --git a/web/src/middleware.ts b/web/src/middleware.ts index 377e820..fe348f7 100644 --- a/web/src/middleware.ts +++ b/web/src/middleware.ts @@ -3,8 +3,11 @@ import { authMiddleware } from '@clerk/nextjs'; export default authMiddleware({ publicRoutes: [ '/', + '/:lang', + '/:lang/:slug', '/nyheter', '/nyheter/:slug', + '/nyheter/:lang/:slug', '/about', '/api/og-image/:slug', ], diff --git a/web/src/pages/nyheter/[slug].tsx b/web/src/pages/[lang]/[slug].tsx similarity index 87% rename from web/src/pages/nyheter/[slug].tsx rename to web/src/pages/[lang]/[slug].tsx index c2ae9e1..daabd3a 100644 --- a/web/src/pages/nyheter/[slug].tsx +++ b/web/src/pages/[lang]/[slug].tsx @@ -11,32 +11,36 @@ import Link from 'next/link'; const ReactPlayer = dynamic(() => import('react-player/lazy'), { ssr: false }); interface IParams extends ParsedUrlQuery { + lang: string; slug: string; } export async function getServerSideProps({ params }: { params: IParams }) { - const { slug } = params; + const { lang, slug } = params; const article = await db - .selectFrom('articles') - .innerJoin('articleImages', 'articles.articleImageId', 'articleImages.id') + .selectFrom('articleTranslations as at') + .innerJoin('articles as a', 'a.id', 'at.articleId') + .innerJoin('articleImages as ai', 'a.articleImageId', 'ai.id') .select([ - 'articles.id', - 'articles.createdAt', - 'articles.updatedAt', - 'articles.title', - 'articles.body', - 'articles.slug', - 'articles.sverigesRadioLink', - 'articles.sverigesRadioTitle', - 'articles.audioUrl', - 'articleImages.imageUrl', - 'articleImages.imageIsAiGenerated', - 'articleImages.creditInfo', - 'articleImages.imagePrompt', + 'at.id', + 'at.slug', + 'at.title', + 'at.body', + 'at.category', + 'at.audioUrl', + 'a.createdAt', + 'a.updatedAt', + 'a.sverigesRadioLink', + 'a.sverigesRadioTitle', + 'ai.imageUrl', + 'ai.imageIsAiGenerated', + 'ai.creditInfo', + 'ai.imagePrompt', ]) - .where('slug', '=', slug) - .where('isPublished', '=', true) + .where('at.slug', '=', slug) + .where('at.language', '=', lang as string) + .where('at.isPublished', '=', true) .executeTakeFirst(); if (!article) { @@ -46,7 +50,7 @@ export async function getServerSideProps({ params }: { params: IParams }) { } await db - .updateTable('articles') + .updateTable('articleTranslations') .set((eb) => ({ pageViews: eb.bxp('pageViews', '+', 1), })) diff --git a/web/src/pages/[lang]/index.tsx b/web/src/pages/[lang]/index.tsx new file mode 100644 index 0000000..953c55c --- /dev/null +++ b/web/src/pages/[lang]/index.tsx @@ -0,0 +1,142 @@ +import { ArticleSummaryLarge } from '@/components/ArticleSummaryLarge'; +import { ArticleSummarySmall } from '@/components/ArticleSummarySmall'; +import { MainContainer } from '@/components/MainContainer'; +import { db } from '@/utils/db'; +import type { InferGetServerSidePropsType } from 'next'; +import { type ParsedUrlQuery } from 'querystring'; +import Link from 'next/link'; + +interface IParams extends ParsedUrlQuery { + lang: string; +} + +export async function getServerSideProps({ params }: { params: IParams }) { + const { lang } = params; + + if (lang === 'en') { + return { + redirect: { + destination: '/', + permanent: true, + }, + }; + } + + const articles = await db + .selectFrom('articleTranslations as at') + .innerJoin('articles as a', 'a.id', 'at.articleId') + .innerJoin('articleImages as ai', 'a.articleImageId', 'ai.id') + .select([ + 'a.id', + 'a.createdAt', + 'at.title', + 'at.slug', + 'at.body', + 'at.category', + 'at.language', + 'ai.imageUrl', + ]) + .where('at.title', 'is not', null) + .where('at.isPublished', '=', true) + .where('at.language', '=', lang) + .orderBy('a.createdAt', 'desc') + .limit(25) + .execute(); + + let today = new Date(); // get the current date + let sevenDaysAgo = new Date(today); // create a copy of the current date + + sevenDaysAgo.setDate(today.getDate() - 7); // subtract 7 days + + const popularArticles = await db + .selectFrom('articles') + .innerJoin('articleImages', 'articles.articleImageId', 'articleImages.id') + .select([ + 'articles.id', + 'articles.createdAt', + 'articles.title', + 'articles.slug', + 'articles.body', + 'articles.category', + 'articleImages.imageUrl', + ]) + .where('title', 'is not', null) + .where('isPublished', '=', true) + .where('articles.createdAt', '>', sevenDaysAgo) + .orderBy('pageViews', 'desc') + .limit(8) + .execute(); + + return { + props: { + articles, + popularArticles, + lang, + }, + }; +} + +const Page = ( + props: InferGetServerSidePropsType, +) => { + // get first three articles + // const firstThreeArticles = props.articles.slice(0, 3); + + const { articles, popularArticles, lang } = props; + + return ( + + {/* */} +
+
+ {articles.map((article, i) => { + if (i % 5 === 0) { + return ( + + ); + } else { + return ( + + ); + } + })} +
+
+
Most Read
+
+ {popularArticles.map((article) => { + return ( + +
+
+
+ {article.title} +
+
+

+ {article.category} +

+
+
+ + ); + })} +
+
+
+ + ); +}; + +export default Page; diff --git a/web/src/pages/index.tsx b/web/src/pages/index.tsx index 23f7fcb..7f2d7b0 100644 --- a/web/src/pages/index.tsx +++ b/web/src/pages/index.tsx @@ -7,23 +7,27 @@ import Link from 'next/link'; export const getServerSideProps = async () => { const articles = await db - .selectFrom('articles') - .innerJoin('articleImages', 'articles.articleImageId', 'articleImages.id') + .selectFrom('articleTranslations as at') + .innerJoin('articles as a', 'a.id', 'at.articleId') + .innerJoin('articleImages as ai', 'a.articleImageId', 'ai.id') .select([ - 'articles.id', - 'articles.createdAt', - 'articles.title', - 'articles.slug', - 'articles.body', - 'articles.category', - 'articleImages.imageUrl', + 'a.id', + 'a.createdAt', + 'at.title', + 'at.slug', + 'at.body', + 'at.category', + 'ai.imageUrl', ]) - .where('title', 'is not', null) - .where('isPublished', '=', true) - .orderBy('createdAt', 'desc') + .where('at.title', 'is not', null) + .where('at.isPublished', '=', true) + .where('at.language', '=', 'en') + .orderBy('a.createdAt', 'desc') .limit(25) .execute(); + console.log({ articles }); + let today = new Date(); // get the current date let sevenDaysAgo = new Date(today); // create a copy of the current date @@ -48,8 +52,6 @@ export const getServerSideProps = async () => { .limit(8) .execute(); - console.log(articles); - return { props: { articles, @@ -153,7 +155,7 @@ const Page = ( {popularArticles.map((article) => { return ( diff --git a/web/src/utils/helpers.ts b/web/src/utils/helpers.ts index 4a55789..8674c23 100644 --- a/web/src/utils/helpers.ts +++ b/web/src/utils/helpers.ts @@ -26,3 +26,59 @@ export function isAllowedAdminUserId(userId: string) { 'user_2UNVeD1ZI8CTqInfzVfzrfYL0K7', ].includes(userId); } + +interface LanguageProps { + name: string; + slogan: string; + rtl?: boolean; +} + +export const languages: Record = { + en: { + name: 'English', + slogan: 'Swedish news in English', + }, + fi: { + name: 'Finnish', + slogan: 'Ruotsin uutisia suomeksi', + }, + ar: { + name: 'Arabic', + slogan: 'الأخبار السويدية باللغة العربية', + rtl: true, + }, + ru: { + name: 'Russian', + slogan: 'Шведские новости на русском языке', + }, + uk: { + name: 'Ukrainian', + slogan: 'Шведські новини українською', + }, + ckb: { + name: 'Kurdish (Sorani)', + slogan: 'هەواڵی سویدی بە زمانی کوردی', + rtl: true, + }, + fa: { + name: 'Persian', + slogan: 'اخبار سوئدی به زبان فارسی', + rtl: true, + }, + so: { + name: 'Somali', + slogan: 'Wararka Swedishka ee Soomaaliga', + }, + es: { + name: 'Spanish', + slogan: 'Noticias suecas en español', + }, + de: { + name: 'German', + slogan: 'Schwedische Nachrichten auf Deutsch', + }, + fr: { + name: 'French', + slogan: 'Actualités suédoises en français', + }, +}; diff --git a/web/src/utils/kysely-types.d.ts b/web/src/utils/kysely-types.d.ts index 6f10703..5034e6a 100644 --- a/web/src/utils/kysely-types.d.ts +++ b/web/src/utils/kysely-types.d.ts @@ -47,8 +47,26 @@ export interface ArticleSocialMediaHooks { hook: string | null; } +export interface ArticleTranslations { + id: Generated; + createdAt: Generated; + updatedAt: Generated; + articleId: number | null; + language: string; + title: string | null; + slug: string | null; + body: string | null; + audioUrl: string | null; + isPublished: Generated; + socialMediaHook: string | null; + isPublishedOnSocialMedia: Generated; + category: string | null; + pageViews: Generated; +} + export interface DB { articleImages: ArticleImages; articles: Articles; articleSocialMediaHooks: ArticleSocialMediaHooks; + articleTranslations: ArticleTranslations; } diff --git a/web/src/utils/types.ts b/web/src/utils/types.ts index 48aa493..99d07be 100644 --- a/web/src/utils/types.ts +++ b/web/src/utils/types.ts @@ -8,6 +8,7 @@ export interface Article { body: string | null; imageUrl: string | null; category: string | null; + language: string; } export const articleSchema = z.object({ diff --git a/worker/.env-example b/worker/.env-example index eae8cdc..a7cb2eb 100644 --- a/worker/.env-example +++ b/worker/.env-example @@ -1,15 +1,26 @@ -DATABASE_URL=postgres://elitan:nI2UokMqE8Wa@ep-fragrant-dew-970345.eu-central-1.aws.neon.tech/srai?sslmode=require +DATABASE_URL=postgres://postgres:password@ai:9001/postgres?sslmode=disable +PGPASSWORD=password -OPENAI_API_KEY=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 +OPENAI_API_KEY=sk-asdasd -UNSPLASH_ACCESS_KEY=yV0XXXgICEYng7Iq2WwzFMTrJKCZWTocwLE1tEx52qo +UNSPLASH_ACCESS_KEY= -SPACES_KEY=DO00RTMYRXCGK4YTGZ7D -SPACES_SECRET=4tK6E+zya4pGfrQbDZ6YqyHPxtPNMWu5qyvq4W3InfI +SPACES_KEY= +SPACES_SECRET= -ELEVEN_LABS_API_KEY=a8c0215aee9efcce0fa314b01933aced +ELEVEN_LABS_API_KEY= -DISCORD_WEBHOOK_ID=1137739749147295855 -DISCORD_WEBHOOK_TOKEN=3T_54nNr9KJM3hHvGqRGLJkW6RZFy4yBDOUvPTpOtG4-YgiqTWR22jRPmTbCeaNGHyDX +DISCORD_WEBHOOK_ID= +DISCORD_WEBHOOK_TOKEN= -STABLE_DIFFUSION_TEXT2IMG_ENDPOINT=http://100.101.51.53:7860/sdapi/v1/txt2img \ No newline at end of file +STABLE_DIFFUSION_TEXT2IMG_ENDPOINT= + +TWITTER_APP_KEY= +TWITTER_APP_SECRET= +TWITTER_ACCESS_TOKEN= +TWITTER_ACCESS_SECRET= + + +FACEBOOK_ACCESS_TOKEN= + +GOOGLE_API_TRANSLATION_KEY= \ No newline at end of file diff --git a/worker/src/1-get-sr-data.ts b/worker/src/1-get-sr-data.ts index 98a2bad..657788b 100644 --- a/worker/src/1-get-sr-data.ts +++ b/worker/src/1-get-sr-data.ts @@ -29,6 +29,11 @@ const baseUrl = 'https://sverigesradio.se'; continue; } + if (sverigesRadioTitle.includes('Godmorgon världen')) { + console.log('Skipping Godmorgon världen'); + continue; + } + const sverigesRadioLinkResponse = await axios.get(sverigesRadioLink); let articleContent = cheerio.load(sverigesRadioLinkResponse.data); diff --git a/worker/src/2-transcribe.ts b/worker/src/2-transcribe.ts index 3cbb4cb..d86c092 100644 --- a/worker/src/2-transcribe.ts +++ b/worker/src/2-transcribe.ts @@ -1,6 +1,7 @@ import * as child_process from 'child_process'; import { db, pool } from './utils/db'; import 'dotenv/config'; +import { logOnce } from 'kysely'; /** * Executes a shell command and return it as a Promise. @@ -62,6 +63,29 @@ function runCommand(cmd: string, timeout = 5000): Promise { continue; } + // get duration + try { + const res = await runCommand( + `ffprobe -i "/tmp/whisper/raw.mp4" -show_entries format=duration -v quiet -of csv="p=0"`, + ); + + const length = parseInt(res, 10); + + if (length > 300) { + console.log(`Episode is longer than 5 minutes, skipping`); + continue; + } + + if (length === 60) { + console.log(`Episode is exactly 60 seconds, skipping`); + continue; + } + } catch (error) { + console.error(`Unable do get duration of audio file - ${error}`); + // await db.deleteFrom('articles').where('id', '=', article.id).execute(); + continue; + } + console.log(`Encoding episode...`); await runCommand( 'ffmpeg -y -i /tmp/whisper/raw.mp4 -ar 16000 /tmp/whisper/converted.wav', diff --git a/worker/src/3-openai.ts b/worker/src/3-openai.ts index 144064c..516d66c 100644 --- a/worker/src/3-openai.ts +++ b/worker/src/3-openai.ts @@ -1,7 +1,13 @@ import slugify from 'slugify'; import { db } from './utils/db'; -import { generateArticle, textIsRelatedToSweden } from './utils/openai'; +import { + generateArticle, + generateTranslation, + textIsRelatedToSweden, +} from './utils/openai'; +import { LanguageEnum } from './utils/kysely-types'; +import { translate } from './utils/helpers'; (async () => { const articlesToRefine = await db @@ -36,7 +42,6 @@ import { generateArticle, textIsRelatedToSweden } from './utils/openai'; .updateTable('articles') .set({ isRelatedToSweden, - isPublished: true, }) .where('id', '=', article.id) .execute(); @@ -63,11 +68,18 @@ import { generateArticle, textIsRelatedToSweden } from './utils/openai'; body, category, imagePrompt, - socialMediaHook1, - socialMediaHook2, - socialMediaHook3, + socialMediaHook, } = generatedArticle; + // update main article with the image prompt + await db + .updateTable('articles') + .set({ + imagePrompt, + }) + .where('id', '=', article.id) + .execute(); + console.log('replace optional quotes in the title'); // remove optional quotes in the beginnning and end of the title @@ -80,37 +92,70 @@ import { generateArticle, textIsRelatedToSweden } from './utils/openai'; strict: true, }); - console.log('insert the article'); + console.log('insert the article translations'); await db - .updateTable('articles') - .set({ + .insertInto('articleTranslations') + .values({ + articleId: article.id, + language: 'en', title, slug, body, category, - imagePrompt, + socialMediaHook: socialMediaHook, + isPublished: true, }) - .where('id', '=', article.id) - .executeTakeFirst(); + .execute(); - console.log('insert the hooks'); - await db - .insertInto('articleSocialMediaHooks') - .values([ - { - articleId: article.id, - hook: socialMediaHook1, - }, - { - articleId: article.id, - hook: socialMediaHook2, - }, - { + const languages = [ + 'fi', + 'ar', + 'ckb', + 'so', + 'ru', + 'uk', + 'fa', + 'es', + 'de', + 'fr', + ]; + for (const language of languages) { + console.log(`generate article in ${language}`); + + const headlineTranslated = await translate({ + from: 'en', + to: language, + text: title, + }); + + const bodyTranslated = await translate({ + from: 'en', + to: language, + text: body, + }); + + const categoryTranslated = await translate({ + from: 'en', + to: language, + text: category, + }); + + await db + .insertInto('articleTranslations') + .values({ articleId: article.id, - hook: socialMediaHook3, - }, - ]) - .execute(); + language: language as LanguageEnum, + title: headlineTranslated, + slug: slugify(headlineTranslated, { + lower: true, + strict: true, + }), + body: bodyTranslated, + category: categoryTranslated, + isPublished: true, + }) + .execute(); + } } console.log('done'); diff --git a/worker/src/4-stable-diffusion.ts b/worker/src/4-stable-diffusion.ts index e2dcad2..a660165 100644 --- a/worker/src/4-stable-diffusion.ts +++ b/worker/src/4-stable-diffusion.ts @@ -46,7 +46,7 @@ const s3Client = new S3({ restore_faces: true, width: 800, height: 500, - batch_size: 4, + batch_size: 1, }); const response = await fetch(url, { diff --git a/worker/src/6-embeddings.ts b/worker/src/6-embeddings.ts new file mode 100644 index 0000000..1059f2d --- /dev/null +++ b/worker/src/6-embeddings.ts @@ -0,0 +1,8 @@ +async function main() { + // get all articles that does not have embeddings + // get the article's english article + // generate embeddings + // store the embeddings on the articles table +} + +main(); diff --git a/worker/src/playground.ts b/worker/src/playground.ts index 0aa1586..d4d90ed 100644 --- a/worker/src/playground.ts +++ b/worker/src/playground.ts @@ -1,18 +1,14 @@ import 'dotenv/config'; -import { postToFacebook } from './utils/helpers'; -import { twitterClient } from './utils/twitter'; -(async () => { - // const r = await postToFacebook( - // 'test123 hejhjh', - // 'https://google.se/teetetest', - // ); +import { translate } from './utils/helpers'; +import slugify from 'slugify'; - const title = 'test title'; - const linkToArticle = `https://nyheter.sh/nyheter/123123`; - const post = `${title}\n\n${linkToArticle}`; +async function main() { + const headline = 'ارتفاع الكرونا السويدية: التأثير على التضخم في المستقبل؟'; - const r = await twitterClient.v2.tweet(post); + const slug = slugify(headline, { strict: false, lower: true }); - console.log({ r }); -})(); + console.log({ slug }); +} + +main(); diff --git a/worker/src/utils/env.ts b/worker/src/utils/env.ts new file mode 100644 index 0000000..95ab842 --- /dev/null +++ b/worker/src/utils/env.ts @@ -0,0 +1,10 @@ +import { z } from 'zod'; +import 'dotenv/config'; + +const envSchema = z.object({ + GOOGLE_API_TRANSLATION_KEY: z.string(), +}); + +const env = envSchema.parse(process.env); + +export { env }; diff --git a/worker/src/utils/helpers.ts b/worker/src/utils/helpers.ts index d6cd9fe..2d9a1bf 100644 --- a/worker/src/utils/helpers.ts +++ b/worker/src/utils/helpers.ts @@ -1,4 +1,5 @@ import * as child_process from 'child_process'; +import { env } from './env'; export function getFirstTwoSentences(text: string): string { const sentences = text.match(/[^.!?]+[.!?]+/g) || []; @@ -58,3 +59,89 @@ export async function postToFacebook( return await response.json(); } + +export function getLanguageFromTwoLetters(language: string): string { + const languageMap: Record = { + fi: 'Finnish', + ar: 'Arabic', + ru: 'Russian', + uk: 'Ukrainian', + kur: 'Kurdish (Sorani)', + fa: 'Persian', + so: 'Somali', + es: 'Spanish', + de: 'German', + fr: 'French', + }; + + if (!languageMap[language]) { + throw new Error('Language not found'); + } + + return languageMap[language]; +} + +function escapeNewLines(str: string) { + return str.replace(/\n/g, '\\n'); +} + +function fixOpenAiNewLineResponse(str: string) { + return str + .split('"') + .map((chunk, index) => { + // Only replace \n inside the JSON string values, which are in every other index after splitting by " + if (index % 2 === 1) { + return escapeNewLines(chunk); + } else { + return chunk; + } + }) + .join('"'); +} + +export function parseOpenAiJson(str: string) { + return JSON.parse(fixOpenAiNewLineResponse(str)); +} + +interface TranslateParmas { + text: string; + from: string; + to: string; +} + +export async function translate(params: TranslateParmas) { + const { text, from, to } = params; + + const url = `https://translation.googleapis.com/language/translate/v2`; + const request = { + q: text, + source: from, + target: to, + format: 'text', + }; + + // Run request + const response = await fetch(`${url}?key=${env.GOOGLE_API_TRANSLATION_KEY}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(request), + }); + + const jsonResponse = await response.json(); + + console.log({ jsonResponse }); + + if (!jsonResponse.data || !jsonResponse.data.translations) { + throw new Error('Translation failed'); + } + + if (jsonResponse.data.translations.length === 0) { + throw new Error('Translation failed'); + } + + console.log(jsonResponse.data.translations); + + return jsonResponse.data.translations[0].translatedText; +} diff --git a/worker/src/utils/kysely-types.d.ts b/worker/src/utils/kysely-types.d.ts index 6f10703..18bc7c4 100644 --- a/worker/src/utils/kysely-types.d.ts +++ b/worker/src/utils/kysely-types.d.ts @@ -4,6 +4,8 @@ export type Generated = T extends ColumnType ? ColumnType : ColumnType; +export type LanguageEnum = "ar" | "en" | "fa" | "fi" | "kur" | "ru" | "so" | "uk"; + export type Timestamp = ColumnType; export interface ArticleImages { @@ -47,8 +49,26 @@ export interface ArticleSocialMediaHooks { hook: string | null; } +export interface ArticleTranslations { + id: Generated; + createdAt: Generated; + updatedAt: Generated; + articleId: number | null; + language: LanguageEnum; + title: string | null; + slug: string | null; + body: string | null; + audioUrl: string | null; + isPublished: Generated; + socialMediaHook: string | null; + isPublishedOnSocialMedia: Generated; + category: string | null; + pageViews: Generated; +} + export interface DB { articleImages: ArticleImages; articles: Articles; articleSocialMediaHooks: ArticleSocialMediaHooks; + articleTranslations: ArticleTranslations; } diff --git a/worker/src/utils/openai.ts b/worker/src/utils/openai.ts index d209bdc..8b60a70 100644 --- a/worker/src/utils/openai.ts +++ b/worker/src/utils/openai.ts @@ -2,7 +2,11 @@ import { OpenAI } from 'openai'; import { z } from 'zod'; import 'dotenv/config'; -import { removeLastSentence } from './helpers'; +import { + getLanguageFromTwoLetters, + parseOpenAiJson, + removeLastSentence, +} from './helpers'; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, @@ -45,30 +49,43 @@ export const FUNCTIONS = { type: 'string', description: `Description of an image to be associated with the news article. Make the description detailed. Don't make the image about a specific person. Try to be as objective as possible.`, }, - socialMediaHook1: { + socialMediaHook: { type: 'string', description: `A short engaging facebook post with a hook for the article. The hook should start with an emoji followed by a space. No other emojis should be used.`, }, - socialMediaHook2: { - type: 'string', - description: `An engaging facebook post with a hook for the article. The hook should start with an emoji followed by a space. No other emojis should be used.`, - }, - socialMediaHook3: { - type: 'string', - description: `An engaging facebook post with a hook for the article. The hook should start with an emoji followed by a space. No other emojis should be used.`, - }, }, required: [ 'body', 'headline', 'category', 'imagePrompt', - 'socialMediaHook1', - 'socialMediaHook2', - 'socialMediaHook3', + 'socialMediaHook', ], }, }, + getTranslation: { + name: 'getTranslation', + description: + 'Translate a news article. Be very accurate in your translation.', + parameters: { + type: 'object', + properties: { + headline: { + type: 'string', + description: `The translated headline`, + }, + category: { + type: 'string', + description: `The translated category`, + }, + body: { + type: 'string', + description: `The translated article`, + }, + }, + required: ['body', 'headline', 'category'], + }, + }, bestArticleToPublish: { name: 'bestArticleToPublish', description: @@ -93,6 +110,7 @@ export const FUNCTIONS = { export const GPT_PROMPT_JOURNALIST = `You are a journalist who writes independent news articles. The news articles you write follow journalistic standards and are informative and engaging for the reader.`; export const GPT_PROMPT_ASSISTANT = `You are a helpful assistant`; +export const GPT_PROMPT_TRANSLATOR = `You are an expert translator`; export async function textIsRelatedToSweden(text: string): Promise { const bodyContent = `INFORMATION:\n${text}\nEND OF INFORMATION.\nHelp me with classifying the information above. Is the information related to Sweden or not?`; @@ -119,7 +137,7 @@ export async function textIsRelatedToSweden(text: string): Promise { const body = openAiBodyResponse.choices[0].message?.function_call?.arguments; - const bodyObject = JSON.parse(body as string); + const bodyObject = parseOpenAiJson(body as string); return bodyObject.isRelatedToSweden; } @@ -158,56 +176,85 @@ export async function generateArticle(transcribedText: string) { console.log(openAiBodyResponse.choices[0].message); console.log(jsonString); - const sanitizedJsonString = jsonString.replace(/\t/g, '\\t'); + const resJson = parseOpenAiJson(jsonString); - const resJson = JSON.parse(sanitizedJsonString); + console.log({ resJson }); const articleResponseSchema = z.object({ body: z.string(), headline: z.string(), category: z.string(), imagePrompt: z.string(), - socialMediaHook1: z.string(), - socialMediaHook2: z.string(), - socialMediaHook3: z.string(), + socialMediaHook: z.string(), }); return articleResponseSchema.parse(resJson); } -export async function bestArticleToPublish( - content: any, -): Promise<{ articleId: number; socialMediaHook: string }> { - const bodyContent = `INFORMATION:\n${content}\nEND OF INFORMATION.\nHelp me decide what news article to publish based on the title, body and social media hook. I want you to pick the news article that has the best potential to engage users on social media.`; +type GenerateTranslation = { + headline: string; + body: string; + category: string; + language: string; +}; + +export async function generateTranslation({ + headline, + body, + category, + language, +}: GenerateTranslation) { + const bodyContent = `I require you to translate some text for me. Translate the following news article from English to ${getLanguageFromTwoLetters( + language, + )}. Be very accurate in your translation. + +HEADLINE +${headline} +END OF HEADLINE + +CATEGORY +${category} +END OF CATEGORY + +ARTICLE: +${body} +END OF ARTICLE`; const openAiBodyResponse = await openai.chat.completions.create({ messages: [ { role: 'system', - content: GPT_PROMPT_ASSISTANT, + content: GPT_PROMPT_TRANSLATOR, }, { role: 'user', content: bodyContent, }, ], - functions: [FUNCTIONS.bestArticleToPublish], + functions: [FUNCTIONS.getTranslation], function_call: { - name: FUNCTIONS.bestArticleToPublish.name, + name: FUNCTIONS.getTranslation.name, }, model: 'gpt-3.5-turbo', temperature: 0.7, - max_tokens: 1200, + max_tokens: 1800, }); - const body = openAiBodyResponse.choices[0].message?.function_call?.arguments; + const jsonString = openAiBodyResponse.choices[0].message?.function_call + ?.arguments as string; - const resJson = JSON.parse(body as string); + console.log(openAiBodyResponse.choices[0].message); + console.log(jsonString); - const responseSchema = z.object({ - articleId: z.number(), - socialMediaHook: z.string(), + // const sanitizedJsonString = jsonString.replace(/\t/g, '\\t'); + + const resJson = parseOpenAiJson(jsonString); + + const translationResponseSchema = z.object({ + headline: z.string(), + category: z.string(), + body: z.string(), }); - return responseSchema.parse(resJson); + return translationResponseSchema.parse(resJson); }