From b6a5a364161e5446359e2da8354f8e4ec3dda15c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9=20Malzieu?= Date: Wed, 27 Mar 2024 18:35:48 +0100 Subject: [PATCH] Add a way to pass a meta tag max bytes size to proxy (#22) --- .../server/fixtures/frame-with-big-tag.html | 10 ++++++++++ packages/server/src/handlers.ts | 18 +++++++++-------- packages/server/src/parser.test.ts | 18 ++++++++++++++++- packages/server/src/parser.ts | 7 ++++++- packages/server/src/utils.ts | 20 +++++++++++++++++++ 5 files changed, 63 insertions(+), 10 deletions(-) create mode 100644 packages/server/fixtures/frame-with-big-tag.html diff --git a/packages/server/fixtures/frame-with-big-tag.html b/packages/server/fixtures/frame-with-big-tag.html new file mode 100644 index 0000000..664ec8f --- /dev/null +++ b/packages/server/fixtures/frame-with-big-tag.html @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/packages/server/src/handlers.ts b/packages/server/src/handlers.ts index 33ac9e5..963063e 100644 --- a/packages/server/src/handlers.ts +++ b/packages/server/src/handlers.ts @@ -1,9 +1,9 @@ import type { GetMetadataResponse, PostRedirectResponse } from '@open-frames/proxy-types'; -import { CORS_HEADERS } from './constants.js'; +import { CORS_HEADERS, TAG_PREFIXES } from './constants.js'; import { ErrorResponse } from './errors.js'; import { extractMetaTags, getFrameInfo } from './parser.js'; -import { getMimeType, getProxySafeMediaHeaders, getUrl, metaTagsToObject } from './utils.js'; +import { getMaxMetaTagSize, getMimeType, getProxySafeMediaHeaders, getUrl, metaTagsToObject } from './utils.js'; export async function handleGet(req: Request) { const url = getUrl(req); @@ -11,7 +11,8 @@ export async function handleGet(req: Request) { if (!url) { return new Response('Missing url query param', { status: 400 }); } - const { data, headersToForward } = await downloadAndExtract(url); + const maxMetaTagSize = getMaxMetaTagSize(req); + const { data, headersToForward } = await downloadAndExtract(url, maxMetaTagSize); const res: GetMetadataResponse = { url, extractedTags: metaTagsToObject(data), @@ -34,7 +35,8 @@ export async function handlePost(req: Request) { if (!url) { return new Response('Missing url query param', { status: 400, headers: CORS_HEADERS }); } - const data = await postAndExtract(url, body); + const maxMetaTagSize = getMaxMetaTagSize(req); + const data = await postAndExtract(url, body, maxMetaTagSize); const res: GetMetadataResponse = { url, @@ -95,7 +97,7 @@ export async function handleMedia(req: Request) { }); } -export async function postAndExtract(url: string, body: unknown) { +export async function postAndExtract(url: string, body: unknown, maxMetaTagSize: number | undefined) { const signal = AbortSignal.timeout(10000); const response = await fetch(url, { method: 'POST', @@ -112,10 +114,10 @@ export async function postAndExtract(url: string, body: unknown) { } const text = await response.text(); - return extractMetaTags(text); + return extractMetaTags(text, TAG_PREFIXES, maxMetaTagSize); } -export async function downloadAndExtract(url: string) { +export async function downloadAndExtract(url: string, maxMetaTagSize?: number | undefined) { const signal = AbortSignal.timeout(10000); const response = await fetch(url, { redirect: 'follow', signal }); // TODO: Better error handling @@ -127,7 +129,7 @@ export async function downloadAndExtract(url: string) { // TODO: Stream response until you see and then stop const text = await response.text(); - return { data: extractMetaTags(text), headersToForward }; + return { data: extractMetaTags(text, TAG_PREFIXES, maxMetaTagSize), headersToForward }; } export async function findRedirect(url: string, body: unknown): Promise { diff --git a/packages/server/src/parser.test.ts b/packages/server/src/parser.test.ts index c77e752..78429ce 100644 --- a/packages/server/src/parser.test.ts +++ b/packages/server/src/parser.test.ts @@ -30,6 +30,7 @@ async function serveHtml(port: number) { const testCases = [ { file: 'github.html', + maxMetaTagSize: undefined, expectedTags: { 'og:title': 'oven-sh/bun: Incredibly fast JavaScript runtime, bundler, test runner, and package manager – all in one', 'og:image': 'https://opengraph.githubassets.com/14c49397fbfdc07e07d589d265396ddb65eda364617f14d1976937a842bb0983/oven-sh/bun', @@ -39,6 +40,7 @@ const testCases = [ }, { file: 'ogp.html', + maxMetaTagSize: undefined, expectedTags: { 'og:title': 'Open Graph protocol', 'og:image': 'https://ogp.me/logo.png', @@ -49,6 +51,7 @@ const testCases = [ }, { file: 'minimal-frame.html', + maxMetaTagSize: undefined, expectedTags: { 'fc:frame': EXPECTED_FRAME_FARCASTER_VERSION, 'fc:frame:image': EXPECTED_FRAME_IMAGE, @@ -66,6 +69,7 @@ const testCases = [ }, { file: 'minimal-open-frame.html', + maxMetaTagSize: undefined, expectedTags: { 'of:accepts:xmtp': '1', 'of:image': EXPECTED_FRAME_IMAGE, @@ -84,6 +88,7 @@ const testCases = [ }, { file: 'mixed-frame.html', + maxMetaTagSize: undefined, expectedTags: { 'fc:frame': EXPECTED_FRAME_FARCASTER_VERSION, 'fc:frame:image': `fc-${EXPECTED_FRAME_IMAGE}`, @@ -106,6 +111,7 @@ const testCases = [ }, { file: 'frame-with-all-fields.html', + maxMetaTagSize: undefined, expectedTags: { 'of:version': EXPECTED_FRAME_VERSION, 'of:accepts:xmtp': EXPECTED_FRAME_XMTP_VERSION, @@ -164,6 +170,16 @@ const testCases = [ }, }, }, + { + file: 'frame-with-big-tag.html', + maxMetaTagSize: 1024, + expectedTags: { + // no image tag because image is 2kb + 'fc:frame': EXPECTED_FRAME_FARCASTER_VERSION, + 'fc:frame:post_url': EXPECTED_FRAME_POST_URL, + }, + // since image tag is not valid, no frame info + }, ] as const; describe('metadata parsing', () => { @@ -180,7 +196,7 @@ describe('metadata parsing', () => { for (const testCase of testCases) { test(`can extract tags from ${testCase.file}`, async () => { - const { data: metaTags } = await downloadAndExtract(`http://localhost:${PORT}/${testCase.file}`); + const { data: metaTags } = await downloadAndExtract(`http://localhost:${PORT}/${testCase.file}`, testCase.maxMetaTagSize); const extractedTags = metaTagsToObject(metaTags); for (const [key, value] of Object.entries(testCase.expectedTags)) { diff --git a/packages/server/src/parser.ts b/packages/server/src/parser.ts index 8c4cb4b..bfe36a9 100644 --- a/packages/server/src/parser.ts +++ b/packages/server/src/parser.ts @@ -3,10 +3,11 @@ import { load } from 'cheerio'; import { ALLOWED_ACTIONS, FRAMES_PREFIXES, TAG_PREFIXES } from './constants.js'; import type { DeepPartial } from './types.js'; +import { getStrByteSize } from './utils.js'; type MetaTag = [string, string]; -export function extractMetaTags(html: string, tagPrefixes = TAG_PREFIXES) { +export function extractMetaTags(html: string, tagPrefixes = TAG_PREFIXES, maxMetaTagSize: number | undefined) { const $ = load(html); const metaTags = $('meta'); const metaTagsArray = Array.from(metaTags); @@ -26,6 +27,10 @@ export function extractMetaTags(html: string, tagPrefixes = TAG_PREFIXES) { return acc; } + if (maxMetaTagSize && getStrByteSize(content) > maxMetaTagSize) { + return acc; + } + acc.push([property, content]); return acc; diff --git a/packages/server/src/utils.ts b/packages/server/src/utils.ts index 4865e19..916befa 100644 --- a/packages/server/src/utils.ts +++ b/packages/server/src/utils.ts @@ -10,6 +10,18 @@ export function getUrl(req: Request) { return url; } +export function getMaxMetaTagSize(req: Request) { + const maxMetaTagSize = new URL(req.url).searchParams.get('max-meta-tag-bytes'); + if (maxMetaTagSize) { + if (isPositiveInteger(maxMetaTagSize)) { + return Number(maxMetaTagSize); + } else { + throw new ErrorResponse('Could not parse max-meta-tag-length query param', 400); + } + } + return undefined; +} + export function getRequestPath(req: Request): string { return new URL(req.url).pathname; } @@ -49,3 +61,11 @@ export function metaTagsToObject(tags: [string, string][]): Record, ); } + +export function getStrByteSize(str: string): number { + return new Blob([str]).size; +} + +export function isPositiveInteger(str: string): boolean { + return Number.isInteger(Number(str)) && Number(str) > 0; +}