From b6a5a364161e5446359e2da8354f8e4ec3dda15c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?No=C3=A9=20Malzieu?=
Date: Wed, 27 Mar 2024 18:35:48 +0100
Subject: [PATCH] Add a way to pass a meta tag max bytes size to proxy (#22)
---
.../server/fixtures/frame-with-big-tag.html | 10 ++++++++++
packages/server/src/handlers.ts | 18 +++++++++--------
packages/server/src/parser.test.ts | 18 ++++++++++++++++-
packages/server/src/parser.ts | 7 ++++++-
packages/server/src/utils.ts | 20 +++++++++++++++++++
5 files changed, 63 insertions(+), 10 deletions(-)
create mode 100644 packages/server/fixtures/frame-with-big-tag.html
diff --git a/packages/server/fixtures/frame-with-big-tag.html b/packages/server/fixtures/frame-with-big-tag.html
new file mode 100644
index 0000000..664ec8f
--- /dev/null
+++ b/packages/server/fixtures/frame-with-big-tag.html
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/packages/server/src/handlers.ts b/packages/server/src/handlers.ts
index 33ac9e5..963063e 100644
--- a/packages/server/src/handlers.ts
+++ b/packages/server/src/handlers.ts
@@ -1,9 +1,9 @@
import type { GetMetadataResponse, PostRedirectResponse } from '@open-frames/proxy-types';
-import { CORS_HEADERS } from './constants.js';
+import { CORS_HEADERS, TAG_PREFIXES } from './constants.js';
import { ErrorResponse } from './errors.js';
import { extractMetaTags, getFrameInfo } from './parser.js';
-import { getMimeType, getProxySafeMediaHeaders, getUrl, metaTagsToObject } from './utils.js';
+import { getMaxMetaTagSize, getMimeType, getProxySafeMediaHeaders, getUrl, metaTagsToObject } from './utils.js';
export async function handleGet(req: Request) {
const url = getUrl(req);
@@ -11,7 +11,8 @@ export async function handleGet(req: Request) {
if (!url) {
return new Response('Missing url query param', { status: 400 });
}
- const { data, headersToForward } = await downloadAndExtract(url);
+ const maxMetaTagSize = getMaxMetaTagSize(req);
+ const { data, headersToForward } = await downloadAndExtract(url, maxMetaTagSize);
const res: GetMetadataResponse = {
url,
extractedTags: metaTagsToObject(data),
@@ -34,7 +35,8 @@ export async function handlePost(req: Request) {
if (!url) {
return new Response('Missing url query param', { status: 400, headers: CORS_HEADERS });
}
- const data = await postAndExtract(url, body);
+ const maxMetaTagSize = getMaxMetaTagSize(req);
+ const data = await postAndExtract(url, body, maxMetaTagSize);
const res: GetMetadataResponse = {
url,
@@ -95,7 +97,7 @@ export async function handleMedia(req: Request) {
});
}
-export async function postAndExtract(url: string, body: unknown) {
+export async function postAndExtract(url: string, body: unknown, maxMetaTagSize: number | undefined) {
const signal = AbortSignal.timeout(10000);
const response = await fetch(url, {
method: 'POST',
@@ -112,10 +114,10 @@ export async function postAndExtract(url: string, body: unknown) {
}
const text = await response.text();
- return extractMetaTags(text);
+ return extractMetaTags(text, TAG_PREFIXES, maxMetaTagSize);
}
-export async function downloadAndExtract(url: string) {
+export async function downloadAndExtract(url: string, maxMetaTagSize?: number | undefined) {
const signal = AbortSignal.timeout(10000);
const response = await fetch(url, { redirect: 'follow', signal });
// TODO: Better error handling
@@ -127,7 +129,7 @@ export async function downloadAndExtract(url: string) {
// TODO: Stream response until you see and then stop
const text = await response.text();
- return { data: extractMetaTags(text), headersToForward };
+ return { data: extractMetaTags(text, TAG_PREFIXES, maxMetaTagSize), headersToForward };
}
export async function findRedirect(url: string, body: unknown): Promise {
diff --git a/packages/server/src/parser.test.ts b/packages/server/src/parser.test.ts
index c77e752..78429ce 100644
--- a/packages/server/src/parser.test.ts
+++ b/packages/server/src/parser.test.ts
@@ -30,6 +30,7 @@ async function serveHtml(port: number) {
const testCases = [
{
file: 'github.html',
+ maxMetaTagSize: undefined,
expectedTags: {
'og:title': 'oven-sh/bun: Incredibly fast JavaScript runtime, bundler, test runner, and package manager – all in one',
'og:image': 'https://opengraph.githubassets.com/14c49397fbfdc07e07d589d265396ddb65eda364617f14d1976937a842bb0983/oven-sh/bun',
@@ -39,6 +40,7 @@ const testCases = [
},
{
file: 'ogp.html',
+ maxMetaTagSize: undefined,
expectedTags: {
'og:title': 'Open Graph protocol',
'og:image': 'https://ogp.me/logo.png',
@@ -49,6 +51,7 @@ const testCases = [
},
{
file: 'minimal-frame.html',
+ maxMetaTagSize: undefined,
expectedTags: {
'fc:frame': EXPECTED_FRAME_FARCASTER_VERSION,
'fc:frame:image': EXPECTED_FRAME_IMAGE,
@@ -66,6 +69,7 @@ const testCases = [
},
{
file: 'minimal-open-frame.html',
+ maxMetaTagSize: undefined,
expectedTags: {
'of:accepts:xmtp': '1',
'of:image': EXPECTED_FRAME_IMAGE,
@@ -84,6 +88,7 @@ const testCases = [
},
{
file: 'mixed-frame.html',
+ maxMetaTagSize: undefined,
expectedTags: {
'fc:frame': EXPECTED_FRAME_FARCASTER_VERSION,
'fc:frame:image': `fc-${EXPECTED_FRAME_IMAGE}`,
@@ -106,6 +111,7 @@ const testCases = [
},
{
file: 'frame-with-all-fields.html',
+ maxMetaTagSize: undefined,
expectedTags: {
'of:version': EXPECTED_FRAME_VERSION,
'of:accepts:xmtp': EXPECTED_FRAME_XMTP_VERSION,
@@ -164,6 +170,16 @@ const testCases = [
},
},
},
+ {
+ file: 'frame-with-big-tag.html',
+ maxMetaTagSize: 1024,
+ expectedTags: {
+ // no image tag because image is 2kb
+ 'fc:frame': EXPECTED_FRAME_FARCASTER_VERSION,
+ 'fc:frame:post_url': EXPECTED_FRAME_POST_URL,
+ },
+ // since image tag is not valid, no frame info
+ },
] as const;
describe('metadata parsing', () => {
@@ -180,7 +196,7 @@ describe('metadata parsing', () => {
for (const testCase of testCases) {
test(`can extract tags from ${testCase.file}`, async () => {
- const { data: metaTags } = await downloadAndExtract(`http://localhost:${PORT}/${testCase.file}`);
+ const { data: metaTags } = await downloadAndExtract(`http://localhost:${PORT}/${testCase.file}`, testCase.maxMetaTagSize);
const extractedTags = metaTagsToObject(metaTags);
for (const [key, value] of Object.entries(testCase.expectedTags)) {
diff --git a/packages/server/src/parser.ts b/packages/server/src/parser.ts
index 8c4cb4b..bfe36a9 100644
--- a/packages/server/src/parser.ts
+++ b/packages/server/src/parser.ts
@@ -3,10 +3,11 @@ import { load } from 'cheerio';
import { ALLOWED_ACTIONS, FRAMES_PREFIXES, TAG_PREFIXES } from './constants.js';
import type { DeepPartial } from './types.js';
+import { getStrByteSize } from './utils.js';
type MetaTag = [string, string];
-export function extractMetaTags(html: string, tagPrefixes = TAG_PREFIXES) {
+export function extractMetaTags(html: string, tagPrefixes = TAG_PREFIXES, maxMetaTagSize: number | undefined) {
const $ = load(html);
const metaTags = $('meta');
const metaTagsArray = Array.from(metaTags);
@@ -26,6 +27,10 @@ export function extractMetaTags(html: string, tagPrefixes = TAG_PREFIXES) {
return acc;
}
+ if (maxMetaTagSize && getStrByteSize(content) > maxMetaTagSize) {
+ return acc;
+ }
+
acc.push([property, content]);
return acc;
diff --git a/packages/server/src/utils.ts b/packages/server/src/utils.ts
index 4865e19..916befa 100644
--- a/packages/server/src/utils.ts
+++ b/packages/server/src/utils.ts
@@ -10,6 +10,18 @@ export function getUrl(req: Request) {
return url;
}
+export function getMaxMetaTagSize(req: Request) {
+ const maxMetaTagSize = new URL(req.url).searchParams.get('max-meta-tag-bytes');
+ if (maxMetaTagSize) {
+ if (isPositiveInteger(maxMetaTagSize)) {
+ return Number(maxMetaTagSize);
+ } else {
+ throw new ErrorResponse('Could not parse max-meta-tag-length query param', 400);
+ }
+ }
+ return undefined;
+}
+
export function getRequestPath(req: Request): string {
return new URL(req.url).pathname;
}
@@ -49,3 +61,11 @@ export function metaTagsToObject(tags: [string, string][]): Record,
);
}
+
+export function getStrByteSize(str: string): number {
+ return new Blob([str]).size;
+}
+
+export function isPositiveInteger(str: string): boolean {
+ return Number.isInteger(Number(str)) && Number(str) > 0;
+}