Skip to content

Commit

Permalink
Remove excessive whitespace before sending to ai
Browse files Browse the repository at this point in the history
  • Loading branch information
elie222 committed Dec 21, 2024
1 parent c037166 commit b7729a6
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 9 deletions.
5 changes: 1 addition & 4 deletions apps/web/app/(app)/automation/ReportMistake.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,7 @@ function ImproveRules({
onClick={async () => {
setChecking(true);

const result = await testAiAction({
messageId: message.id,
threadId: message.threadId,
});
const result = await testAiAction({ messageId: message.id });
if (isActionError(result)) {
toastError({
title: "There was an error testing the email",
Expand Down
2 changes: 1 addition & 1 deletion apps/web/app/(app)/automation/TestRules.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ export function TestResultDisplay({
<AlertDescription className="mt-2 space-y-4">
{isAIRule(result.rule) && (
<div className="text-sm">
<span className="font-medium">Rule Instructions: </span>
<span className="font-medium">AI Instructions: </span>
{result.rule.instructions.substring(0, MAX_LENGTH)}
{result.rule.instructions.length >= MAX_LENGTH && "..."}
</div>
Expand Down
1 change: 1 addition & 0 deletions apps/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
"@types/react": "18.3.12",
"@types/react-dom": "18.3.1",
"autoprefixer": "10.4.20",
"cross-env": "^7.0.3",
"dotenv": "^16.4.7",
"jiti": "^2.4.1",
"jsdom": "^25.0.1",
Expand Down
4 changes: 2 additions & 2 deletions apps/web/utils/ai/choose-rule/ai-choose-rule.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ ${
}
`;

const prompt = `This email was received for processing. Select a rule to apply to it.
const prompt = `An email was received for processing. Select a rule to apply to it.
<outputFormat>
Respond with a JSON object with the following fields:
"reason" - the reason you chose that rule. Keep it short.
"reason" - the reason you chose that rule. Keep it concise.
"rule" - the number of the rule you want to apply
</outputFormat>
Expand Down
4 changes: 2 additions & 2 deletions apps/web/utils/ai/choose-rule/stringify-email.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { truncate } from "@/utils/string";
import { removeExcessiveWhitespace, truncate } from "@/utils/string";

export type EmailForLLM = {
from: string;
Expand All @@ -14,7 +14,7 @@ export function stringifyEmail(email: EmailForLLM, maxLength: number) {
email.replyTo && `<replyTo>${email.replyTo}</replyTo>`,
email.cc && `<cc>${email.cc}</cc>`,
`<subject>${email.subject}</subject>`,
`<body>${truncate(email.content, maxLength)}</body>`,
`<body>${truncate(removeExcessiveWhitespace(email.content), maxLength)}</body>`,
];

return emailParts.filter(Boolean).join("\n");
Expand Down
66 changes: 66 additions & 0 deletions apps/web/utils/string.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { describe, it, expect } from "vitest";
import { removeExcessiveWhitespace, truncate } from "./string";

// Run with:
// pnpm test utils/string.test.ts

describe("string utils", () => {
describe("truncate", () => {
it("should truncate strings longer than specified length", () => {
expect(truncate("hello world", 5)).toBe("hello...");
});

it("should not truncate strings shorter than specified length", () => {
expect(truncate("hello", 10)).toBe("hello");
});
});

describe("removeExcessiveWhitespace", () => {
it("should collapse multiple spaces into single space", () => {
expect(removeExcessiveWhitespace("hello world")).toBe("hello world");
});

it("should preserve single newlines", () => {
expect(removeExcessiveWhitespace("hello\nworld")).toBe("hello\nworld");
});

it("should collapse multiple newlines into double newlines", () => {
expect(removeExcessiveWhitespace("hello\n\n\n\nworld")).toBe(
"hello\n\nworld",
);
});

it("should remove zero-width spaces", () => {
expect(removeExcessiveWhitespace("hello\u200Bworld")).toBe("hello world");
});

it("should handle complex cases with multiple types of whitespace", () => {
const input = "hello world\n\n\n\n next line\u200B\u200B test";
expect(removeExcessiveWhitespace(input)).toBe(
"hello world\n\nnext line test",
);
});

it("should trim leading and trailing whitespace", () => {
expect(removeExcessiveWhitespace(" hello world ")).toBe("hello world");
});

it("should handle empty string", () => {
expect(removeExcessiveWhitespace("")).toBe("");
});

it("should handle string with only whitespace", () => {
expect(removeExcessiveWhitespace(" \n\n \u200B ")).toBe("");
});

it("should handle soft hyphens and other special characters", () => {
const input = "hello\u00ADworld\u034Ftest\u200B\u200Cspace";
expect(removeExcessiveWhitespace(input)).toBe("hello world test space");
});

it("should handle mixed special characters and whitespace", () => {
const input = "hello\u00AD world\n\n\u034F\n\u200B test";
expect(removeExcessiveWhitespace(input)).toBe("hello world\n\ntest");
});
});
});
22 changes: 22 additions & 0 deletions apps/web/utils/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,28 @@ export function truncate(str: string, length: number) {
return str.length > length ? `${str.slice(0, length)}...` : str;
}

export function removeExcessiveWhitespace(str: string) {
return (
str
// First remove all zero-width spaces, soft hyphens, and other invisible characters
// Handle each special character separately to avoid combining character issues
.replace(
/\u200B|\u200C|\u200D|\u200E|\u200F|\uFEFF|\u3164|\u00AD|\u034F/g,
" ",
)
// Normalize all types of line breaks to \n
.replace(/\r\n|\r/g, "\n")
// Then collapse multiple newlines (3 or more) into double newlines
.replace(/\n\s*\n\s*\n+/g, "\n\n")
// Clean up spaces around newlines (but preserve double newlines)
.replace(/[^\S\n]*\n[^\S\n]*/g, "\n")
// Replace multiple spaces (but not newlines) with single space
.replace(/[^\S\n]+/g, " ")
// Clean up any trailing/leading whitespace
.trim()
);
}

export function generalizeSubject(subject = "") {
// replace numbers to make subject more generic
// also removes [], () ,and words that start with #
Expand Down
12 changes: 12 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b7729a6

Please sign in to comment.