Skip to content

Commit

Permalink
Merge branch 'main' into jarred/experiment-fragmentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Jarred-Sumner authored Dec 26, 2024
2 parents f5e874e + 145a7fd commit 5c476d6
Show file tree
Hide file tree
Showing 57 changed files with 3,791 additions and 4,027 deletions.
62 changes: 62 additions & 0 deletions bench/snippets/zlib.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { bench, run } from "../runner.mjs";
import zlib from "node:zlib";
import { promisify } from "node:util";

const deflate = promisify(zlib.deflate);
const inflate = promisify(zlib.inflate);

const short = "Hello World!";
const long = "Hello World!".repeat(1024);
const veryLong = "Hello World!".repeat(10240);

// Pre-compress some data for decompression tests
const shortBuf = Buffer.from(short);
const longBuf = Buffer.from(long);
const veryLongBuf = Buffer.from(veryLong);

let [shortCompressed, longCompressed, veryLongCompressed] = await Promise.all([
deflate(shortBuf, { level: 6 }),
deflate(longBuf, { level: 6 }),
deflate(veryLongBuf, { level: 6 }),
]);

const format = new Intl.NumberFormat("en-US", { notation: "compact", unit: "byte" });
// Compression tests at different levels
bench(`deflate ${format.format(short.length)}B (level 1)`, async () => {
await deflate(shortBuf, { level: 1 });
});

bench(`deflate ${format.format(short.length)} (level 6)`, async () => {
await deflate(shortBuf, { level: 6 });
});

bench(`deflate ${format.format(long.length)} (level 1)`, async () => {
await deflate(longBuf, { level: 1 });
});

bench(`deflate ${format.format(long.length)} (level 6)`, async () => {
await deflate(longBuf, { level: 6 });
});

bench(`deflate ${format.format(veryLong.length)} (level 1)`, async () => {
await deflate(veryLongBuf, { level: 1 });
});

bench(`deflate ${format.format(veryLong.length)} (level 6)`, async () => {
await deflate(veryLongBuf, { level: 6 });
});

// Decompression tests
bench(`inflate ${format.format(short.length)}`, async () => {
await inflate(shortCompressed);
});

bench(`inflate ${format.format(long.length)}`, async () => {
await inflate(longCompressed);
});

bench(`inflate ${format.format(veryLong.length)}`, async () => {
await inflate(veryLongCompressed);
});

await run();
2 changes: 1 addition & 1 deletion cmake/targets/BuildBun.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ endif()

if(WIN32)
target_link_options(${bun} PUBLIC
/STACK:0x1200000,0x100000
/STACK:0x1200000,0x200000
/errorlimit:0
)
if(RELEASE)
Expand Down
172 changes: 0 additions & 172 deletions misctools/gen-unicode-table.js

This file was deleted.

108 changes: 108 additions & 0 deletions misctools/gen-unicode-table.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import { Generator, Context } from "./unicode-generator";

// Create sets for fast lookups
const idStartES5Set = new Set([
...require("@unicode/unicode-3.0.0/General_Category/Uppercase_Letter/code-points"),
...require("@unicode/unicode-3.0.0/General_Category/Lowercase_Letter/code-points"),
...require("@unicode/unicode-3.0.0/General_Category/Titlecase_Letter/code-points"),
...require("@unicode/unicode-3.0.0/General_Category/Modifier_Letter/code-points"),
...require("@unicode/unicode-3.0.0/General_Category/Other_Letter/code-points"),
]);

const idContinueES5Set = new Set([
...idStartES5Set,
...require("@unicode/unicode-3.0.0/General_Category/Nonspacing_Mark/code-points"),
...require("@unicode/unicode-3.0.0/General_Category/Spacing_Mark/code-points"),
...require("@unicode/unicode-3.0.0/General_Category/Decimal_Number/code-points"),
...require("@unicode/unicode-3.0.0/General_Category/Connector_Punctuation/code-points"),
]);

const idStartESNextSet = new Set(require("@unicode/unicode-15.1.0/Binary_Property/ID_Start/code-points"));
const idContinueESNextSet = new Set(require("@unicode/unicode-15.1.0/Binary_Property/ID_Continue/code-points"));

// Exclude known problematic codepoints
const ID_Continue_mistake = new Set([0x30fb, 0xff65]);

function bitsToU64Array(bits: number[]): bigint[] {
const result: bigint[] = [];
for (let i = 0; i < bits.length; i += 64) {
let value = 0n;
for (let j = 0; j < 64 && i + j < bits.length; j++) {
if (bits[i + j]) {
value |= 1n << BigInt(j);
}
}
result.push(value);
}
return result;
}

async function generateTable(table: string, name: string, checkFn: (cp: number) => boolean) {
const context: Context<boolean> = {
get: (cp: number) => checkFn(cp),
eql: (a: boolean, b: boolean) => a === b,
};

const generator = new Generator(context);
const tables = await generator.generate();

return `
pub fn ${name}(cp: u21) bool {
if (cp > 0x10FFFF) return false;
const high = cp >> 8;
const low = cp & 0xFF;
const stage2_idx = ${table}.stage1[high];
const bit_pos = stage2_idx + low;
const u64_idx = bit_pos >> 6;
const bit_idx = @as(u6, @intCast(bit_pos & 63));
return (${table}.stage2[u64_idx] & (@as(u64, 1) << bit_idx)) != 0;
}
const ${table} = struct {
pub const stage1 = [_]u16{${tables.stage1.join(",")}};
pub const stage2 = [_]u64{${bitsToU64Array(tables.stage2)
.map(n => n.toString())
.join(",")}};
};
`;
}

async function main() {
const functions = [
{
name: "isIDStartES5",
table: "idStartES5",
check: (cp: number) => idStartES5Set.has(cp),
},
{
name: "isIDContinueES5",
table: "idContinueES5",
check: (cp: number) => idContinueES5Set.has(cp),
},
{
name: "isIDStartESNext",
table: "idStartESNext",
check: (cp: number) => idStartESNextSet.has(cp),
},
{
name: "isIDContinueESNext",
table: "idContinueESNext",
check: (cp: number) => idContinueESNextSet.has(cp) && !ID_Continue_mistake.has(cp),
},
];

const results = await Promise.all(
functions.map(async ({ name, check, table }) => {
const code = await generateTable(table, name, check);
return `
/// ${name} checks if a codepoint is valid in the ${name} category
${code}`;
}),
);

console.log(`/// This file is auto-generated. Do not edit.
${results.join("\n\n")}`);
}

main();
Loading

0 comments on commit 5c476d6

Please sign in to comment.