Skip to content

Commit

Permalink
chore(bench): add medium case
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 4, 2023
1 parent 1837e08 commit 7d36cce
Show file tree
Hide file tree
Showing 10 changed files with 5,090 additions and 5,070 deletions.
18 changes: 9 additions & 9 deletions bench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ Linux
Test url: `https://choosealicense.com` (small)
32 pages

| `libraries` | `speed` |
| :-------------------------------- | :-------------------- |
| **`spider-rs: crawl 10 samples`** | `76ms`(✅ **1.00x**) |
| **`crawlee: crawl 10 samples`** | `1s` (✅ **1.00x**) |
| `libraries` | `speed` |
| :-------------------------------- | :------------------- |
| **`spider-rs: crawl 10 samples`** | `76ms`(✅ **1.00x**) |
| **`crawlee: crawl 10 samples`** | `1s` (✅ **1.00x**) |

Test url: `https://rsseau.fr` (medium)
211 pages

| `libraries` | `speed` |
| :-------------------------------- | :------------------- |
| **`spider-rs: crawl 10 samples`** | `0.5s` (✅ **1.00x**) |
| **`crawlee: crawl 10 samples`** | `72s` (✅ **1.00x**) |
| `libraries` | `speed` |
| :-------------------------------- | :-------------------- |
| **`spider-rs: crawl 10 samples`** | `0.5s` (✅ **1.00x**) |
| **`crawlee: crawl 10 samples`** | `72s` (✅ **1.00x**) |

```sh
----------------------
Expand All @@ -47,4 +47,4 @@ Test url: `https://rsseau.fr` (medium)
| **`spider-rs: crawl 10 samples`** | `2.5s` (✅ **1.00x**) |
| **`crawlee: crawl 10 samples`** | `75s` (✅ **1.00x**) |

The performance scales the larger the website and if throttling is needed. Linux benchmarks are about 10x faster than macOS for spider-rs.
The performance scales the larger the website and if throttling is needed. Linux benchmarks are about 10x faster than macOS for spider-rs.
7 changes: 7 additions & 0 deletions bench/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,10 @@ export const iterations = process.env.BENCH_COUNT
: 20;

export const TEST_URL = "https://choosealicense.com";
export const TEST_URL_MEDIUM = "https://rsseau.fr";

export enum BenchSizes {
SMALL = "SMALL",
MEDIUM = "MEDIUM",
LARGE = "LARGE"
}
26 changes: 13 additions & 13 deletions bench/case/crawlee.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
import { CheerioCrawler } from 'crawlee';
import { TEST_URL, iterations } from "../base"
import { CheerioCrawler } from "crawlee";
import { TEST_URL, iterations } from "../base";

export async function bench(url = TEST_URL, size = "SMALL") {
const crawler = new CheerioCrawler({
async requestHandler({ enqueueLinks }) {
await enqueueLinks();
},
});

export async function bench() {
const crawler = new CheerioCrawler({
async requestHandler({ enqueueLinks, request }) {
await enqueueLinks();
}
});

let duration = 0;

const run = async () => {
const startTime = performance.now();
await crawler.run([TEST_URL]);
await crawler.run([url]);
duration += performance.now() - startTime;
};

const bm = async (cb: () => Promise<void>, i = 0) => {
await cb();
if (i < iterations) {
Expand All @@ -28,10 +28,10 @@ export async function bench() {
console.log(
JSON.stringify([
{
name: "crawlee - OPS/S [SMALL:PAGE]",
name: `crawlee - OPS/S [${size}:PAGE]`,
unit: "OPS/S",
value: 1000 / (duration / iterations),
},
]),
);
}
}
14 changes: 7 additions & 7 deletions bench/case/spider.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { Website, NPage } from "../../index.js";
import { TEST_URL, iterations } from "../base"
import { Website } from "../../index.js";
import { TEST_URL, iterations } from "../base";

export async function bench() {
const website = new Website(TEST_URL);
export async function bench(url = TEST_URL, size = "SMALL") {
const website = new Website(url);

let duration = 0;

Expand All @@ -20,14 +20,14 @@ export async function bench() {
};

await bm(run);

console.log(
JSON.stringify([
{
name: "@spider-rs/spider-rs - OPS/S [SMALL:PAGE]",
name: `@spider-rs/spider-rs - OPS/S [${size}:PAGE]`,
unit: "OPS/S",
value: 1000 / (duration / iterations),
},
]),
);
}
}
6 changes: 5 additions & 1 deletion bench/compare.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import { bench } from "./case/spider"
import { TEST_URL_MEDIUM, BenchSizes } from "./base";
import { bench } from "./case/spider";

// small
bench();
// small/medium
bench(TEST_URL_MEDIUM, BenchSizes.MEDIUM)
6 changes: 5 additions & 1 deletion bench/crawlee.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import { bench } from "./case/crawlee"
import { TEST_URL_MEDIUM, BenchSizes } from "./base";
import { bench } from "./case/crawlee";

// small
bench();
// small/medium
bench(TEST_URL_MEDIUM, BenchSizes.MEDIUM)
13 changes: 8 additions & 5 deletions bench/oss.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import { bench } from "./case/spider"
import { bench as benchCrawlee } from "./case/crawlee"
import { bench } from "./case/spider";
import { bench as benchCrawlee } from "./case/crawlee";
import { TEST_URL_MEDIUM, BenchSizes } from "./base";

(async () => {
await bench();
await benchCrawlee();
})()
await bench();
await bench(TEST_URL_MEDIUM, BenchSizes.MEDIUM);
await benchCrawlee();
await benchCrawlee(TEST_URL_MEDIUM, BenchSizes.MEDIUM);
})();
Loading

0 comments on commit 7d36cce

Please sign in to comment.