diff --git a/README.md b/README.md index 00f322a..a66ee44 100644 --- a/README.md +++ b/README.md @@ -103,12 +103,14 @@ Use the crawl shortcut to get the page content and url. ```ts import { crawl } from "@spider-rs/spider-rs"; -const { links, pages } = new crawl("https://rsseau.fr"); +const { links, pages } = await crawl("https://rsseau.fr"); console.log(pages); ``` ## Benchmarks +Spider is about 1,000x (small websites) 10,000x (medium websites), and 100,000x (production grade websites) times faster than the popular crawlee library even with the node port performance hits. + ```sh ---------------------- mac Apple M1 Max @@ -125,7 +127,7 @@ Test url: `https://choosealicense.com` (small) | | `libraries` | | :-------------------------------- | :-------------------- | | **`spider-rs: crawl 10 samples`** | `286ms`(✅ **1.00x**) | -| **`crawlee: crawl 10 samples`** | `1s` (✅ **1.00x**) | +| **`crawlee: crawl 10 samples`** | `1.7s` (✅ **1.00x**) | Test url: `https://rsseau.fr` (medium) diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index ceb1649..f87525d 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -18,3 +18,7 @@ - [Crawl](./crawl.md) - [Scrape](./scrape.md) - [Cron Job](./cron-job.md) + +# Benchmarks + +- [Compare](./benchmarks.md) diff --git a/book/src/benchmarks.md b/book/src/benchmarks.md new file mode 100644 index 0000000..e91859d --- /dev/null +++ b/book/src/benchmarks.md @@ -0,0 +1,36 @@ +# Benchmarks + +The speed of Spider-RS ported compared to other tools. + +Spider is about 1,000x (small websites) 10,000x (medium websites), and 100,000x (production grade websites) times faster than the popular crawlee library even with the node port performance hits. + +```sh +---------------------- +mac Apple M1 Max +10-core CPU +64 GB of RAM memory +1 TB of SSD disk space +----------------------- +``` + +Test url: `https://choosealicense.com` (small) + +32 pages + +| | `libraries` | +| :-------------------------------- | :-------------------- | +| **`spider-rs: crawl 10 samples`** | `286ms`(✅ **1.00x**) | +| **`crawlee: crawl 10 samples`** | `1.7s` (✅ **1.00x**) | + +Test url: `https://rsseau.fr` (medium) + +211 pages + +| | `libraries` | +| :-------------------------------- | :-------------------- | +| **`spider-rs: crawl 10 samples`** | `2.5s` (✅ **1.00x**) | +| **`crawlee: crawl 10 samples`** | `75s` (✅ **1.00x**) | + +The performance scales the larger the website and if throttling is needed. + +Linux benchmarks are about 10x faster than macOS for spider-rs. \ No newline at end of file diff --git a/book/src/simple.md b/book/src/simple.md index 4732501..1bb7411 100644 --- a/book/src/simple.md +++ b/book/src/simple.md @@ -36,6 +36,7 @@ const onPageEvent = (err: Error | null, value: NPage) => { links.push(value); }; +// params in order event, background, and headless chrome await website.crawl(onPageEvent); console.log(website.getLinks()); ``` diff --git a/npm/android-arm-eabi/package.json b/npm/android-arm-eabi/package.json index 08d3bd6..99ccb19 100644 --- a/npm/android-arm-eabi/package.json +++ b/npm/android-arm-eabi/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-android-arm-eabi", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "android" diff --git a/npm/android-arm64/package.json b/npm/android-arm64/package.json index 21bc1a1..8dad63a 100644 --- a/npm/android-arm64/package.json +++ b/npm/android-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-android-arm64", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "android" diff --git a/npm/darwin-arm64/package.json b/npm/darwin-arm64/package.json index 70c8d2d..62254a7 100644 --- a/npm/darwin-arm64/package.json +++ b/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-darwin-arm64", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "darwin" diff --git a/npm/darwin-universal/package.json b/npm/darwin-universal/package.json index fd0617e..0d037f1 100644 --- a/npm/darwin-universal/package.json +++ b/npm/darwin-universal/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-darwin-universal", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "darwin" diff --git a/npm/darwin-x64/package.json b/npm/darwin-x64/package.json index 8f75e95..9ff77c9 100644 --- a/npm/darwin-x64/package.json +++ b/npm/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-darwin-x64", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "darwin" diff --git a/npm/freebsd-x64/package.json b/npm/freebsd-x64/package.json index 1f714af..ceaf32f 100644 --- a/npm/freebsd-x64/package.json +++ b/npm/freebsd-x64/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-freebsd-x64", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "freebsd" diff --git a/npm/linux-arm-gnueabihf/package.json b/npm/linux-arm-gnueabihf/package.json index 80661cc..ade68e1 100644 --- a/npm/linux-arm-gnueabihf/package.json +++ b/npm/linux-arm-gnueabihf/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-linux-arm-gnueabihf", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "linux" diff --git a/npm/linux-arm64-gnu/package.json b/npm/linux-arm64-gnu/package.json index 83e05ba..b81740a 100644 --- a/npm/linux-arm64-gnu/package.json +++ b/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-linux-arm64-gnu", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "linux" diff --git a/npm/linux-arm64-musl/package.json b/npm/linux-arm64-musl/package.json index 137e40d..b59ab6e 100644 --- a/npm/linux-arm64-musl/package.json +++ b/npm/linux-arm64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-linux-arm64-musl", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "linux" diff --git a/npm/linux-x64-gnu/package.json b/npm/linux-x64-gnu/package.json index 3135e59..e7105af 100644 --- a/npm/linux-x64-gnu/package.json +++ b/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-linux-x64-gnu", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "linux" diff --git a/npm/linux-x64-musl/package.json b/npm/linux-x64-musl/package.json index a4fba8e..a4f1fbe 100644 --- a/npm/linux-x64-musl/package.json +++ b/npm/linux-x64-musl/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-linux-x64-musl", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "linux" diff --git a/npm/win32-arm64-msvc/package.json b/npm/win32-arm64-msvc/package.json index cad1ff2..417425f 100644 --- a/npm/win32-arm64-msvc/package.json +++ b/npm/win32-arm64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-win32-arm64-msvc", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "win32" diff --git a/npm/win32-ia32-msvc/package.json b/npm/win32-ia32-msvc/package.json index e6d0eab..42f1f71 100644 --- a/npm/win32-ia32-msvc/package.json +++ b/npm/win32-ia32-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-win32-ia32-msvc", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "win32" diff --git a/npm/win32-x64-msvc/package.json b/npm/win32-x64-msvc/package.json index 2e0d0b9..9887c91 100644 --- a/npm/win32-x64-msvc/package.json +++ b/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs-win32-x64-msvc", - "version": "0.0.20", + "version": "0.0.21", "repository": "https://github.com/spider-rs/spider-nodejs", "os": [ "win32" diff --git a/package.json b/package.json index faaf157..4924d7c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@spider-rs/spider-rs", - "version": "0.0.20", + "version": "0.0.21", "main": "index.js", "types": "index.d.ts", "napi": {