From 1837e08bd90351d5cf558832c87d7bea967244c0 Mon Sep 17 00:00:00 2001 From: j-mendez Date: Sun, 3 Dec 2023 19:57:14 -0500 Subject: [PATCH] chore(book): add github url --- .github/workflows/bench.yml | 2 +- bench/README.md | 6 +++--- book/book.toml | 6 +++++- book/src/README.md | 12 +++++++++++- book/src/benchmarks.md | 28 ++++++++++++++++++++++------ 5 files changed, 42 insertions(+), 12 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index d634c1f..be08ebe 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -34,7 +34,7 @@ jobs: run: corepack enable && corepack prepare yarn@stable --activate - name: Install Deps - run: yarn --no-immutable && yarn build && cd bench npm i + run: yarn --no-immutable && yarn build && cd bench && npm i - name: Run Bench @spider-rs/spider-rs run: yarn bench diff --git a/bench/README.md b/bench/README.md index 6d76f5f..01212bd 100644 --- a/bench/README.md +++ b/bench/README.md @@ -13,14 +13,14 @@ Test url: `https://choosealicense.com` (small) | `libraries` | `speed` | | :-------------------------------- | :-------------------- | | **`spider-rs: crawl 10 samples`** | `76ms`(✅ **1.00x**) | -| **`crawlee: crawl 10 samples`** | `1.6s` (✅ **1.00x**) | +| **`crawlee: crawl 10 samples`** | `1s` (✅ **1.00x**) | Test url: `https://rsseau.fr` (medium) 211 pages | `libraries` | `speed` | | :-------------------------------- | :------------------- | -| **`spider-rs: crawl 10 samples`** | `1s` (✅ **1.00x**) | +| **`spider-rs: crawl 10 samples`** | `0.5s` (✅ **1.00x**) | | **`crawlee: crawl 10 samples`** | `72s` (✅ **1.00x**) | ```sh @@ -47,4 +47,4 @@ Test url: `https://rsseau.fr` (medium) | **`spider-rs: crawl 10 samples`** | `2.5s` (✅ **1.00x**) | | **`crawlee: crawl 10 samples`** | `75s` (✅ **1.00x**) | -The performance scales the larger the website and if throttling is needed. Linux benchmarks are about 10x faster than macOS for spider-rs. +The performance scales the larger the website and if throttling is needed. Linux benchmarks are about 10x faster than macOS for spider-rs. \ No newline at end of file diff --git a/book/book.toml b/book/book.toml index c237585..6712ee3 100644 --- a/book/book.toml +++ b/book/book.toml @@ -1,6 +1,10 @@ [book] -authors = ["j-mendez"] +authors = ["Jeff Mendez"] language = "en" multilingual = false src = "src" title = "spider-rs" + +[output.html] +git-repository-url = "https://github.com/spider-rs/spider-nodejs/tree/main/book" +edit-url-template = "https://github.com/spider-rs/spider-nodejs/edit/main/book/{path}" diff --git a/book/src/README.md b/book/src/README.md index a7d85a3..f504bed 100644 --- a/book/src/README.md +++ b/book/src/README.md @@ -1,6 +1,16 @@ # Introduction -Spider-RS is the fastest web crawler and indexer written in Rust ported to Node.js. +`Spider-RS` is the fastest web crawler and indexer written in Rust ported to Node.js. + +- Concurrent +- Streaming +- Decentralization +- Headless Chrome Rendering +- HTTP Proxies +- Cron Jobs +- Subscriptions +- Blacklisting and Budgeting Depth +- Written in Rust for speed, safety, and simplicity Spider powers some big tools and helps bring the crawling aspect to almost no downtime with the correct setup, view the [spider](https://github.com/spider-rs/spider) project to learn more. diff --git a/book/src/benchmarks.md b/book/src/benchmarks.md index 00d1112..01212bd 100644 --- a/book/src/benchmarks.md +++ b/book/src/benchmarks.md @@ -1,15 +1,33 @@ # Benchmarks -The speed of Spider-RS ported compared to other tools. +```sh +Linux +8-core CPU +32 GB of RAM memory +----------------------- +``` -Spider is about 1,000x (small websites) 10,000x (medium websites), and 100,000x (production grade websites) times faster than the popular crawlee library even with the node port performance hits. +Test url: `https://choosealicense.com` (small) +32 pages + +| `libraries` | `speed` | +| :-------------------------------- | :-------------------- | +| **`spider-rs: crawl 10 samples`** | `76ms`(✅ **1.00x**) | +| **`crawlee: crawl 10 samples`** | `1s` (✅ **1.00x**) | + +Test url: `https://rsseau.fr` (medium) +211 pages + +| `libraries` | `speed` | +| :-------------------------------- | :------------------- | +| **`spider-rs: crawl 10 samples`** | `0.5s` (✅ **1.00x**) | +| **`crawlee: crawl 10 samples`** | `72s` (✅ **1.00x**) | ```sh ---------------------- mac Apple M1 Max 10-core CPU 64 GB of RAM memory -1 TB of SSD disk space ----------------------- ``` @@ -29,6 +47,4 @@ Test url: `https://rsseau.fr` (medium) | **`spider-rs: crawl 10 samples`** | `2.5s` (✅ **1.00x**) | | **`crawlee: crawl 10 samples`** | `75s` (✅ **1.00x**) | -The performance scales the larger the website and if throttling is needed. - -Linux benchmarks are about 10x faster than macOS for spider-rs. +The performance scales the larger the website and if throttling is needed. Linux benchmarks are about 10x faster than macOS for spider-rs. \ No newline at end of file