From 0179995d083749c0e5a1925283d30523a5f41806 Mon Sep 17 00:00:00 2001
From: j-mendez <jeff@a11ywatch.com>
Date: Tue, 28 Nov 2023 12:34:08 -0500
Subject: [PATCH] chore(docs): add full website config examples

---
 Cargo.toml           |   2 +-
 book/src/SUMMARY.md  |   5 ++-
 book/src/crawl.md    |  46 +++++++++++++++++++
 book/src/cron-job.md |   7 +--
 book/src/website.md  | 102 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 156 insertions(+), 6 deletions(-)
 create mode 100644 book/src/crawl.md

diff --git a/Cargo.toml b/Cargo.toml
index 16b03e5..1b43ab0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@ compact_str = "0.7.1"
 napi = { version = "2.14.1", default-features = false, features = ["napi4", "async", "tokio_rt"] }
 napi-derive = "2.14.2"
 num_cpus = "1.16.0"
-spider = { version = "1.50.8", features = ["napi", "budget", "cron", "regex", "cookies"] }
+spider = { version = "1.50.8", features = ["napi", "budget", "cron", "regex", "cookies", "socks"] }
 
 [target.x86_64-unknown-linux-gnu.dependencies]
 openssl-sys = { version = "0.9.96", features = ["vendored"] }
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
index 90e6514..0feda28 100644
--- a/book/src/SUMMARY.md
+++ b/book/src/SUMMARY.md
@@ -4,13 +4,14 @@
 
 # User Guide
 
-- [Getting Started](./getting-started.md)
+- [Getting started](./getting-started.md)
 - [A simple example](./simple.md)
 
 # Config
 
 - [Website](./website.md)
 
-# Features
+# Usage
 
+- [Crawl](./crawl.md)
 - [Cron Job](./cron-job.md)
diff --git a/book/src/crawl.md b/book/src/crawl.md
new file mode 100644
index 0000000..7b8d8f4
--- /dev/null
+++ b/book/src/crawl.md
@@ -0,0 +1,46 @@
+# Crawl
+
+Crawl a website concurrently. 
+
+```ts
+import { Website } from "@spider-rs/spider-rs";
+
+// pass in the website url
+const website = new Website("https://rsseau.fr");
+
+await website.crawl();
+console.log(website.getLinks());
+```
+
+## Async Event
+
+You can pass in a async function as the first param to the crawl function for realtime updates streamed.
+
+```ts
+import { Website } from "@spider-rs/spider-rs";
+
+const website = new Website("https://rsseau.fr");
+
+const onPageEvent = (err, value) => {
+  console.log(value);
+};
+
+await website.crawl(onPageEvent);
+```
+
+## Background
+
+You can run the request in the background and receive events with the second param set to `true`.
+
+```ts
+import { Website } from "@spider-rs/spider-rs";
+
+const website = new Website("https://rsseau.fr");
+
+const onPageEvent = (err, value) => {
+  console.log(value);
+};
+
+await website.crawl(onPageEvent, true);
+// this will run instantly as the crawl is in the background
+```
\ No newline at end of file
diff --git a/book/src/cron-job.md b/book/src/cron-job.md
index 746485a..68f172d 100644
--- a/book/src/cron-job.md
+++ b/book/src/cron-job.md
@@ -3,14 +3,15 @@
 Use a cron job that can run any time of day to gather website data.
 
 ```ts
-import { Website, type NPage } from "@spider-rs/spider-rs";
+import { Website } from "@spider-rs/spider-rs";
 
 const website = new Website("https://choosealicense.com")
   .withCron("1/5 * * * * *")
   .build();
 
-const onPageEvent = (err: Error | null, value: NPage) => {
-  links.push(value);
+// get the pages of the website when the cron runs streamed.
+const onPageEvent = (err, value) => {
+  console.log(value);
 };
 
 const handle = await website.runCron(onPageEvent);
diff --git a/book/src/website.md b/book/src/website.md
index 8f700e2..59cc98e 100644
--- a/book/src/website.md
+++ b/book/src/website.md
@@ -49,3 +49,105 @@ const website = new Website("https://choosealicense.com")
 ```
 
 View the [cron](./cron-job.md) section for details how to use the cron.
+
+### Budget
+
+Add a crawl budget that prevents crawling `x` amount of pages.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withBudget({
+    "*": 1,
+  })
+  .build();
+```
+
+### Subdomains
+
+Include subdomains in request.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withSubdomains(true)
+  .build();
+```
+
+### TLD
+
+Include TLDs in request.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withTlds(true)
+  .build();
+```
+
+### External Domains
+
+Add external domains to include with the website.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withExternalDomains(["https://www.myotherdomain.com"])
+  .build();
+```
+
+### Proxy
+
+Use a proxy to crawl a website.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withProxies(["https://www.myproxy.com"])
+  .build();
+```
+
+### Delays
+
+Add delays between pages.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withDelays(200)
+  .build();
+```
+
+### User-Agent
+
+Use a custom User-Agent.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withUserAgent("mybot/v1")
+  .build();
+```
+
+### Request Timeout
+
+Add a request timeout per page in miliseconds. Example shows 30 seconds.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withRequestTimeout(30000)
+  .build();
+```
+
+### Respect Robots
+
+Respect the robots.txt file.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withRespectRobotsTxt(true)
+  .build();
+```
+
+### Http2 Prior Knowledge
+
+Use http2 to connect if you know the website servers supports this.
+
+```ts
+const website = new Website("https://choosealicense.com")
+  .withHttp2PriorKnowledge(true)
+  .build();
+```