deploy: 43bff3c

spider-rs · Jul 20, 2024 · 380cc63 · 380cc63
1 parent 0ac8272
commit 380cc63
Show file tree

Hide file tree

Showing 11 changed files with 296 additions and 356 deletions.
diff --git a/crawl.html b/crawl.html
@@ -181,74 +181,74 @@ <h1 class="menu-title">spider-rs</h1>
                     <main>
                         <h1 id="crawl"><a class="header" href="#crawl">Crawl</a></h1>
 <p>Crawl a website concurrently.</p>
-<pre><code class="language-ts">import { Website } from "@spider-rs/spider-rs";
+<pre><code class="language-ts">import { Website } from '@spider-rs/spider-rs'
 
 // pass in the website url
-const website = new Website("https://rsseau.fr");
+const website = new Website('https://rsseau.fr')
 
-await website.crawl();
+await website.crawl()
 
 // [ "https://rsseau.fr/blog", ...]
-console.log(website.getLinks());
+console.log(website.getLinks())
 </code></pre>
 <h2 id="async-event"><a class="header" href="#async-event">Async Event</a></h2>
 <p>You can pass in a async function as the first param to the crawl function for realtime updates streamed.</p>
-<pre><code class="language-ts">import { Website } from "@spider-rs/spider-rs";
+<pre><code class="language-ts">import { Website } from '@spider-rs/spider-rs'
 
-const website = new Website("https://rsseau.fr");
+const website = new Website('https://rsseau.fr')
 
 const onPageEvent = (err, value) =&gt; {
-  console.log(value);
-};
+  console.log(value)
+}
 
-await website.crawl(onPageEvent);
+await website.crawl(onPageEvent)
 </code></pre>
 <h2 id="background"><a class="header" href="#background">Background</a></h2>
 <p>You can run the request in the background and receive events with the second param set to <code>true</code>.</p>
-<pre><code class="language-ts">import { Website } from "@spider-rs/spider-rs";
+<pre><code class="language-ts">import { Website } from '@spider-rs/spider-rs'
 
-const website = new Website("https://rsseau.fr");
+const website = new Website('https://rsseau.fr')
 
 const onPageEvent = (err, value) =&gt; {
-  console.log(value);
-};
+  console.log(value)
+}
 
-await website.crawl(onPageEvent, true);
+await website.crawl(onPageEvent, true)
 // this will run instantly as the crawl is in the background
 </code></pre>
 <h2 id="subscriptions"><a class="header" href="#subscriptions">Subscriptions</a></h2>
 <p>You can setup many subscriptions to run events when a crawl happens.</p>
-<pre><code class="language-ts">import { Website } from "@spider-rs/spider-rs";
+<pre><code class="language-ts">import { Website } from '@spider-rs/spider-rs'
 
-const website = new Website("https://rsseau.fr");
+const website = new Website('https://rsseau.fr')
 
 const onPageEvent = (err, value) =&gt; {
-  console.log(value);
-};
+  console.log(value)
+}
 
-const subscriptionID = website.subscribe(onPageEvent);
+const subscriptionID = website.subscribe(onPageEvent)
 
-await website.crawl();
+await website.crawl()
 
-website.unsubscribe(subscriptionID);
+website.unsubscribe(subscriptionID)
 // this will run instantly as the crawl is in the background
 </code></pre>
 <h2 id="headless-chrome"><a class="header" href="#headless-chrome">Headless Chrome</a></h2>
 <p>Headless Chrome rendering can be done by setting the third param in <code>crawl</code> or <code>scrape</code> to <code>true</code>.
 It will attempt to connect to chrome running remotely if the <code>CHROME_URL</code> env variable is set with chrome launching as a fallback. Using a remote connection with <code>CHROME_URL</code> will
 drastically speed up runs.</p>
-<pre><code class="language-ts">import { Website } from "@spider-rs/spider-rs";
+<pre><code class="language-ts">import { Website } from '@spider-rs/spider-rs'
 
-const website = new Website("https://rsseau.fr");
+const website = new Website('https://rsseau.fr')
 
 const onPageEvent = (err, value) =&gt; {
-  console.log(value);
-};
+  console.log(value)
+}
 
 // all params are optional. The third param determines headless rendering.
-await website.crawl(onPageEvent, false, true);
+await website.crawl(onPageEvent, false, true)
 // make sure to call unsubscribe when finished or else the instance is kept alive when events are setup.
-website.unsubscribe();
+website.unsubscribe()
 </code></pre>
 
                     </main>

diff --git a/cron-job.html b/cron-job.html
@@ -181,18 +181,16 @@ <h1 class="menu-title">spider-rs</h1>
                     <main>
                         <h1 id="cron-jobs"><a class="header" href="#cron-jobs">Cron Jobs</a></h1>
 <p>Use a cron job that can run any time of day to gather website data.</p>
-<pre><code class="language-ts">import { Website } from "@spider-rs/spider-rs";
+<pre><code class="language-ts">import { Website } from '@spider-rs/spider-rs'
 
-const website = new Website("https://choosealicense.com")
-  .withCron("1/5 * * * * *")
-  .build();
+const website = new Website('https://choosealicense.com').withCron('1/5 * * * * *').build()
 
 // get the pages of the website when the cron runs streamed.
 const onPageEvent = (err, value) =&gt; {
-  console.log(value);
-};
+  console.log(value)
+}
 
-const handle = await website.runCron(onPageEvent);
+const handle = await website.runCron(onPageEvent)
 </code></pre>
 
                     </main>

diff --git a/index.html b/index.html
@@ -193,13 +193,13 @@ <h1 id="introduction"><a class="header" href="#introduction">Introduction</a></h
 <li>Written in <a href="https://www.rust-lang.org/">Rust</a> for speed, safety, and simplicity</li>
 </ul>
 <p>Spider powers some big tools and helps bring the crawling aspect to almost no downtime with the correct setup, view the <a href="https://github.com/spider-rs/spider">spider</a> project to learn more.</p>
-<pre><code class="language-ts">import { Website } from "@spider-rs/spider-rs";
+<pre><code class="language-ts">import { Website } from '@spider-rs/spider-rs'
 
-const website = new Website("https://choosealicense.com");
+const website = new Website('https://choosealicense.com')
 
-await website.crawl();
+await website.crawl()
 
-console.log(website.getLinks());
+console.log(website.getLinks())
 </code></pre>
 
                     </main>

diff --git a/page.html b/page.html
@@ -185,31 +185,31 @@ <h2 id="new-page"><a class="header" href="#new-page">New Page</a></h2>
 <p>Get a new page with content.</p>
 <p>The first param is the url, followed by if subdomains should be included, and last to include TLD's in links.</p>
 <p>Calling <code>page.fetch</code> is needed to get the content.</p>
-<pre><code class="language-ts">import { Page } from "@spider-rs/spider-rs";
+<pre><code class="language-ts">import { Page } from '@spider-rs/spider-rs'
 
-const page = new Page("https://choosealicense.com", false, false);
-await page.fetch();
+const page = new Page('https://choosealicense.com', false, false)
+await page.fetch()
 </code></pre>
 <h2 id="page-links"><a class="header" href="#page-links">Page Links</a></h2>
 <p>get all the links related to a page.</p>
-<pre><code class="language-ts">const page = new Page("https://choosealicense.com", false, false);
-await page.fetch();
-const links = await page.getLinks();
-console.log(links);
+<pre><code class="language-ts">const page = new Page('https://choosealicense.com', false, false)
+await page.fetch()
+const links = await page.getLinks()
+console.log(links)
 </code></pre>
 <h2 id="page-html"><a class="header" href="#page-html">Page Html</a></h2>
 <p>Get the markup for the page or HTML.</p>
-<pre><code class="language-ts">const page = new Page("https://choosealicense.com", false, false);
-await page.fetch();
-const html = page.getHtml();
-console.log(html);
+<pre><code class="language-ts">const page = new Page('https://choosealicense.com', false, false)
+await page.fetch()
+const html = page.getHtml()
+console.log(html)
 </code></pre>
 <h2 id="page-bytes"><a class="header" href="#page-bytes">Page Bytes</a></h2>
 <p>Get the raw bytes of a page to store the files in a database.</p>
-<pre><code class="language-ts">const page = new Page("https://choosealicense.com", false, false);
-await page.fetch();
-const bytes = page.getBytes();
-console.log(bytes);
+<pre><code class="language-ts">const page = new Page('https://choosealicense.com', false, false)
+await page.fetch()
+const bytes = page.getBytes()
+console.log(bytes)
 </code></pre>
 
                     </main>