chore(book): add page docs

spider-rs · Dec 27, 2023 · d653219 · d653219
1 parent b93f9c0
commit d653219
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 0 deletions.
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
@@ -10,5 +10,6 @@
 # Configuration
 
 - [Website](./website.md)
+- [Page](./page.md)
 - [Environment](./env.md)
 
diff --git a/book/src/page.md b/book/src/page.md
@@ -0,0 +1,72 @@
+# Page
+
+A single page on a website, useful if you just the root url.
+
+## New Page
+
+Get a new page with content.
+
+The first param is the url, followed by if subdomains should be included, and last to include TLD's in links.
+
+Calling `page.fetch` is needed to get the content.
+
+```python
+import asyncio
+from spider_rs import Page
+
+async def main():
+    page = Page("https://choosealicense.com")
+    page.fetch()
+
+asyncio.run(main())
+```
+
+## Page Links
+
+get all the links related to a page.
+
+```python
+import asyncio
+from spider_rs import Page
+
+async def main():
+    page = Page("https://choosealicense.com")
+    page.fetch()
+    links = page.get_links()
+    print(links)
+asyncio.run(main())
+```
+
+## Page Html
+
+Get the markup for the page or HTML.
+
+```python
+import asyncio
+from spider_rs import Page
+
+async def main():
+    page = Page("https://choosealicense.com")
+    page.fetch()
+    links = page.get_html()
+    print(links)
+
+asyncio.run(main())
+```
+
+## Page Bytes
+
+Get the raw bytes of a page to store the files in a database.
+
+```python
+import asyncio
+from spider_rs import Page
+
+async def main():
+    page = Page("https://choosealicense.com")
+    page.fetch()
+    links = page.get_bytes()
+    print(links)
+
+asyncio.run(main())
+```
diff --git a/book/src/website.md b/book/src/website.md
@@ -272,6 +272,7 @@ class Subscription:
 async def main():
     website = Website("https://choosealicense.com")
     website.crawl(Subscription())
+    # sleep for 2s and stop etc
     website.stop()
 
 asyncio.run(main())