From 08afc318334f631af0c2112bd5ce6cdc3fa996db Mon Sep 17 00:00:00 2001 From: j-mendez Date: Thu, 5 Dec 2024 17:10:21 -0500 Subject: [PATCH] feat(clients): add exponential backoff --- book/src/README.md | 3 +- book/src/rust/getting-started.md | 2 +- cli/Cargo.lock | 28 +++- cli/Cargo.toml | 8 +- javascript/package-lock.json | 52 ++++---- javascript/package.json | 13 +- javascript/src/client.ts | 36 ++++-- python/requirements.txt | 3 +- python/setup.py | 2 +- python/spider/async_spider.py | 9 +- python/spider/spider.py | 14 +- rust/Cargo.lock | 214 +++++++++++++++++-------------- rust/Cargo.toml | 3 +- rust/src/lib.rs | 175 ++++++++++++++++++++----- 14 files changed, 375 insertions(+), 187 deletions(-) diff --git a/book/src/README.md b/book/src/README.md index c96983a..2c3c997 100644 --- a/book/src/README.md +++ b/book/src/README.md @@ -9,4 +9,5 @@ - Cron Jobs - Subscriptions - AI Scraping and Event Driven Actions -- Blacklisting and Budgeting Depth \ No newline at end of file +- Blacklisting and Budgeting Depth +- Exponential Backoff \ No newline at end of file diff --git a/book/src/rust/getting-started.md b/book/src/rust/getting-started.md index 00e9ab5..37d40ba 100644 --- a/book/src/rust/getting-started.md +++ b/book/src/rust/getting-started.md @@ -285,4 +285,4 @@ spider.crawl_url(url, Some(crawler_params), true, "application/jsonl", None::"] description = "The Spider Cloud CLI for web crawling and scraping" @@ -11,10 +11,10 @@ categories = ["web-programming"] include = ["src/*", "../../LICENSE", "README.md"] [dependencies] -clap = { version = "4.5.13", features = ["derive"]} +clap = { version = "4", features = ["derive"]} reqwest = { version = "0.12", features = ["json", "stream"] } tokio = { version = "1", features = ["rt-multi-thread", "macros"] } spider-client = { path = "../rust", version = "0.1" } -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" +serde = { version = "1", features = ["derive"] } +serde_json = "1" keyring = { version = "3", features = ["apple-native", "windows-native", "sync-secret-service"] } diff --git a/javascript/package-lock.json b/javascript/package-lock.json index d3a3106..db85649 100644 --- a/javascript/package-lock.json +++ b/javascript/package-lock.json @@ -1,18 +1,21 @@ { "name": "@spider-cloud/spider-client", - "version": "0.1.23", + "version": "0.1.24", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@spider-cloud/spider-client", - "version": "0.1.23", + "version": "0.1.24", "license": "MIT", + "dependencies": { + "exponential-backoff": "^3.1.1" + }, "devDependencies": { - "@types/node": "22.7.5", - "dotenv": "^16.4.5", - "tsx": "^4.19.1", - "typescript": "5.6.3" + "@types/node": "22.10.1", + "dotenv": "^16.4.7", + "tsx": "^4.19.2", + "typescript": "5.7.2" } }, "node_modules/@esbuild/aix-ppc64": { @@ -400,18 +403,18 @@ } }, "node_modules/@types/node": { - "version": "22.7.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.5.tgz", - "integrity": "sha512-jML7s2NAzMWc//QSJ1a3prpk78cOPchGvXJsC3C6R6PSMoooztvRVQEz89gmBTBY1SPMaqo5teB4uNHPdetShQ==", + "version": "22.10.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz", + "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==", "dev": true, "dependencies": { - "undici-types": "~6.19.2" + "undici-types": "~6.20.0" } }, "node_modules/dotenv": { - "version": "16.4.5", - "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", - "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "version": "16.4.7", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz", + "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==", "dev": true, "engines": { "node": ">=12" @@ -459,6 +462,11 @@ "@esbuild/win32-x64": "0.23.1" } }, + "node_modules/exponential-backoff": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/exponential-backoff/-/exponential-backoff-3.1.1.tgz", + "integrity": "sha512-dX7e/LHVJ6W3DE1MHWi9S1EYzDESENfLrYohG2G++ovZrYOkm4Knwa0mc1cn84xJOR4KEU0WSchhLbd0UklbHw==" + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -495,9 +503,9 @@ } }, "node_modules/tsx": { - "version": "4.19.1", - "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.1.tgz", - "integrity": "sha512-0flMz1lh74BR4wOvBjuh9olbnwqCPc35OOlfyzHba0Dc+QNUeWX/Gq2YTbnwcWPO3BMd8fkzRVrHcsR+a7z7rA==", + "version": "4.19.2", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.2.tgz", + "integrity": "sha512-pOUl6Vo2LUq/bSa8S5q7b91cgNSjctn9ugq/+Mvow99qW6x/UZYwzxy/3NmqoT66eHYfCVvFvACC58UBPFf28g==", "dev": true, "dependencies": { "esbuild": "~0.23.0", @@ -514,9 +522,9 @@ } }, "node_modules/typescript": { - "version": "5.6.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz", - "integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==", + "version": "5.7.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz", + "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==", "dev": true, "bin": { "tsc": "bin/tsc", @@ -527,9 +535,9 @@ } }, "node_modules/undici-types": { - "version": "6.19.8", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", - "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", "dev": true } } diff --git a/javascript/package.json b/javascript/package.json index a53ff16..bc3e02a 100644 --- a/javascript/package.json +++ b/javascript/package.json @@ -1,6 +1,6 @@ { "name": "@spider-cloud/spider-client", - "version": "0.1.23", + "version": "0.1.24", "description": "Isomorphic Javascript SDK for Spider Cloud services", "scripts": { "test": "node --import tsx --test __tests__/*test.ts", @@ -23,9 +23,12 @@ "author": "Jeff Mendez", "license": "MIT", "devDependencies": { - "@types/node": "22.7.5", - "dotenv": "^16.4.5", - "tsx": "^4.19.1", - "typescript": "5.6.3" + "@types/node": "22.10.1", + "dotenv": "^16.4.7", + "tsx": "^4.19.2", + "typescript": "5.7.2" + }, + "dependencies": { + "exponential-backoff": "^3.1.1" } } diff --git a/javascript/src/client.ts b/javascript/src/client.ts index ae23112..f7edf50 100644 --- a/javascript/src/client.ts +++ b/javascript/src/client.ts @@ -10,6 +10,7 @@ import { } from "./config"; import { version } from "../package.json"; import { streamReader } from "./utils/stream-reader"; +import { backOff } from "exponential-backoff"; /** * Generic params for core request. @@ -56,12 +57,15 @@ export class Spider { jsonl?: boolean ) { const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders; - const response = await fetch( - `${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, + const response = await backOff( + () => + fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, { + method: "POST", + headers: headers, + body: JSON.stringify(data), + }), { - method: "POST", - headers: headers, - body: JSON.stringify(data), + numOfAttempts: 5, } ); @@ -82,11 +86,14 @@ export class Spider { */ private async _apiGet(endpoint: string) { const headers = this.prepareHeaders; - const response = await fetch( - `${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, + const response = await backOff( + () => + fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, { + method: "GET", + headers: headers, + }), { - method: "GET", - headers: headers, + numOfAttempts: 5, } ); @@ -104,11 +111,14 @@ export class Spider { */ private async _apiDelete(endpoint: string) { const headers = this.prepareHeaders; - const response = await fetch( - `${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, + const response = await backOff( + () => + fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, { + method: "DELETE", + headers, + }), { - method: "DELETE", - headers, + numOfAttempts: 5, } ); diff --git a/python/requirements.txt b/python/requirements.txt index 766fec4..f42c08e 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -3,4 +3,5 @@ pytest-asyncio python-dotenv aiohttp python-dotenv -ijson \ No newline at end of file +ijson +tenacity \ No newline at end of file diff --git a/python/setup.py b/python/setup.py index d200e55..98df8df 100644 --- a/python/setup.py +++ b/python/setup.py @@ -14,7 +14,7 @@ def read_file(fname): author_email="jeff@spider.cloud", description="Python SDK for Spider Cloud API", packages=find_packages(), - install_requires=["requests", "ijson"], + install_requires=["requests", "ijson", "tenacity", "aiohttp"], long_description=read_file("README.md"), long_description_content_type="text/markdown", classifiers=[ diff --git a/python/spider/async_spider.py b/python/spider/async_spider.py index e8e31ba..aa8181c 100644 --- a/python/spider/async_spider.py +++ b/python/spider/async_spider.py @@ -1,8 +1,5 @@ -import os -import json -import logging +import os, tenacity, json, aiohttp, logging from typing import Optional, Dict, Any, AsyncIterator, Callable -import aiohttp from aiohttp import ClientSession, ClientResponse from types import TracebackType from typing import Type @@ -35,6 +32,10 @@ async def __aexit__( if self.session: await self.session.close() + @tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, min=1, max=60), + stop=tenacity.stop_after_attempt(5) + ) async def _request( self, method: str, diff --git a/python/spider/spider.py b/python/spider/spider.py index 26fd241..4d50cc6 100644 --- a/python/spider/spider.py +++ b/python/spider/spider.py @@ -1,4 +1,4 @@ -import os, requests, logging, ijson +import os, requests, logging, ijson, tenacity from typing import Optional, Dict from spider.spider_types import RequestParamsDict, JsonCallback, QueryRequest @@ -15,6 +15,10 @@ def __init__(self, api_key: Optional[str] = None): if self.api_key is None: raise ValueError("No API key provided") + @tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, min=1, max=60), + stop=tenacity.stop_after_attempt(5) + ) def api_post( self, endpoint: str, @@ -41,6 +45,10 @@ def api_post( else: self._handle_error(response, f"post to {endpoint}") + @tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, min=1, max=60), + stop=tenacity.stop_after_attempt(5) + ) def api_get( self, endpoint: str, @@ -67,6 +75,10 @@ def api_get( else: self._handle_error(response, f"get from {endpoint}") + @tenacity.retry( + wait=tenacity.wait_exponential(multiplier=1, min=1, max=60), + stop=tenacity.stop_after_attempt(5) + ) def api_delete( self, endpoint: str, diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 13a91e8..039240e 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -29,6 +29,17 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "backon" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5289ec98f68f28dd809fd601059e6aa908bb8f6108620930828283d4ee23d7" +dependencies = [ + "fastrand", + "gloo-timers", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -64,15 +75,15 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytes" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "cc" -version = "1.1.36" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baee610e9452a8f6f0a1b6194ec09ff9e2d85dea54432acdae41aa0761c95d70" +checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc" dependencies = [ "shlex", ] @@ -133,19 +144,19 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "fastrand" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "fnv" @@ -255,11 +266,23 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "h2" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ "atomic-waker", "bytes", @@ -276,21 +299,15 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" - -[[package]] -name = "hermit-abi" -version = "0.3.9" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -328,9 +345,9 @@ checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "hyper" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" dependencies = [ "bytes", "futures-channel", @@ -539,9 +556,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", "hashbrown", @@ -555,16 +572,17 @@ checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "a865e038f7f6ed956f788f0d7d60c541fff74c7bd74272c5d4cf15c63743e705" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -576,9 +594,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.161" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "linux-raw-sys" @@ -588,9 +606,9 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "litemap" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" [[package]] name = "log" @@ -621,11 +639,10 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi", "libc", "wasi", "windows-sys 0.52.0", @@ -733,9 +750,9 @@ checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -817,9 +834,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustix" -version = "0.38.39" +version = "0.38.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" dependencies = [ "bitflags", "errno", @@ -830,9 +847,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.16" +version = "0.23.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" dependencies = [ "once_cell", "rustls-pki-types", @@ -875,9 +892,9 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "schannel" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] @@ -897,9 +914,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -907,18 +924,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.214" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -927,9 +944,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.132" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -972,9 +989,9 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -982,8 +999,9 @@ dependencies = [ [[package]] name = "spider-client" -version = "0.1.23" +version = "0.1.24" dependencies = [ + "backon", "dotenv", "lazy_static", "reqwest", @@ -1013,9 +1031,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.87" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -1024,9 +1042,9 @@ dependencies = [ [[package]] name = "sync_wrapper" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" dependencies = [ "futures-core", ] @@ -1065,9 +1083,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", @@ -1088,9 +1106,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.41.0" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", @@ -1125,12 +1143,11 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ "rustls", - "rustls-pki-types", "tokio", ] @@ -1147,9 +1164,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -1166,9 +1183,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", "tracing-core", @@ -1176,9 +1193,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", ] @@ -1191,9 +1208,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "untrusted" @@ -1203,9 +1220,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.3" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -1247,9 +1264,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "d15e63b4482863c109d70a7b8706c1e364eb6ea449b201a76c5b89cedcec2d5c" dependencies = [ "cfg-if", "once_cell", @@ -1258,9 +1275,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "8d36ef12e3aaca16ddd3f67922bc63e48e953f126de60bd33ccc0101ef9998cd" dependencies = [ "bumpalo", "log", @@ -1273,21 +1290,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.45" +version = "0.4.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +checksum = "9dfaf8f50e5f293737ee323940c7d8b08a66a95a419223d9f41610ca08b0833d" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "705440e08b42d3e4b36de7d66c944be628d579796b8090bfa3471478a2260051" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1295,9 +1313,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d" dependencies = [ "proc-macro2", "quote", @@ -1308,9 +1326,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "6ee99da9c5ba11bd675621338ef6fa52296b76b83305e9b6e5c77d4c286d6d49" [[package]] name = "wasm-streams" @@ -1327,9 +1345,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.72" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "a98bc3c33f0fe7e59ad7cd041b89034fa82a7c2d4365ca538dda6cdaf513863c" dependencies = [ "js-sys", "wasm-bindgen", @@ -1461,9 +1479,9 @@ checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" [[package]] name = "yoke" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" dependencies = [ "serde", "stable_deref_trait", @@ -1473,9 +1491,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", @@ -1485,18 +1503,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index d89c2cb..10774a5 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider-client" -version = "0.1.23" +version = "0.1.24" edition = "2021" authors = [ "j-mendez "] description = "Spider Cloud client" @@ -17,6 +17,7 @@ tokio = { version = "1", features = ["rt-multi-thread", "macros"] } serde = { version = "1", features = ["derive"] } serde_json = { version = "1" } tokio-stream = "0.1" +backon = { version = "1", features = ["tokio-sleep"] } [dev-dependencies] dotenv = "0.15.0" diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 3a7062d..a1b1f5a 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -61,6 +61,8 @@ //! - `utils`: Utility functions used by the Spider client. //! +use backon::ExponentialBuilder; +use backon::Retryable; use reqwest::Client; use reqwest::{Error, Response}; use serde::{Deserialize, Serialize}; @@ -429,7 +431,9 @@ pub struct TransformParams { #[derive(Serialize, Deserialize, Debug, Clone)] pub struct DataParam { + /// The HTML resource. pub html: String, + /// The website url. pub url: Option, } @@ -437,9 +441,12 @@ pub struct DataParam { #[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "lowercase")] pub enum RequestType { - #[default] + /// Default HTTP request Http, + /// Chrome browser rendering Chrome, + #[default] + /// Smart mode defaulting to HTTP and using Chrome when needed. SmartMode, } @@ -529,10 +536,10 @@ impl Spider { /// # Returns /// /// The response from the API. - async fn api_post( + async fn api_post_base( &self, endpoint: &str, - data: impl Serialize + std::fmt::Debug, + data: impl Serialize + Sized + std::fmt::Debug, content_type: &str, ) -> Result { let url: String = format!("{API_URL}/{}", endpoint); @@ -550,6 +557,41 @@ impl Spider { .await } + /// Sends a POST request to the API. + /// + /// # Arguments + /// + /// * `endpoint` - The API endpoint. + /// * `data` - The request data as a HashMap. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API. + async fn api_post( + &self, + endpoint: &str, + data: impl Serialize + std::fmt::Debug + Clone + Send + Sync, + content_type: &str, + ) -> Result { + let fetch = || async { + self.api_post_base(endpoint, data.clone(), content_type) + .await + }; + + fetch + .retry(ExponentialBuilder::default().with_max_times(5)) + .when(|err: &reqwest::Error| { + if let Some(status) = err.status() { + status.is_server_error() + } else { + err.is_timeout() + } + }) + .await + } + /// Sends a GET request to the API. /// /// # Arguments @@ -559,7 +601,7 @@ impl Spider { /// # Returns /// /// The response from the API as a JSON value. - async fn api_get( + async fn api_get_base( &self, endpoint: &str, query_params: Option<&T>, @@ -580,45 +622,36 @@ impl Spider { res.json().await } - /// Scrapes a URL. + + /// Sends a GET request to the API. /// /// # Arguments /// - /// * `url` - The URL to scrape. - /// * `params` - Optional request parameters. - /// * `stream` - Whether streaming is enabled. - /// * `content_type` - The content type of the request. + /// * `endpoint` - The API endpoint. /// /// # Returns /// /// The response from the API as a JSON value. - pub async fn scrape_url( + async fn api_get( &self, - url: &str, - params: Option, - content_type: &str, + endpoint: &str, + query_params: Option<&T>, ) -> Result { - let mut data = HashMap::new(); - - data.insert( - "url".to_string(), - serde_json::Value::String(url.to_string()), - ); - data.insert("limit".to_string(), serde_json::Value::Number(1.into())); - - if let Ok(params) = serde_json::to_value(params) { - match params.as_object() { - Some(ref p) => { - let params_collect = p.iter().map(|(k, v)| (k.to_string(), v.clone())); + let fetch = || async { + self.api_get_base(endpoint, query_params.clone()) + .await + }; - data.extend(params_collect); + fetch + .retry(ExponentialBuilder::default().with_max_times(5)) + .when(|err: &reqwest::Error| { + if let Some(status) = err.status() { + status.is_server_error() + } else { + err.is_timeout() } - _ => (), - } - } - - let res = self.api_post("crawl", data, content_type).await?; - res.json().await + }) + .await } /// Sends a DELETE request to the API. @@ -633,7 +666,7 @@ impl Spider { /// # Returns /// /// The response from the API. - async fn api_delete( + async fn api_delete_base( &self, endpoint: &str, params: Option>, @@ -658,6 +691,82 @@ impl Spider { request_builder.send().await } + /// Sends a DELETE request to the API. + /// + /// # Arguments + /// + /// * `endpoint` - The API endpoint. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API. + async fn api_delete( + &self, + endpoint: &str, + params: Option>, + ) -> Result { + let fetch = || async { + self.api_delete_base(endpoint, params.clone()) + .await + }; + + fetch + .retry(ExponentialBuilder::default().with_max_times(5)) + .when(|err: &reqwest::Error| { + if let Some(status) = err.status() { + status.is_server_error() + } else { + err.is_timeout() + } + }) + .await + } + + + /// Scrapes a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to scrape. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn scrape_url( + &self, + url: &str, + params: Option, + content_type: &str, + ) -> Result { + let mut data = HashMap::new(); + + data.insert( + "url".to_string(), + serde_json::Value::String(url.to_string()), + ); + data.insert("limit".to_string(), serde_json::Value::Number(1.into())); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + let params_collect = p.iter().map(|(k, v)| (k.to_string(), v.clone())); + + data.extend(params_collect); + } + _ => (), + } + } + + let res = self.api_post("crawl", data, content_type).await?; + res.json().await + } + /// Crawls a URL. /// /// # Arguments