Skip to content

Commit

Permalink
feat(clients): add exponential backoff
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 5, 2024
1 parent 12fd0f9 commit 08afc31
Show file tree
Hide file tree
Showing 14 changed files with 375 additions and 187 deletions.
3 changes: 2 additions & 1 deletion book/src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
- Cron Jobs
- Subscriptions
- AI Scraping and Event Driven Actions
- Blacklisting and Budgeting Depth
- Blacklisting and Budgeting Depth
- Exponential Backoff
2 changes: 1 addition & 1 deletion book/src/rust/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,4 +285,4 @@ spider.crawl_url(url, Some(crawler_params), true, "application/jsonl", None::<fn

## Error Handling

The SDK handles errors returned by the Spider Cloud API and raises appropriate exceptions. If an error occurs during a request, it will be propagated to the caller with a descriptive error message.
The SDK handles errors returned by the Spider Cloud API and raises appropriate exceptions. If an error occurs during a request, it will be propagated to the caller with a descriptive error message. By default request use a Exponential Backoff to retry as needed.
28 changes: 26 additions & 2 deletions cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider-cloud-cli"
version = "0.1.23"
version = "0.1.24"
edition = "2021"
authors = [ "j-mendez <[email protected]>"]
description = "The Spider Cloud CLI for web crawling and scraping"
Expand All @@ -11,10 +11,10 @@ categories = ["web-programming"]
include = ["src/*", "../../LICENSE", "README.md"]

[dependencies]
clap = { version = "4.5.13", features = ["derive"]}
clap = { version = "4", features = ["derive"]}
reqwest = { version = "0.12", features = ["json", "stream"] }
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
spider-client = { path = "../rust", version = "0.1" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
keyring = { version = "3", features = ["apple-native", "windows-native", "sync-secret-service"] }
52 changes: 30 additions & 22 deletions javascript/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 8 additions & 5 deletions javascript/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@spider-cloud/spider-client",
"version": "0.1.23",
"version": "0.1.24",
"description": "Isomorphic Javascript SDK for Spider Cloud services",
"scripts": {
"test": "node --import tsx --test __tests__/*test.ts",
Expand All @@ -23,9 +23,12 @@
"author": "Jeff Mendez<[email protected]>",
"license": "MIT",
"devDependencies": {
"@types/node": "22.7.5",
"dotenv": "^16.4.5",
"tsx": "^4.19.1",
"typescript": "5.6.3"
"@types/node": "22.10.1",
"dotenv": "^16.4.7",
"tsx": "^4.19.2",
"typescript": "5.7.2"
},
"dependencies": {
"exponential-backoff": "^3.1.1"
}
}
36 changes: 23 additions & 13 deletions javascript/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
} from "./config";
import { version } from "../package.json";
import { streamReader } from "./utils/stream-reader";
import { backOff } from "exponential-backoff";

/**
* Generic params for core request.
Expand Down Expand Up @@ -56,12 +57,15 @@ export class Spider {
jsonl?: boolean
) {
const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
const response = await fetch(
`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`,
const response = await backOff(
() =>
fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, {
method: "POST",
headers: headers,
body: JSON.stringify(data),
}),
{
method: "POST",
headers: headers,
body: JSON.stringify(data),
numOfAttempts: 5,
}
);

Expand All @@ -82,11 +86,14 @@ export class Spider {
*/
private async _apiGet(endpoint: string) {
const headers = this.prepareHeaders;
const response = await fetch(
`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`,
const response = await backOff(
() =>
fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, {
method: "GET",
headers: headers,
}),
{
method: "GET",
headers: headers,
numOfAttempts: 5,
}
);

Expand All @@ -104,11 +111,14 @@ export class Spider {
*/
private async _apiDelete(endpoint: string) {
const headers = this.prepareHeaders;
const response = await fetch(
`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`,
const response = await backOff(
() =>
fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, {
method: "DELETE",
headers,
}),
{
method: "DELETE",
headers,
numOfAttempts: 5,
}
);

Expand Down
3 changes: 2 additions & 1 deletion python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ pytest-asyncio
python-dotenv
aiohttp
python-dotenv
ijson
ijson
tenacity
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def read_file(fname):
author_email="[email protected]",
description="Python SDK for Spider Cloud API",
packages=find_packages(),
install_requires=["requests", "ijson"],
install_requires=["requests", "ijson", "tenacity", "aiohttp"],
long_description=read_file("README.md"),
long_description_content_type="text/markdown",
classifiers=[
Expand Down
9 changes: 5 additions & 4 deletions python/spider/async_spider.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import os
import json
import logging
import os, tenacity, json, aiohttp, logging
from typing import Optional, Dict, Any, AsyncIterator, Callable
import aiohttp
from aiohttp import ClientSession, ClientResponse
from types import TracebackType
from typing import Type
Expand Down Expand Up @@ -35,6 +32,10 @@ async def __aexit__(
if self.session:
await self.session.close()

@tenacity.retry(
wait=tenacity.wait_exponential(multiplier=1, min=1, max=60),
stop=tenacity.stop_after_attempt(5)
)
async def _request(
self,
method: str,
Expand Down
14 changes: 13 additions & 1 deletion python/spider/spider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os, requests, logging, ijson
import os, requests, logging, ijson, tenacity
from typing import Optional, Dict
from spider.spider_types import RequestParamsDict, JsonCallback, QueryRequest

Expand All @@ -15,6 +15,10 @@ def __init__(self, api_key: Optional[str] = None):
if self.api_key is None:
raise ValueError("No API key provided")

@tenacity.retry(
wait=tenacity.wait_exponential(multiplier=1, min=1, max=60),
stop=tenacity.stop_after_attempt(5)
)
def api_post(
self,
endpoint: str,
Expand All @@ -41,6 +45,10 @@ def api_post(
else:
self._handle_error(response, f"post to {endpoint}")

@tenacity.retry(
wait=tenacity.wait_exponential(multiplier=1, min=1, max=60),
stop=tenacity.stop_after_attempt(5)
)
def api_get(
self,
endpoint: str,
Expand All @@ -67,6 +75,10 @@ def api_get(
else:
self._handle_error(response, f"get from {endpoint}")

@tenacity.retry(
wait=tenacity.wait_exponential(multiplier=1, min=1, max=60),
stop=tenacity.stop_after_attempt(5)
)
def api_delete(
self,
endpoint: str,
Expand Down
Loading

0 comments on commit 08afc31

Please sign in to comment.