diff --git a/javascript/package-lock.json b/javascript/package-lock.json index 8c68b8a..af520b5 100644 --- a/javascript/package-lock.json +++ b/javascript/package-lock.json @@ -1,12 +1,12 @@ { "name": "@spider-cloud/spider-client", - "version": "0.0.48", + "version": "0.0.49", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@spider-cloud/spider-client", - "version": "0.0.48", + "version": "0.0.49", "license": "MIT", "devDependencies": { "@supabase/supabase-js": "^2.44.2", diff --git a/javascript/package.json b/javascript/package.json index 32618c9..215179b 100644 --- a/javascript/package.json +++ b/javascript/package.json @@ -1,6 +1,6 @@ { "name": "@spider-cloud/spider-client", - "version": "0.0.48", + "version": "0.0.49", "description": "A Javascript SDK for Spider Cloud services", "scripts": { "test": "node --import tsx --test __tests__/*test.ts", diff --git a/javascript/src/config.ts b/javascript/src/config.ts index 88bdce7..068bb54 100644 --- a/javascript/src/config.ts +++ b/javascript/src/config.ts @@ -87,6 +87,16 @@ export interface SpiderParams { */ budget?: Budget; + /** + * The blacklist routes to ignore. This can be a Regex string pattern. + */ + black_list?: string[]; + + /** + * The whitelist routes to only crawl. This can be a Regex string pattern and used with black_listing. + */ + white_list?: string[]; + /** * The locale to be used during the crawl. */ @@ -230,6 +240,6 @@ export type SpiderCoreResponse = { status?: number; // The website url. url?: string; -} +}; -export type ChunkCallbackFunction = (data: SpiderCoreResponse) => void +export type ChunkCallbackFunction = (data: SpiderCoreResponse) => void; diff --git a/python/setup.py b/python/setup.py index 9f1dc2f..15a320f 100644 --- a/python/setup.py +++ b/python/setup.py @@ -8,7 +8,7 @@ def read_file(fname): setup( name="spider-client", - version="0.0.48", + version="0.0.49", url="https://github.com/spider-rs/spider-clients/tree/main/python", author="Spider", author_email="jeff@a11ywatch.com", diff --git a/python/spider/spider.py b/python/spider/spider.py index 36f3cd3..f2f5add 100644 --- a/python/spider/spider.py +++ b/python/spider/spider.py @@ -377,7 +377,7 @@ def _prepare_headers(self, content_type: str = "application/json"): return { "Content-Type": content_type, "Authorization": f"Bearer {self.api_key}", - "User-Agent": f"Spider-Client/0.0.48", + "User-Agent": f"Spider-Client/0.0.49", } def _post_request(self, url: str, data, headers, stream=False): diff --git a/python/spider/spider_types.py b/python/spider/spider_types.py index 31eb51c..7d5d9af 100644 --- a/python/spider/spider_types.py +++ b/python/spider/spider_types.py @@ -35,6 +35,12 @@ class RequestParamsDict(TypedDict, total=False): # The budget for various resources. budget: Optional[Dict[str, int]] + # The blacklist routes to ignore. This can be a Regex string pattern. + black_list: Optional[List[str]] + + # The whitelist routes to only crawl. This can be a Regex string pattern and used with black_listing. + white_list: Optional[List[str]] + # The locale to be used during the crawl. locale: Optional[str] @@ -114,4 +120,4 @@ class RequestParamsDict(TypedDict, total=False): chunking_alg: Optional[ChunkingAlgDict] -JsonCallback = Callable[[dict], None] \ No newline at end of file +JsonCallback = Callable[[dict], None]