Skip to content

Commit

Permalink
feat(streaming): add js client crawl streaming callback
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Jul 4, 2024
1 parent 1cb005c commit e6a28ec
Show file tree
Hide file tree
Showing 14 changed files with 693 additions and 3,839 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/nodejs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,8 @@ jobs:

- run: npm test
working-directory: ./javascript
env:
SPIDER_API_KEY: ${{ secrets.SPIDER_API_KEY }}
SPIDER_EMAIL: ${{ secrets.SPIDER_EMAIL }}
SPIDER_PASSWORD: ${{ secrets.SPIDER_PASSWORD }}
SUPABASE_AUTO_REFRESH_TOKEN: "false"
35 changes: 29 additions & 6 deletions javascript/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,34 @@ app
});
```

A real world crawl example streaming the response.

```javascript
import { Spider } from "@spider-cloud/spider-client";

// Initialize the SDK with your API key
const app = new Spider({ apiKey: "YOUR_API_KEY" });

// The target URL
const url = "https://spider.cloud";

// Crawl a website
const crawlParams = {
limit: 5,
store_data: false,
metadata: true,
request: "http",
};

const stream = true;

const streamCallback = (data) => {
console.log(data["url"]);
};

app.crawlUrl(url, crawlParams, stream, streamCallback);
```

### Data Operations

The Spider client can interact with specific data tables to create, retrieve, and delete data.
Expand Down Expand Up @@ -124,7 +152,6 @@ spider

You can use [Supabase](https://supabase.com/docs/reference/javascript) to directly connect to instances and write your own logic. First, you need to install `@supabase/supabase-js` since this package does not include the dependency by default. This keeps the bundle size small and allows for lazy imports of the client.


```ts
const spiderClient = new Spider({ apiKey: process.env.SPIDER_API_KEY });

Expand All @@ -149,8 +176,4 @@ Contributions are always welcome! Feel free to open an issue or submit a pull re

## License

The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).

```
```
The Spider Cloud JavaScript SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT).
105 changes: 63 additions & 42 deletions javascript/__tests__/spiderwebai.test.ts
Original file line number Diff line number Diff line change
@@ -1,63 +1,84 @@
import { describe, test, expect, jest } from "@jest/globals";
import { describe, test } from "node:test";
import assert from "node:assert";
import { Spider } from "../src";

jest.setTimeout(1000 * 60);
import "dotenv/config";

describe("Spider JS SDK", () => {
test("should throw error if API key is not provided", () => {
expect(() => new Spider({ apiKey: undefined })).toThrow(
"No API key provided"
);
if (!process.env.SPIDER_API_KEY) {
assert.throws(() => new Spider({ apiKey: null }));
} else {
assert.doesNotThrow(() => new Spider({ apiKey: null }));
}
});
test("should scrape url with data", async () => {
await import("dotenv/config");

if (process.env.SPIDER_API_KEY) {
const spiderClient = new Spider({ apiKey: process.env.SPIDER_API_KEY });
const spiderData = await spiderClient.scrapeUrl("https://spider.cloud", {
test("should crawl url with data", async () => {
const spiderClient = new Spider();
const spiderData = await spiderClient.crawlUrl("https://spider.cloud", {
store_data: true,
limit: 2,
});

assert(Array.isArray(spiderData));
assert(spiderData && spiderData.length === 2);
});

test("should crawl url streaming with data", async () => {
const stream = true;

const spiderClient = new Spider();
const spiderData = await spiderClient.crawlUrl(
"https://spider.cloud",
{
store_data: true,
});
limit: 4,
},
stream,
(data) => {
assert(data["url"]);
}
);

expect(Array.isArray(spiderData));
}
assert(typeof spiderData === "undefined");
});
test("should get data from the api", async () => {
await import("dotenv/config");

if (process.env.SPIDER_API_KEY) {
const spiderClient = new Spider({ apiKey: process.env.SPIDER_API_KEY });
const spiderData = await spiderClient.getData("websites", { limit: 1 });
test("should scrape url with data", async () => {
const spiderClient = new Spider();
const spiderData = await spiderClient.scrapeUrl("https://spider.cloud", {
store_data: true,
});

expect(Array.isArray(spiderData));
}
assert(Array.isArray(spiderData));
});
test("should download data from the api", async () => {
await import("dotenv/config");

if (process.env.SPIDER_API_KEY) {
const spiderClient = new Spider({ apiKey: process.env.SPIDER_API_KEY });
const spiderData = await spiderClient.createSignedUrl("spider.cloud", {
limit: 1,
page: 0,
});
test("should get data from the api", async () => {
const spiderClient = new Spider();
const { data } = await spiderClient.getData("websites", { limit: 1 });

expect(spiderData);
}
assert(Array.isArray(data));
});

test("should connect with supabase", async () => {
await import("dotenv/config");
// test.skip("should download data from the api", async () => {
// await import("dotenv/config");

if (process.env.SPIDER_API_KEY) {
const spiderClient = new Spider({ apiKey: process.env.SPIDER_API_KEY });
await spiderClient.init_supabase();
// const spiderClient = new Spider();
// const spiderData = await spiderClient.createSignedUrl("spider.cloud", {
// limit: 1,
// page: 0,
// });

const auth = await spiderClient.supabase?.auth.signInWithPassword({
email: process.env.SPIDER_EMAIL || "",
password: process.env.SPIDER_PASSWORD || "",
});
// assert(spiderData);
// });

expect(auth);
}
test("should connect with supabase", async () => {
const spiderClient = new Spider();
await spiderClient.init_supabase();

const auth = await spiderClient.supabase?.auth.signInWithPassword({
email: process.env.SPIDER_EMAIL || "",
password: process.env.SPIDER_PASSWORD || "",
});

assert(auth);
});
});
Loading

0 comments on commit e6a28ec

Please sign in to comment.