diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..71d6de0 --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,52 @@ +name: Rust CI + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + rust-version: [stable, beta, nightly] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Rust ${{ matrix.rust-version }} + uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust-version }} + profile: minimal + override: true + + - name: Cache cargo registry + uses: actions/cache@v3 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-registry- + + - name: Cache cargo index + uses: actions/cache@v3 + with: + path: ~/.cargo/git + key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-git- + + - name: Check toolchain + run: rustc --version + + - name: Build + run: cargo build --verbose + working-directory: ./rust + + - name: Run tests + run: cargo test --verbose --lib + working-directory: ./rust diff --git a/README.md b/README.md index 9515f47..e81ab9f 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,10 @@ Leverage the power of Spider in your Python applications. Navigate to our [Pytho Integrate Spider effortlessly into your Javascript projects. Visit our [Javascript client library directory](./javascript/) to explore how you can utilize Spider in Node.js or browser environments. Enhance your web scraping capabilities, improve data collection strategies, and unlock new possibilities with our cutting-edge technology. +## Rust (WIP) + +Integrate Spider effortlessly into your Rust projects. Visit our [Rust client library directory](./rust/) to explore how you can utilize Spider in your applications. Enhance your web scraping capabilities, improve data collection strategies, and unlock new possibilities with our cutting-edge technology. + --- ### Features diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 index 0000000..ed82ec6 --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,1255 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "backtrace" +version = "0.3.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "cc" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "dotenv" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" + +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "gimli" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" + +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "ipnet" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "openssl" +version = "0.10.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "reqwest" +version = "0.11.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "system-configuration", + "tokio", + "tokio-native-tls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "winreg", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "security-framework" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +dependencies = [ + "bitflags 2.6.0", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spider-client" +version = "0.1.1" +dependencies = [ + "dotenv", + "lazy_static", + "reqwest", + "serde", + "serde_json", + "tokio", + "tokio-stream", +] + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "tinyvec" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6b6a2fb3a985e99cebfaefa9faa3024743da73304ca1c683a36429613d3d22" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "wasm-streams" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000..502b281 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "spider-client" +version = "0.1.1" +edition = "2021" +authors = [ "j-mendez "] +description = "Spider Cloud client" +license = "MIT" +readme = "README.md" +repository = "https://github.com/spider-rs/spider-clients" +keywords = ["crawler", "web-crawler", "spider", "web-indexer", "site-map-generator"] +categories = ["web-programming"] +include = ["src/*", "../../LICENSE", "README.md"] + +[dependencies] +reqwest = { version = "0.11", features = ["json", "stream"] } +tokio = { version = "1", features = ["full"] } +serde = { version = "1", features = ["derive"] } +serde_json = { version = "1" } +tokio-stream = "0.1.15" + +[dev-dependencies] +dotenv = "0.15.0" +lazy_static = "1.5.0" diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 0000000..aee28a2 --- /dev/null +++ b/rust/README.md @@ -0,0 +1,300 @@ +# Spider Cloud Rust SDK + +The Spider Cloud Rust SDK offers a toolkit for straightforward website scraping, crawling at scale, and other utilities like extracting links and taking screenshots, enabling you to collect data formatted for compatibility with language models (LLMs). It features a user-friendly interface for seamless integration with the Spider Cloud API. + +-- +Current WIP +-- + +## Installation + +To use the Spider Cloud Rust SDK, include the following in your `Cargo.toml`: + +```toml +[dependencies] +spider-client = "0.1" +``` + +## Usage + +1. Get an API key from [spider.cloud](https://spider.cloud) +2. Set the API key as an environment variable named `SPIDER_API_KEY` or pass it as an argument when creating an instance of the `Spider` struct. + +Here's an example of how to use the SDK: + +```rust +use serde_json::json; +use std::env; + +#[tokio::main] +async fn main() { + // Set the API key as an environment variable + env::set_var("SPIDER_API_KEY", "your_api_key"); + + // Initialize the Spider with your API key + let spider = Spider::new(None).expect("API key must be provided"); + + let url = "https://spider.cloud"; + + // Scrape a single URL + let scraped_data = spider.scrape_url(url, None, false, "application/json").await.expect("Failed to scrape the URL"); + + println!("Scraped Data: {:?}", scraped_data); + + // Crawl a website + let crawler_params = RequestParams { + limit: Some(1), + proxy_enabled: Some(true), + store_data: Some(false), + metadata: Some(false), + request: Some(RequestType::Http), + ..Default::default() + }; + + let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::).await.expect("Failed to crawl the URL"); + + println!("Crawl Result: {:?}", crawl_result); +} +``` + +### Scraping a URL + +To scrape data from a single URL: + +```rust +let url = "https://example.com"; +let scraped_data = spider.scrape_url(url, None, false, "application/json").await.expect("Failed to scrape the URL"); +``` + +### Crawling a Website + +To automate crawling a website: + +```rust +let url = "https://example.com"; +let crawl_params = RequestParams { + limit: Some(200), + request: Some(RequestType::Smart), + ..Default::default() +}; +let crawl_result = spider.crawl_url(url, Some(crawl_params), false, "application/json", None::).await.expect("Failed to crawl the URL"); +``` + +#### Crawl Streaming + +Stream crawl the website in chunks to scale with a callback: + +```rust +fn handle_json(json_obj: serde_json::Value) { + println!("Received chunk: {:?}", json_obj); +} + +let url = "https://example.com"; +let crawl_params = RequestParams { + limit: Some(200), + store_data: Some(false), + ..Default::default() +}; + +spider.crawl_url( + url, + Some(crawl_params), + true, + "application/json", + Some(handle_json) +).await.expect("Failed to crawl the URL"); +``` + +### Search + +Perform a search for websites to crawl or gather search results: + +```rust +let query = "a sports website"; +let crawl_params = RequestParams { + request: Some(RequestType::Smart), + search_limit: Some(5), + limit: Some(5), + fetch_page_content: Some(true), + ..Default::default() +}; +let crawl_result = spider.search(query, Some(crawl_params), false, "application/json").await.expect("Failed to perform search"); +``` + +### Retrieving Links from a URL(s) + +Extract all links from a specified URL: + +```rust +let url = "https://example.com"; +let links = spider.links(url, None, false, "application/json").await.expect("Failed to retrieve links from URL"); +``` + +### Transform + +Transform HTML to markdown or text lightning fast: + +```rust +let data = vec![json!({"html": "

Hello world

"})]; +let params = RequestParams { + readability: Some(false), + return_format: Some(ReturnFormat::Markdown), + ..Default::default() +}; +let result = spider.transform(data, Some(params), false, "application/json").await.expect("Failed to transform HTML to markdown"); +println!("Transformed Data: {:?}", result); +``` + +### Taking Screenshots of a URL(s) + +Capture a screenshot of a given URL: + +```rust +let url = "https://example.com"; +let screenshot = spider.screenshot(url, None, false, "application/json").await.expect("Failed to take screenshot of URL"); +``` + +### Extracting Contact Information + +Extract contact details from a specified URL: + +```rust +let url = "https://example.com"; +let contacts = spider.extract_contacts(url, None, false, "application/json").await.expect("Failed to extract contacts from URL"); +println!("Extracted Contacts: {:?}", contacts); +``` + +### Labeling Data from a URL(s) + +Label the data extracted from a particular URL: + +```rust +let url = "https://example.com"; +let labeled_data = spider.label(url, None, false, "application/json").await.expect("Failed to label data from URL"); +println!("Labeled Data: {:?}", labeled_data); +``` + +### Checking Crawl State + +You can check the crawl state of a specific URL: + +```rust +let url = "https://example.com"; +let state = spider.get_crawl_state(url, None, false, "application/json").await.expect("Failed to get crawl state for URL"); +println!("Crawl State: {:?}", state); +``` + +### Downloading Files + +You can download the results of the website: + +```rust +let url = "https://example.com"; +let options = hashmap!{ + "page" => 0, + "limit" => 100, + "expiresIn" => 3600 // Optional, add if needed +}; +let response = spider.create_signed_url(Some(url), Some(options)).await.expect("Failed to create signed URL"); +println!("Download URL: {:?}", response); +``` + +### Checking Available Credits + +You can check the remaining credits on your account: + +```rust +let credits = spider.get_credits().await.expect("Failed to get credits"); +println!("Remaining Credits: {:?}", credits); +``` + +### Data Operations + +The Spider client can now interact with specific data tables to create, retrieve, and delete data. + +#### Retrieve Data from a Table + +To fetch data from a specified table by applying query parameters: + +```rust +let table_name = "pages"; +let query_params = RequestParams { + limit: Some(20), + ..Default::default() +}; +let response = spider.data_get(table_name, Some(query_params)).await.expect("Failed to retrieve data from table"); +println!("Data from table: {:?}", response); +``` + +#### Delete Data from a Table + +To delete data from a specified table based on certain conditions: + +```rust +let table_name = "websites"; +let delete_params = RequestParams { + domain: Some("www.example.com".to_string()), + ..Default::default() +}; +let response = spider.data_delete(table_name, Some(delete_params)).await.expect("Failed to delete data from table"); +println!("Delete Response: {:?}", response); +``` + +## Streaming + +If you need to use streaming, set the `stream` parameter to `true` and provide a callback function: + +```rust +fn handle_json(json_obj: serde_json::Value) { + println!("Received chunk: {:?}", json_obj); +} + +let url = "https://example.com"; +let crawler_params = RequestParams { + limit: Some(1), + proxy_enabled: Some(true), + store_data: Some(false), + metadata: Some(false), + request: Some(RequestType::Http), + ..Default::default() +}; + +spider.links(url, Some(crawler_params), true, "application/json").await.expect("Failed to retrieve links from URL"); +``` + +## Content-Type + +The following Content-type headers are supported using the `content_type` parameter: + +- `application/json` +- `text/csv` +- `application/xml` +- `application/jsonl` + +```rust +let url = "https://example.com"; + +let crawler_params = RequestParams { + limit: Some(1), + proxy_enabled: Some(true), + store_data: Some(false), + metadata: Some(false), + request: Some(RequestType::Http), + ..Default::default() +}; + +// Stream JSON lines back to the client +spider.crawl_url(url, Some(crawler_params), true, "application/jsonl", None::).await.expect("Failed to crawl the URL"); +``` + +## Error Handling + +The SDK handles errors returned by the Spider Cloud API and raises appropriate exceptions. If an error occurs during a request, it will be propagated to the caller with a descriptive error message. + +## Contributing + +Contributions to the Spider Cloud Rust SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository. + +## License + +The Spider Cloud Rust SDK is open-source and released under the [MIT License](https://opensource.org/licenses/MIT). diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 0000000..d6ac4fd --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,915 @@ +//! The `spider-client` module provides the primary interface and +//! functionalities for the Spider web crawler library, which is +//! designed for rapid and efficient crawling of web pages to gather +//! links using isolated contexts. +//! +//! ### Features +//! +//! - **Multi-threaded Crawling:** Spider can utilize multiple +//! threads to parallelize the crawling process, drastically +//! improving performance and allowing the ability to gather +//! millions of pages in a short time. +//! +//! - **Configurable:** The library provides various options to +//! configure the crawling behavior, such as setting the depth +//! of crawling, user-agent strings, delays between requests, +//! and more. +//! +//! - **Link Gathering:** The primary objective of Spider is to +//! gather and manage links from the web pages it crawls, +//! compiling them into a structured format for further use. +//! +//! ### Examples +//! +//! Basic usage of the Spider client might look like this: +//! +//! ```rust +//! use spider_client::{Spider, RequestType, RequestParams}; +//! use tokio; +//! +//! # #[ignore] +//! #[tokio::main] +//! async fn main() { +//! let spider = Spider::new(Some("myspiderapikey".into())).expect("API key must be provided"); +//! +//! let url = "https://spider.cloud"; +//! +//! // Scrape a single URL +//! let scraped_data = spider.scrape_url(url, None, "application/json").await.expect("Failed to scrape the URL"); +//! +//! println!("Scraped Data: {:?}", scraped_data); +//! +//! // Crawl a website +//! let crawler_params = RequestParams { +//! limit: Some(1), +//! proxy_enabled: Some(true), +//! store_data: Some(false), +//! metadata: Some(false), +//! request: Some(RequestType::Http), +//! ..Default::default() +//! }; +//! +//! let crawl_result = spider.crawl_url(url, Some(crawler_params), false, "application/json", None::).await.expect("Failed to crawl the URL"); +//! +//! println!("Crawl Result: {:?}", crawl_result); +//! } +//! ``` +//! +//! ### Modules +//! +//! - `config`: Contains the configuration options for the Spider client. +//! - `utils`: Utility functions used by the Spider client. +//! + +use reqwest::Client; +use reqwest::{Error, Response}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use tokio_stream::StreamExt; + +/// Structure representing the Chunking algorithm dictionary. +#[derive(Debug, Deserialize, Serialize)] +pub struct ChunkingAlgDict { + /// The chunking algorithm to use, defined as a specific type. + r#type: ChunkingType, + /// The amount to chunk by. + value: i32, +} + +/// Enum representing different types of Chunking. +#[derive(Default, Debug, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum ChunkingType { + #[default] + /// By the word count. + ByWords, + /// By the line count. + ByLines, + /// By the char length. + ByCharacterLength, + /// By sentence. + BySentence, +} + +/// Structure representing request parameters. +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct RequestParams { + /// The URL to be crawled. + pub url: Option, + /// The type of request to be made. + pub request: Option, + /// The maximum number of pages the crawler should visit. + pub limit: Option, + /// The format in which the result should be returned. + pub return_format: Option, + /// Specifies whether to only visit the top-level domain. + pub tld: Option, + /// The depth of the crawl. + pub depth: Option, + /// Specifies whether the request should be cached. + pub cache: Option, + /// The budget for various resources. + pub budget: Option>, + /// The blacklist routes to ignore. This can be a Regex string pattern. + pub black_list: Option>, + /// The whitelist routes to only crawl. This can be a Regex string pattern and used with black_listing. + pub white_list: Option>, + /// The locale to be used during the crawl. + pub locale: Option, + /// The cookies to be set for the request, formatted as a single string. + pub cookies: Option, + /// Specifies whether to use stealth techniques to avoid detection. + pub stealth: Option, + /// The headers to be used for the request. + pub headers: Option>, + /// Specifies whether anti-bot measures should be used. + pub anti_bot: Option, + /// Specifies whether to include metadata in the response. + pub metadata: Option, + /// The dimensions of the viewport. + pub viewport: Option>, + /// The encoding to be used for the request. + pub encoding: Option, + /// Specifies whether to include subdomains in the crawl. + pub subdomains: Option, + /// The user agent string to be used for the request. + pub user_agent: Option, + /// Specifies whether the response data should be stored. + pub store_data: Option, + /// Configuration settings for GPT (general purpose texture mappings). + pub gpt_config: Option>, + /// Specifies whether to use fingerprinting protection. + pub fingerprint: Option, + /// Specifies whether to perform the request without using storage. + pub storageless: Option, + /// Specifies whether readability optimizations should be applied. + pub readability: Option, + /// Specifies whether to use a proxy for the request. + pub proxy_enabled: Option, + /// Specifies whether to respect the site's robots.txt file. + pub respect_robots: Option, + /// CSS selector to be used to filter the content. + pub query_selector: Option, + /// Specifies whether to load all resources of the crawl target. + pub full_resources: Option, + /// Specifies whether to use the sitemap links. + pub sitemap: Option, + /// Get page insights to determine information like request duration, accessibility, and other web vitals. Requires the `metadata` parameter to be set to `true`. + pub page_insights: Option, + /// Returns the OpenAI embeddings for the title and description. Other values, such as keywords, may also be included. Requires the `metadata` parameter to be set to `true`. + pub return_embeddings: Option, + /// The timeout for the request, in milliseconds. + pub request_timeout: Option, + /// Specifies whether to run the request in the background. + pub run_in_background: Option, + /// Specifies whether to skip configuration checks. + pub skip_config_checks: Option, + /// The chunking algorithm to use. + pub chunking_alg: Option, +} + +/// The structure representing request parameters for a search request. +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct SearchRequestParams { + /// The base request parameters. + #[serde(flatten, skip)] + pub base: RequestParams, + /// The search query string. + pub search: String, + /// The limit amount of URLs to fetch or crawl from the search results. + pub search_limit: Option, + /// Fetch all the content of the websites by performing crawls. + pub fetch_page_content: Option, + /// The country code to use for the search. It's a two-letter country code (e.g., 'us' for the United States). + pub country: Option, + /// The location from where you want the search to originate. + pub location: Option, + /// The language to use for the search. It's a two-letter language code (e.g., 'en' for English). + pub language: Option, + /// The maximum number of results to return for the search. + pub num: Option, +} + +/// Enum representing different types of Requests. +#[derive(Default, Debug, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum RequestType { + #[default] + Http, + Chrome, + Smart, +} + +/// Enum representing different return formats. +#[derive(Default, Debug, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum ReturnFormat { + #[default] + Raw, + Markdown, + Commonmark, + Html2text, + Text, + Bytes, +} + +/// Represents a Spider with API key and HTTP client. +#[derive(Debug)] +pub struct Spider { + /// The Spider API key. + api_key: String, + /// The Spider Client to re-use. + client: Client, +} + +impl Spider { + /// Creates a new instance of Spider. + /// + /// # Arguments + /// + /// * `api_key` - An optional API key. + /// + /// # Returns + /// + /// A new instance of Spider or an error string if no API key is provided. + pub fn new(api_key: Option) -> Result { + let api_key = api_key.or_else(|| std::env::var("SPIDER_API_KEY").ok()); + + match api_key { + Some(key) => Ok(Self { + api_key: key, + client: Client::new(), + }), + None => Err("No API key provided"), + } + } + + /// Sends a POST request to the API. + /// + /// # Arguments + /// + /// * `endpoint` - The API endpoint. + /// * `data` - The request data as a HashMap. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API. + async fn api_post( + &self, + endpoint: &str, + data: impl Serialize, + content_type: &str, + ) -> Result { + let url: String = format!("https://api.spider.cloud/{}", endpoint); + self.client + .post(&url) + .header( + "User-Agent", + format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")), + ) + .header("Content-Type", content_type) + .header("Authorization", format!("Bearer {}", self.api_key)) + .json(&data) + .send() + .await + } + + /// Sends a GET request to the API. + /// + /// # Arguments + /// + /// * `endpoint` - The API endpoint. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + async fn api_get(&self, endpoint: &str) -> Result { + let url = format!("https://api.spider.cloud/{}", endpoint); + let res = self + .client + .get(&url) + .header( + "User-Agent", + format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")), + ) + .header("Content-Type", "application/json") + .header("Authorization", format!("Bearer {}", self.api_key)) + .send() + .await?; + res.json().await + } + + /// Scrapes a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to scrape. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn scrape_url( + &self, + url: &str, + params: Option, + content_type: &str, + ) -> Result { + let mut data = HashMap::new(); + + data.insert( + "url".to_string(), + serde_json::Value::String(url.to_string()), + ); + data.insert("limit".to_string(), serde_json::Value::Number(1.into())); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + let params_collect = p.iter().map(|(k, v)| (k.to_string(), v.clone())); + + data.extend(params_collect); + } + _ => (), + } + } + + let res = self.api_post("crawl", data, content_type).await?; + res.json().await + } + + /// Sends a DELETE request to the API. + /// + /// # Arguments + /// + /// * `endpoint` - The API endpoint. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API. + async fn api_delete( + &self, + endpoint: &str, + params: Option>, + ) -> Result { + let url = format!("https://api.spider.cloud/v1/{}", endpoint); + let request_builder = self + .client + .delete(&url) + .header( + "User-Agent", + format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")), + ) + .header("Content-Type", "application/json") + .header("Authorization", format!("Bearer {}", self.api_key)); + + let request_builder = if let Some(params) = params { + request_builder.json(¶ms) + } else { + request_builder + }; + + request_builder.send().await + } + + /// Crawls a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to crawl. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// * `callback` - Optional callback function to handle each streamed chunk. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn crawl_url( + &self, + url: &str, + params: Option, + stream: bool, + content_type: &str, + callback: Option, + ) -> Result { + let mut data = HashMap::new(); + data.insert("url".into(), serde_json::Value::String(url.to_string())); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone()))); + } + _ => (), + } + } + + let res = self.api_post("crawl", data, content_type).await?; + + if stream { + if let Some(callback) = callback { + let stream = res.bytes_stream(); + tokio::pin!(stream); + + while let Some(item) = stream.next().await { + match item { + Ok(chunk) => match serde_json::from_slice(&chunk) { + Ok(json_obj) => { + callback(json_obj); + } + _ => (), + }, + Err(e) => { + eprintln!("Error in streaming response: {}", e); + } + } + } + Ok(serde_json::Value::Null) + } else { + Ok(serde_json::Value::Null) + } + } else { + res.json().await + } + } + + /// Fetches links from a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to fetch links from. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn links( + &self, + url: &str, + params: Option, + stream: bool, + content_type: &str, + ) -> Result { + let mut data = HashMap::new(); + data.insert("url".into(), serde_json::Value::String(url.to_string())); + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone()))); + } + _ => (), + } + } + + let res = self.api_post("links", data, content_type).await?; + res.json().await + } + + /// Takes a screenshot of a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to take a screenshot of. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn screenshot( + &self, + url: &str, + params: Option, + stream: bool, + content_type: &str, + ) -> Result { + let mut data = HashMap::new(); + data.insert("url".into(), serde_json::Value::String(url.to_string())); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone()))); + } + _ => (), + } + } + + let res = self.api_post("screenshot", data, content_type).await?; + res.json().await + } + + /// Searches for a query. + /// + /// # Arguments + /// + /// * `q` - The query to search for. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn search( + &self, + q: &str, + params: Option, + stream: bool, + content_type: &str, + ) -> Result { + let body = match params { + Some(mut params) => { + params.search = q.to_string(); + params + } + _ => { + let mut params = SearchRequestParams::default(); + params.search = q.to_string(); + params + } + }; + + let res = self.api_post("search", body, content_type).await?; + + res.json().await + } + + /// Transforms data. + /// + /// # Arguments + /// + /// * `data` - The data to transform. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn transform( + &self, + data: Vec>, + params: Option, + stream: bool, + content_type: &str, + ) -> Result { + let mut payload = HashMap::new(); + + payload.insert("data".into(), serde_json::to_value(data).unwrap()); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone()))); + } + _ => (), + } + } + + let res = self.api_post("transform", payload, content_type).await?; + + res.json().await + } + + /// Extracts contacts from a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to extract contacts from. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn extract_contacts( + &self, + url: &str, + params: Option, + stream: bool, + content_type: &str, + ) -> Result { + let mut data = HashMap::new(); + + data.insert("url".into(), serde_json::to_value(url).unwrap()); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone()))); + } + _ => (), + } + } + + let res = self + .api_post("pipeline/extract-contacts", data, content_type) + .await?; + res.json().await + } + + /// Labels data from a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to label data from. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + /// The response from the API as a JSON value. + pub async fn label( + &self, + url: &str, + params: Option, + stream: bool, + content_type: &str, + ) -> Result { + let mut data = HashMap::new(); + data.insert("url".into(), serde_json::Value::String(url.to_string())); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + data.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone()))); + } + _ => (), + } + } + + let res = self.api_post("pipeline/label", data, content_type).await?; + res.json().await + } + + /// Creates a signed URL. + /// + /// # Arguments + /// + /// * `domain` - Optional domain. + /// * `options` - Optional options. + /// * `stream` - Whether streaming is enabled. + /// + /// # Returns + /// + /// The response from the API. + pub async fn create_signed_url( + &self, + domain: Option<&str>, + options: Option>, + ) -> Result { + let mut params = HashMap::new(); + + if let Some(domain) = domain { + params.insert("domain".to_string(), domain.to_string()); + } + + if let Some(options) = options { + for (key, value) in options { + params.insert(key.to_string(), value.to_string()); + } + } + + let url = format!("https://api.spider.cloud/v1/data/storage"); + let request = self + .client + .get(&url) + .header( + "User-Agent", + format!("Spider-Client/{}", env!("CARGO_PKG_VERSION")), + ) + .header("Content-Type", "application/octet-stream") + .header("Authorization", format!("Bearer {}", self.api_key)) + .query(¶ms); + + let res = request.send().await?; + + Ok(res) + } + + /// Gets the crawl state of a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to get the crawl state of. + /// * `params` - Optional request parameters. + /// * `stream` - Whether streaming is enabled. + /// * `content_type` - The content type of the request. + /// + /// # Returns + /// + pub async fn get_crawl_state( + &self, + url: &str, + params: Option, + stream: bool, + content_type: &str, + ) -> Result { + let mut payload = HashMap::new(); + payload.insert("url".into(), serde_json::Value::String(url.to_string())); + payload.insert( + "contentType".into(), + serde_json::Value::String(content_type.to_string()), + ); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + payload.extend(p.iter().map(|(k, v)| (k.to_string(), v.clone()))); + } + _ => (), + } + } + + let res = self + .api_post("data/crawl_state", payload, content_type) + .await?; + res.json().await + } + + pub async fn get_credits(&self) -> Result { + self.api_get("data/credits").await + } + + pub async fn data_post( + &self, + table: &str, + data: Option, + ) -> Result { + let res = self + .api_post(&format!("data/{}", table), data, "application/json") + .await?; + res.json().await + } + + pub async fn data_get( + &self, + table: &str, + params: Option, + ) -> Result { + let mut payload = HashMap::new(); + + if let Some(params) = params { + let params = serde_json::to_value(params).unwrap(); + payload.extend( + params + .as_object() + .unwrap() + .iter() + .map(|(k, v)| (k.as_str(), v.clone())), + ); + } + + let res = self.api_get(&format!("data/{}", table)).await?; + Ok(res) + } + + pub async fn data_delete( + &self, + table: &str, + params: Option, + ) -> Result { + let mut payload = HashMap::new(); + + if let Ok(params) = serde_json::to_value(params) { + match params.as_object() { + Some(ref p) => { + let params_collect = p.iter().map(|(k, v)| (k.to_string(), v.clone())); + + payload.extend(params_collect); + } + _ => (), + } + } + + let res = self + .api_delete(&format!("data/{}", table), Some(payload)) + .await?; + res.json().await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use dotenv::dotenv; + use lazy_static::lazy_static; + + lazy_static! { + static ref SPIDER_CLIENT: Spider = { + dotenv().ok(); + Spider::new(None).unwrap() + }; + } + + #[tokio::test] + async fn test_scrape_url() { + let response = SPIDER_CLIENT + .scrape_url("https://example.com", None, "application/json") + .await; + assert!(response.is_ok()); + } + + #[tokio::test] + async fn test_crawl_url() { + let response = SPIDER_CLIENT + .crawl_url( + "https://example.com", + None, + false, + "application/json", + None::, + ) + .await; + assert!(response.is_ok()); + } + + #[tokio::test] + async fn test_links() { + let response = SPIDER_CLIENT + .links("https://example.com", None, false, "application/json") + .await; + assert!(response.is_ok()); + } + + #[tokio::test] + async fn test_screenshot() { + let response = SPIDER_CLIENT + .screenshot("https://example.com", None, false, "application/json") + .await; + assert!(response.is_ok()); + } + + // #[tokio::test] + // async fn test_search() { + // let mut params = SearchRequestParams::default(); + // params.search_limit = Some(1); + // params.num = Some(1); + + // let response = SPIDER_CLIENT + // .search("a sports website", Some(params), false, "application/json") + // .await; + // assert!(response.is_ok()); + // } + + #[tokio::test] + async fn test_transform() { + let data = vec![HashMap::new()]; + let response = SPIDER_CLIENT + .transform(data, None, false, "application/json") + .await; + assert!(response.is_ok()); + } + + #[tokio::test] + async fn test_extract_contacts() { + let response = SPIDER_CLIENT + .extract_contacts("https://example.com", None, false, "application/json") + .await; + assert!(response.is_ok()); + } + + #[tokio::test] + async fn test_label() { + let response = SPIDER_CLIENT + .label("https://example.com", None, false, "application/json") + .await; + assert!(response.is_ok()); + } + + #[tokio::test] + async fn test_create_signed_url() { + let response = SPIDER_CLIENT + .create_signed_url(Some("example.com"), None) + .await; + assert!(response.is_ok()); + } + + #[tokio::test] + async fn test_get_crawl_state() { + let response = SPIDER_CLIENT + .get_crawl_state("https://example.com", None, false, "application/json") + .await; + assert!(response.is_ok()); + } + + #[tokio::test] + async fn test_get_credits() { + let response = SPIDER_CLIENT.get_credits().await; + assert!(response.is_ok()); + } +}