From 3b462613a95cecde92765a206e992d9930b0f55a Mon Sep 17 00:00:00 2001 From: j-mendez Date: Fri, 15 Sep 2023 18:10:02 -0400 Subject: [PATCH] chore(glob): fix link callback #136 --- Cargo.lock | 8 ++++---- examples/Cargo.toml | 4 ++-- spider/Cargo.toml | 2 +- spider/README.md | 12 ++++++------ spider/src/website.rs | 6 +++--- spider_cli/Cargo.toml | 4 ++-- spider_cli/README.md | 2 +- spider_worker/Cargo.toml | 4 ++-- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f61372a1e..31b00d2df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3550,7 +3550,7 @@ dependencies = [ [[package]] name = "spider" -version = "1.42.1" +version = "1.42.2" dependencies = [ "ahash", "bytes", @@ -3586,7 +3586,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "1.42.1" +version = "1.42.2" dependencies = [ "clap 3.2.25", "env_logger 0.9.3", @@ -3598,7 +3598,7 @@ dependencies = [ [[package]] name = "spider_examples" -version = "1.42.1" +version = "1.42.2" dependencies = [ "convert_case", "env_logger 0.9.3", @@ -3619,7 +3619,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "1.42.1" +version = "1.42.2" dependencies = [ "env_logger 0.10.0", "lazy_static", diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 13a073b93..4225165ff 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_examples" -version = "1.42.1" +version = "1.42.2" authors = ["madeindjs ", "j-mendez "] description = "Multithreaded web crawler written in Rust." repository = "https://github.com/spider-rs/spider" @@ -22,7 +22,7 @@ htr = "0.5.27" flexbuffers = "2.0.0" [dependencies.spider] -version = "1.42.1" +version = "1.42.2" path = "../spider" features = ["serde"] diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 9f4fb8ae2..b66d43ca6 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "1.42.1" +version = "1.42.2" authors = ["madeindjs ", "j-mendez "] description = "The fastest web crawler written in Rust." repository = "https://github.com/spider-rs/spider" diff --git a/spider/README.md b/spider/README.md index 60fe86892..6f7029e7a 100644 --- a/spider/README.md +++ b/spider/README.md @@ -16,7 +16,7 @@ This is a basic async example crawling a web page, add spider to your `Cargo.tom ```toml [dependencies] -spider = "1.42.1" +spider = "1.42.2" ``` And then the code: @@ -88,7 +88,7 @@ We have a couple optional feature flags. Regex blacklisting, jemaloc backend, gl ```toml [dependencies] -spider = { version = "1.42.1", features = ["regex", "ua_generator"] } +spider = { version = "1.42.2", features = ["regex", "ua_generator"] } ``` 1. `ua_generator`: Enables auto generating a random real User-Agent. @@ -116,7 +116,7 @@ Move processing to a worker, drastically increases performance even if worker is ```toml [dependencies] -spider = { version = "1.42.1", features = ["decentralized"] } +spider = { version = "1.42.2", features = ["decentralized"] } ``` ```sh @@ -137,7 +137,7 @@ Use the subscribe method to get a broadcast channel. ```toml [dependencies] -spider = { version = "1.42.1", features = ["sync"] } +spider = { version = "1.42.2", features = ["sync"] } ``` ```rust,no_run @@ -167,7 +167,7 @@ Allow regex for blacklisting routes ```toml [dependencies] -spider = { version = "1.42.1", features = ["regex"] } +spider = { version = "1.42.2", features = ["regex"] } ``` ```rust,no_run @@ -194,7 +194,7 @@ If you are performing large workloads you may need to control the crawler by ena ```toml [dependencies] -spider = { version = "1.42.1", features = ["control"] } +spider = { version = "1.42.2", features = ["control"] } ``` ```rust diff --git a/spider/src/website.rs b/spider/src/website.rs index 7c8a703c4..9905a0cc9 100644 --- a/spider/src/website.rs +++ b/spider/src/website.rs @@ -854,11 +854,11 @@ impl Website { if !page.is_empty() { let u = page.get_url().into(); let link_result = match self.on_link_find_callback { - Some(cb) => cb(u), - _ => u, + Some(cb) => cb(u, None), + _ => (link, None), }; - self.links_visited.insert(link_result); + self.links_visited.insert(link_result.0); let page_links = HashSet::from(page.links(&base).await); links.extend(page_links); diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index d58046a68..b47b10544 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "1.42.1" +version = "1.42.2" authors = ["madeindjs ", "j-mendez "] description = "The fastest web crawler CLI written in Rust." repository = "https://github.com/spider-rs/spider" @@ -26,7 +26,7 @@ quote = "1.0.18" failure_derive = "0.1.8" [dependencies.spider] -version = "1.42.1" +version = "1.42.2" path = "../spider" [[bin]] diff --git a/spider_cli/README.md b/spider_cli/README.md index da5c3d31a..c8d08a3de 100644 --- a/spider_cli/README.md +++ b/spider_cli/README.md @@ -40,7 +40,7 @@ spider --domain http://localhost:3000 download -t _temp_spider_downloads ``` ```sh -spider_cli 1.42.1 +spider_cli 1.42.2 madeindjs , j-mendez The fastest web crawler CLI written in Rust. diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 45d82d5f4..aeed4c9b7 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "1.42.1" +version = "1.42.2" authors = ["madeindjs ", "j-mendez "] description = "The fastest web crawler CLI written in Rust." repository = "https://github.com/spider-rs/spider" @@ -22,7 +22,7 @@ lazy_static = "1.4.0" env_logger = "0.10.0" [dependencies.spider] -version = "1.42.1" +version = "1.42.2" path = "../spider" features = ["serde", "flexbuffers"]