Skip to content

Commit

Permalink
chore(chrome): fix remote page closing
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Sep 12, 2023
1 parent a306858 commit 7c60530
Show file tree
Hide file tree
Showing 9 changed files with 27 additions and 22 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions examples/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_examples"
version = "1.40.10"
version = "1.40.11"
authors = ["madeindjs <[email protected]>", "j-mendez <[email protected]>"]
description = "Multithreaded web crawler written in Rust."
repository = "https://github.com/spider-rs/spider"
Expand All @@ -22,7 +22,7 @@ htr = "0.5.27"
flexbuffers = "2.0.0"

[dependencies.spider]
version = "1.40.10"
version = "1.40.11"
path = "../spider"
features = ["serde"]

Expand Down
2 changes: 1 addition & 1 deletion spider/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider"
version = "1.40.10"
version = "1.40.11"
authors = ["madeindjs <[email protected]>", "j-mendez <[email protected]>"]
description = "The fastest web crawler written in Rust."
repository = "https://github.com/spider-rs/spider"
Expand Down
12 changes: 6 additions & 6 deletions spider/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ This is a basic async example crawling a web page, add spider to your `Cargo.tom

```toml
[dependencies]
spider = "1.40.10"
spider = "1.40.11"
```

And then the code:
Expand Down Expand Up @@ -87,7 +87,7 @@ We have a couple optional feature flags. Regex blacklisting, jemaloc backend, gl

```toml
[dependencies]
spider = { version = "1.40.10", features = ["regex", "ua_generator"] }
spider = { version = "1.40.11", features = ["regex", "ua_generator"] }
```

1. `ua_generator`: Enables auto generating a random real User-Agent.
Expand All @@ -114,7 +114,7 @@ Move processing to a worker, drastically increases performance even if worker is

```toml
[dependencies]
spider = { version = "1.40.10", features = ["decentralized"] }
spider = { version = "1.40.11", features = ["decentralized"] }
```

```sh
Expand All @@ -135,7 +135,7 @@ Use the subscribe method to get a broadcast channel.

```toml
[dependencies]
spider = { version = "1.40.10", features = ["sync"] }
spider = { version = "1.40.11", features = ["sync"] }
```

```rust,no_run
Expand Down Expand Up @@ -165,7 +165,7 @@ Allow regex for blacklisting routes

```toml
[dependencies]
spider = { version = "1.40.10", features = ["regex"] }
spider = { version = "1.40.11", features = ["regex"] }
```

```rust,no_run
Expand All @@ -192,7 +192,7 @@ If you are performing large workloads you may need to control the crawler by ena

```toml
[dependencies]
spider = { version = "1.40.10", features = ["control"] }
spider = { version = "1.40.11", features = ["control"] }
```

```rust
Expand Down
3 changes: 2 additions & 1 deletion spider/src/features/chrome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ pub async fn launch_browser(
let (browser, mut handler) = match std::env::var("CHROME_URL") {
Ok(v) => Browser::connect(&v).await,
_ => Browser::launch(get_browser_config(&proxies).unwrap()).await,
}.unwrap();
}
.unwrap();

// spawn a new task that continuously polls the handler
let handle = task::spawn(async move {
Expand Down
10 changes: 7 additions & 3 deletions spider/src/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,7 @@ impl Website {
let (mut browser, browser_handle) = launch_browser(&self.configuration.proxies).await;

let new_page = browser.new_page("about:blank").await.unwrap();

if cfg!(feature = "chrome_stealth") {
let _ = new_page.enable_stealth_mode(&if self.configuration.user_agent.is_some() {
&self.configuration.user_agent.as_ref().unwrap().as_str()
Expand All @@ -887,7 +888,7 @@ impl Website {
client,
unsafe { selectors.unwrap_unchecked() },
self.channel.clone(),
new_page,
new_page.clone(),
));

let mut links: HashSet<CaseInsensitiveString> = self
Expand Down Expand Up @@ -983,6 +984,8 @@ impl Website {
if !std::env::var("CHROME_URL").is_ok() {
let _ = browser.close().await;
let _ = browser_handle.await;
} else {
let _ = new_page.close().await;
}
}
}
Expand Down Expand Up @@ -1342,8 +1345,7 @@ impl Website {
""
});
}
let page = Arc::new(new_page);

let page = Arc::new(new_page.clone());
// crawl while links exists
loop {
let stream =
Expand Down Expand Up @@ -1437,6 +1439,8 @@ impl Website {

if !std::env::var("CHROME_URL").is_ok() {
let _ = browser.close().await;
} else {
let _ = new_page.close().await;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions spider_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_cli"
version = "1.40.10"
version = "1.40.11"
authors = ["madeindjs <[email protected]>", "j-mendez <[email protected]>"]
description = "The fastest web crawler CLI written in Rust."
repository = "https://github.com/spider-rs/spider"
Expand All @@ -26,7 +26,7 @@ quote = "1.0.18"
failure_derive = "0.1.8"

[dependencies.spider]
version = "1.40.10"
version = "1.40.11"
path = "../spider"

[[bin]]
Expand Down
2 changes: 1 addition & 1 deletion spider_cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ spider --domain http://localhost:3000 download -t _temp_spider_downloads
```

```sh
spider_cli 1.40.10
spider_cli 1.40.11
madeindjs <[email protected]>, j-mendez <[email protected]>
The fastest web crawler CLI written in Rust.

Expand Down
4 changes: 2 additions & 2 deletions spider_worker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider_worker"
version = "1.40.10"
version = "1.40.11"
authors = ["madeindjs <[email protected]>", "j-mendez <[email protected]>"]
description = "The fastest web crawler CLI written in Rust."
repository = "https://github.com/spider-rs/spider"
Expand All @@ -22,7 +22,7 @@ lazy_static = "1.4.0"
env_logger = "0.10.0"

[dependencies.spider]
version = "1.40.10"
version = "1.40.11"
path = "../spider"
features = ["serde", "flexbuffers"]

Expand Down

0 comments on commit 7c60530

Please sign in to comment.