Skip to content

Commit

Permalink
added some major cli updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Nikolai Schimke authored and Nikolai Schimke committed Feb 1, 2024
1 parent 0b5240c commit 5355946
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 17 deletions.
93 changes: 82 additions & 11 deletions src/bin/sportshub.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::ops::Sub;

use anyhow::Error;
use clap::{Parser, Subcommand};
use diesel::sqlite::Sqlite;
Expand All @@ -24,33 +26,102 @@ struct Cli {

#[derive(Subcommand)]
enum Commands {
#[clap(about = "Parse the website for updated data")]
Parse {
/// The number of tabs to open for checking stream links
tabs: usize,
#[clap(about = "Parse the website for updated games")]
Data {
#[command(subcommand)]
data_command: Option<DataCommands>,
},
#[clap(about = "Run the web server")]
Server {
Serve {
/// port to run the server on
/// (default: 3000)
/// usage: sportshub serve -p 5173
#[clap(short, long, default_value = "3000")]
port: u16,

/// Whether to run the server in silent mode
/// (default: false)
/// usage: sportshub serve -s
#[clap(short, long)]
silent: bool,
},
}

#[derive(Subcommand, Clone)]
enum DataCommands {
#[clap(about = "Scrape the website for updated games")]
Scrape {
/// Whether to run the browser in headless mode
/// (default: true)
/// usage: sportshub scrape -H false
#[clap(short = 'H', long = "headless", default_value = "true")]
headless: bool,
},
#[clap(about = "Update the database with latest games links")]
Update {
/// How many tabs to use for scraping
/// (default: 10)
/// usage: sportshub update -t 20
#[clap(short = 't', long = "tabs", default_value = "10")]
tabs: u8,

/// Whether to run the browser in headless mode
/// (default: true)
/// usage: sportshub update -H false
/// usage: sportshub update -H false -t 20
#[clap(short = 'H', long = "headless", default_value = "true")]
headless: bool,
},
#[clap(about = "Get the info about the current database")]
Info {},
#[clap(about = "Delete all past streams")]
Clear {},
}

#[rocket::main]
async fn main() {
let mut conn = db::helpers::establish_connection().unwrap();
run_migrations(&mut conn).unwrap();

let cli = Cli::parse();

match cli.command {
Some(Commands::Parse { tabs }) => {
run_migrations(&mut conn).unwrap();
scrape_utils::start_scraping(tabs).unwrap();
Some(Commands::Data { data_command }) => {
match data_command {
Some(DataCommands::Scrape { headless }) => {
scrape_utils::scrape_events(headless).unwrap();
}
Some(DataCommands::Update { tabs, headless }) => {
scrape_utils::update_streams(tabs as usize, headless).unwrap();
}
Some(DataCommands::Info {}) => {
let streams = db::helpers::get_streams(&mut conn).unwrap();
println!("Total events: {}", streams.len());
let empty_streams = db::helpers::get_empty_streams(&mut conn).unwrap();
println!("Empty events: {}", empty_streams.len());
let linked_streams = db::helpers::get_linked_streams(&mut conn).unwrap();
println!("Linked events: {}", linked_streams.len());

let mut total_links = 0;

for stream in linked_streams {
if stream.stream_link != "" {
total_links += stream.stream_link.split(",").count();
}
}

println!("Total streams: {}", total_links);
}
Some(DataCommands::Clear {}) => {
db::helpers::delete_all_streams(&mut conn).unwrap();
}
None => {
println!("use sportshub data -h for help");
}
}
}
Some(Commands::Server { port }) => {
run_migrations(&mut conn).unwrap();
web_server_utils::run(port).await.unwrap();
Some(Commands::Serve { port, silent }) => {
web_server_utils::run(port, silent).await.unwrap();
}
None => {
println!("use sportshub -h for help");
Expand Down
43 changes: 41 additions & 2 deletions src/db/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
use std::time::{Duration, Instant};

use diesel::{prelude::*, RunQueryDsl};
use diesel::{dsl::*, prelude::*, RunQueryDsl};
use serde::{Deserialize, Serialize};

use super::{
models::{Stream, StreamNew},
Expand Down Expand Up @@ -39,7 +40,6 @@ pub fn get_streams_by_id(conn: &mut SqliteConnection, search_id: i32) -> Result<
}

pub fn delete_all_past_streams(conn: &mut SqliteConnection) -> Result<usize, anyhow::Error> {
println!("Deleting all 3+ hour past streams...");
Ok(
diesel::delete(stream.filter(start_time.le(chrono::Utc::now().naive_utc() - Duration::from_secs(3 * 60 * 60))))
.execute(conn)?,
Expand Down Expand Up @@ -82,3 +82,42 @@ pub fn get_streams_by_either_team(
)
.load::<Stream>(conn)?)
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct LeagueWithCountry {
pub league: String,
pub country: String,
}

pub fn get_unique_leagues_with_country(conn: &mut SqliteConnection) -> Result<Vec<LeagueWithCountry>, anyhow::Error> {
let mut leagues = Vec::new();

let mut results = stream
.select((schema::stream::league, schema::stream::country))
.distinct()
.load::<(String, String)>(conn)?;

results.sort_by(|a, b| a.0.cmp(&b.0));

for (i_league, i_country) in results {
if !leagues.contains(&LeagueWithCountry {
league: i_league.clone(),
country: i_country.clone(),
}) {
leagues.push(LeagueWithCountry {
league: i_league,
country: i_country,
});
}
}

Ok(leagues)
}

pub fn get_active_games(conn: &mut SqliteConnection) -> Result<Vec<Stream>, anyhow::Error> {
Ok(stream.filter(schema::stream::stream_link.ne("")).load::<Stream>(conn)?)
}

pub fn delete_all_streams(conn: &mut SqliteConnection) -> Result<usize, anyhow::Error> {
Ok(diesel::delete(stream).execute(conn)?)
}
63 changes: 61 additions & 2 deletions src/scrape_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ use crate::{
};


pub fn start_scraping(open_tabs: usize) -> Result<(), anyhow::Error> {
pub fn start_scraping(open_tabs: usize, headless: bool) -> Result<(), anyhow::Error> {
// realised we didnt need adblocker when headless
let browser = Browser::new({
headless_chrome::LaunchOptions {
headless: false,
headless,
sandbox: true,
ignore_certificate_errors: true,
..Default::default()
Expand Down Expand Up @@ -58,6 +58,35 @@ pub fn start_scraping(open_tabs: usize) -> Result<(), anyhow::Error> {
Ok(())
}

pub fn update_streams(open_tabs: usize, headless: bool) -> Result<(), anyhow::Error> {
// realised we didnt need adblocker when headless
let browser = Browser::new({
headless_chrome::LaunchOptions {
headless,
sandbox: true,
ignore_certificate_errors: true,
..Default::default()
}
})?;

let mut conn = db::helpers::establish_connection()?;

// we get all the links from database that don't have stream links
// and we check them in parallel
// my 8gb ram m1 macbook air can handle 10 tabs relatively easily
// takes ~27 seconds to scan everything
// however can improve by using a shared queue instead of splitting it
// so... TODO!
check_all_links(&browser, &mut conn, open_tabs)?;

// we close all the tabs because otherwise it shows an error when program
// finishes
for t in (*browser.get_tabs().as_ref().lock().unwrap()).iter() {
t.close(true)?;
}

Ok(())
}

/// This function scrapes all the games from the home page and saves them to database.
/// It takes roughly 1 second to scrape ~500 games.
Expand Down Expand Up @@ -278,3 +307,33 @@ pub fn check_link(tab: &mut Arc<Tab>, conn: &mut SqliteConnection, link: &str) -

Ok(())
}


pub fn scrape_events(headless: bool) -> Result<(), anyhow::Error> {
// realised we didnt need adblocker when headless
let browser = Browser::new({
headless_chrome::LaunchOptions {
headless,
sandbox: true,
ignore_certificate_errors: true,
..Default::default()
}
})?;

let mut conn = db::helpers::establish_connection()?;

let tab = browser.new_tab()?;


for sport in sports::SPORTS.iter() {
today_games(&tab, &mut conn, sport)?;
}

// we close all the tabs because otherwise it shows an error when program
// finishes
for t in (*browser.get_tabs().as_ref().lock().unwrap()).iter() {
t.close(true)?;
}

Ok(())
}
18 changes: 16 additions & 2 deletions src/web_server_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use db::models::Stream;
use rocket::{get, routes, serde::json::Json, Rocket};

use crate::db;
use crate::db::{self, helpers::LeagueWithCountry};

#[get("/")]
async fn get_all_streams() -> Json<Vec<Stream>> {
Expand Down Expand Up @@ -62,9 +62,22 @@ async fn get_streams_by_either_team(team: &str) -> Json<Vec<Stream>> {
Json(streams)
}

pub async fn run(port: u16) -> anyhow::Result<()> {
#[get("/leagues")]
async fn info_get_leagues() -> Json<Vec<LeagueWithCountry>> {
let mut conn = db::helpers::establish_connection().unwrap();
let leagues = db::helpers::get_unique_leagues_with_country(&mut conn).unwrap();

Json(leagues)
}

pub async fn run(port: u16, silent: bool) -> anyhow::Result<()> {
Rocket::custom(rocket::Config {
port,
log_level: if silent {
rocket::config::LogLevel::Off
} else {
rocket::config::LogLevel::Normal
},
..Default::default()
})
.mount(
Expand All @@ -79,6 +92,7 @@ pub async fn run(port: u16) -> anyhow::Result<()> {
get_streams_by_either_team,
],
)
.mount("/info", routes![info_get_leagues])
.launch()
.await?;

Expand Down

0 comments on commit 5355946

Please sign in to comment.