Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restructure Modules and Fix font loading #3

Merged
merged 8 commits into from
Dec 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ tower-http = { version = "0.4.3", features = ["trace", "cors"], optional = true
tower = { version = "0.4.13", optional = true }
tracing = { version = "0.1.40", optional = true }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"], optional = true }
accept-header = { version = "0.2.3", optional = true}
mime = { version = "0.3.17", optional = true }
regex = { version = "1.10.2", optional = true }

clap = { version = "4.4.11", features = ["derive"] }
image = "0.24.7"
reqwest = { version = "0.11.22", features = ["stream"] }
resvg = "0.37.0"
strum = { version = "0.25.0", features = ["derive"] }
tokio = { version = "1.35.0", features = ["full"] }
image = "0.24.7"
resvg = "0.37.0"
thiserror = "1.0.51"
tl = "0.7.7"
tokio = { version = "1.35.0", features = ["full"] }
url = "2.5.0"
webp = "0.2.6"
accept-header = { version = "0.2.3", optional = true}
mime = { version = "0.3.17", optional = true }
regex = { version = "1.10.2", optional = true }
lazy_static = "1.4.0"
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ favicon-rover serve --help # show help information

Example: `http://localhost:3000/example.com?size=24`

### Fonts

The fallback image generation will attempt to query and load a "sans-serif" font. It will load your system fonts if available as well as any fonts
in the current directory (`pwd`) when favicon-rover is started.

### CORS

By default, any origin is allowed to make a request to this API. To lock it down, use the `--origin` command line options to specify any amount of origins. If an origin starts and ends with `/` it will be treated as a regexp. For example `favicon-rover serve -o http://example1.com -o /\.example2\.com$/` will accept any request from "http://example1.com" or from a subdomain of "example2.com".
Expand Down
104 changes: 104 additions & 0 deletions src/favicon_image/fetch/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//! Methods for fetching a favicon image from a url and interpreting its format

mod scrape;

use reqwest::{
header::{CONTENT_TYPE, USER_AGENT},
Client,
};
use std::io;
use thiserror::Error;
use url::Url;

use scrape::{scrape_link_tags, ScrapeError};
pub const BOT_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36";

#[derive(Error, Debug)]
pub enum FetchFaviconError {
#[error(transparent)]
Scrape(#[from] ScrapeError),

#[error(transparent)]
Network(#[from] reqwest::Error),

#[error(transparent)]
TokioError(#[from] tokio::task::JoinError),

#[error("Failed to decode image: {0}")]
ImageError(#[from] image::ImageError),

#[cfg(feature = "server")]
#[error("Provided URL is not a valid url")]
InvalidUrl,

#[error("Cannot decode the image type")]
CannotDecode,
}

/// Fetch the favicon for a given url
impl super::FaviconImage {
pub async fn fetch_for_url(
client: &Client,
target_url: &Url,
size: u32,
) -> Result<Self, FetchFaviconError> {
// Determine favicon url
let image_url = scrape_link_tags(client, target_url, size)
.await
.unwrap_or_else(|_| target_url.join("/favicon.ico").unwrap());

// Fetch the image
let res = client
.get(image_url)
.header(USER_AGENT, BOT_USER_AGENT)
.send()
.await?;

// Render SVGs
if res
.headers()
.get(CONTENT_TYPE)
.is_some_and(|content_type| content_type == "image/svg+xml")
{
let svg = res.text().await?;
return Ok(Self::from_svg_str(svg, size));
}

// Get HTTP response body
let body = res.bytes().await?;
let cursor = io::Cursor::new(body);

// Create reader and attempt to guess image format
let image_reader = image::io::Reader::new(cursor)
.with_guessed_format()
.expect("Cursor IO shouldn't fail");

// Decode the image!
let image_format = image_reader.format();
let image_data = tokio::task::spawn_blocking(move || {
match image_format {
// Use `webp` crate to decode WebPs
Some(image::ImageFormat::WebP) => {
let data = image_reader.into_inner().into_inner();
let decoder = webp::Decoder::new(&data);
decoder
.decode()
.ok_or(FetchFaviconError::CannotDecode)
.map(|webp| webp.to_image())
}

// Use image to decode other
Some(_) => image_reader.decode().map_err(|e| e.into()),

// We don't know the format
None => Err(FetchFaviconError::CannotDecode),
}
})
.await??;

Ok(Self {
data: image_data,
format: image_format,
})
}
}
95 changes: 95 additions & 0 deletions src/favicon_image/fetch/scrape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//! Methods for scraping a website to determine the available favicon urls

use reqwest::{header::USER_AGENT, Client};
use thiserror::Error;
use url::Url;

use super::BOT_USER_AGENT;

#[derive(Debug, Clone)]
struct Link {
href: String,
size: usize,
}

#[derive(Error, Debug)]
pub enum ScrapeError {
#[error(transparent)]
Network(#[from] reqwest::Error),

#[error(transparent)]
HTMLParse(#[from] tl::ParseError),

#[error(transparent)]
URLParse(#[from] url::ParseError),

#[error("link not found")]
LinkNotFound,
}

/// Scrape the <link /> tags from a given URL to find a favicon url
pub async fn scrape_link_tags(
client: &Client,
url: &Url,
preferred_size: u32,
) -> Result<Url, ScrapeError> {
let res = client
.get(url.clone())
.header(USER_AGENT, BOT_USER_AGENT)
.send()
.await?;
let html = res.text().await?;

let dom = tl::parse(&html, tl::ParserOptions::default())?;
let parser = dom.parser();
let mut links: Vec<_> = dom
.query_selector("link[rel*=\"icon\"]")
.unwrap()
.map(|link| link.get(parser).unwrap().as_tag().unwrap().attributes())
.filter_map(|attr| match attr.get("href").flatten() {
Some(href) => {
if let Some(media) = attr.get("media").flatten() {
if String::from(media.as_utf8_str())
.replace(' ', "")
.to_ascii_lowercase()
.contains("prefers-color-scheme:dark")
{
return None;
}
}
Some(Link {
href: href.as_utf8_str().into_owned(),
size: attr
.get("sizes")
.flatten()
.and_then(|sizes| {
sizes
.as_utf8_str()
.split_once('x')
.and_then(|(size, _)| size.parse().ok())
})
.unwrap_or(0),
})
}
None => None,
})
.collect();

if links.is_empty() {
return Err(ScrapeError::LinkNotFound);
}

links.sort_unstable_by_key(|link| link.size);

// If an icon larger than the preferred size exists, use the closest
// to what we want instead of always using the largest image available
let filtered_links: Vec<_> = links
.iter()
.filter(|link| link.size < preferred_size as usize)
.collect();
if !filtered_links.is_empty() {
return Ok(url.join(&filtered_links.first().unwrap().href)?);
}

Ok(url.join(&links.last().unwrap().href)?)
}
39 changes: 6 additions & 33 deletions src/favicon_image.rs → src/favicon_image/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
//! Wrapper for image data in various formats
//! Implements file and network IO for favicon data

pub mod fetch;
mod svg;

use image::{imageops::FilterType, ImageFormat};
use image::{DynamicImage, RgbaImage};
use resvg::{
tiny_skia,
usvg::{self, fontdb, Options, Size, TreeParsing, TreeTextToPath},
Tree,
};
use std::io;
use thiserror::Error;

Expand Down Expand Up @@ -70,33 +70,6 @@ impl FaviconImage {
..self
}
}

pub fn from_svg_str(svg: String, size: u32) -> Self {
let rtree = {
// TODO: include a font file in this project for consistent results
let mut fontdb = fontdb::Database::new();
fontdb.load_system_fonts();

let mut tree = usvg::Tree::from_data(svg.as_bytes(), &Options::default()).unwrap();
tree.convert_text(&fontdb);
tree.size = tree
.size
.scale_to(Size::from_wh(size as f32, size as f32).unwrap());
Tree::from_usvg(&tree)
};

let pixmap_size = rtree.size.to_int_size();
let mut pixmap = tiny_skia::Pixmap::new(pixmap_size.width(), pixmap_size.height()).unwrap();
rtree.render(tiny_skia::Transform::default(), &mut pixmap.as_mut());

Self {
data: DynamicImage::ImageRgba8(
RgbaImage::from_raw(pixmap.width(), pixmap.height(), pixmap.data().to_vec())
.unwrap(),
),
format: None,
}
}
}

#[cfg(feature = "server")]
Expand Down
52 changes: 52 additions & 0 deletions src/favicon_image/svg.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//! Svg operations for favicon images

use image::{DynamicImage, RgbaImage};
use lazy_static::lazy_static;
use resvg::{
tiny_skia,
usvg::{self, fontdb, Options, Size, TreeParsing, TreeTextToPath},
Tree,
};

// Load fonts once
lazy_static! {
static ref FONT_DB: fontdb::Database = {
let mut db = fontdb::Database::new();

// Load system fonts if available
db.load_system_fonts();

// Load any fonts in the current directory
if let Ok(pwd_path) = std::env::current_dir() {
db.load_fonts_dir(pwd_path);
giraugh marked this conversation as resolved.
Show resolved Hide resolved
}

db
};
}

impl super::FaviconImage {
/// Rasterise an svg string to a formatless favicon image
pub fn from_svg_str(svg: String, size: u32) -> Self {
let rtree = {
let mut tree = usvg::Tree::from_data(svg.as_bytes(), &Options::default()).unwrap();
tree.convert_text(&FONT_DB);
tree.size = tree
.size
.scale_to(Size::from_wh(size as f32, size as f32).unwrap());
Tree::from_usvg(&tree)
};

let pixmap_size = rtree.size.to_int_size();
let mut pixmap = tiny_skia::Pixmap::new(pixmap_size.width(), pixmap_size.height()).unwrap();
rtree.render(tiny_skia::Transform::default(), &mut pixmap.as_mut());

Self {
data: DynamicImage::ImageRgba8(
RgbaImage::from_raw(pixmap.width(), pixmap.height(), pixmap.data().to_vec())
.unwrap(),
),
format: None,
}
}
}
Loading