Skip to content

Commit

Permalink
sanitize_html
Browse files Browse the repository at this point in the history
  • Loading branch information
Kapu1178 committed Apr 21, 2024
1 parent 78de2f4 commit 79558d1
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 1 deletion.
185 changes: 184 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ dbpnoise = { version = "0.1.2", optional = true }
pathfinding = { version = "4.4", optional = true }
num-integer = { version = "0.1.45", optional = true }
dmi = { version = "0.3.1", optional = true }
ammonia = { version = "4.0.0", optional = true }
maplit = { version = "1.0.2", optional = true }

[features]
default = [
Expand All @@ -76,6 +78,7 @@ default = [
"log",
"noise",
"rustls_tls",
"sanitize",
"sql",
"time",
"toml",
Expand All @@ -93,6 +96,7 @@ all = [
"log",
"noise",
"rustls_tls",
"sanitize",
"sql",
"time",
"toml",
Expand All @@ -116,6 +120,7 @@ http = ["reqwest", "serde", "serde_json", "once_cell", "jobs"]
json = ["serde", "serde_json"]
log = ["chrono"]
sql = ["mysql", "serde", "serde_json", "once_cell", "dashmap", "jobs"]
sanitize = ["ammonia", "maplit", "serde_json"]
time = []
toml = ["serde", "serde_json", "toml-dep"]
url = ["url-dep", "percent-encoding"]
Expand Down
36 changes: 36 additions & 0 deletions dmsrc/sanitize.dm
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Takes in a string and json_encode()"d lists to produce a sanitized string.
* This function operates on whitelists, there is currently no way to blacklist.
* Args:
* * text: the string to sanitize.
* * attribute_whitelist_json: a json_encode()'d list of HTML attributes to allow in the final string.
* * tag_whitelist_json: a json_encode()'d list of HTML tags to allow in the final string.
*/
#define rustg_sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) RUSTG_CALL(RUST_G, "sanitize_html")(text, attribute_whitelist_json, tag_whitelist_json)

/*
* Here is a recommended default tag whitelist
list(
"b","br",
"center", "code",
"dd", "del", "div", "dl", "dt",
"em",
"font",
"h1", "h2", "h3", "h4", "h5", "h6", "hr",
"i", "ins",
"li",
"menu",
"ol",
"p", "pre",
"span", "strong",
"table",
"tbody",
"td",
"th",
"thead",
"tfoot",
"tr",
"u",
"ul",
)
*/
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ pub mod pathfinder;
pub mod redis_pubsub;
#[cfg(feature = "redis_reliablequeue")]
pub mod redis_reliablequeue;
#[cfg(feature = "sanitize")]
pub mod sanitize;
#[cfg(feature = "sql")]
pub mod sql;
#[cfg(feature = "time")]
Expand Down
30 changes: 30 additions & 0 deletions src/sanitize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use crate::error::Result;
use std::collections::HashSet;
use maplit::hashset;

byond_fn!(fn sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) {
match seriously_sanitize_html(text, attribute_whitelist_json, tag_whitelist_json) {
Ok(r) => return Some(r),
Err(e) => return Some(e.to_string())
}
});

fn seriously_sanitize_html(text: &str, attribute_whitelist_json: &str, tag_whitelist_json: &str) -> Result<String> {
let attribute_whitelist: HashSet<&str> = serde_json::from_str(attribute_whitelist_json)?;
let tag_whitelist: HashSet<&str> = serde_json::from_str(tag_whitelist_json)?;

let mut prune_url_schemes = ammonia::Builder::default().clone_url_schemes();
prune_url_schemes.insert("byond");

let sanitized = ammonia::Builder::empty()
.clean_content_tags(hashset!["script", "style"]) // Completely forbid script and style attributes.
.link_rel(Some("noopener")) // https://mathiasbynens.github.io/rel-noopener/
.url_schemes(prune_url_schemes)
.generic_attributes(attribute_whitelist)
.tags(tag_whitelist)
.clean(text)
.to_string();


Ok(sanitized)
}

0 comments on commit 79558d1

Please sign in to comment.