X-Git-Url: http://these/git/?a=blobdiff_plain;f=crates%2Futils%2Fsrc%2Futils%2Fvalidation.rs;h=0c955b12220d08825bf258e83c2a005a7eb67c5c;hb=92568956353f21649ed9aff68b42699c9d036f30;hp=41103332c6d7a2a225159344423ffdcd6dfab32c;hpb=da3e3b6d8a6684d58eecef91c675a696005f8330;p=lemmy.git diff --git a/crates/utils/src/utils/validation.rs b/crates/utils/src/utils/validation.rs index 41103332..0c955b12 100644 --- a/crates/utils/src/utils/validation.rs +++ b/crates/utils/src/utils/validation.rs @@ -1,14 +1,14 @@ -use crate::error::{LemmyError, LemmyResult}; +use crate::error::{LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult}; use itertools::Itertools; use once_cell::sync::Lazy; -use regex::Regex; +use regex::{Regex, RegexBuilder}; use totp_rs::{Secret, TOTP}; use url::Url; static VALID_ACTOR_NAME_REGEX: Lazy = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex")); static VALID_POST_TITLE_REGEX: Lazy = - Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex")); + Lazy::new(|| Regex::new(r".*\S{3,200}.*").expect("compile regex")); static VALID_MATRIX_ID_REGEX: Lazy = Lazy::new(|| { Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex") }); @@ -17,8 +17,69 @@ static CLEAN_URL_PARAMS_REGEX: Lazy = Lazy::new(|| { Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$") .expect("compile regex") }); + const BODY_MAX_LENGTH: usize = 10000; +const POST_BODY_MAX_LENGTH: usize = 50000; const BIO_MAX_LENGTH: usize = 300; +const SITE_NAME_MAX_LENGTH: usize = 20; +const SITE_NAME_MIN_LENGTH: usize = 1; +const SITE_DESCRIPTION_MAX_LENGTH: usize = 150; +//Invisible unicode characters, taken from https://invisible-characters.com/ +const FORBIDDEN_DISPLAY_CHARS: [char; 53] = [ + '\u{0009}', + '\u{00a0}', + '\u{00ad}', + '\u{034f}', + '\u{061c}', + '\u{115f}', + '\u{1160}', + '\u{17b4}', + '\u{17b5}', + '\u{180e}', + '\u{2000}', + '\u{2001}', + '\u{2002}', + '\u{2003}', + '\u{2004}', + '\u{2005}', + '\u{2006}', + '\u{2007}', + '\u{2008}', + '\u{2009}', + '\u{200a}', + '\u{200b}', + '\u{200c}', + '\u{200d}', + '\u{200e}', + '\u{200f}', + '\u{202f}', + '\u{205f}', + '\u{2060}', + '\u{2061}', + '\u{2062}', + '\u{2063}', + '\u{2064}', + '\u{206a}', + '\u{206b}', + '\u{206c}', + '\u{206d}', + '\u{206e}', + '\u{206f}', + '\u{3000}', + '\u{2800}', + '\u{3164}', + '\u{feff}', + '\u{ffa0}', + '\u{1d159}', + '\u{1d173}', + '\u{1d174}', + '\u{1d175}', + '\u{1d176}', + '\u{1d177}', + '\u{1d178}', + '\u{1d179}', + '\u{1d17a}', +]; fn has_newline(name: &str) -> bool { name.contains('\n') @@ -29,7 +90,7 @@ pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> LemmyRes && VALID_ACTOR_NAME_REGEX.is_match(name) && !has_newline(name); if !check { - Err(LemmyError::from_message("invalid_name")) + Err(LemmyErrorType::InvalidName.into()) } else { Ok(()) } @@ -37,13 +98,13 @@ pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> LemmyRes // Can't do a regex here, reverse lookarounds not supported pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> LemmyResult<()> { - let check = !name.starts_with('@') - && !name.starts_with('\u{200b}') + let check = !name.contains(FORBIDDEN_DISPLAY_CHARS) + && !name.starts_with('@') && name.chars().count() >= 3 && name.chars().count() <= actor_name_max_length && !has_newline(name); if !check { - Err(LemmyError::from_message("invalid_username")) + Err(LemmyErrorType::InvalidDisplayName.into()) } else { Ok(()) } @@ -52,7 +113,7 @@ pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> LemmyR pub fn is_valid_matrix_id(matrix_id: &str) -> LemmyResult<()> { let check = VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id); if !check { - Err(LemmyError::from_message("invalid_matrix_id")) + Err(LemmyErrorType::InvalidMatrixId.into()) } else { Ok(()) } @@ -61,18 +122,23 @@ pub fn is_valid_matrix_id(matrix_id: &str) -> LemmyResult<()> { pub fn is_valid_post_title(title: &str) -> LemmyResult<()> { let check = VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title); if !check { - Err(LemmyError::from_message("invalid_post_title")) + Err(LemmyErrorType::InvalidPostTitle.into()) } else { Ok(()) } } /// This could be post bodies, comments, or any description field -pub fn is_valid_body_field(body: &Option) -> LemmyResult<()> { +pub fn is_valid_body_field(body: &Option, post: bool) -> LemmyResult<()> { if let Some(body) = body { - let check = body.chars().count() <= BODY_MAX_LENGTH; + let check = if post { + body.chars().count() <= POST_BODY_MAX_LENGTH + } else { + body.chars().count() <= BODY_MAX_LENGTH + }; + if !check { - Err(LemmyError::from_message("invalid_body_field")) + Err(LemmyErrorType::InvalidBodyField.into()) } else { Ok(()) } @@ -82,14 +148,83 @@ pub fn is_valid_body_field(body: &Option) -> LemmyResult<()> { } pub fn is_valid_bio_field(bio: &str) -> LemmyResult<()> { - let check = bio.chars().count() <= BIO_MAX_LENGTH; - if !check { - Err(LemmyError::from_message("bio_length_overflow")) + max_length_check(bio, BIO_MAX_LENGTH, LemmyErrorType::BioLengthOverflow) +} + +/// Checks the site name length, the limit as defined in the DB. +pub fn site_name_length_check(name: &str) -> LemmyResult<()> { + min_max_length_check( + name, + SITE_NAME_MIN_LENGTH, + SITE_NAME_MAX_LENGTH, + LemmyErrorType::SiteNameRequired, + LemmyErrorType::SiteNameLengthOverflow, + ) +} + +/// Checks the site description length, the limit as defined in the DB. +pub fn site_description_length_check(description: &str) -> LemmyResult<()> { + max_length_check( + description, + SITE_DESCRIPTION_MAX_LENGTH, + LemmyErrorType::SiteDescriptionLengthOverflow, + ) +} + +fn max_length_check(item: &str, max_length: usize, error_type: LemmyErrorType) -> LemmyResult<()> { + if item.len() > max_length { + Err(error_type.into()) + } else { + Ok(()) + } +} + +fn min_max_length_check( + item: &str, + min_length: usize, + max_length: usize, + min_msg: LemmyErrorType, + max_msg: LemmyErrorType, +) -> LemmyResult<()> { + if item.len() > max_length { + Err(max_msg.into()) + } else if item.len() < min_length { + Err(min_msg.into()) } else { Ok(()) } } +/// Attempts to build a regex and check it for common errors before inserting into the DB. +pub fn build_and_check_regex(regex_str_opt: &Option<&str>) -> LemmyResult> { + regex_str_opt.map_or_else( + || Ok(None::), + |regex_str| { + if regex_str.is_empty() { + // If the proposed regex is empty, return as having no regex at all; this is the same + // behavior that happens downstream before the write to the database. + return Ok(None::); + } + + RegexBuilder::new(regex_str) + .case_insensitive(true) + .build() + .with_lemmy_type(LemmyErrorType::InvalidRegex) + .and_then(|regex| { + // NOTE: It is difficult to know, in the universe of user-crafted regex, which ones + // may match against any string text. To keep it simple, we'll match the regex + // against an innocuous string - a single number - which should help catch a regex + // that accidentally matches against all strings. + if regex.is_match("1") { + return Err(LemmyErrorType::PermissiveRegex.into()); + } + + Ok(Some(regex)) + }) + }, + ) +} + pub fn clean_url_params(url: &Url) -> Url { let mut url_out = url.clone(); if url.query().is_some() { @@ -114,13 +249,13 @@ pub fn check_totp_2fa_valid( // Throw an error if their token is missing let token = totp_token .as_deref() - .ok_or_else(|| LemmyError::from_message("missing_totp_token"))?; + .ok_or(LemmyErrorType::MissingTotpToken)?; let totp = build_totp_2fa(site_name, username, totp_secret)?; let check_passed = totp.check_current(token)?; if !check_passed { - return Err(LemmyError::from_message("incorrect_totp token")); + return Err(LemmyErrorType::IncorrectTotpToken.into()); } } @@ -135,7 +270,7 @@ pub fn build_totp_2fa(site_name: &str, username: &str, secret: &str) -> Result Result) -> LemmyResult<()> { + if let Some(url) = url { + if url.scheme() != "http" && url.scheme() != "https" { + return Err(LemmyErrorType::InvalidUrlScheme.into()); + } + } + Ok(()) +} + #[cfg(test)] mod tests { + #![allow(clippy::unwrap_used)] + #![allow(clippy::indexing_slicing)] + use super::build_totp_2fa; - use crate::utils::validation::{ - check_site_visibility_valid, - clean_url_params, - generate_totp_2fa_secret, - is_valid_actor_name, - is_valid_display_name, - is_valid_matrix_id, - is_valid_post_title, + use crate::{ + error::LemmyErrorType, + utils::validation::{ + build_and_check_regex, + check_site_visibility_valid, + check_url_scheme, + clean_url_params, + generate_totp_2fa_secret, + is_valid_actor_name, + is_valid_bio_field, + is_valid_display_name, + is_valid_matrix_id, + is_valid_post_title, + site_description_length_check, + site_name_length_check, + BIO_MAX_LENGTH, + SITE_DESCRIPTION_MAX_LENGTH, + SITE_NAME_MAX_LENGTH, + }, }; use url::Url; @@ -246,6 +402,120 @@ mod tests { assert!(totp.is_ok()); } + #[test] + fn test_valid_site_name() { + let valid_names = [ + (0..SITE_NAME_MAX_LENGTH).map(|_| 'A').collect::(), + String::from("A"), + ]; + let invalid_names = [ + ( + &(0..SITE_NAME_MAX_LENGTH + 1) + .map(|_| 'A') + .collect::(), + LemmyErrorType::SiteNameLengthOverflow, + ), + (&String::new(), LemmyErrorType::SiteNameRequired), + ]; + + valid_names.iter().for_each(|valid_name| { + assert!( + site_name_length_check(valid_name).is_ok(), + "Expected {} of length {} to be Ok.", + valid_name, + valid_name.len() + ) + }); + + invalid_names + .iter() + .for_each(|(invalid_name, expected_err)| { + let result = site_name_length_check(invalid_name); + + assert!(result.is_err()); + assert!( + result.unwrap_err().error_type.eq(&expected_err.clone()), + "Testing {}, expected error {}", + invalid_name, + expected_err + ); + }); + } + + #[test] + fn test_valid_bio() { + assert!(is_valid_bio_field(&(0..BIO_MAX_LENGTH).map(|_| 'A').collect::()).is_ok()); + + let invalid_result = + is_valid_bio_field(&(0..BIO_MAX_LENGTH + 1).map(|_| 'A').collect::()); + + assert!( + invalid_result.is_err() + && invalid_result + .unwrap_err() + .error_type + .eq(&LemmyErrorType::BioLengthOverflow) + ); + } + + #[test] + fn test_valid_site_description() { + assert!(site_description_length_check( + &(0..SITE_DESCRIPTION_MAX_LENGTH) + .map(|_| 'A') + .collect::() + ) + .is_ok()); + + let invalid_result = site_description_length_check( + &(0..SITE_DESCRIPTION_MAX_LENGTH + 1) + .map(|_| 'A') + .collect::(), + ); + + assert!( + invalid_result.is_err() + && invalid_result + .unwrap_err() + .error_type + .eq(&LemmyErrorType::SiteDescriptionLengthOverflow) + ); + } + + #[test] + fn test_valid_slur_regex() { + let valid_regexes = [&None, &Some(""), &Some("(foo|bar)")]; + + valid_regexes.iter().for_each(|regex| { + let result = build_and_check_regex(regex); + + assert!(result.is_ok(), "Testing regex: {:?}", regex); + }); + } + + #[test] + fn test_too_permissive_slur_regex() { + let match_everything_regexes = [ + (&Some("["), LemmyErrorType::InvalidRegex), + (&Some("(foo|bar|)"), LemmyErrorType::PermissiveRegex), + (&Some(".*"), LemmyErrorType::PermissiveRegex), + ]; + + match_everything_regexes + .iter() + .for_each(|(regex_str, expected_err)| { + let result = build_and_check_regex(regex_str); + + assert!(result.is_err()); + assert!( + result.unwrap_err().error_type.eq(&expected_err.clone()), + "Testing regex {:?}, expected error {}", + regex_str, + expected_err + ); + }); + } + #[test] fn test_check_site_visibility_valid() { assert!(check_site_visibility_valid(true, true, &None, &None).is_err()); @@ -257,4 +527,13 @@ mod tests { assert!(check_site_visibility_valid(false, false, &Some(true), &None).is_ok()); assert!(check_site_visibility_valid(false, false, &None, &Some(true)).is_ok()); } + + #[test] + fn test_check_url_scheme() { + assert!(check_url_scheme(&None).is_ok()); + assert!(check_url_scheme(&Some(Url::parse("http://example.com").unwrap())).is_ok()); + assert!(check_url_scheme(&Some(Url::parse("https://example.com").unwrap())).is_ok()); + assert!(check_url_scheme(&Some(Url::parse("ftp://example.com").unwrap())).is_err()); + assert!(check_url_scheme(&Some(Url::parse("javascript:void").unwrap())).is_err()); + } }