-use crate::error::{LemmyError, LemmyResult};
+use crate::error::{LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult};
use itertools::Itertools;
use once_cell::sync::Lazy;
-use regex::Regex;
+use regex::{Regex, RegexBuilder};
use totp_rs::{Secret, TOTP};
use url::Url;
static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
static VALID_POST_TITLE_REGEX: Lazy<Regex> =
- Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex"));
+ Lazy::new(|| Regex::new(r".*\S{3,200}.*").expect("compile regex"));
static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex")
});
Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$")
.expect("compile regex")
});
+
const BODY_MAX_LENGTH: usize = 10000;
+const POST_BODY_MAX_LENGTH: usize = 50000;
const BIO_MAX_LENGTH: usize = 300;
+const SITE_NAME_MAX_LENGTH: usize = 20;
+const SITE_NAME_MIN_LENGTH: usize = 1;
+const SITE_DESCRIPTION_MAX_LENGTH: usize = 150;
+//Invisible unicode characters, taken from https://invisible-characters.com/
+const FORBIDDEN_DISPLAY_CHARS: [char; 53] = [
+ '\u{0009}',
+ '\u{00a0}',
+ '\u{00ad}',
+ '\u{034f}',
+ '\u{061c}',
+ '\u{115f}',
+ '\u{1160}',
+ '\u{17b4}',
+ '\u{17b5}',
+ '\u{180e}',
+ '\u{2000}',
+ '\u{2001}',
+ '\u{2002}',
+ '\u{2003}',
+ '\u{2004}',
+ '\u{2005}',
+ '\u{2006}',
+ '\u{2007}',
+ '\u{2008}',
+ '\u{2009}',
+ '\u{200a}',
+ '\u{200b}',
+ '\u{200c}',
+ '\u{200d}',
+ '\u{200e}',
+ '\u{200f}',
+ '\u{202f}',
+ '\u{205f}',
+ '\u{2060}',
+ '\u{2061}',
+ '\u{2062}',
+ '\u{2063}',
+ '\u{2064}',
+ '\u{206a}',
+ '\u{206b}',
+ '\u{206c}',
+ '\u{206d}',
+ '\u{206e}',
+ '\u{206f}',
+ '\u{3000}',
+ '\u{2800}',
+ '\u{3164}',
+ '\u{feff}',
+ '\u{ffa0}',
+ '\u{1d159}',
+ '\u{1d173}',
+ '\u{1d174}',
+ '\u{1d175}',
+ '\u{1d176}',
+ '\u{1d177}',
+ '\u{1d178}',
+ '\u{1d179}',
+ '\u{1d17a}',
+];
fn has_newline(name: &str) -> bool {
name.contains('\n')
&& VALID_ACTOR_NAME_REGEX.is_match(name)
&& !has_newline(name);
if !check {
- Err(LemmyError::from_message("invalid_name"))
+ Err(LemmyErrorType::InvalidName.into())
} else {
Ok(())
}
// Can't do a regex here, reverse lookarounds not supported
pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> LemmyResult<()> {
- let check = !name.starts_with('@')
- && !name.starts_with('\u{200b}')
+ let check = !name.contains(FORBIDDEN_DISPLAY_CHARS)
+ && !name.starts_with('@')
&& name.chars().count() >= 3
&& name.chars().count() <= actor_name_max_length
&& !has_newline(name);
if !check {
- Err(LemmyError::from_message("invalid_username"))
+ Err(LemmyErrorType::InvalidDisplayName.into())
} else {
Ok(())
}
pub fn is_valid_matrix_id(matrix_id: &str) -> LemmyResult<()> {
let check = VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id);
if !check {
- Err(LemmyError::from_message("invalid_matrix_id"))
+ Err(LemmyErrorType::InvalidMatrixId.into())
} else {
Ok(())
}
pub fn is_valid_post_title(title: &str) -> LemmyResult<()> {
let check = VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title);
if !check {
- Err(LemmyError::from_message("invalid_post_title"))
+ Err(LemmyErrorType::InvalidPostTitle.into())
} else {
Ok(())
}
}
/// This could be post bodies, comments, or any description field
-pub fn is_valid_body_field(body: &Option<String>) -> LemmyResult<()> {
+pub fn is_valid_body_field(body: &Option<String>, post: bool) -> LemmyResult<()> {
if let Some(body) = body {
- let check = body.chars().count() <= BODY_MAX_LENGTH;
+ let check = if post {
+ body.chars().count() <= POST_BODY_MAX_LENGTH
+ } else {
+ body.chars().count() <= BODY_MAX_LENGTH
+ };
+
if !check {
- Err(LemmyError::from_message("invalid_body_field"))
+ Err(LemmyErrorType::InvalidBodyField.into())
} else {
Ok(())
}
}
pub fn is_valid_bio_field(bio: &str) -> LemmyResult<()> {
- let check = bio.chars().count() <= BIO_MAX_LENGTH;
- if !check {
- Err(LemmyError::from_message("bio_length_overflow"))
+ max_length_check(bio, BIO_MAX_LENGTH, LemmyErrorType::BioLengthOverflow)
+}
+
+/// Checks the site name length, the limit as defined in the DB.
+pub fn site_name_length_check(name: &str) -> LemmyResult<()> {
+ min_max_length_check(
+ name,
+ SITE_NAME_MIN_LENGTH,
+ SITE_NAME_MAX_LENGTH,
+ LemmyErrorType::SiteNameRequired,
+ LemmyErrorType::SiteNameLengthOverflow,
+ )
+}
+
+/// Checks the site description length, the limit as defined in the DB.
+pub fn site_description_length_check(description: &str) -> LemmyResult<()> {
+ max_length_check(
+ description,
+ SITE_DESCRIPTION_MAX_LENGTH,
+ LemmyErrorType::SiteDescriptionLengthOverflow,
+ )
+}
+
+fn max_length_check(item: &str, max_length: usize, error_type: LemmyErrorType) -> LemmyResult<()> {
+ if item.len() > max_length {
+ Err(error_type.into())
+ } else {
+ Ok(())
+ }
+}
+
+fn min_max_length_check(
+ item: &str,
+ min_length: usize,
+ max_length: usize,
+ min_msg: LemmyErrorType,
+ max_msg: LemmyErrorType,
+) -> LemmyResult<()> {
+ if item.len() > max_length {
+ Err(max_msg.into())
+ } else if item.len() < min_length {
+ Err(min_msg.into())
} else {
Ok(())
}
}
+/// Attempts to build a regex and check it for common errors before inserting into the DB.
+pub fn build_and_check_regex(regex_str_opt: &Option<&str>) -> LemmyResult<Option<Regex>> {
+ regex_str_opt.map_or_else(
+ || Ok(None::<Regex>),
+ |regex_str| {
+ if regex_str.is_empty() {
+ // If the proposed regex is empty, return as having no regex at all; this is the same
+ // behavior that happens downstream before the write to the database.
+ return Ok(None::<Regex>);
+ }
+
+ RegexBuilder::new(regex_str)
+ .case_insensitive(true)
+ .build()
+ .with_lemmy_type(LemmyErrorType::InvalidRegex)
+ .and_then(|regex| {
+ // NOTE: It is difficult to know, in the universe of user-crafted regex, which ones
+ // may match against any string text. To keep it simple, we'll match the regex
+ // against an innocuous string - a single number - which should help catch a regex
+ // that accidentally matches against all strings.
+ if regex.is_match("1") {
+ return Err(LemmyErrorType::PermissiveRegex.into());
+ }
+
+ Ok(Some(regex))
+ })
+ },
+ )
+}
+
pub fn clean_url_params(url: &Url) -> Url {
let mut url_out = url.clone();
if url.query().is_some() {
// Throw an error if their token is missing
let token = totp_token
.as_deref()
- .ok_or_else(|| LemmyError::from_message("missing_totp_token"))?;
+ .ok_or(LemmyErrorType::MissingTotpToken)?;
let totp = build_totp_2fa(site_name, username, totp_secret)?;
let check_passed = totp.check_current(token)?;
if !check_passed {
- return Err(LemmyError::from_message("incorrect_totp token"));
+ return Err(LemmyErrorType::IncorrectTotpToken.into());
}
}
let sec = Secret::Raw(secret.as_bytes().to_vec());
let sec_bytes = sec
.to_bytes()
- .map_err(|_| LemmyError::from_message("Couldnt parse totp secret"))?;
+ .map_err(|_| LemmyErrorType::CouldntParseTotpSecret)?;
TOTP::new(
totp_rs::Algorithm::SHA256,
Some(site_name.to_string()),
username.to_string(),
)
- .map_err(|e| LemmyError::from_error_message(e, "Couldnt generate TOTP"))
+ .with_lemmy_type(LemmyErrorType::CouldntGenerateTotp)
}
pub fn check_site_visibility_valid(
let federation_enabled = new_federation_enabled.unwrap_or(current_federation_enabled);
if private_instance && federation_enabled {
- return Err(LemmyError::from_message(
- "cant_enable_private_instance_and_federation_together",
- ));
+ return Err(LemmyErrorType::CantEnablePrivateInstanceAndFederationTogether.into());
}
Ok(())
}
+pub fn check_url_scheme(url: &Option<Url>) -> LemmyResult<()> {
+ if let Some(url) = url {
+ if url.scheme() != "http" && url.scheme() != "https" {
+ return Err(LemmyErrorType::InvalidUrlScheme.into());
+ }
+ }
+ Ok(())
+}
+
#[cfg(test)]
mod tests {
+ #![allow(clippy::unwrap_used)]
+ #![allow(clippy::indexing_slicing)]
+
use super::build_totp_2fa;
- use crate::utils::validation::{
- check_site_visibility_valid,
- clean_url_params,
- generate_totp_2fa_secret,
- is_valid_actor_name,
- is_valid_display_name,
- is_valid_matrix_id,
- is_valid_post_title,
+ use crate::{
+ error::LemmyErrorType,
+ utils::validation::{
+ build_and_check_regex,
+ check_site_visibility_valid,
+ check_url_scheme,
+ clean_url_params,
+ generate_totp_2fa_secret,
+ is_valid_actor_name,
+ is_valid_bio_field,
+ is_valid_display_name,
+ is_valid_matrix_id,
+ is_valid_post_title,
+ site_description_length_check,
+ site_name_length_check,
+ BIO_MAX_LENGTH,
+ SITE_DESCRIPTION_MAX_LENGTH,
+ SITE_NAME_MAX_LENGTH,
+ },
};
use url::Url;
assert!(totp.is_ok());
}
+ #[test]
+ fn test_valid_site_name() {
+ let valid_names = [
+ (0..SITE_NAME_MAX_LENGTH).map(|_| 'A').collect::<String>(),
+ String::from("A"),
+ ];
+ let invalid_names = [
+ (
+ &(0..SITE_NAME_MAX_LENGTH + 1)
+ .map(|_| 'A')
+ .collect::<String>(),
+ LemmyErrorType::SiteNameLengthOverflow,
+ ),
+ (&String::new(), LemmyErrorType::SiteNameRequired),
+ ];
+
+ valid_names.iter().for_each(|valid_name| {
+ assert!(
+ site_name_length_check(valid_name).is_ok(),
+ "Expected {} of length {} to be Ok.",
+ valid_name,
+ valid_name.len()
+ )
+ });
+
+ invalid_names
+ .iter()
+ .for_each(|(invalid_name, expected_err)| {
+ let result = site_name_length_check(invalid_name);
+
+ assert!(result.is_err());
+ assert!(
+ result.unwrap_err().error_type.eq(&expected_err.clone()),
+ "Testing {}, expected error {}",
+ invalid_name,
+ expected_err
+ );
+ });
+ }
+
+ #[test]
+ fn test_valid_bio() {
+ assert!(is_valid_bio_field(&(0..BIO_MAX_LENGTH).map(|_| 'A').collect::<String>()).is_ok());
+
+ let invalid_result =
+ is_valid_bio_field(&(0..BIO_MAX_LENGTH + 1).map(|_| 'A').collect::<String>());
+
+ assert!(
+ invalid_result.is_err()
+ && invalid_result
+ .unwrap_err()
+ .error_type
+ .eq(&LemmyErrorType::BioLengthOverflow)
+ );
+ }
+
+ #[test]
+ fn test_valid_site_description() {
+ assert!(site_description_length_check(
+ &(0..SITE_DESCRIPTION_MAX_LENGTH)
+ .map(|_| 'A')
+ .collect::<String>()
+ )
+ .is_ok());
+
+ let invalid_result = site_description_length_check(
+ &(0..SITE_DESCRIPTION_MAX_LENGTH + 1)
+ .map(|_| 'A')
+ .collect::<String>(),
+ );
+
+ assert!(
+ invalid_result.is_err()
+ && invalid_result
+ .unwrap_err()
+ .error_type
+ .eq(&LemmyErrorType::SiteDescriptionLengthOverflow)
+ );
+ }
+
+ #[test]
+ fn test_valid_slur_regex() {
+ let valid_regexes = [&None, &Some(""), &Some("(foo|bar)")];
+
+ valid_regexes.iter().for_each(|regex| {
+ let result = build_and_check_regex(regex);
+
+ assert!(result.is_ok(), "Testing regex: {:?}", regex);
+ });
+ }
+
+ #[test]
+ fn test_too_permissive_slur_regex() {
+ let match_everything_regexes = [
+ (&Some("["), LemmyErrorType::InvalidRegex),
+ (&Some("(foo|bar|)"), LemmyErrorType::PermissiveRegex),
+ (&Some(".*"), LemmyErrorType::PermissiveRegex),
+ ];
+
+ match_everything_regexes
+ .iter()
+ .for_each(|(regex_str, expected_err)| {
+ let result = build_and_check_regex(regex_str);
+
+ assert!(result.is_err());
+ assert!(
+ result.unwrap_err().error_type.eq(&expected_err.clone()),
+ "Testing regex {:?}, expected error {}",
+ regex_str,
+ expected_err
+ );
+ });
+ }
+
#[test]
fn test_check_site_visibility_valid() {
assert!(check_site_visibility_valid(true, true, &None, &None).is_err());
assert!(check_site_visibility_valid(false, false, &Some(true), &None).is_ok());
assert!(check_site_visibility_valid(false, false, &None, &Some(true)).is_ok());
}
+
+ #[test]
+ fn test_check_url_scheme() {
+ assert!(check_url_scheme(&None).is_ok());
+ assert!(check_url_scheme(&Some(Url::parse("http://example.com").unwrap())).is_ok());
+ assert!(check_url_scheme(&Some(Url::parse("https://example.com").unwrap())).is_ok());
+ assert!(check_url_scheme(&Some(Url::parse("ftp://example.com").unwrap())).is_err());
+ assert!(check_url_scheme(&Some(Url::parse("javascript:void").unwrap())).is_err());
+ }
}