diff --git a/justfile b/justfile index a6f0b24..4bd1864 100644 --- a/justfile +++ b/justfile @@ -3,7 +3,7 @@ test: cargo test --features random watch-test: - watchexec -- "reset && just test" + watchexec -e rs -rc reset -- just test build: cargo build --all @@ -18,4 +18,4 @@ watch-doc: watchexec -- "just doc && cargo test --all-features --doc" tree: - tree -I target + tree -I target \ No newline at end of file diff --git a/src/boundary.rs b/src/boundary.rs new file mode 100644 index 0000000..1df82c8 --- /dev/null +++ b/src/boundary.rs @@ -0,0 +1,116 @@ +pub struct Boundary { + condition: fn(&str) -> bool, + start: usize, + len: usize, +} + +impl Boundary { + // TODO maybe use graphemes here + pub const SPACE: Boundary = Boundary { + condition: |s| s.chars().nth(0) == Some(' '), + start: 0, + len: 1, + }; + pub const HYPHEN: Boundary = Boundary { + condition: |s| s.chars().nth(0) == Some('-'), + start: 0, + len: 1, + }; + pub const UNDERSCORE: Boundary = Boundary { + condition: |s| s.chars().nth(0) == Some('_'), + start: 0, + len: 1, + }; + pub const LOWER_UPPER: Boundary = Boundary { + condition: |s| { + let mut chars = s.chars(); + chars.next().map(|c| c.is_lowercase()).unwrap_or(false) + && chars.next().map(|c| c.is_uppercase()).unwrap_or(false) + }, + start: 1, + len: 0, + }; + pub const ACRONYM: Boundary = Boundary { + condition: |s| { + let mut chars = s.chars(); + chars.next().map(|c| c.is_uppercase()).unwrap_or(false) + && chars.next().map(|c| c.is_uppercase()).unwrap_or(false) + && chars.next().map(|c| c.is_lowercase()).unwrap_or(false) + }, + start: 1, + len: 0, + }; + + pub const fn default_delimiters() -> [Boundary; 3] { + [Boundary::SPACE, Boundary::HYPHEN, Boundary::UNDERSCORE] + } +} + +// another idea for this algorithm +// build an array of integers where +// 0 means no split +// 1 means the split is left of this char +// 2 means this character is removed +// then I can build the word at the end +fn split<'s>(s: &'s str, boundaries: &[Boundary]) -> Vec<&'s str> { + let mut words = Vec::new(); + let mut last_end = 0; + for i in 0..s.len() { + for boundary in boundaries { + if (boundary.condition)(&s[i..]) { + words.push(&s[last_end..i + boundary.start]); + last_end = i + boundary.start + boundary.len; + break; + } + } + } + words.push(&s[last_end..]); + words +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hyphen() { + let s = "a-b-c"; + let v = split(s, &[Boundary::HYPHEN]); + assert_eq!(v, vec!["a", "b", "c"]); + } + + #[test] + fn underscore() { + let s = "a_b_c"; + let v = split(s, &[Boundary::UNDERSCORE]); + assert_eq!(v, vec!["a", "b", "c"]); + } + + #[test] + fn space() { + let s = "a b c"; + let v = split(s, &[Boundary::SPACE]); + assert_eq!(v, vec!["a", "b", "c"]); + } + + #[test] + fn delimiters() { + let s = "aaa-bbb_ccc ddd ddd-eee"; + let v = split(s, &Boundary::default_delimiters()); + assert_eq!(v, vec!["aaa", "bbb", "ccc", "ddd", "ddd", "eee"]); + } + + #[test] + fn lower_upper() { + let s = "lowerUpperUpper"; + let v = split(s, &[Boundary::LOWER_UPPER]); + assert_eq!(v, vec!["lower", "Upper", "Upper"]); + } + + #[test] + fn acronym() { + let s = "XMLRequest"; + let v = split(s, &[Boundary::ACRONYM]); + assert_eq!(v, vec!["XML", "Request"]); + } +} diff --git a/src/lib.rs b/src/lib.rs index 37edc4f..4d8b349 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,11 +186,13 @@ //! This will add two additional cases: Random and PseudoRandom. You can read about their //! construction in the [Case enum](enum.Case.html). +mod boundary; mod case; mod converter; mod pattern; mod segmentation; +pub use boundary::Boundary as NewBoundary; pub use case::Case; pub use converter::Converter; pub use pattern::Pattern; @@ -270,7 +272,8 @@ pub trait Casing> { } impl> Casing for T -where T: ToString +where + T: ToString, { fn to_case(&self, case: Case) -> String { StateConverter::new(self).to_case(case)