Challenge 64

Word Frequency Counter

Write a word_frequency function that reads a string of text and counts the frequency of each word, ignoring case and punctuation.

Return the results as a sorted list of word-frequency pairs, where words are in lowercase and sorted alphabetically.

// Rust Bytes Issue 73: Word Frequency Counter

assert_eq!(word_frequency(""), Vec::< (String, u32) >::new());
assert_eq!(word_frequency("well-known well-known WELL-KNOWN"), vec![(("well-known".to_string(), 3))]);
assert_eq!(word_frequency("café CAFÉ résumé Résumé"), vec![(("café".to_string(), 2), ("résumé".to_string(), 2))]);
assert_eq!(
    word_frequency("The quick brown fox, the quick Brown FOX!"),
    vec![
        ("brown".to_string(), 2),
        ("fox".to_string(), 2),
        ("quick".to_string(), 2),
        ("the".to_string(), 2),

    ]
);

Write your solution below

// Rust Bytes Issue 73: Word Frequency Counter

pub fn word_frequency(input: &str) -> Vec<(String, u32)> {
    // your implementation goes here
}

#[cfg(test)]
mod tests {
    use super::word_frequency;

    #[test]
    fn basic_case() {
        let input = "The quick brown fox, the quick Brown FOX!";
        let expected = vec![
            ("brown".to_string(), 2),
            ("fox".to_string(), 2),
            ("quick".to_string(), 2),
            ("the".to_string(), 2),
        ];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn empty_string() {
        let input = "";
        let expected: Vec<(String, u32)> = vec![];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn only_punctuation() {
        let input = ".,!?;:-";
        let expected: Vec<(String, u32)> = vec![];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn single_word() {
        let input = "hello";
        let expected = vec![("hello".to_string(), 1)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn multiple_spaces_and_punctuation() {
        let input = "hello,,,   world!!!   HELLO.";
        let expected = vec![("hello".to_string(), 2), ("world".to_string(), 1)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn numbers_and_mixed_chars() {
        let input = "test123 test 123test TEST!";
        let expected = vec![("test".to_string(), 4)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn hyphenated_words() {
        let input = "well-known well-Known WELL-KNOWN";
        let expected = vec![("wellknown".to_string(), 3)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn apostrophes_in_words() {
        let input = "don't Don'T cant can't";
        let expected = vec![("cant".to_string(), 2), ("dont".to_string(), 2)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn unicode_words() {
        let input = "café CAFÉ résumé Résumé";
        let expected = vec![("café".to_string(), 2), ("résumé".to_string(), 2)];
        assert_eq!(word_frequency(input), expected)
    }
}

Solution

Click to Show/Hide Solution
#![allow(unused)]
fn main() {
// Rust Bytes Issue 73: Word Frequency Counter

pub fn word_frequency(input: &str) -> Vec<(String, u32)> {
    input
        .split_whitespace()
        .filter_map(|word| {
            let word = word
                .chars()
                .filter(|c| c.is_alphabetic())
                .collect::<String>()
                .to_lowercase();
            (!word.is_empty()).then_some(word)
        })
        .fold(std::collections::BTreeMap::new(), |mut map, word| {
            *map.entry(word).or_default() += 1;
            map
        })
        .into_iter()
        .collect()
}

#[cfg(test)]
mod tests {
    use super::word_frequency;

    #[test]
    fn basic_case() {
        let input = "The quick brown fox, the quick Brown FOX!";
        let expected = vec![
            ("brown".to_string(), 2),
            ("fox".to_string(), 2),
            ("quick".to_string(), 2),
            ("the".to_string(), 2),
        ];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn empty_string() {
        let input = "";
        let expected: Vec<(String, u32)> = vec![];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn only_punctuation() {
        let input = ".,!?;:-";
        let expected: Vec<(String, u32)> = vec![];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn single_word() {
        let input = "hello";
        let expected = vec![("hello".to_string(), 1)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn multiple_spaces_and_punctuation() {
        let input = "hello,,,   world!!!   HELLO.";
        let expected = vec![("hello".to_string(), 2), ("world".to_string(), 1)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn numbers_and_mixed_chars() {
        let input = "test123 test 123test TEST!";
        let expected = vec![("test".to_string(), 4)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn hyphenated_words() {
        let input = "well-known well-Known WELL-KNOWN";
        let expected = vec![("wellknown".to_string(), 3)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn apostrophes_in_words() {
        let input = "don't Don'T cant can't";
        let expected = vec![("cant".to_string(), 2), ("dont".to_string(), 2)];
        assert_eq!(word_frequency(input), expected);
    }

    #[test]
    fn unicode_words() {
        let input = "café CAFÉ résumé Résumé";
        let expected = vec![("café".to_string(), 2), ("résumé".to_string(), 2)];
        assert_eq!(word_frequency(input), expected)
    }
}
}