R package for simple string manipulation

Description

Clean, wrangle, and parse character [string] vectors using base exclusively base R functions.

Usage

Count

Count number of matches.

## extract all there/their/they're
chr_count(x, "there|their|they\\S?re", ignore.case = TRUE)
#> [1] 1 1 1 0 0

Remove

Remove text patterns.

## remove URLS
chr_remove_links(x)
#> [1] "this one is @there\n  has #MultipleLines  and \n   @twitter"   
#> [2] "this @one #istotally their and \n  some non-ascii symbols: ¿ ;"
#> [3] "this one is they're "                                          
#> [4] "this one #HasHashtags #afew #ofthem"                           
#> [5] "and more @kearneymw at "

## string together functions with magrittr pipe
library(magrittr)

## remove mentions and extra [white] spaces
chr_remove_mentions(x) %>%
  chr_remove_ws()
#> [1] "this one is has #MultipleLines https://github.com and http://twitter.com"
#> [2] "this #istotally their and some non-ascii symbols: ¿ ;"                   
#> [3] "this one is they're https://github.com"                                  
#> [4] "this one #HasHashtags #afew #ofthem"                                     
#> [5] "and more at https://mikew.com"

## remove hashtags
chr_remove_hashtags(x)
#> [1] "this one is @there\n  has  https://github.com and \n  http://twitter.com @twitter"
#> [2] "this @one  their and \n  some non-ascii symbols: ¿ ;"                             
#> [3] "this one is they're https://github.com"                                           
#> [4] "this one   "                                                                      
#> [5] "and more @kearneymw at https://mikew.com"

## remove hashtags, line breaks, and extra spaces
x %>%
  chr_remove_hashtags() %>%
  chr_remove_linebreaks() %>%
  chr_remove_ws()
#> [1] "this one is @there has https://github.com and http://twitter.com @twitter"
#> [2] "this @one their and some non-ascii symbols: ¿ ;"                          
#> [3] "this one is they're https://github.com"                                   
#> [4] "this one"                                                                 
#> [5] "and more @kearneymw at https://mikew.com"

## remove links and extract words
x %>%
  chr_remove_links() %>%
  chr_remove_mentions() %>%
  chr_extract_words()
#> [[1]]
#> [1] "this"          "one"           "is"            "has"          
#> [5] "MultipleLines" "and"          
#> 
#> [[2]]
#> [1] "this"      "istotally" "their"     "and"       "some"      "non-ascii"
#> [7] "symbols"  
#> 
#> [[3]]
#> [1] "this"    "one"     "is"      "they're"
#> 
#> [[4]]
#> [1] "this"        "one"         "HasHashtags" "afew"        "ofthem"     
#> 
#> [[5]]
#> [1] "and"  "more" "at"

Contributions

Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.