Extract abbreviations from text
extract_abbrev(txt, stopwords = "", connectors = "do|de|o|a")
"Lorem Ipsum STF ergo S.T.F. foo." |> extract_abbrev()
#> [[1]]
#> [1] "STF"
#>
"Lorem Ipsum FEDERAL SERVICE dolor sit S.T.F. foo." |> extract_abbrev()
#> [[1]]
#> [1] "FEDERAL SERVICE"
#>
"Lorem THE FEDERAL SERVICE dolor sit S.T.F. foo." |> extract_abbrev()
#> [[1]]
#> [1] "THE FEDERAL SERVICE"
#>
# now using the stopwords:
"Lorem THE FEDERAL SERVICE dolor sit S.T.F. foo." |> extract_abbrev(stopwords = "THE")
#> [[1]]
#> [1] "FEDERAL SERVICE"
#>
"Lorem THE FEDERAL SERVICE dolor sit WE S.T.F. foo." |> extract_abbrev(stopwords = c("THE", "WE"))
#> [[1]]
#> [1] "FEDERAL SERVICE"
#>