This functions is still under development At this time you can generate a stopwords list in Portuguese and English.

There is many options of stopwords lists available in R, like:

But none is categorized allowing a more fine grained control of the words by grammar categories.

Choose the language and which grammar tags use to generate a stopword list. The lang parameter uses the two word designation, and the cat designs the grammar category following Penn Treebank. See here

POS Tag Description Example Description
CC coordinating conjunction and
CD cardinal number one, third
DT determiner the
EX existential there there is
PP pronoum me, you, he, she, it, we, they
PPZ pronoum
UH interjection oops!
V verb are be have variations of verbs to be, have,
gen_stopwords()
#>  [1] "e"                "ou"               "mas"              "que"             
#>  [5] "porque"           "por que"          "porquê"           "por quê"         
#>  [9] "se"               "como"             "primeiro"         "segundo"         
#> [13] "terceiro"         "quarto"           "quinto"           "sexto"           
#> [17] "sétimo"           "oitavo"           "nono"             "décimo"          
#> [21] "vigésimo"         "trigésimo"        "quadrigésimo"     "quinquagésimo"   
#> [25] "sexagésimo"       "septuagésimo"     "setuagésimo"      "octogésimo"      
#> [29] "nonagésimo"       "centésimo"        "ducentésimo"      "trecentésimo"    
#> [33] "quadringentésimo" "quingentésimo"    "seiscentésimo"    "sexcentésimo"    
#> [37] "septingentésimo"  "setingentésimo"   "octingentésimo"   "nongentésimo"    
#> [41] "milésimo"         "milionésimo"      "bilionésimo"
gen_stopwords(lang = "en", categories = "CC")
#> [1] "and" "but" "or"  "for" "yet" "so"
gen_stopwords(lang = "pt", categories = "CC DT")
#>  [1] "e"       "ou"      "mas"     "que"     "porque"  "por que" "porquê" 
#>  [8] "por quê" "se"      "como"    "o"       "a"       "os"      "as"
# to convert the list into a named vector
gen_stopwords(lang = "en", categories = "CC DT V")
#>  [1] "and"   "but"   "or"    "for"   "yet"   "so"    "a"     "the"   "this" 
#> [10] "am"    "are"   "is"    "be"    "can"   "could" "did"   "do"    "have" 
#> [19] "he"    "it"    "may"   "might" "must"  "need"  "no"    "not"   "now"  
#> [28] "of"    "on"    "she"   "that"  "to"    "was"   "were"

gen_stopwords(lang = "pt")
#>  [1] "e"                "ou"               "mas"              "que"             
#>  [5] "porque"           "por que"          "porquê"           "por quê"         
#>  [9] "se"               "como"             "primeiro"         "segundo"         
#> [13] "terceiro"         "quarto"           "quinto"           "sexto"           
#> [17] "sétimo"           "oitavo"           "nono"             "décimo"          
#> [21] "vigésimo"         "trigésimo"        "quadrigésimo"     "quinquagésimo"   
#> [25] "sexagésimo"       "septuagésimo"     "setuagésimo"      "octogésimo"      
#> [29] "nonagésimo"       "centésimo"        "ducentésimo"      "trecentésimo"    
#> [33] "quadringentésimo" "quingentésimo"    "seiscentésimo"    "sexcentésimo"    
#> [37] "septingentésimo"  "setingentésimo"   "octingentésimo"   "nongentésimo"    
#> [41] "milésimo"         "milionésimo"      "bilionésimo"
gen_stopwords(lang = "pt", categories = "V")
#>   [1] "ser"        "sou"        "sois"       "é"          "és"        
#>   [6] "somos"      "são"        "era"        "eram"       "éramos"    
#>  [11] "serei"      "será"       "serão"      "serás"      "fui"       
#>  [16] "foste"      "foi"        "fomos"      "fostes"     "foram"     
#>  [21] "eras"       "éreis"      "seremos"    "sereis"     "seja"      
#>  [26] "sejam"      "estar"      "estou"      "estás"      "está"      
#>  [31] "estamos"    "estais"     "estão"      "estive"     "estiveste" 
#>  [36] "esteve"     "estivemos"  "estivestes" "estiveram"  "estava"    
#>  [41] "estavas"    "estávamos"  "estáveis"   "estavam"    "estarei"   
#>  [46] "estarás"    "estará"     "estaremos"  "estareis"   "estarão"   
#>  [51] "esteja"     "estejam"    "ter"        "tenham"     "têem"      
#>  [56] "tenho"      "tens"       "tem"        "temos"      "tendes"    
#>  [61] "têm"        "tive"       "tiveste"    "teve"       "tivemos"   
#>  [66] "tivestes"   "tiveram"    "tinha"      "tinhas"     "tínhamos"  
#>  [71] "tínheis"    "tinham"     "terei"      "terás"      "terá"      
#>  [76] "teremos"    "tereis"     "terão"      "teria"      "teriam"    
#>  [81] "haver"      "houve"      "haveria"    "haveriam"   "hei"       
#>  [86] "hás"        "há"         "havemos"    "haveis"     "hão"       
#>  [91] "houver"     "houveres"   "houvermos"  "houverdes"  "houverem"  
#>  [96] "havia"      "havias"     "havíamos"   "havíeis"    "haviam"    
#> [101] "haverei"    "haverás"    "haverá"     "haveremos"  "havereis"  
#> [106] "haverão"    "haja"       "hajam"      "houvera"    "houveram"  
#> [111] "houvesse"

With the vec parameter, it is possible to have three different output formats: list, vector (default) and named vector.

gen_stopwords(lang = "pt", categories = "V", vec = "list")
#> $V
#> $V$ser
#>  [1] "ser"     "sou"     "sois"    "é"       "és"      "somos"   "sois"   
#>  [8] "são"     "era"     "eram"    "éramos"  "serei"   "será"    "serão"  
#> [15] "serás"   "fui"     "foste"   "foi"     "fomos"   "fostes"  "foram"  
#> [22] "era"     "eras"    "era"     "éramos"  "éreis"   "eram"    "serei"  
#> [29] "serás"   "será"    "seremos" "sereis"  "serão"   "seja"    "sejam"  
#> 
#> $V$estar
#>  [1] "estar"      "estou"      "estás"      "está"       "estamos"   
#>  [6] "estais"     "estão"      "estive"     "estiveste"  "esteve"    
#> [11] "estivemos"  "estivestes" "estiveram"  "estava"     "estavas"   
#> [16] "estava"     "estávamos"  "estáveis"   "estavam"    "estarei"   
#> [21] "estarás"    "estará"     "estaremos"  "estareis"   "estarão"   
#> [26] "esteja"     "estejam"   
#> 
#> $V$ter
#>  [1] "ter"      "tenham"   "têem"     "tenho"    "tens"     "tem"     
#>  [7] "temos"    "tendes"   "têm"      "tive"     "tiveste"  "teve"    
#> [13] "tivemos"  "tivestes" "tiveram"  "tinha"    "tinhas"   "tinha"   
#> [19] "tínhamos" "tínheis"  "tinham"   "terei"    "terás"    "terá"    
#> [25] "teremos"  "tereis"   "terão"    "teria"    "teriam"  
#> 
#> $V$haver
#>  [1] "haver"     "houve"     "haveria"   "haveriam"  "hei"       "hás"      
#>  [7] "há"        "havemos"   "haveis"    "hão"       "houver"    "houveres" 
#> [13] "houver"    "houvermos" "houverdes" "houverem"  "havia"     "havias"   
#> [19] "havia"     "havíamos"  "havíeis"   "haviam"    "haverei"   "haverás"  
#> [25] "haverá"    "haveremos" "havereis"  "haverão"   "haja"      "hajam"    
#> [31] "houvera"   "houveram"  "houvesse" 
#> 
#> 
#> $included
#> character(0)
gen_stopwords(lang = "pt", categories = "V", vec = "vec")
#>   [1] "ser"        "sou"        "sois"       "é"          "és"        
#>   [6] "somos"      "são"        "era"        "eram"       "éramos"    
#>  [11] "serei"      "será"       "serão"      "serás"      "fui"       
#>  [16] "foste"      "foi"        "fomos"      "fostes"     "foram"     
#>  [21] "eras"       "éreis"      "seremos"    "sereis"     "seja"      
#>  [26] "sejam"      "estar"      "estou"      "estás"      "está"      
#>  [31] "estamos"    "estais"     "estão"      "estive"     "estiveste" 
#>  [36] "esteve"     "estivemos"  "estivestes" "estiveram"  "estava"    
#>  [41] "estavas"    "estávamos"  "estáveis"   "estavam"    "estarei"   
#>  [46] "estarás"    "estará"     "estaremos"  "estareis"   "estarão"   
#>  [51] "esteja"     "estejam"    "ter"        "tenham"     "têem"      
#>  [56] "tenho"      "tens"       "tem"        "temos"      "tendes"    
#>  [61] "têm"        "tive"       "tiveste"    "teve"       "tivemos"   
#>  [66] "tivestes"   "tiveram"    "tinha"      "tinhas"     "tínhamos"  
#>  [71] "tínheis"    "tinham"     "terei"      "terás"      "terá"      
#>  [76] "teremos"    "tereis"     "terão"      "teria"      "teriam"    
#>  [81] "haver"      "houve"      "haveria"    "haveriam"   "hei"       
#>  [86] "hás"        "há"         "havemos"    "haveis"     "hão"       
#>  [91] "houver"     "houveres"   "houvermos"  "houverdes"  "houverem"  
#>  [96] "havia"      "havias"     "havíamos"   "havíeis"    "haviam"    
#> [101] "haverei"    "haverás"    "haverá"     "haveremos"  "havereis"  
#> [106] "haverão"    "haja"       "hajam"      "houvera"    "houveram"  
#> [111] "houvesse"
gen_stopwords(lang = "pt", categories = "V", vec = "n_vec")
#>       V.ser1       V.ser2       V.ser3       V.ser4       V.ser5       V.ser6 
#>        "ser"        "sou"       "sois"          "é"         "és"      "somos" 
#>       V.ser7       V.ser8       V.ser9      V.ser10      V.ser11      V.ser12 
#>       "sois"        "são"        "era"       "eram"     "éramos"      "serei" 
#>      V.ser13      V.ser14      V.ser15      V.ser16      V.ser17      V.ser18 
#>       "será"      "serão"      "serás"        "fui"      "foste"        "foi" 
#>      V.ser19      V.ser20      V.ser21      V.ser22      V.ser23      V.ser24 
#>      "fomos"     "fostes"      "foram"        "era"       "eras"        "era" 
#>      V.ser25      V.ser26      V.ser27      V.ser28      V.ser29      V.ser30 
#>     "éramos"      "éreis"       "eram"      "serei"      "serás"       "será" 
#>      V.ser31      V.ser32      V.ser33      V.ser34      V.ser35     V.estar1 
#>    "seremos"     "sereis"      "serão"       "seja"      "sejam"      "estar" 
#>     V.estar2     V.estar3     V.estar4     V.estar5     V.estar6     V.estar7 
#>      "estou"      "estás"       "está"    "estamos"     "estais"      "estão" 
#>     V.estar8     V.estar9    V.estar10    V.estar11    V.estar12    V.estar13 
#>     "estive"  "estiveste"     "esteve"  "estivemos" "estivestes"  "estiveram" 
#>    V.estar14    V.estar15    V.estar16    V.estar17    V.estar18    V.estar19 
#>     "estava"    "estavas"     "estava"  "estávamos"   "estáveis"    "estavam" 
#>    V.estar20    V.estar21    V.estar22    V.estar23    V.estar24    V.estar25 
#>    "estarei"    "estarás"     "estará"  "estaremos"   "estareis"    "estarão" 
#>    V.estar26    V.estar27       V.ter1       V.ter2       V.ter3       V.ter4 
#>     "esteja"    "estejam"        "ter"     "tenham"       "têem"      "tenho" 
#>       V.ter5       V.ter6       V.ter7       V.ter8       V.ter9      V.ter10 
#>       "tens"        "tem"      "temos"     "tendes"        "têm"       "tive" 
#>      V.ter11      V.ter12      V.ter13      V.ter14      V.ter15      V.ter16 
#>    "tiveste"       "teve"    "tivemos"   "tivestes"    "tiveram"      "tinha" 
#>      V.ter17      V.ter18      V.ter19      V.ter20      V.ter21      V.ter22 
#>     "tinhas"      "tinha"   "tínhamos"    "tínheis"     "tinham"      "terei" 
#>      V.ter23      V.ter24      V.ter25      V.ter26      V.ter27      V.ter28 
#>      "terás"       "terá"    "teremos"     "tereis"      "terão"      "teria" 
#>      V.ter29     V.haver1     V.haver2     V.haver3     V.haver4     V.haver5 
#>     "teriam"      "haver"      "houve"    "haveria"   "haveriam"        "hei" 
#>     V.haver6     V.haver7     V.haver8     V.haver9    V.haver10    V.haver11 
#>        "hás"         "há"    "havemos"     "haveis"        "hão"     "houver" 
#>    V.haver12    V.haver13    V.haver14    V.haver15    V.haver16    V.haver17 
#>   "houveres"     "houver"  "houvermos"  "houverdes"   "houverem"      "havia" 
#>    V.haver18    V.haver19    V.haver20    V.haver21    V.haver22    V.haver23 
#>     "havias"      "havia"   "havíamos"    "havíeis"     "haviam"    "haverei" 
#>    V.haver24    V.haver25    V.haver26    V.haver27    V.haver28    V.haver29 
#>    "haverás"     "haverá"  "haveremos"   "havereis"    "haverão"       "haja" 
#>    V.haver30    V.haver31    V.haver32    V.haver33 
#>      "hajam"    "houvera"   "houveram"   "houvesse"

To use only certain kinds of verbs, like only the variations/conjugations of the Portuguese verb ser:

my_sw <- gen_stopwords("pt", "V", vec = "list")
my_sw$V$ser
#>  [1] "ser"     "sou"     "sois"    "é"       "és"      "somos"   "sois"   
#>  [8] "são"     "era"     "eram"    "éramos"  "serei"   "será"    "serão"  
#> [15] "serás"   "fui"     "foste"   "foi"     "fomos"   "fostes"  "foram"  
#> [22] "era"     "eras"    "era"     "éramos"  "éreis"   "eram"    "serei"  
#> [29] "serás"   "será"    "seremos" "sereis"  "serão"   "seja"    "sejam"
# or shorter
gen_stopwords("pt", "V", vec = "list")$V$ser
#>  [1] "ser"     "sou"     "sois"    "é"       "és"      "somos"   "sois"   
#>  [8] "são"     "era"     "eram"    "éramos"  "serei"   "será"    "serão"  
#> [15] "serás"   "fui"     "foste"   "foi"     "fomos"   "fostes"  "foram"  
#> [22] "era"     "eras"    "era"     "éramos"  "éreis"   "eram"    "serei"  
#> [29] "serás"   "será"    "seremos" "sereis"  "serão"   "seja"    "sejam"

To see all the categories and its respective terms, run the following code

show_sw("en")
#> $IN
#> [1] "of"  "for" "in"  "by" 
#> 
#> $DT
#> [1] "a"    "the"  "this"
#> 
#> $CC
#> [1] "and" "but" "or"  "for" "but" "yet" "so" 
#> 
#> $CD
#>  [1] "zero"    "one"     "two"     "three"   "four"    "five"    "first"  
#>  [8] "second"  "third"   "fourth"  "fifth"   "sixth"   "seventh" "eighth" 
#> [15] "ninth"   "tenth"  
#> 
#> $JJ
#> [1] "blue"  "happy" "sad"  
#> 
#> $JJR
#> [1] "bluer"   "happier"
#> 
#> $JJS
#> [1] "bluest"   "happiest"
#> 
#> $MD
#> [1] "could" "will" 
#> 
#> $PP
#> [1] "I"   "you" "he"  "she" "it"  "we" 
#> 
#> $PRP
#> [1] "I"   "you" "he"  "she" "we" 
#> 
#> $PPZ
#> [1] "your" "my"   "mine" "ours" "his"  "her" 
#> 
#> $RB
#> [1] "however"   "usually"   "naturally" "here"      "good"     
#> 
#> $RBR
#> [1] "better"
#> 
#> $UH
#> [1] "aha"
#> 
#> $RP
#> [1] "up"  "off"
#> 
#> $V
#>  [1] "am"    "are"   "is"    "be"    "can"   "could" "did"   "do"    "have" 
#> [10] "he"    "is"    "it"    "may"   "might" "must"  "need"  "no"    "not"  
#> [19] "now"   "of"    "on"    "or"    "she"   "that"  "the"   "to"    "was"  
#> [28] "were" 
#> 
#> $question
#> [1] "what"    "when"    "where"   "who"     "whom"    "why"     "because"
#> 
#> $PT
#> [1] "Mr."   "Mrs."  "Miss"  "Ms."   "Sir"   "Madam" "Dr."   "Prof."
show_sw("en", as_vector = TRUE)
#>  [1] "a"         "aha"       "am"        "and"       "are"       "be"       
#>  [7] "because"   "better"    "blue"      "bluer"     "bluest"    "but"      
#> [13] "by"        "can"       "could"     "did"       "do"        "Dr."      
#> [19] "eighth"    "fifth"     "first"     "five"      "for"       "four"     
#> [25] "fourth"    "good"      "happier"   "happiest"  "happy"     "have"     
#> [31] "he"        "her"       "here"      "his"       "however"   "I"        
#> [37] "in"        "is"        "it"        "Madam"     "may"       "might"    
#> [43] "mine"      "Miss"      "Mr."       "Mrs."      "Ms."       "must"     
#> [49] "my"        "naturally" "need"      "ninth"     "no"        "not"      
#> [55] "now"       "of"        "off"       "on"        "one"       "or"       
#> [61] "ours"      "Prof."     "sad"       "second"    "seventh"   "she"      
#> [67] "Sir"       "sixth"     "so"        "tenth"     "that"      "the"      
#> [73] "third"     "this"      "three"     "to"        "two"       "up"       
#> [79] "usually"   "was"       "we"        "were"      "what"      "when"     
#> [85] "where"     "who"       "whom"      "why"       "will"      "yet"      
#> [91] "you"       "your"      "zero"
show_sw("pt", as_vector = TRUE)
#>   [1] "a"                "ah"               "ai"              
#>   [4] "aquele"           "as"               "assim"           
#>   [7] "bilionésimo"      "bosta"            "caralho"         
#>  [10] "caramba"          "centésimo"        "cocô"            
#>  [13] "com"              "como"             "concluindo"      
#>  [16] "Conclusão"        "conosco"          "consectário"     
#>  [19] "consequentemente" "consigo"          "convosco"        
#>  [22] "credo"            "da"               "das"             
#>  [25] "de"               "décimo"           "decorre"         
#>  [28] "dela"             "delas"            "dele"            
#>  [31] "deles"            "depreende-se"     "desse modo"      
#>  [34] "do"               "dos"              "Doutor"          
#>  [37] "Doutora"          "Doutores"         "Dr."             
#>  [40] "Dra."             "ducentésimo"      "e"               
#>  [43] "é"                "eh"               "ei"              
#>  [46] "ela"              "elas"             "ele"             
#>  [49] "eles"             "em"               "em suma"         
#>  [52] "em vista disso"   "enfim"            "então"           
#>  [55] "era"              "eram"             "éramos"          
#>  [58] "eras"             "éreis"            "és"              
#>  [61] "esse"             "está"             "estais"          
#>  [64] "estamos"          "estão"            "estar"           
#>  [67] "estará"           "estarão"          "estarás"         
#>  [70] "estarei"          "estareis"         "estaremos"       
#>  [73] "estás"            "estava"           "estavam"         
#>  [76] "estávamos"        "estavas"          "estáveis"        
#>  [79] "este"             "esteja"           "estejam"         
#>  [82] "esteve"           "estive"           "estivemos"       
#>  [85] "estiveram"        "estiveste"        "estivestes"      
#>  [88] "estou"            "eu"               "Excelentíssima"  
#>  [91] "Excelentíssimo"   "Excelentíssimos"  "Exmo."           
#>  [94] "Exmos."           "finalmente"       "fiu"             
#>  [97] "foi"              "fomos"            "foram"           
#> [100] "foste"            "fostes"           "fui"             
#> [103] "há"               "haja"             "hajam"           
#> [106] "hão"              "hás"              "haveis"          
#> [109] "havemos"          "haver"            "haverá"          
#> [112] "haverão"          "haverás"          "haverei"         
#> [115] "havereis"         "haveremos"        "haveria"         
#> [118] "haveriam"         "havia"            "haviam"          
#> [121] "havíamos"         "havias"           "havíeis"         
#> [124] "hei"              "houve"            "houver"          
#> [127] "houvera"          "houveram"         "houverdes"       
#> [130] "houverem"         "houveres"         "houvermos"       
#> [133] "houvesse"         "isso"             "isto"            
#> [136] "lhe"              "logo"             "mas"             
#> [139] "me"               "meo"              "merda"           
#> [142] "meu"              "milésimo"         "milionésimo"     
#> [145] "mim"              "na"               "né"              
#> [148] "neh"              "no"               "nonagésimo"      
#> [151] "nongentésimo"     "nono"             "nos"             
#> [154] "nós"              "nossa"            "nossas"          
#> [157] "nosso"            "nossos"           "o"               
#> [160] "octingentésimo"   "octogésimo"       "oh"              
#> [163] "oitavo"           "opa"              "os"              
#> [166] "ou"               "ou seja"          "para"            
#> [169] "pois"             "por"              "por isso"        
#> [172] "por que"          "por quê"          "porque"          
#> [175] "porquê"           "porra"            "portanto"        
#> [178] "pqp"              "primeiro"         "psiu"            
#> [181] "quadrigésimo"     "quadringentésimo" "quarto"          
#> [184] "que"              "quingentésimo"    "quinquagésimo"   
#> [187] "quinto"           "Rev.ma"           "são"             
#> [190] "se"               "segue-se"         "segundo"         
#> [193] "seiscentésimo"    "seja"             "sejam"           
#> [196] "sem"              "Senhor"           "Senhoras"        
#> [199] "Senhores"         "septingentésimo"  "septuagésimo"    
#> [202] "ser"              "será"             "serão"           
#> [205] "serás"            "serei"            "sereis"          
#> [208] "seremos"          "sétimo"           "setingentésimo"  
#> [211] "setuagésimo"      "seu"              "sexagésimo"      
#> [214] "sexcentésimo"     "sexto"            "si"              
#> [217] "sois"             "somos"            "sou"             
#> [220] "Sr."              "Sra."             "sua"             
#> [223] "te"               "têem"             "tem"             
#> [226] "têm"              "temos"            "tendes"          
#> [229] "tenham"           "tenho"            "tens"            
#> [232] "ter"              "terá"             "terão"           
#> [235] "terás"            "terceiro"         "terei"           
#> [238] "tereis"           "teremos"          "teria"           
#> [241] "teriam"           "teu"              "teve"            
#> [244] "ti"               "tinha"            "tinham"          
#> [247] "tínhamos"         "tinhas"           "tínheis"         
#> [250] "tive"             "tivemos"          "tiveram"         
#> [253] "tiveste"          "tivestes"         "trecentésimo"    
#> [256] "trigésimo"        "tu"               "uhu"             
#> [259] "ui"               "uou"              "V.A."            
#> [262] "V.Ex."            "V.Ex.a"           "V.Ex.as"         
#> [265] "V.Mag"            "V.S."             "V.S.a"           
#> [268] "vigésimo"         "você"             "vocês"           
#> [271] "vos"              "vós"              "vtnc"            
#> [274] "VV.AA."           "wow"