Skip to contents

from a POS dataframe (using filter_by_query() |> parsePOS()) get the pairs of entities

Usage

get_cooc_entities(pos, loop = FALSE, freq = TRUE, lower_case = FALSE)

Arguments

pos

a dataframe generated by filter_by_query() |> parsePOS()

loop

if TRUE (default: FALSE), returns a loop (self reference) in the graph

freq

if TRUE (default: TRUE), returns the count (frequency) of edges/co-occurences

Examples

x <- txt_wiki |> filter_by_query("Police")
x <- x |> parsePOS()
get_cooc_entities(x)
#> $graphs
#> # A tibble: 88 × 3
#>    n1               n2                                 freq
#>    <chr>            <chr>                             <int>
#>  1 Ted_Kaczynski_'s Industrial_Society_and_Its_Future     2
#>  2 Altoona          Industrial_Society_and_Its_Future     1
#>  3 Altoona          McDonald                              1
#>  4 Altoona          Ted_Kaczynski_'s                      1
#>  5 Central_Park     Altoona                               1
#>  6 Central_Park     Industrial_Society_and_Its_Future     1
#>  7 Central_Park     Mangione                              1
#>  8 Central_Park     McDonald                              1
#>  9 Central_Park     New_York_City                         1
#> 10 Central_Park     San_Francisco                         1
#> # ℹ 78 more rows
#> 
#> $isolated_nodes
#>       node freq
#> 1 American    1
#> 
#> $nodes
#> # A tibble: 18 × 2
#>    node                                      freq
#>    <chr>                                    <int>
#>  1 Industrial_Society_and_Its_Future            2
#>  2 New_Jersey                                   2
#>  3 Ted_Kaczynski_'s                             2
#>  4 Altoona                                      1
#>  5 American                                     1
#>  6 Central_Park                                 1
#>  7 Joseph_Kenny                                 1
#>  8 Mangione                                     1
#>  9 Manhattan                                    1
#> 10 McDonald                                     1
#> 11 NYPD                                         1
#> 12 New_York                                     1
#> 13 New_York_City                                1
#> 14 Pennsylvania                                 1
#> 15 San_Francisco                                1
#> 16 Upper_Manhattan                              1
#> 17 the_George_Washington_Bridge_Bus_Station     1
#> 18 the_San_Francisco_Police_Department          1
#> 
# with loops /self-reference
get_cooc_entities(x, loop = TRUE)
#> $graphs
#> # A tibble: 103 × 3
#>    n1                                   n2                                 freq
#>    <chr>                                <chr>                             <int>
#>  1 Mangione                             Mangione                             10
#>  2 Pennsylvania                         Mangione                              8
#>  3 Central_Park                         Mangione                              5
#>  4 George_Washington_Bridge_Bus_Station Mangione                              5
#>  5 Mangione                             Industrial_Society_and_Its_Future     5
#>  6 Mangione                             Ted_Kaczynski_'s                      5
#>  7 New_Jersey                           Mangione                              5
#>  8 New_York_City                        Mangione                              5
#>  9 Upper_Manhattan                      Mangione                              5
#> 10 San_Francisco_Police_Department      Mangione                              4
#> # ℹ 93 more rows
#> 
#> $isolated_nodes
#>       node freq
#> 1 American    1
#> 
#> $nodes
#> # A tibble: 18 × 2
#>    node                                      freq
#>    <chr>                                    <int>
#>  1 Mangione                                     5
#>  2 Industrial_Society_and_Its_Future            2
#>  3 New_Jersey                                   2
#>  4 Pennsylvania                                 2
#>  5 Ted_Kaczynski_'s                             2
#>  6 Altoona                                      1
#>  7 American                                     1
#>  8 Central_Park                                 1
#>  9 Joseph_Kenny                                 1
#> 10 Manhattan                                    1
#> 11 McDonald                                     1
#> 12 NYPD                                         1
#> 13 New_York                                     1
#> 14 New_York_City                                1
#> 15 San_Francisco                                1
#> 16 Upper_Manhattan                              1
#> 17 the_George_Washington_Bridge_Bus_Station     1
#> 18 the_San_Francisco_Police_Department          1
#> 
get_cooc_entities(x, lower_case = TRUE)
#> $graphs
#> # A tibble: 88 × 3
#>    n1               n2                                 freq
#>    <chr>            <chr>                             <int>
#>  1 ted_kaczynski_'s industrial_society_and_its_future     2
#>  2 altoona          industrial_society_and_its_future     1
#>  3 altoona          mcdonald                              1
#>  4 altoona          ted_kaczynski_'s                      1
#>  5 central_park     altoona                               1
#>  6 central_park     industrial_society_and_its_future     1
#>  7 central_park     mangione                              1
#>  8 central_park     mcdonald                              1
#>  9 central_park     new_york_city                         1
#> 10 central_park     san_francisco                         1
#> # ℹ 78 more rows
#> 
#> $isolated_nodes
#>       node freq
#> 1 american    1
#> 
#> $nodes
#> # A tibble: 18 × 2
#>    node                                      freq
#>    <chr>                                    <int>
#>  1 industrial_society_and_its_future            2
#>  2 new_jersey                                   2
#>  3 ted_kaczynski_'s                             2
#>  4 altoona                                      1
#>  5 american                                     1
#>  6 central_park                                 1
#>  7 joseph_kenny                                 1
#>  8 mangione                                     1
#>  9 manhattan                                    1
#> 10 mcdonald                                     1
#> 11 new_york                                     1
#> 12 new_york_city                                1
#> 13 nypd                                         1
#> 14 pennsylvania                                 1
#> 15 san_francisco                                1
#> 16 the_george_washington_bridge_bus_station     1
#> 17 the_san_francisco_police_department          1
#> 18 upper_manhattan                              1
#>