from a POS dataframe (using filter_by_query() |> parsePOS()) get the pairs of entities
Examples
x <- txt_wiki |> filter_by_query("Police")
x <- x |> parsePOS()
get_cooc_entities(x)
#> $graphs
#> # A tibble: 88 × 3
#> n1 n2 freq
#> <chr> <chr> <int>
#> 1 Ted_Kaczynski_'s Industrial_Society_and_Its_Future 2
#> 2 Altoona Industrial_Society_and_Its_Future 1
#> 3 Altoona McDonald 1
#> 4 Altoona Ted_Kaczynski_'s 1
#> 5 Central_Park Altoona 1
#> 6 Central_Park Industrial_Society_and_Its_Future 1
#> 7 Central_Park Mangione 1
#> 8 Central_Park McDonald 1
#> 9 Central_Park New_York_City 1
#> 10 Central_Park San_Francisco 1
#> # ℹ 78 more rows
#>
#> $isolated_nodes
#> node freq
#> 1 American 1
#>
#> $nodes
#> # A tibble: 18 × 2
#> node freq
#> <chr> <int>
#> 1 Industrial_Society_and_Its_Future 2
#> 2 New_Jersey 2
#> 3 Ted_Kaczynski_'s 2
#> 4 Altoona 1
#> 5 American 1
#> 6 Central_Park 1
#> 7 Joseph_Kenny 1
#> 8 Mangione 1
#> 9 Manhattan 1
#> 10 McDonald 1
#> 11 NYPD 1
#> 12 New_York 1
#> 13 New_York_City 1
#> 14 Pennsylvania 1
#> 15 San_Francisco 1
#> 16 Upper_Manhattan 1
#> 17 the_George_Washington_Bridge_Bus_Station 1
#> 18 the_San_Francisco_Police_Department 1
#>
# with loops /self-reference
get_cooc_entities(x, loop = TRUE)
#> $graphs
#> # A tibble: 103 × 3
#> n1 n2 freq
#> <chr> <chr> <int>
#> 1 Mangione Mangione 10
#> 2 Pennsylvania Mangione 8
#> 3 Central_Park Mangione 5
#> 4 George_Washington_Bridge_Bus_Station Mangione 5
#> 5 Mangione Industrial_Society_and_Its_Future 5
#> 6 Mangione Ted_Kaczynski_'s 5
#> 7 New_Jersey Mangione 5
#> 8 New_York_City Mangione 5
#> 9 Upper_Manhattan Mangione 5
#> 10 San_Francisco_Police_Department Mangione 4
#> # ℹ 93 more rows
#>
#> $isolated_nodes
#> node freq
#> 1 American 1
#>
#> $nodes
#> # A tibble: 18 × 2
#> node freq
#> <chr> <int>
#> 1 Mangione 5
#> 2 Industrial_Society_and_Its_Future 2
#> 3 New_Jersey 2
#> 4 Pennsylvania 2
#> 5 Ted_Kaczynski_'s 2
#> 6 Altoona 1
#> 7 American 1
#> 8 Central_Park 1
#> 9 Joseph_Kenny 1
#> 10 Manhattan 1
#> 11 McDonald 1
#> 12 NYPD 1
#> 13 New_York 1
#> 14 New_York_City 1
#> 15 San_Francisco 1
#> 16 Upper_Manhattan 1
#> 17 the_George_Washington_Bridge_Bus_Station 1
#> 18 the_San_Francisco_Police_Department 1
#>
get_cooc_entities(x, lower_case = TRUE)
#> $graphs
#> # A tibble: 88 × 3
#> n1 n2 freq
#> <chr> <chr> <int>
#> 1 ted_kaczynski_'s industrial_society_and_its_future 2
#> 2 altoona industrial_society_and_its_future 1
#> 3 altoona mcdonald 1
#> 4 altoona ted_kaczynski_'s 1
#> 5 central_park altoona 1
#> 6 central_park industrial_society_and_its_future 1
#> 7 central_park mangione 1
#> 8 central_park mcdonald 1
#> 9 central_park new_york_city 1
#> 10 central_park san_francisco 1
#> # ℹ 78 more rows
#>
#> $isolated_nodes
#> node freq
#> 1 american 1
#>
#> $nodes
#> # A tibble: 18 × 2
#> node freq
#> <chr> <int>
#> 1 industrial_society_and_its_future 2
#> 2 new_jersey 2
#> 3 ted_kaczynski_'s 2
#> 4 altoona 1
#> 5 american 1
#> 6 central_park 1
#> 7 joseph_kenny 1
#> 8 mangione 1
#> 9 manhattan 1
#> 10 mcdonald 1
#> 11 new_york 1
#> 12 new_york_city 1
#> 13 nypd 1
#> 14 pennsylvania 1
#> 15 san_francisco 1
#> 16 the_george_washington_bridge_bus_station 1
#> 17 the_san_francisco_police_department 1
#> 18 upper_manhattan 1
#>