from a POS dataframe (using filter_by_query() |> parsePOS() or by spacyr::spacy_parse) get the pairs of entities
Examples
x <- txt_wiki |> filter_by_query("Police")
x <- x |> parsePOS()
get_cooc_entities(x)
#> $graphs
#> # A tibble: 88 × 3
#> n1 n2 freq
#> <chr> <chr> <int>
#> 1 Ted_Kaczynski_'s Industrial_Society_and_Its_Future 2
#> 2 Altoona Industrial_Society_and_Its_Future 1
#> 3 Altoona McDonald 1
#> 4 Altoona Ted_Kaczynski_'s 1
#> 5 Central_Park Altoona 1
#> 6 Central_Park Industrial_Society_and_Its_Future 1
#> 7 Central_Park Mangione 1
#> 8 Central_Park McDonald 1
#> 9 Central_Park New_York_City 1
#> 10 Central_Park San_Francisco 1
#> # ℹ 78 more rows
#>
#> $isolated_nodes
#> # A tibble: 1 × 2
#> node freq
#> <chr> <int>
#> 1 American 1
#>
#> $nodes
#> # A tibble: 17 × 2
#> node freq
#> <chr> <int>
#> 1 Industrial_Society_and_Its_Future 2
#> 2 New_Jersey 2
#> 3 Ted_Kaczynski_'s 2
#> 4 Altoona 1
#> 5 Central_Park 1
#> 6 Joseph_Kenny 1
#> 7 Mangione 1
#> 8 Manhattan 1
#> 9 McDonald 1
#> 10 NYPD 1
#> 11 New_York 1
#> 12 New_York_City 1
#> 13 Pennsylvania 1
#> 14 San_Francisco 1
#> 15 Upper_Manhattan 1
#> 16 the_George_Washington_Bridge_Bus_Station 1
#> 17 the_San_Francisco_Police_Department 1
#>
# with loops /self-reference
get_cooc_entities(x, loop = TRUE)
#> $graphs
#> # A tibble: 103 × 3
#> n1 n2 freq
#> <chr> <chr> <int>
#> 1 Mangione Mangione 10
#> 2 Pennsylvania Mangione 8
#> 3 Central_Park Mangione 5
#> 4 George_Washington_Bridge_Bus_Station Mangione 5
#> 5 Mangione Industrial_Society_and_Its_Future 5
#> 6 Mangione Ted_Kaczynski_'s 5
#> 7 New_Jersey Mangione 5
#> 8 New_York_City Mangione 5
#> 9 Upper_Manhattan Mangione 5
#> 10 San_Francisco_Police_Department Mangione 4
#> # ℹ 93 more rows
#>
#> $isolated_nodes
#> # A tibble: 1 × 2
#> node freq
#> <chr> <int>
#> 1 American 1
#>
#> $nodes
#> # A tibble: 17 × 2
#> node freq
#> <chr> <int>
#> 1 Mangione 5
#> 2 Industrial_Society_and_Its_Future 2
#> 3 New_Jersey 2
#> 4 Pennsylvania 2
#> 5 Ted_Kaczynski_'s 2
#> 6 Altoona 1
#> 7 Central_Park 1
#> 8 Joseph_Kenny 1
#> 9 Manhattan 1
#> 10 McDonald 1
#> 11 NYPD 1
#> 12 New_York 1
#> 13 New_York_City 1
#> 14 San_Francisco 1
#> 15 Upper_Manhattan 1
#> 16 the_George_Washington_Bridge_Bus_Station 1
#> 17 the_San_Francisco_Police_Department 1
#>
get_cooc_entities(x, lower_case = TRUE)
#> $graphs
#> # A tibble: 88 × 3
#> n1 n2 freq
#> <chr> <chr> <int>
#> 1 ted_kaczynski_'s industrial_society_and_its_future 2
#> 2 altoona industrial_society_and_its_future 1
#> 3 altoona mcdonald 1
#> 4 altoona ted_kaczynski_'s 1
#> 5 central_park altoona 1
#> 6 central_park industrial_society_and_its_future 1
#> 7 central_park mangione 1
#> 8 central_park mcdonald 1
#> 9 central_park new_york_city 1
#> 10 central_park san_francisco 1
#> # ℹ 78 more rows
#>
#> $isolated_nodes
#> # A tibble: 1 × 2
#> node freq
#> <chr> <int>
#> 1 american 1
#>
#> $nodes
#> # A tibble: 17 × 2
#> node freq
#> <chr> <int>
#> 1 industrial_society_and_its_future 2
#> 2 new_jersey 2
#> 3 ted_kaczynski_'s 2
#> 4 altoona 1
#> 5 central_park 1
#> 6 joseph_kenny 1
#> 7 mangione 1
#> 8 manhattan 1
#> 9 mcdonald 1
#> 10 new_york 1
#> 11 new_york_city 1
#> 12 nypd 1
#> 13 pennsylvania 1
#> 14 san_francisco 1
#> 15 the_george_washington_bridge_bus_station 1
#> 16 the_san_francisco_police_department 1
#> 17 upper_manhattan 1
#>