Skip to contents

Find infections per individual

Usage

cg_annotate_episodes_find(
  cg,
  infection_cols,
  infection_present,
  episode_days = 14,
  episode_numbers_col = episode_number,
  episode_start_col = episode_start,
  episode_end_col = episode_end
)

Arguments

cg

a chronogram

infection_cols

A vector of column names that contain information regarding infection status, such as symptoms, LFT results, PCR results, or sequencing results.

infection_present

Strings used in each of infection_cols to indicate the presence of infection.

episode_days

The number of days to scan forwards and backwards in time to consider events a single episode. For example, is symptom onset 10d after a positive PCR test a single episode? The default is 14d.

episode_numbers_col

The column name to use for episode numbers. Default is episode_number (unquoted).

episode_start_col, episode_end_col

The column names to store the episode start and end dates (defaults: episode_start and episode_end, both unquoted).

Value

x a chronogram, with episode numbers annotated

Examples

## Example 1: A small study ##-------------------------------------
data(built_smallstudy)
cg_small <- built_smallstudy$chronogram
infections_to_add <- built_smallstudy$infections_to_add

## add infection data to to chronogram ##
cg_small <- cg_add_experiment(cg_small, infections_to_add)

## now infection finding ##
cg_small_inf <- cg_annotate_episodes_find(cg_small,
  infection_cols = c("LFT", "PCR", "symptoms"),
  infection_present = c("pos", "Post", "^severe")
)
#> Parsed: infection_cols and infection_present
#>           
#> Searching in the [[column]], for the "text": 
#> 
#> stringr::str_detect(.data[["LFT"]], "pos") ~ "yes"
#> 
#> stringr::str_detect(.data[["PCR"]], "Post") ~ "yes"
#> 
#> stringr::str_detect(.data[["symptoms"]], "^severe") ~ "yes"
#> 
#> 
#> ...detecting will be exact.
#>           Capitals, spelling etc must be precise
#> 
#> Joining with `by = join_by(calendar_date, elig_study_id)`

summary(cg_small_inf$episode_number)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.000   1.000   1.000   1.077   1.000   2.000    1934 

## exact text matching ##------------------------------------------
test2 <- cg_annotate_episodes_find(cg_small_inf,
  infection_cols = c("LFT", "PCR", "symptoms"),
  infection_present = c("Pos", "Post", "^mild")
)
#> Parsed: infection_cols and infection_present
#>           
#> Searching in the [[column]], for the "text": 
#> 
#> stringr::str_detect(.data[["LFT"]], "Pos") ~ "yes"
#> 
#> stringr::str_detect(.data[["PCR"]], "Post") ~ "yes"
#> 
#> stringr::str_detect(.data[["symptoms"]], "^mild") ~ "yes"
#> 
#> 
#> ...detecting will be exact.
#>           Capitals, spelling etc must be precise
#> 
#> Joining with `by = join_by(calendar_date, elig_study_id, episode_number,
#> episode_start, episode_end)`

summary(test2$episode_number)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.000   1.000   1.000   1.077   1.000   2.000    1934 

## empty strings will error (as they otherwise match everything) ##
test3a <- 
try(
cg_annotate_episodes_find(cg_small_inf,
  infection_cols = c("LFT", "PCR", "symptoms"),
  infection_present = c("pos", "Post", "")
)
)
#> Parsed: infection_cols and infection_present
#>           
#> Searching in the [[column]], for the "text": 
#> 
#> stringr::str_detect(.data[["LFT"]], "pos") ~ "yes"
#> 
#> stringr::str_detect(.data[["PCR"]], "Post") ~ "yes"
#> 
#> stringr::str_detect(.data[["symptoms"]], "") ~ "yes"
#> Error in cg_annotate_episodes_find(cg_small_inf, infection_cols = c("LFT",  : 
#>   infection_present seems to have one more empty condition(s)
## a 'random' string will not error ##-----------------------------
test3b <- try(
cg_annotate_episodes_find(cg_small_inf,
  infection_cols = c("LFT", "PCR", "symptoms"),
  infection_present = c("pos", "Post", "a")
)
)
#> Parsed: infection_cols and infection_present
#>           
#> Searching in the [[column]], for the "text": 
#> 
#> stringr::str_detect(.data[["LFT"]], "pos") ~ "yes"
#> 
#> stringr::str_detect(.data[["PCR"]], "Post") ~ "yes"
#> 
#> stringr::str_detect(.data[["symptoms"]], "a") ~ "yes"
#> 
#> 
#> ...detecting will be exact.
#>           Capitals, spelling etc must be precise
#> 
#> Joining with `by = join_by(calendar_date, elig_study_id, episode_number,
#> episode_start, episode_end)`