Skip to contents

Get variants data for NY state

counties <- c("New York", "New York City", "New York County", "Queens County", "Bronx County", "Bronx", "Brooklyn", "Manhattan", "Queens", "Richmond County")

gisaid_metadata <- qs::qread("~/data/epicov/metadata_tsv_2024_04_11.qs")
gisaid_usa <- gisaid_metadata %>%
  filter(Country == "USA") %>%
  filter(Host == "Human")
# format metadata
gisaid_usa <- FormatGISAIDMetadata(gisaid_usa)
gisaid_usa <- gisaid_usa %>%
  arrange(State, MonthYearCollected) %>%
  filter(pangolin_lineage != "Unknown")
gisaid_NY <- gisaid_usa %>% filter(State == "New York")


gisaid_NYC <- gisaid_NY %>% filter(District %in% counties)

vocs <- GetVOCs()
custom_voc_mapping <- list(
  `JN.1` = "JN.1",
  `JN.1.*` = "JN.1",
  `HV.1` = "HV.1",
  `HV.1.*` = "HV.1",
  `B.1` = "B.1",
  `B.1.1.306` = "B.1",
  `B.1.1.306.*` = "B.1",
  `B.1.1.326` = "B.1",
  `B.1.36.29` = "B.1",
  `B.1.560` = "B.1",
  `B.1.1` = "B.1",
  `B.1.210` = "B.1",
  `B.1.36.8` = "B.1",
  `B.1.36` = "B.1",
  `B.1.36.*` = "B.1"
)

gisaid_NYC <- gisaid_NYC %>% filter(pangolin_lineage != "None")

gisaid_NYC <- CollapseLineageToVOCs(
  variant_df = gisaid_NYC,
  vocs = vocs,
  custom_voc_mapping = custom_voc_mapping,
  summarize = FALSE
)

Get weekly cases for New York

confirmed <- read_csv("https://raw.githubusercontent.com/nychealth/coronavirus-data/master/trends/data-by-day.csv") %>% select(date_of_interest, CASE_COUNT)
colnames(confirmed) <- c("date", "daily_cases")
confirmed$WeekYear <- tsibble::yearweek(confirmed$date)
confirmed$MonthYear <- GetMonthYear(confirmed$date, datefmt = "%m/%d/%Y")

confirmed_subset_dateweekwise_long <- confirmed %>%
  group_by(WeekYear) %>%
  summarise(n = ceiling(mean(daily_cases, na.rm = T))) %>%
  arrange(WeekYear) %>%
  rename(WeekYearCollected = WeekYear)


gisaid_NYC_weekwise <- SummarizeVariantsWeekwise(gisaid_NYC)

Distribution of variants

state_month_counts <- SummarizeVariantsMonthwise(gisaid_NYC)
state_month_counts$State <- "NYC"
state_month_prevalence <- CountsToPrevalence(state_month_counts)


state_month_prevalence <- CollapseLineageToVOCs(
  variant_df = state_month_prevalence,
  vocs = vocs,
  custom_voc_mapping = custom_voc_mapping, summarize = FALSE
)

p5 <- StackedBarPlotPrevalence(state_month_prevalence)
p5

Project weekly cases to variant prevalence data from GISAID

voc_to_keep <- gisaid_NYC_weekwise %>%
  group_by(lineage_collapsed) %>%
  summarise(n_sum = sum(n)) %>%
  filter(n_sum > 50) %>%
  pull(lineage_collapsed) %>%
  unique()
gisaid_NYC_weekwise <- gisaid_NYC_weekwise %>% filter(lineage_collapsed %in% voc_to_keep)

newyork_cases_pred_prob_sel_long <- FitMultinomWeekly(gisaid_NYC_weekwise, confirmed_subset_dateweekwise_long)
the_anim <- PlotVariantPrevalenceAnimated(newyork_cases_pred_prob_sel_long, title = "Estimated cases (weekly average) in New York City by variant", caption = "**Source: gisaid.org and NYC Health**<br>", date_breaks = "120 days")
gganimate::anim_save(filename = here::here("docs/articles/NYC_animated.gif"), animation = the_anim)

Look at cases from 2023:

confirmed_subset_dateweekwise_long <- confirmed %>%
  filter(MonthYear > "April 2023") %>%
  group_by(WeekYear) %>%
  summarise(n = ceiling(mean(daily_cases, na.rm = T))) %>%
  arrange(WeekYear) %>%
  rename(WeekYearCollected = WeekYear)

gisaid_NYC_subset <- gisaid_NYC %>% filter(MonthYearCollected > "April 2023")
gisaid_weekwise <- SummarizeVariantsWeekwise(gisaid_NYC_subset)

voc_to_keep <- gisaid_weekwise %>%
  group_by(lineage_collapsed) %>%
  summarise(n_sum = sum(n)) %>%
  filter(n_sum > 1) %>%
  pull(lineage_collapsed) %>%
  unique()
gisaid_weekwise <- gisaid_weekwise %>% filter(lineage_collapsed %in% voc_to_keep)

cases_pred_prob_sel_long <- FitMultinomWeekly(gisaid_weekwise, confirmed_subset_dateweekwise_long)
the_anim <- PlotVariantPrevalenceAnimated(cases_pred_prob_sel_long, title = "Estimated cases (weekly average) in New York City by variant", caption = "**Source: gisaid.org and NYC Health**<br>", date_breaks = "30 days")
## `geom_line()`: Each group consists of only one observation.
##  Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
##  Do you need to adjust the group aesthetic?
gganimate::anim_save(filename = here::here("docs/articles/NYC_animated_2023.gif"), animation = the_anim)