We tried covariate models

Some worked, some didnt

BLUF: not using covariates for the small copepods, will retain DOY for the large copepods

# from each output folder in pyindex, 
outdir <- here::here("pyindex")
moddirs <- list.dirs(outdir) 
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)


# function to apply extracting info
getmodinfo <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
    fill = TRUE, header = FALSE)
  
  n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
  grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
  max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
  use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
  fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
  bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
  
  #FieldConfig
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
    beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
    beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
  }
  
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    beta1 <- "IID"
    beta2 <- "IID"
  }
  
  
  #RhoConfig
  rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
  rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
  rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
  rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
  
  # read parameter estimates, object is called parameter_Estimates
  if(file.exists(file.path(d.name, "parameter_estimates.RData"))) {
    load(file.path(d.name, "parameter_estimates.RData"))
    
    AIC <- parameter_estimates$AIC[1]  
    converged <- parameter_estimates$Convergence_check[1]
    fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
    randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
    
  }else if(file.exists(file.path(d.name, "parameter_estimates.txt"))){
    
    reptext <- readLines(file.path(d.name, "parameter_estimates.txt"))
    
    AIC <- as.double(reptext[grep(reptext, pattern = "AIC")+1])
    converged <- reptext[grep(reptext, pattern = "Convergence_check")+1]
    fixedcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                     boundary("word"))[[1]][2])
    randomcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                     boundary("word"))[[1]][3])
    
  }else{
    
    AIC <- NA_real_
    converged <- NA_character_
    fixedcoeff <- NA_integer_
    randomcoeff <- NA_integer_
  }
  
  
  #index <- read.csv(file.path(d.name, "Index.csv"))
  
  
  # return model attributes as a dataframe
  out <- data.frame(modname = modname,
                    n_x = n_x,
                    grid_size_km = grid_size_km,
                    max_cells = max_cells,
                    use_anisotropy = use_anisotropy,
                    fine_scale =  fine_scale,
                    bias.correct = bias.correct,
                    omega1 = omega1,
                    omega2 = omega2,
                    epsilon1 = epsilon1,
                    epsilon2 = epsilon2,
                    beta1 = beta1,
                    beta2 = beta2,
                    rho_epsilon1 = rho_epsilon1,
                    rho_epsilon2 = rho_epsilon2,
                    rho_beta1 = rho_beta1,
                    rho_beta2 = rho_beta2,
                    AIC = AIC,
                    converged = converged,
                    fixedcoeff = fixedcoeff,
                    randomcoeff = randomcoeff#,
                    #index = index
  )
    
    return(out)

}


modcompare <- purrr::map_dfr(moddirs, getmodinfo)

modselect <- modcompare |>
  dplyr::mutate(season = dplyr::case_when(stringr::str_detect(modname, "_fall_") ~ "Fall",
                            stringr::str_detect(modname, "spring") ~ "Spring",
                            stringr::str_detect(modname, "_all_") ~ "Annual",
                            TRUE ~ as.character(NA))) |>
  dplyr::mutate(converged2 = dplyr::case_when(stringr::str_detect(converged, "no evidence") ~ "likely",
                                stringr::str_detect(converged, "is likely not") ~ "unlikely",
                                TRUE ~ as.character(NA))) |>
  dplyr::mutate(copegroup = stringr::str_extract(modname, "[^_]+")) |>
  #dplyr::mutate(modname = str_extract(modname, '(?<=allagg_).*')) |>
  dplyr::group_by(copegroup, season) |>
  dplyr::mutate(deltaAIC = AIC-min(AIC)) |>
  dplyr::select(copegroup, modname, season, deltaAIC, fixedcoeff,
         randomcoeff, use_anisotropy, 
         omega1, omega2, epsilon1, epsilon2, 
         beta1, beta2, AIC, converged2) |>
  dplyr::arrange(copegroup, season, AIC)

# DT::datatable(modselect, rownames = FALSE, 
#               options= list(pageLength = 25, scrollX = TRUE),
#               caption = "Comparison of delta AIC values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures. See text for model descriptions.")

# flextable::flextable(modselect) %>%
#                        #dplyr::select(-c(use_anisotropy, 
#          #omega1, omega2, epsilon1, epsilon2, 
#          #beta1, beta2))
#          #) %>%
#   flextable::set_header_labels(modname = "Model name",
#                                season = "Season",
#                                #deltaAIC = "dAIC",
#                                fixedcoeff = "N fixed",
#                                randomcoeff = "N random",
#                                converged2 = "Convergence") |>
#   #flextable::set_caption("Comparison of delta AIC (dAIC) values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures, with number of fixed (N fixed) and random (N random) coefficients. See text for model descriptions associated with each model name.") %>%
#   flextable::fontsize(size = 9, part = "all") %>%
#   flextable::colformat_double(digits = 2) |>
#   flextable::set_table_properties(layout = "autofit", width = 1)

stratlook <- data.frame(Stratum = c("Stratum_1",
                                    "Stratum_2",
                                    "Stratum_3",
                                    "Stratum_4",
                                    "Stratum_5",
                                    "Stratum_6",
                                    "Stratum_7"),
                        Region  = c("AllEPU",
                                    "her_sp",
                                    "her_fa",
                                    "MAB",
                                    "GB",
                                    "GOM",
                                    "SS"))

# function to apply extracting info
getmodindex <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  if(file.exists(file.path(d.name,"Index.csv"))){
    index <- read.csv(file.path(d.name, "Index.csv"))
  }else{
    stopifnot()
  }
  # return model indices as a dataframe
  out <- data.frame(modname = modname,
                    index
  )
  
  return(out)
}

modcompareindex <- purrr::map_dfr(moddirs, purrr::possibly(getmodindex, otherwise = NULL))

splitoutput <- modcompareindex %>%
  dplyr::mutate(Season = modname |> map(str_split, pattern = "_") |> map_chr(c(1,2))) %>%
  dplyr::mutate(Data = modname |> map(str_split, pattern = "_") |> map_chr(c(1,1))) %>%
  dplyr::mutate(Estimate = ifelse(Estimate == 0, NA, Estimate)) |>
  dplyr::left_join(stratlook) #%>%
  #dplyr::filter(Region %in% c(GOM", "GB", "MAB","SS", "AllEPU")) use all regions

zoomax <- max(splitoutput$Estimate, na.rm=T)


zootsmean <- splitoutput %>%
  dplyr::group_by(modname, Region) %>%
  dplyr::mutate(fmean = mean(Estimate, na.rm=T))

plot_zooindices <- function(splitoutput, plotdata, plotregions, plottitle){
  
  filterEPUs <- plotregions #c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU")
  
  seasons <- splitoutput |> dplyr::filter(Data==plotdata) |> dplyr::select(Season) |> dplyr::distinct()
  
  ncols <- dim(seasons)[1]
  
  currentMonth <- lubridate::month(Sys.Date())
  currentYear <- lubridate::year(Sys.Date())
  if (currentMonth > 4) {
    endShade <- currentYear
  } else {
    endShade <- currentYear - 1
  }
  shadedRegion <- c(endShade-9,endShade)
  
  shade.alpha <- 0.3
  shade.fill <- "lightgrey"
  lwd <- 1
  pcex <- 2
  trend.alpha <- 0.5
  trend.size <- 2
  hline.size <- 1
  line.size <- 2
  hline.alpha <- 0.35
  hline.lty <- "dashed"
  label.size <- 5
  hjust.label <- 1.5
  letter_size <- 4
  errorbar.width <- 0.25
  x.shade.min <- shadedRegion[1]
  x.shade.max <- shadedRegion[2]
  
  setup <- list(
    shade.alpha = shade.alpha,
    shade.fill =shade.fill,
    lwd = lwd,
    pcex = pcex,
    trend.alpha = trend.alpha,
    trend.size = trend.size,
    line.size = line.size,
    hline.size = hline.size,
    hline.alpha = hline.alpha,
    hline.lty = hline.lty,
    errorbar.width = errorbar.width,
    label.size = label.size,
    hjust.label = hjust.label,
    letter_size = letter_size,
    x.shade.min = x.shade.min,
    x.shade.max = x.shade.max
  )
  
  
  fix<- splitoutput |>
    dplyr::filter(Data %in% plotdata, #c("calfin"),
                  Region %in% filterEPUs) |>
    dplyr::group_by(Region, Season) |>
    dplyr::summarise(max = max(Estimate, na.rm=T))
  
  p <- splitoutput |>
    dplyr::filter(Data %in% plotdata, #c("calfin"),
                  Region %in% filterEPUs) |>
    dplyr::group_by(Region, Season) |>
    dplyr::left_join(fix) |>
    dplyr::mutate(#Value = Value/resca,
      Mean = as.numeric(Estimate),
      SE = Std..Error.for.Estimate,
      Mean = Mean/max,
      SE = SE/max,
      Upper = Mean + SE,
      Lower = Mean - SE) |>
    ggplot2::ggplot(ggplot2::aes(x = Time, y = Mean, linetype = modname, group = modname))+
    ggplot2::annotate("rect", fill = setup$shade.fill, alpha = setup$shade.alpha,
                      xmin = setup$x.shade.min , xmax = setup$x.shade.max,
                      ymin = -Inf, ymax = Inf) +
    ggplot2::geom_ribbon(ggplot2::aes(ymin = Lower, ymax = Upper, fill = Season), alpha = 0.5)+
    ggplot2::geom_point()+
    ggplot2::geom_line()+
    ggplot2::ggtitle(plottitle)+
    ggplot2::ylab(expression("Relative abundance"))+
    ggplot2::xlab(ggplot2::element_blank())+
    ggplot2::facet_wrap(Region~Season, ncol = ncols, 
                        labeller = label_wrap_gen(multi_line=FALSE))+
    ecodata::geom_gls()+
    ecodata::theme_ts()+
    ecodata::theme_facet()+
    ecodata::theme_title() +
    ggplot2::theme(legend.position = "bottom")
  
  return(p)
}

Comparison of small copepod models with and without covariates

Overview of small copepod model indices (no covariates), including the herring larval season model (Sep-Feb).

splitnocov <- splitoutput |> dplyr::filter(!str_detect(modname, "_doy"))

plot_zooindices(splitoutput = splitnocov, 
             plotdata = "smallcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Small copepods")

Fall small copepod trends

Do we get the same trend from the day of year covariate models and those without covariates? Scale is different.

plot_indices_comp <- function(splitoutput, 
                                 plotdata, 
                                 plotregions, 
                                 plottitle,
                                 comptype){
  
  filterEPUs <- plotregions #c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU")
  
  seasons <- splitoutput |> dplyr::filter(Data==plotdata) |> dplyr::select(Season) |> dplyr::distinct()
  
  ncols <- dim(seasons)[1]
  
  currentMonth <- lubridate::month(Sys.Date())
  currentYear <- lubridate::year(Sys.Date())
  if (currentMonth > 4) {
    endShade <- currentYear
  } else {
    endShade <- currentYear - 1
  }
  shadedRegion <- c(endShade-9,endShade)
  
  shade.alpha <- 0.3
  shade.fill <- "lightgrey"
  lwd <- 1
  pcex <- 2
  trend.alpha <- 0.5
  trend.size <- 2
  hline.size <- 1
  line.size <- 2
  hline.alpha <- 0.35
  hline.lty <- "dashed"
  label.size <- 5
  hjust.label <- 1.5
  letter_size <- 4
  errorbar.width <- 0.25
  x.shade.min <- shadedRegion[1]
  x.shade.max <- shadedRegion[2]
  
  setup <- list(
    shade.alpha = shade.alpha,
    shade.fill =shade.fill,
    lwd = lwd,
    pcex = pcex,
    trend.alpha = trend.alpha,
    trend.size = trend.size,
    line.size = line.size,
    hline.size = hline.size,
    hline.alpha = hline.alpha,
    hline.lty = hline.lty,
    errorbar.width = errorbar.width,
    label.size = label.size,
    hjust.label = hjust.label,
    letter_size = letter_size,
    x.shade.min = x.shade.min,
    x.shade.max = x.shade.max
  )
  
  if(comptype=="trend"){
    
    tsmean <- splitoutput |>
      dplyr::filter(Data %in% plotdata,
                  Region %in% filterEPUs) |>
      dplyr::group_by(modname, Region) |>
      dplyr::mutate(fmean = mean(Estimate, na.rm=TRUE))
    
    p <-ggplot(tsmean, aes(x=Time, y=((Estimate-fmean)/fmean), colour=modname)) +
      #geom_errorbar(aes(ymin=(Estimate+Std..Error.for.Estimate - fmean)/fmean,
      #                  ymax=(Estimate-Std..Error.for.Estimate - fmean)/fmean))+
      geom_point(aes(shape=modname))+
      geom_line()+
      #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
      facet_wrap(~Region, scales = "free_y", ncol = 1) +
      #scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
      ylab("Relative index scaled to time series mean")  +
      ggtitle(paste(plottitle, "Trend comparison")) +
      theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0),
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))
  }
    
  if(comptype=="scale"){
    
    scalecomp <- splitoutput |>
      dplyr::filter(Data %in% plotdata,
                    Region %in% filterEPUs) 
    
    indexmax <-  max(scalecomp$Estimate, na.rm=TRUE)
    
    p <-ggplot(scalecomp, aes(x=Time, y=Estimate, colour=modname)) +
      #geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
      geom_ribbon(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate, fill=modname), linetype = 0, alpha = 0.15)+
      geom_point(aes(shape=modname))+
      geom_line()+
      #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
      facet_wrap(~Region, scales = "free_y", ncol = 1) +
      scale_y_continuous(labels=function(x)round(x/indexmax, digits = 2))+
      ylab("Relative index scaled to maximum")  +
      ggtitle(paste(plottitle, "Scale comparison"))+
      theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0),
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))
    
  }  
  
  return(p)
}

fallonly <- splitoutput |> dplyr::filter(Season=="fall")

plot_indices_comp(splitoutput = fallonly, 
             plotdata = "smallcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Small copepods (All)",
             comptype = "trend")

fallonly <- splitoutput |> dplyr::filter(Season=="fall")

plot_indices_comp(splitoutput = fallonly, 
             plotdata = "smallcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Small copepods (All)",
             comptype = "scale")

Spring large copepod models

The Day of Year covariate model did converge in spring for large copepods.

fallonly <- splitoutput |> dplyr::filter(Season=="spring")

plot_indices_comp(splitoutput = fallonly, 
             plotdata = "lgcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Large copepods (All)",
             comptype = "trend")

fallonly <- splitoutput |> dplyr::filter(Season=="spring")

plot_indices_comp(splitoutput = fallonly, 
             plotdata = "lgcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Large copepods (All)",
             comptype = "scale")

Copepod Model Results: Covariate comparisons

Sarah Gaichas

2024-09-10

We tried covariate models

Comparison of small copepod models with and without covariates

Fall small copepod trends

Spring large copepod models