We tried covariate models

Some worked, some didnt

BLUF: not using covariates for the small copepods, will retain DOY for the large copepods

# from each output folder in pyindex, 
outdir <- here::here("pyindex")
moddirs <- list.dirs(outdir) 
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)


# function to apply extracting info
getmodinfo <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
    fill = TRUE, header = FALSE)
  
  n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
  grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
  max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
  use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
  fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
  bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
  
  #FieldConfig
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
    beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
    beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
  }
  
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    beta1 <- "IID"
    beta2 <- "IID"
  }
  
  
  #RhoConfig
  rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
  rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
  rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
  rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
  
  # read parameter estimates, object is called parameter_Estimates
  if(file.exists(file.path(d.name, "parameter_estimates.RData"))) {
    load(file.path(d.name, "parameter_estimates.RData"))
    
    AIC <- parameter_estimates$AIC[1]  
    converged <- parameter_estimates$Convergence_check[1]
    fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
    randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
    
  }else if(file.exists(file.path(d.name, "parameter_estimates.txt"))){
    
    reptext <- readLines(file.path(d.name, "parameter_estimates.txt"))
    
    AIC <- as.double(reptext[grep(reptext, pattern = "AIC")+1])
    converged <- reptext[grep(reptext, pattern = "Convergence_check")+1]
    fixedcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                     boundary("word"))[[1]][2])
    randomcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                     boundary("word"))[[1]][3])
    
  }else{
    
    AIC <- NA_real_
    converged <- NA_character_
    fixedcoeff <- NA_integer_
    randomcoeff <- NA_integer_
  }
  
  
  #index <- read.csv(file.path(d.name, "Index.csv"))
  
  
  # return model attributes as a dataframe
  out <- data.frame(modname = modname,
                    n_x = n_x,
                    grid_size_km = grid_size_km,
                    max_cells = max_cells,
                    use_anisotropy = use_anisotropy,
                    fine_scale =  fine_scale,
                    bias.correct = bias.correct,
                    omega1 = omega1,
                    omega2 = omega2,
                    epsilon1 = epsilon1,
                    epsilon2 = epsilon2,
                    beta1 = beta1,
                    beta2 = beta2,
                    rho_epsilon1 = rho_epsilon1,
                    rho_epsilon2 = rho_epsilon2,
                    rho_beta1 = rho_beta1,
                    rho_beta2 = rho_beta2,
                    AIC = AIC,
                    converged = converged,
                    fixedcoeff = fixedcoeff,
                    randomcoeff = randomcoeff#,
                    #index = index
  )
    
    return(out)

}


modcompare <- purrr::map_dfr(moddirs, getmodinfo)

modselect <- modcompare |>
  dplyr::mutate(season = dplyr::case_when(stringr::str_detect(modname, "_fall_") ~ "Fall",
                            stringr::str_detect(modname, "spring") ~ "Spring",
                            stringr::str_detect(modname, "_all_") ~ "Annual",
                            TRUE ~ as.character(NA))) |>
  dplyr::mutate(converged2 = dplyr::case_when(stringr::str_detect(converged, "no evidence") ~ "likely",
                                stringr::str_detect(converged, "is likely not") ~ "unlikely",
                                TRUE ~ as.character(NA))) |>
  dplyr::mutate(copegroup = stringr::str_extract(modname, "[^_]+")) |>
  #dplyr::mutate(modname = str_extract(modname, '(?<=allagg_).*')) |>
  dplyr::group_by(copegroup, season) |>
  dplyr::mutate(deltaAIC = AIC-min(AIC)) |>
  dplyr::select(copegroup, modname, season, deltaAIC, fixedcoeff,
         randomcoeff, use_anisotropy, 
         omega1, omega2, epsilon1, epsilon2, 
         beta1, beta2, AIC, converged2) |>
  dplyr::arrange(copegroup, season, AIC)

# DT::datatable(modselect, rownames = FALSE, 
#               options= list(pageLength = 25, scrollX = TRUE),
#               caption = "Comparison of delta AIC values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures. See text for model descriptions.")

# flextable::flextable(modselect) %>%
#                        #dplyr::select(-c(use_anisotropy, 
#          #omega1, omega2, epsilon1, epsilon2, 
#          #beta1, beta2))
#          #) %>%
#   flextable::set_header_labels(modname = "Model name",
#                                season = "Season",
#                                #deltaAIC = "dAIC",
#                                fixedcoeff = "N fixed",
#                                randomcoeff = "N random",
#                                converged2 = "Convergence") |>
#   #flextable::set_caption("Comparison of delta AIC (dAIC) values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures, with number of fixed (N fixed) and random (N random) coefficients. See text for model descriptions associated with each model name.") %>%
#   flextable::fontsize(size = 9, part = "all") %>%
#   flextable::colformat_double(digits = 2) |>
#   flextable::set_table_properties(layout = "autofit", width = 1)
stratlook <- data.frame(Stratum = c("Stratum_1",
                                    "Stratum_2",
                                    "Stratum_3",
                                    "Stratum_4",
                                    "Stratum_5",
                                    "Stratum_6",
                                    "Stratum_7"),
                        Region  = c("AllEPU",
                                    "her_sp",
                                    "her_fa",
                                    "MAB",
                                    "GB",
                                    "GOM",
                                    "SS"))

# function to apply extracting info
getmodindex <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  if(file.exists(file.path(d.name,"Index.csv"))){
    index <- read.csv(file.path(d.name, "Index.csv"))
  }else{
    stopifnot()
  }
  # return model indices as a dataframe
  out <- data.frame(modname = modname,
                    index
  )
  
  return(out)
}

modcompareindex <- purrr::map_dfr(moddirs, purrr::possibly(getmodindex, otherwise = NULL))

splitoutput <- modcompareindex %>%
  dplyr::mutate(Season = modname |> map(str_split, pattern = "_") |> map_chr(c(1,2))) %>%
  dplyr::mutate(Data = modname |> map(str_split, pattern = "_") |> map_chr(c(1,1))) %>%
  dplyr::mutate(Estimate = ifelse(Estimate == 0, NA, Estimate)) |>
  dplyr::left_join(stratlook) #%>%
  #dplyr::filter(Region %in% c(GOM", "GB", "MAB","SS", "AllEPU")) use all regions

zoomax <- max(splitoutput$Estimate, na.rm=T)


zootsmean <- splitoutput %>%
  dplyr::group_by(modname, Region) %>%
  dplyr::mutate(fmean = mean(Estimate, na.rm=T)) 
plot_zooindices <- function(splitoutput, plotdata, plotregions, plottitle){
  
  filterEPUs <- plotregions #c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU")
  
  seasons <- splitoutput |> dplyr::filter(Data==plotdata) |> dplyr::select(Season) |> dplyr::distinct()
  
  ncols <- dim(seasons)[1]
  
  currentMonth <- lubridate::month(Sys.Date())
  currentYear <- lubridate::year(Sys.Date())
  if (currentMonth > 4) {
    endShade <- currentYear
  } else {
    endShade <- currentYear - 1
  }
  shadedRegion <- c(endShade-9,endShade)
  
  shade.alpha <- 0.3
  shade.fill <- "lightgrey"
  lwd <- 1
  pcex <- 2
  trend.alpha <- 0.5
  trend.size <- 2
  hline.size <- 1
  line.size <- 2
  hline.alpha <- 0.35
  hline.lty <- "dashed"
  label.size <- 5
  hjust.label <- 1.5
  letter_size <- 4
  errorbar.width <- 0.25
  x.shade.min <- shadedRegion[1]
  x.shade.max <- shadedRegion[2]
  
  setup <- list(
    shade.alpha = shade.alpha,
    shade.fill =shade.fill,
    lwd = lwd,
    pcex = pcex,
    trend.alpha = trend.alpha,
    trend.size = trend.size,
    line.size = line.size,
    hline.size = hline.size,
    hline.alpha = hline.alpha,
    hline.lty = hline.lty,
    errorbar.width = errorbar.width,
    label.size = label.size,
    hjust.label = hjust.label,
    letter_size = letter_size,
    x.shade.min = x.shade.min,
    x.shade.max = x.shade.max
  )
  
  
  fix<- splitoutput |>
    dplyr::filter(Data %in% plotdata, #c("calfin"),
                  Region %in% filterEPUs) |>
    dplyr::group_by(Region, Season) |>
    dplyr::summarise(max = max(Estimate, na.rm=T))
  
  p <- splitoutput |>
    dplyr::filter(Data %in% plotdata, #c("calfin"),
                  Region %in% filterEPUs) |>
    dplyr::group_by(Region, Season) |>
    dplyr::left_join(fix) |>
    dplyr::mutate(#Value = Value/resca,
      Mean = as.numeric(Estimate),
      SE = Std..Error.for.Estimate,
      Mean = Mean/max,
      SE = SE/max,
      Upper = Mean + SE,
      Lower = Mean - SE) |>
    ggplot2::ggplot(ggplot2::aes(x = Time, y = Mean, linetype = modname, group = modname))+
    ggplot2::annotate("rect", fill = setup$shade.fill, alpha = setup$shade.alpha,
                      xmin = setup$x.shade.min , xmax = setup$x.shade.max,
                      ymin = -Inf, ymax = Inf) +
    ggplot2::geom_ribbon(ggplot2::aes(ymin = Lower, ymax = Upper, fill = Season), alpha = 0.5)+
    ggplot2::geom_point()+
    ggplot2::geom_line()+
    ggplot2::ggtitle(plottitle)+
    ggplot2::ylab(expression("Relative abundance"))+
    ggplot2::xlab(ggplot2::element_blank())+
    ggplot2::facet_wrap(Region~Season, ncol = ncols, 
                        labeller = label_wrap_gen(multi_line=FALSE))+
    ecodata::geom_gls()+
    ecodata::theme_ts()+
    ecodata::theme_facet()+
    ecodata::theme_title() +
    ggplot2::theme(legend.position = "bottom")
  
  return(p)
}

Comparison of small copepod models with and without covariates

Overview of small copepod model indices (no covariates), including the herring larval season model (Sep-Feb).

splitnocov <- splitoutput |> dplyr::filter(!str_detect(modname, "_doy"))

plot_zooindices(splitoutput = splitnocov, 
             plotdata = "smallcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Small copepods") 

Spring large copepod models

The Day of Year covariate model did converge in spring for large copepods.

fallonly <- splitoutput |> dplyr::filter(Season=="spring")

plot_indices_comp(splitoutput = fallonly, 
             plotdata = "lgcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Large copepods (All)",
             comptype = "trend")

fallonly <- splitoutput |> dplyr::filter(Season=="spring")

plot_indices_comp(splitoutput = fallonly, 
             plotdata = "lgcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Large copepods (All)",
             comptype = "scale")