1 Summary

This index uses herring observed in predator stomachs to evaluate potential herring trends for the 2025 RT assessment.

Methods are based on the forage index for Bluefish (Gaichas et al. 2023) and the State of the Ecosystem forage index. These indices were inspired by Ng et al. (2021), which specifically looked at herring in stomachs of predators, but for each predator separately.

Will discuss a good starting point with the team. Preliminary models started in 1973 and used data through 2022. Data are thin prior to 1982 and I can’t bring in SST data from OISST before then, so for these examples we start in 1982. The herring RT model will start in 1987 so a 1982 start covers that.

2 Workflow

Operational updates to the forage index submitted to the State of the Ecosystem report are in the forageindex github repository: https://github.com/NOAA-EDAB/forageindex

Data input files are in the folder fhdat and were processed with the script VASTherringWG_ProcessInputDat.R in that folder: https://github.com/NOAA-EDAB/forageindex/blob/main/fhdat/VASTherringWG_ProcessInputDat.R

# Streamlined version of CreateVASTInputs.Rmd for operational updates to forage index indicators
# February 2024
#   This one uses only herring in stomachs, otherwise the same as SOE forage index
#   To be presented to the 2025 herring RT WG

library(tidyverse)
library(here)
library(dendextend)

# Load NEFSC stomach data received from Brian Smith

# object is called `allfh`
load(here("fhdat/allfh.Rdata"))

#object is called allfh21
load(here("fhdat/allfh21.Rdata"))

#object is called allfh22
load(here("fhdat/allfh22.Rdata"))

# bind all NEFSC stomach datasets
allfh <- allfh %>%
  dplyr::bind_rows(allfh21) |>
  dplyr::bind_rows(allfh22)

###############################################################################
# read predator similarity info to generate predator list
# Input NEFSC food habits overlap matrix:

dietoverlap <- read_csv(here("fhdat/tgmat.2022-02-15.csv"))

# use dendextend functions to get list
d_dietoverlap <- dist(dietoverlap)

guilds <- hclust(d_dietoverlap, method = "complete")

dend <- as.dendrogram(guilds)

dend <- color_branches(dend, k=6) # Brian uses 6 categories

labels(dend) <- paste(as.character(names(dietoverlap[-1]))[order.dendrogram(dend)],
                      "(",labels(dend),")", 
                      sep = "")

pisccomplete <- partition_leaves(dend)[[
  which_node(dend, c("Bluefish..S(37)", "Bluefish..M(36)", "Bluefish..L(35)"))
]]


# Filter NEFSC food habits data with predator list
pisccompletedf <- data.frame("COMNAME" = toupper(str_remove(pisccomplete, "\\..*")),
                             "SizeCat" = str_remove(str_extract(pisccomplete, "\\..*[:upper:]+"), "\\.."),
                             "feedguild" = "pisccomplete")

fh.nefsc.pisc.pisccomplete <- allfh %>%
  #filter(pynam != "EMPTY") %>%
  left_join(pisccompletedf, by = c("pdcomnam" = "COMNAME",
                                   "sizecat" = "SizeCat")) %>%
  filter(!is.na(feedguild))


##############################################################################
# Get prey list from NEFSC and NEAMAP
# Irrelevant! The prey list is now Atlantic herring and unid clupeids.

# preycount  <- fh.nefsc.pisc.pisccomplete %>%
#   #group_by(year, season, pdcomnam, pynam) %>%
#   group_by(pdcomnam, pynam) %>%
#   summarise(count = n()) %>%
#   #arrange(desc(count))
#   pivot_wider(names_from = pdcomnam, values_from = count)
# 
# 
# gencomlist <- allfh %>%
#   select(pynam, pycomnam2, gencom2) %>%
#   distinct()
# 
# NEFSCblueprey <- preycount %>%
#   #filter(BLUEFISH > 9) %>%
#   filter(!pynam %in% c("EMPTY", "BLOWN",
#                        "FISH", "OSTEICHTHYES",
#                        "ANIMAL REMAINS",
#                        "FISH SCALES")) %>%
#   #filter(!str_detect(pynam, "SHRIMP|CRAB")) %>%
#   left_join(gencomlist) %>%
#   filter(!gencom2 %in% c("ARTHROPODA", "ANNELIDA",
#                          "CNIDARIA", "UROCHORDATA",
#                          "ECHINODERMATA", "WORMS",
#                          "BRACHIOPODA", "COMB JELLIES",
#                          "BRYOZOA", "SPONGES",
#                          "MISCELLANEOUS", "OTHER")) %>%
#   arrange(desc(BLUEFISH))
# 
# # March 2023, formally add NEAMAP to prey decisions
# NEAMAPblueprey <- read.csv(here("fhdat/Full Prey List_Common Names.csv")) %>%
#   #filter(BLUEFISH > 9) %>%
#   filter(!SCIENTIFIC.NAME %in% c("Actinopterygii", "fish scales",
#                                  "Decapoda (megalope)", 
#                                  "unidentified material",
#                                  "Plantae",
#                                  "unidentified animal"))
# 
# NEAMAPprey <- NEAMAPblueprey %>%
#   dplyr::select(COMMON.NAME, SCIENTIFIC.NAME, BLUEFISH) %>%
#   dplyr::filter(!is.na(BLUEFISH)) %>%
#   dplyr::mutate(pynam2 = tolower(SCIENTIFIC.NAME),
#                 pynam2 = stringr::str_replace(pynam2, "spp.", "sp")) %>%
#   dplyr::rename(NEAMAP = BLUEFISH)
# 
# 
# NEFSCprey <- NEFSCblueprey %>%
#   dplyr::select(pycomnam2, pynam, BLUEFISH) %>%
#   dplyr::filter(!is.na(BLUEFISH)) %>%
#   dplyr::mutate(pynam2 = tolower(pynam)) %>%
#   dplyr::rename(NEFSC = BLUEFISH)
# 
# # new criteria March 2023, >20 observations NEAMAP+NEFSC, but keep mackerel
# # removes the flatfish order (too broad) and unid Urophycis previously in NEAMAP
# blueprey <- NEFSCprey %>% 
#   dplyr::full_join(NEAMAPprey) %>%
#   dplyr::mutate(NEAMAP = ifelse(is.na(NEAMAP), 0, NEAMAP),
#                 NEFSC = ifelse(is.na(NEFSC), 0, NEFSC),
#                 total = NEFSC + NEAMAP,
#                 PREY = ifelse(is.na(SCIENTIFIC.NAME), pynam, SCIENTIFIC.NAME),
#                 COMMON = ifelse(is.na(COMMON.NAME), pycomnam2, COMMON.NAME),
#                 pynam = ifelse(is.na(pynam), toupper(pynam2), pynam)) %>%
#   dplyr::arrange(desc(total)) %>%
#   dplyr::filter(total>20 | pynam=="SCOMBER SCOMBRUS") %>% # >20 leaves out mackerel
#   dplyr::mutate(COMMON = case_when(pynam=="ILLEX SP" ~ "Shortfin squids",
#                                    pynam2=="teuthida" ~ "Unidentified squids",
#                                    TRUE ~ COMMON)) %>%
#   dplyr::mutate(PREY = stringr::str_to_sentence(PREY),
#                 COMMON = stringr::str_to_sentence(COMMON))


fh.nefsc.pisc.pisccomplete.herring <- fh.nefsc.pisc.pisccomplete %>%
  mutate(herring = case_when(pynam %in% c("CLUPEA HARENGUS",
                                          "CLUPEIDAE",
                                          "CLUPEA HARENGUS SCALES",
                                          "CLUPEIDAE SCALES",
                                          "CLUPEA HARENGUS LARVAE",
                                          "CLUPEIDAE LARVAE")  ~ "herring",
                              TRUE ~ "othprey"))

###############################################################################
# Make the NEFSC dataset aggregating prey based on prey list

herringpyall_stn <- fh.nefsc.pisc.pisccomplete.herring %>%
  #create id linking cruise6_station
  #create season_ng spring and fall Spring=Jan-May, Fall=June-Dec
  mutate(id = paste0(cruise6, "_", station),
         year = as.numeric(year),
         month = as.numeric(month),
         season_ng = case_when(month <= 6 ~ "SPRING",
                               month >= 7 ~ "FALL",
                               TRUE ~ as.character(NA))
  ) %>%
  dplyr::select(year, season_ng, id, stratum,
                pynam, pyamtw, pywgti, pyvoli, herring, 
                pdcomnam, pdid, pdlen, pdsvol, pdswgt, 
                beglat, beglon, declat, declon, 
                bottemp, surftemp, setdepth) %>%
  group_by(id) %>%
  #mean blueprey g per stomach per tow: sum all blueprey g/n stomachs in tow
  mutate(herringwt = case_when(herring == "herring" ~ pyamtw,
                              TRUE ~ 0.0),
         herringpynam = case_when(herring == "herring" ~ pynam,
                               TRUE ~ NA_character_)) 

# Optional: save at prey disaggregated stage for paper
#saveRDS(bluepyall_stn, here("fhdat/bluepyall_stn.rds"))

# Now get station data in one line
stndat <- herringpyall_stn %>%
  dplyr::select(year, season_ng, id, 
                beglat, beglon, declat, declon, 
                bottemp, surftemp, setdepth) %>%
  distinct()

#pisc stomachs in tow count pdid for each pred and sum
piscstom <- herringpyall_stn %>%
  group_by(id, pdcomnam) %>%
  summarise(nstompd = n_distinct(pdid)) %>%
  group_by(id) %>%
  summarise(nstomtot = sum(nstompd))

#mean and var pred length per tow
pisclen <- herringpyall_stn %>%
  summarise(meanpisclen = mean(pdlen),
            varpisclen = var(pdlen))

# Aggregated prey at station level with predator covariates
herringpyagg_stn <- herringpyall_stn %>%
  summarise(sumherringwt = sum(herringwt),
            nherringsp = n_distinct(herringpynam, na.rm = T),
            npreysp = n_distinct(pynam),
            npiscsp = n_distinct(pdcomnam)) %>%
  left_join(piscstom) %>%
  mutate(meanherringwt = sumherringwt/nstomtot) %>%
  left_join(pisclen) %>%
  left_join(stndat)

# save at same stage as before, writing over old file
#saveRDS(bluepyagg_stn, here("fhdat/bluepyagg_stn.rds"))

# current dataset, fix declon, add vessel, rename NEFSC
#nefsc_bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn.rds")) %>%
nefsc_herringpyagg_stn <- herringpyagg_stn %>%
  mutate(declon = -declon,
         vessel = case_when(year<2009 ~ "AL",
                            year>=2009 ~ "HB", 
                            TRUE ~ as.character(NA)))

##############################################################################
# Add NEAMAP to make full aggregated stomach dataset

# Read in NEAMAP updated input from Jim Gartland, reformat with same names
neamap_herringagg_stn <- read_csv(here("fhdat/NEAMAP_Mean stomach weights_Bluefish Prey_wWQ_AH Only.csv")) %>%  
  mutate(vessel = "NEAMAP") %>%
  rename(id = station,
         sumherringwt = sumbluepreywt,
         nherringsp = nbfpreyspp,
         #npreysp = ,
         npiscsp = npiscspp,
         nstomtot = nstomtot, 
         meanherringwt = meanbluepreywt,
         meanpisclen = meanpisclen.simple, 
         #varpisclen = ,
         season_ng = season,
         declat  = lat,
         declon = lon,
         bottemp = bWT,
         #surftemp = , 
         setdepth = depthm) 


# combine NEAMAP and NEFSC
herringagg_stn_all <-  nefsc_herringpyagg_stn %>%
  bind_rows(neamap_herringagg_stn) 

# Save before SST integration step
#saveRDS(bluepyagg_stn_all, here("fhdat/bluepyagg_stn_all.rds"))

###############################################################################
# Add SST into NEAMAP and reintegrate into full dataset

# Read back in if needed for SST
#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

NEFSCstations <- allfh %>%
  dplyr::mutate(id = paste0(cruise6, "_", station),
                year = as.numeric(year),
                month = as.numeric(month),
                day = as.numeric(day),
                declon = -declon) %>%
  dplyr::select(id, year, month, day, declat, declon) %>%
  dplyr::distinct()

# Need NEAMAP SST update! This is the old file
NEAMAPstationSST <- read.csv(here("fhdat/NEAMAP SST_2007_2022_Nov2023.csv"))

NEAMAPstations <- NEAMAPstationSST %>%
  dplyr::mutate(id = station,
                year = as.numeric(year),
                month = as.numeric(month),
                day = as.numeric(day)) %>%
  dplyr::select(id, year, month, day) %>%
  dplyr::distinct()

# remake diethauls
diethauls <- herringagg_stn_all %>%
  dplyr::select(id, declat, declon)

NEFSCstations <- dplyr::select(NEFSCstations, c(-declat, -declon))

Allstations <- bind_rows(NEFSCstations, NEAMAPstations)

#station id, lat lon, year month day
diethauls <- left_join(diethauls, Allstations)

#add year month day to diet data
herringagg_stn_all <- left_join(herringagg_stn_all, diethauls)

# add NEAMAP SST to surftemp field
NEAMAPidSST <- NEAMAPstationSST %>%
  mutate(id = station) %>%
  dplyr::select(id, SST)

herringagg_stn_all <- left_join(herringagg_stn_all, NEAMAPidSST, by="id") %>%
  mutate(surftemp = coalesce(surftemp, SST)) %>%
  dplyr::select(-SST)

# save merged dataset with day, month, and NEAMAP surftemp, same name
#saveRDS(bluepyagg_stn_all, here("fhdat/bluepyagg_stn_all.rds"))

###############################################################################
#Now match stations to OISST

#make an SST dataframe for 2022! Add to saved sst_data in data-raw/gridded

library(sf)
library(raster)
library(terra)
library(nngeo)

# already have all OISST data as dataframes on this repo

# # Bastille function from https://github.com/kimberly-bastille/ecopull/blob/main/R/utils.R
# 
# nc_to_raster <- function(nc,
#                          varname,
#                          extent = c(0, 360, -90, 90),
#                          crop = raster::extent(280, 300, 30, 50),
#                          show_images = FALSE) {
#   
#   message("Reading .nc as brick...")
#   
#   r <- raster::brick(nc, varname = varname)
#   
#   message("Setting CRS...")
#   raster::crs(r) <- "+proj=longlat +lat_1=35 +lat_2=45 +lat_0=40 +lon_0=-77 +x_0=0 +y_0=0 +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"
#   
#   # not sure if this is necessary?
#   raster::extent(r) <- raster::extent(extent)
#   
#   if(show_images){
#     par(mfrow = c(1,2))
#     raster::plot(r, 1, sub = "Full dataset")
#   }
#   
#   message("Cropping data...")
#   ne_data <- raster::crop(r, crop)
#   #ne_data <- raster::rotate(ne_data) add here for future pulls
#   
#   if(show_images){
#     raster::plot(ne_data, 1, sub = "Cropped dataset")
#     par(mfrow = c(1,1))
#   }
#   
#   message("Done!")
#   
#   return(ne_data)
# }
# 
# # function to convert to dataframe based on
# # https://towardsdatascience.com/transforming-spatial-data-to-tabular-data-in-r-4dab139f311f
# 
# raster_to_sstdf <- function(brick,
#                             rotate=TRUE){
#   
#   if(rotate) brick_r <- raster::rotate(brick)
#   brick_r <- raster::crop(brick_r, raster::extent(-77,-65,35,45))
#   sstdf <- as.data.frame(raster::rasterToPoints(brick_r, spatial = TRUE))
#   sstdf <- sstdf %>%
#     dplyr::rename(Lon = x,
#                   Lat = y) %>%
#     tidyr::pivot_longer(cols = starts_with("X"),
#                         names_to = c("year", "month", "day"),
#                         names_prefix = "X",
#                         names_sep = "\\.",
#                         values_to = "sst",
#     )
#   return(sstdf)
# }
# 
# # pull the OISST data as raster brick, modified from 
# # https://github.com/kimberly-bastille/ecopull/blob/main/.github/workflows/pull_satellite_data.yml
# 
# varname <- "sst"
# 
# # 1985-2021 previously pulled, processed and stored. add 2022.
# # add 1981-1984 to extend back in time. No OISST before 1981.
# # 1981 is only Sept-Dec so don't use
# 
# years <- #1982:1984 # 2022
# for(i in years) {
#   name <- paste0(i, ".nc")
#   dir.create(here::here("data-raw","gridded", "sst_data"), recursive = TRUE)
#   filename <- here::here("data-raw","gridded", "sst_data", paste0("test_", i, ".grd"))
#   url <- paste0("https://downloads.psl.noaa.gov/Datasets/noaa.oisst.v2.highres/sst.day.mean.", i, ".nc")
#   download.file(url, destfile = name)
#   
#   text <- knitr::knit_expand(text = "test_{{year}} <- nc_to_raster(nc = name, varname = varname)
#                                      raster::writeRaster(test_{{year}}, filename = filename, overwrite=TRUE)",
#                              year = i)
#   print(text)
#   try(eval(parse(text = text)))
#   unlink(name) # remove nc file to save space
#   print(paste("finished",i))
# }
# 
# 
# # convert raster to dataframe
# #years <- 2022
# for(i in years) {
#   name <- get(paste0("test_",i))
#   filename <- here::here("data-raw","gridded", "sst_data", paste0("sst", i, ".rds"))
#   text <- knitr::knit_expand(text = "sst{{year}} <- raster_to_sstdf(brick = name)
#                                      saveRDS(sst{{year}}, filename)",
#                              year = i)
#   print(text)
#   try(eval(parse(text = text)))
#}
#read in diet data with month day fields

#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

stations <- herringagg_stn_all %>%
  dplyr::mutate(day = str_pad(day, 2, pad='0'),
                month = str_pad(month, 2, pad='0'),
                yrmody = as.numeric(paste0(year, month, day))) %>%
  dplyr::select(id, declon, declat, year, yrmody) %>%
  na.omit() %>%
  sf::st_as_sf(coords=c("declon","declat"), crs=4326, remove=FALSE) 



#list of SST dataframes
SSTdfs <- list.files(here("data-raw/gridded/sst_data/"), pattern = "*.rds")

dietstn_OISST <- tibble()


for(df in SSTdfs){
  sstdf <- readRDS(paste0(here("data-raw/gridded/sst_data/", df)))
  
  # keep only bluefish dates in SST year
  stationsyr <- stations %>%
    filter(year == unique(sstdf$year))
  
  # keep only sst days in bluefish dataset
  sstdf_survdays <- sstdf %>%
    dplyr::mutate(yrmody = as.numeric(paste0(year, month, day)) )%>%
    dplyr::filter(yrmody %in% unique(stationsyr$yrmody)) %>%
    dplyr::mutate(year = as.numeric(year),
                  month = as.numeric(month),
                  day = as.numeric(day),
                  declon = Lon,
                  declat = Lat) %>%
    dplyr::select(-Lon, -Lat) %>%
    sf::st_as_sf(coords=c("declon","declat"), crs=4326, remove=FALSE)  
  
  # now join by nearest neighbor and date
  
  #https://stackoverflow.com/questions/71959927/spatial-join-two-data-frames-by-nearest-feature-and-date-in-r      
  
  yrdietOISST <- do.call('rbind', lapply(split(stationsyr, 1:nrow(stationsyr)), function(x) {
    sf::st_join(x, sstdf_survdays[sstdf_survdays$yrmody == unique(x$yrmody),],
                #join = st_nearest_feature
                join = st_nn, k = 1, progress = FALSE
    )
  }))
  
  #   #datatable solution--works but doesnt seem faster?
  #    df1 <- data.table(stationsyr)
  #   
  #  .nearest_samedate <- function(x) {
  #    st_join(st_as_sf(x), sstdf_survdays[sstdf_survdays$yrmody == unique(x$yrmody),], join = st_nearest_feature)
  #  }
  # # 
  #  yrdietOISST <- df1[, .nearest_samedate(.SD), by = list(1:nrow(df1))]
  
  dietstn_OISST <- rbind(dietstn_OISST, yrdietOISST)
  
}

#saveRDS(dietstn_OISST, here("data-raw/dietstn_OISST.rds"))

# Now join with OISST dataset

#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))
#dietstn_OISST <- readRDS(here("data-raw/dietstn_OISST.rds"))


dietstn_OISST_merge <- dietstn_OISST %>%
  dplyr::rename(declon = declon.x,
                declat = declat.x,
                year = year.x,
                oisst = sst) %>%
  dplyr::select(id, oisst) %>%
  sf::st_drop_geometry()

herringagg_stn_all_OISST <- left_join(herringagg_stn_all, dietstn_OISST_merge)

saveRDS(herringagg_stn_all_OISST, here("fhdat/herringagg_stn_all_OISST_1982-2022.rds"))

VAST models were run using the script VASTunivariate_seasonalherringindex.R in the folder VASTscripts: https://github.com/NOAA-EDAB/forageindex/blob/main/VASTscripts/VASTunivariate_seasonalherringindex.R

# This is the exact VAST code used in Gaichas et al 2023, but with data years 
# extended from 1973-2022
# VAST attempt 2 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization

# Load packages
library(here)
library(dplyr)
library(VAST)

#Read in data, separate spring and fall, and rename columns for VAST:

# this dataset created in SSTmethods.Rmd

herringagg_stn <- readRDS(here::here("fhdat/herringagg_stn_all_OISST_1982-2022.rds"))

# make SST column that uses surftemp unless missing or 0
# there are 3 surftemp 0 values in the dataset, all with oisst > 15
herringagg_stn <- herringagg_stn %>%
  dplyr::mutate(sstfill = ifelse((is.na(surftemp)|surftemp==0), oisst, surftemp))

#herringagg_stn <- bluepyagg_pred_stn

# filter to assessment years at Tony's suggestion

# code Vessel as AL=0, HB=1, NEAMAP=2

herringagg_stn_fall <- herringagg_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL") |>
        #,year > 1984) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanherringwt, #use bluepywt for individuals input in example
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp, #this leaves out many stations for NEFSC
         #surftemp, #this leaves out many stations for NEFSC
         #oisst, #leaves out everything before 1982
         sstfill
         ) %>%
  na.omit() %>%
  as.data.frame()

herringagg_stn_spring <- herringagg_stn %>%
  filter(season_ng == "SPRING") |>
        #,year > 1984) %>%
  mutate(AreaSwept_km2 =1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanherringwt,
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp, #this leaves out many stations for NEFSC
         #surftemp, #this leaves out many stations for NEFSC
         #oisst, #leaves out everything before 1982
         sstfill
         ) %>%
  na.omit() %>%
  as.data.frame()


# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302

# use only MAB, GB, GOM, SS EPUs 
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE

MAB <- c(1010:1080, 1100:1120, 1600:1750, 3010:3450, 3470, 3500, 3510)
GB  <- c(1090, 1130:1210, 1230, 1250, 3460, 3480, 3490, 3520:3550)
GOM <- c(1220, 1240, 1260:1290, 1360:1400, 3560:3830)
SS  <- c(1300:1352, 3840:3990)

coast3nmbuffst <- readRDS(here::here("spatialdat/coast3nmbuffst.rds"))

# Mid Atlantic
MAB2 <- coast3nmbuffst %>% 
  dplyr::filter(stratum_number %in% MAB) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# Georges Bank EPU
GB2 <- coast3nmbuffst %>% 
  dplyr::filter(stratum_number %in% GB) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# gulf of maine EPU (for SOE)
GOM2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% GOM) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# scotian shelf EPU (for SOE)
SS2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% SS) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()


# needed to cover the whole northwest atlantic grid
allother2 <- coast3nmbuffst %>%
  dplyr::filter(!stratum_number %in% c(MAB, GB, GOM, SS)) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# all epus
allEPU2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% c(MAB, GB, GOM, SS)) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# # configs same FieldConfig as below but formatted differently
# FieldConfig <- c(
#   "Omega1"   = 0,   # number of spatial variation factors (0, 1, AR1)
#   "Epsilon1" = 0,   # number of spatio-temporal factors
#   "Omega2"   = 0, 
#   "Epsilon2" = 0
# ) 

# default configs, not really specified anyway
FieldConfig = matrix( "IID", ncol=2, nrow=3, 
                      dimnames=list(c("Omega","Epsilon","Beta"),c("Component_1","Component_2")))


RhoConfig <- c(
  "Beta1" = 0,      # temporal structure on years (intercepts) 
  "Beta2" = 0, 
  "Epsilon1" = 0,   # temporal structure on spatio-temporal variation
  "Epsilon2" = 0
) 
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1

OverdispersionConfig    <- c("eta1"=0, "eta2"=0)
# eta1 = vessel effects on prey encounter rate
# eta2 = vessel effects on prey weight

strata.limits <- as.list(c("AllEPU" = allEPU2, 
                           "MAB" = MAB2,
                           "GB" = GB2,
                           "GOM" = GOM2,
                           "allother" = allother2))

settings = make_settings( n_x = 500, 
                          Region = "northwest_atlantic",
                          Version = "VAST_v14_0_1", #needed to prevent error from newer dev version number
                          #strata.limits = list('All_areas' = 1:1e5), full area
                          strata.limits = strata.limits,
                          purpose = "index2", 
                          bias.correct = TRUE,
                          #use_anisotropy = FALSE,
                          #fine_scale = FALSE,
                          #FieldConfig = FieldConfig,
                          #RhoConfig = RhoConfig,
                          OverdispersionConfig = OverdispersionConfig
                          )


New_Extrapolation_List <- readRDS(here::here("spatialdat/CustomExtrapolationList.rds"))

# select dataset and set directory for output

#########################################################
# Run model fall

season <- c("fall_500_lennosst_ALLsplit_biascorrect")

working_dir <- here::here(sprintf("herringpyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}

# subset for faster testing
#herringagg_stn_fall <- herringagg_stn_fall %>% filter(Year<1990)                       

fit <- fit_model(
  settings = settings, 
  extrapolation_list = New_Extrapolation_List,
  Lat_i = herringagg_stn_fall$Lat, 
  Lon_i = herringagg_stn_fall$Lon, 
  t_i = herringagg_stn_fall$Year, 
  b_i = as_units(herringagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(herringagg_stn_fall)),
  v_i = herringagg_stn_fall$Vessel,
  Q_ik = as.matrix(herringagg_stn_fall[,c("npiscsp", 
                                         "meanpisclen", 
                                         "sstfill"
                                         )]),
  #Use_REML = TRUE,
  working_dir = paste0(working_dir, "/"))

saveRDS(fit, file = paste0(working_dir, "/fit.rds"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

######################################################
# Run model spring

season <- c("spring_500_lennosst_ALLsplit_biascorrect")

working_dir <- here::here(sprintf("herringpyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}                         
  
# subset for faster testing
#herringagg_stn_spring <- herringagg_stn_spring %>% filter(Year<1990) 

fit <- fit_model( settings = settings,  
                 extrapolation_list = New_Extrapolation_List,
                 Lat_i = herringagg_stn_spring[,'Lat'], 
                 Lon_i = herringagg_stn_spring[,'Lon'], 
                 t_i = herringagg_stn_spring[,'Year'], 
                 b_i = as_units(herringagg_stn_spring[,'Catch_g'], 'g'), 
                 a_i = rep(1, nrow(herringagg_stn_spring)),
                 v_i = herringagg_stn_spring$Vessel,
                 Q_ik = as.matrix(herringagg_stn_spring[,c("npiscsp", 
                                                          "meanpisclen", 
                                                          "sstfill"
                                                          )]),
                # Use_REML = TRUE,
                 working_dir = paste0(working_dir, "/"))

saveRDS(fit, file = paste0(working_dir, "/fit.rds"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

3 Working group recommendations, March 2024

3.1 Curate the predator list

The predator list used in initial runs was the same used for bluefish diet, which includes herring and 19 other prey. Therefore it included predators that may feed on smaller forage fish (squids).

This is the full piscivore list, derived from Brian Smith’s prey similarity matrix and using cluster analysis to identify the predator/sizes that have highest diet similarity to all size classes of bluefish:

# object is called `allfh`
load(here("fhdat/allfh.Rdata"))

#object is called allfh21
load(here("fhdat/allfh21.Rdata"))

#object is called allfh22
load(here("fhdat/allfh22.Rdata"))

# bind all NEFSC stomach datasets
allfh <- allfh %>%
  dplyr::bind_rows(allfh21) |>
  dplyr::bind_rows(allfh22)

dietoverlap <- read_csv(here("fhdat/tgmat.2022-02-15.csv"))

d_dietoverlap <- dist(dietoverlap)

# again directly from https://cran.r-project.org/web/packages/dendextend/vignettes/Cluster_Analysis.html
hclust_methods <- c("ward.D", "single", "complete", "average", "mcquitty", 
        "median", "centroid", "ward.D2")
diet_dendlist <- dendlist()
for(i in seq_along(hclust_methods)) {
   hc_diet <- hclust(d_dietoverlap, method = hclust_methods[i])   
   diet_dendlist <- dendlist(diet_dendlist, as.dendrogram(hc_diet))
}
names(diet_dendlist) <- hclust_methods

# preds <- list()
# 
# for(i in 1:8) {
#   dendi <- diet_dendlist[[i]]
#   namei <- names(diet_dendlist)[i]
#   labels(dendi) <- paste(as.character(names(dietoverlap[-1]))[order.dendrogram(dendi)],
#                            "(",labels(dendi),")", 
#                            sep = "")
#   #preds[[namei]] <- partition_leaves(dendi)[[which_node(dendi, c("35", "36", "37"))]]
#   preds[[namei]] <- partition_leaves(dendi)[[which_node(dendi, c("Bluefish..S(37)", "Bluefish..M(36)", "Bluefish..L(35)"))]]
# 
# }

dendi <- diet_dendlist$complete

labels(dendi) <- paste(as.character(names(dietoverlap[-1]))[order.dendrogram(dendi)],
                           "(",labels(dendi),")", 
                           sep = "")
  
pisccomplete <- partition_leaves(dendi)[[which_node(dendi, c("Bluefish..S(37)", "Bluefish..M(36)", "Bluefish..L(35)"))]]
  

sizedefs <- allfh %>% 
  select(pdcomnam, sizecat, pdlen) %>% 
  group_by(pdcomnam, sizecat) %>% 
  summarize(minlen = min(pdlen), maxlen = max(pdlen))

# add these in as a NEAMAP column
# +  Summer Flounder 21-70 cm
# +  Silver Hake 21-76 cm
# +  Weakfish 26-50 cm
# +  Atlantic Cod 81-150 cm 
# +  Bluefish 3 – 118 cm
# +  Striped Bass 31 – 120 cm
# +  Spanish Mackerel 10 – 33.5 cm 
# +  Spotted Sea Trout 15.5 – 34 cm 
# +  Spiny Dogfish 36 – 117 cm
# +  Goosefish 5 – 124 cm  

NEAMAPpisc <- data.frame("comname" = c("Summer Flounder",
                                       "Silver Hake", 
                                       "Weakfish", 
                                       "Atlantic Cod", 
                                       "Bluefish", 
                                       "Striped Bass", 
                                       "Spanish Mackerel", 
                                       "Spotted Sea Trout", 
                                       "Spiny Dogfish", 
                                       "Goosefish"),
                         "mincm" = c(21, 21, 26, 81, 3, 31, 10, 15.5, 36, 5),
                         "maxcm" = c(70, 76, 50, 150, 118, 120, 33.5, 34, 117, 124)
                         ) %>%
  dplyr::mutate(comname = stringr::str_to_sentence(comname),
                NEAMAP = TRUE)


pisccompletedf <- data.frame("COMNAME" = toupper(str_remove(pisccomplete, "\\..*")),
                              "SizeCat" = str_remove(str_extract(pisccomplete, "\\..*[:upper:]+"), "\\.."),
                              "feedguild" = "pisccomplete") %>%
  left_join(sizedefs, by=c("COMNAME" = "pdcomnam", 
                          "SizeCat" = "sizecat")) %>%
  arrange(COMNAME, minlen)

piscall <- pisccompletedf %>%
  dplyr::mutate(comname = stringr::str_to_sentence(COMNAME)) %>%
  dplyr::group_by(comname) %>%
  dplyr::summarise(mincm = min(minlen),
                   maxcm = max(maxlen),
                   NEFSC = TRUE) %>%
  dplyr::full_join(NEAMAPpisc) %>%
  dplyr::mutate(survey = dplyr::case_when(NEFSC & NEAMAP ~ "Both",
                                          NEFSC ~ "NEFSC",
                                          NEAMAP ~ "NEAMAP")) %>%
  dplyr::arrange(comname)

flextable::flextable(piscall %>% dplyr::select(-c(NEFSC, NEAMAP))) %>%
  flextable::set_header_labels(comname = 'Predator name', 
                               #SizeCat = "Size category", 
                               mincm = "Minimum length (cm)",
                               maxcm = "Maximum length (cm)",
                               survey = "Survey") %>%
  flextable::set_caption("Predators with highest diet similarity to Bluefish, Northeast Fisheries Science Center (NEFSC, years 1973-2021) and Northeast Area Monitoring and Assessment Program (NEAMAP, years 2007-2021) databases.") %>%
  #flextable::align_text_col(j=survey, align = "right", header = TRUE, footer = TRUE) %>%
  #flextable::autofit()
  flextable::width(width = c(3, 1, 1, 1))

Table 3.1: Predators with highest diet similarity to Bluefish, Northeast Fisheries Science Center (NEFSC, years 1973-2021) and Northeast Area Monitoring and Assessment Program (NEAMAP, years 2007-2021) databases.
Predator name	Minimum length (cm)	Maximum length (cm)	Survey
Atlantic cod	81.0	150.0	Both
Atlantic halibut	31.0	90.0	NEFSC
Bluefish	3.0	118.0	Both
Buckler dory	21.0	50.0	NEFSC
Cusk	51.0	104.0	NEFSC
Fourspot flounder	41.0	49.0	NEFSC
Goosefish	5.0	124.0	Both
Longfin squid	1.0	30.0	NEFSC
Northern shortfin squid	3.0	30.0	NEFSC
Pollock	51.0	120.0	NEFSC
Red hake	41.0	98.0	NEFSC
Sea raven	4.0	68.0	NEFSC
Silver hake	21.0	76.0	Both
Spanish mackerel	10.0	33.5	NEAMAP
Spiny dogfish	36.0	117.0	Both
Spotted hake	21.0	40.0	NEFSC
Spotted sea trout	15.5	34.0	NEAMAP
Striped bass	31.0	120.0	Both
Summer flounder	21.0	70.0	Both
Thorny skate	81.0	108.0	NEFSC
Weakfish	26.0	50.0	Both
White hake	21.0	136.0	NEFSC

Here is an alternative:

Jim Gartland used the list of predators that had at least 5 locations with herring prey 1978-2019 across both NEFSC and NEAMAP surveys, but also only included predators sampled consistently over the full time series.

Let’s count all possible herring observations by predator in NEFSC. They are in the table, but for the index lets leave out observations of eggs and larvae so this covers the sizes and ages modeled in the assessment. This is a count of stomachs.

preycount <- allfh %>%
  filter(pdcomnam != "",
         pynam %in% c("CLUPEA HARENGUS",
                      "CLUPEIDAE",
                      "CLUPEA HARENGUS SCALES",
                      "CLUPEIDAE SCALES",
                      "CLUPEA HARENGUS LARVAE",
                      "CLUPEA HARENGUS EGGS",
                      "CLUPEIDAE LARVAE",
                      "CLUPEIDAE EGGS"
         )) %>%
  mutate(id = paste0(cruise6, "_", station)) |>
  group_by(pdcomnam, pynam) %>%
  summarise(count = n()) %>%
  ungroup() |>
  pivot_wider(names_from = pynam, values_from = count) |>
  arrange(desc(`CLUPEA HARENGUS`))

flextable::flextable(preycount)

Now a count of stations by predator with CLUPEA HARENGUS only

AHstncount <- allfh %>%
  filter(pdcomnam != "",
         pynam %in% c("CLUPEA HARENGUS"
         )) %>%
  mutate(id = paste0(cruise6, "_", station)) |>
  dplyr::select(pdcomnam, id) %>%
  dplyr::distinct() %>%
  group_by(pdcomnam) |>
  summarise(count = n()) |>
  arrange(desc(count))

flextable::flextable(AHstncount)

pdcomnam	count
SPINY DOGFISH	572
SILVER HAKE	278
ATLANTIC COD	248
WHITE HAKE	185
GOOSEFISH	127
WINTER SKATE	56
POLLOCK	42
STRIPED BASS	35
SUMMER FLOUNDER	32
BLUEFISH	28
THORNY SKATE	19
RED HAKE	18
HADDOCK	16
SEA RAVEN	14
ATLANTIC HALIBUT	13
BARNDOOR SKATE	12
LITTLE SKATE	7
WINDOWPANE	7
ATLANTIC HERRING	6
LONGHORN SCULPIN	4
ATLANTIC MACKEREL	3
SMOOTH DOGFISH	3
WEAKFISH	3
BUCKLER DORY	2
SPOTTED HAKE	2
ACADIAN REDFISH	1
BLACK SEA BASS	1
FOURSPOT FLOUNDER	1
YELLOWTAIL FLOUNDER	1

Applying Jim’s criteria (CLUPEA HARENGUS only in 5 or more stations across the dataset) we include everything down to herring.

If we look at station counts by year to establish consistent sampling across the time series, do we drop any? These are all stations where the predator was sampled in a year, whether they had herring in stomachs or not.

AHstncountpreds <- AHstncount |> dplyr::filter(count >= 5) |> dplyr::select(pdcomnam)

sampstncountyr <- allfh %>%
  dplyr::filter(pdcomnam != "",
         pdcomnam %in% AHstncountpreds$pdcomnam) |>
  dplyr::mutate(id = paste0(cruise6, "_", station)) |>
  dplyr::select(year, pdcomnam, id) %>%
  dplyr::distinct() %>%
  dplyr::group_by(year, pdcomnam) |>
  dplyr::summarise(count = n()) |>
  tidyr::pivot_wider(names_from = year, values_from = count) 

flextable::flextable(sampstncountyr)

I’ll need to ask what we meant by consistent across the time series, but its possible that would drop striped bass and barndoor skate.

Compare the number of stations with herring and the sampling across time from the piscivore list with Jim G’s list:

altpredlist <- AHstncountpreds |>
  dplyr::mutate(comname = stringr::str_to_sentence(AHstncountpreds$pdcomnam))

predlists <- piscall |>
  dplyr::full_join(altpredlist)

flextable::flextable(predlists)

comname	mincm	maxcm	NEFSC	NEAMAP	survey	pdcomnam
Atlantic cod	81.0	150.0	TRUE	TRUE	Both	ATLANTIC COD
Atlantic halibut	31.0	90.0	TRUE		NEFSC	ATLANTIC HALIBUT
Bluefish	3.0	118.0	TRUE	TRUE	Both	BLUEFISH
Buckler dory	21.0	50.0	TRUE		NEFSC
Cusk	51.0	104.0	TRUE		NEFSC
Fourspot flounder	41.0	49.0	TRUE		NEFSC
Goosefish	5.0	124.0	TRUE	TRUE	Both	GOOSEFISH
Longfin squid	1.0	30.0	TRUE		NEFSC
Northern shortfin squid	3.0	30.0	TRUE		NEFSC
Pollock	51.0	120.0	TRUE		NEFSC	POLLOCK
Red hake	41.0	98.0	TRUE		NEFSC	RED HAKE
Sea raven	4.0	68.0	TRUE		NEFSC	SEA RAVEN
Silver hake	21.0	76.0	TRUE	TRUE	Both	SILVER HAKE
Spanish mackerel	10.0	33.5		TRUE	NEAMAP
Spiny dogfish	36.0	117.0	TRUE	TRUE	Both	SPINY DOGFISH
Spotted hake	21.0	40.0	TRUE		NEFSC
Spotted sea trout	15.5	34.0		TRUE	NEAMAP
Striped bass	31.0	120.0	TRUE	TRUE	Both	STRIPED BASS
Summer flounder	21.0	70.0	TRUE	TRUE	Both	SUMMER FLOUNDER
Thorny skate	81.0	108.0	TRUE		NEFSC	THORNY SKATE
Weakfish	26.0	50.0	TRUE	TRUE	Both
White hake	21.0	136.0	TRUE		NEFSC	WHITE HAKE
Winter skate						WINTER SKATE
Haddock						HADDOCK
Barndoor skate						BARNDOOR SKATE
Little skate						LITTLE SKATE
Windowpane						WINDOWPANE
Atlantic herring						ATLANTIC HERRING

The current NEAMAP input data includes the list of piscivores. I would need to request a different dataset to use the other list. That said, there weren’t many herring in the NEAMAP dataset anyway.

The main question for the WG is whether we want to include haddock and the skates, which are not in the piscivore dataset. This would also require a new NEAMAP pull.

3.2 Evaluate spatial footprint

This depends on the predator list. For interest/sensitivity testing, I’ll first use the piscivore predator list (full forage index predator field) to do the footprint.

Then I’ll look at how the footprint changes with different predator lists.

Can run the model with each combination.

4 Results

4.1 How many piscivore locations had herring?

4.1.1 Piscivore predator list

Not that many. Seems a spring model will run anyway using the forage index configuration but a fall model will not. This is the full time series from the stomach data for piscivores, we aren’t using all of it in the model.

herringagg_stn <- readRDS(here::here("fhdat/herringagg_stn_all_OISST_1982-2022.rds"))

# make SST column that uses surftemp unless missing or 0
# there are 3 surftemp 0 values in the dataset, all with oisst > 15
herringagg_stn <- herringagg_stn %>%
  dplyr::mutate(sstfill = ifelse((is.na(surftemp)|surftemp==0), oisst, surftemp))

#herringagg_stn <- bluepyagg_pred_stn

# filter to assessment years at Tony's suggestion

# code Vessel as AL=0, HB=1, NEAMAP=2

herringagg_stn_all <- herringagg_stn %>%
  #ungroup() %>%
  #filter(season_ng == "FALL") |>
  #,year > 1984) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
  ) %>% 
  dplyr::select(Catch_g = meanherringwt, #use bluepywt for individuals input in example
                Year = year,
                Vessel,
                AreaSwept_km2,
                Lat = declat,
                Lon = declon,
                meanpisclen,
                npiscsp,
                #bottemp, #this leaves out many stations for NEFSC
                #surftemp, #this leaves out many stations for NEFSC
                #oisst, #leaves out everything before 1982
                sstfill,
                season_ng
  ) %>%
  na.omit() %>%
  as.data.frame()

nonzeroherring <- herringagg_stn_all |>
  dplyr::filter(Catch_g > 0) |> 
  dplyr::group_by(season_ng, Year) |> 
  dplyr::summarise(n = dplyr::n()) |>
  tidyr::pivot_wider(names_from = "season_ng", names_prefix = "Herring", values_from = "n")

tows <- herringagg_stn_all |>
  dplyr::group_by(season_ng, Year) |> 
  dplyr::summarise(n = dplyr::n()) |>
  tidyr::pivot_wider(names_from = "season_ng", names_prefix = "Total", values_from = "n")

pisctows <- dplyr::left_join(tows, nonzeroherring) |>
  dplyr::select(Year, TotalSPRING, HerringSPRING, TotalFALL, HerringFALL)

tab1 <- flextable::flextable(pisctows) |>
  flextable::set_caption("Number of piscivore stomach locations by season and year (TotalSPRING and TotalFALL), and number of piscivore stomach locations having nonzero herring prey (HerringSPRING and HerringFALL).") 

flextable::colformat_double(tab1, big.mark = "", digits = 0)

Table 4.1: Number of piscivore stomach locations by season and year (TotalSPRING and TotalFALL), and number of piscivore stomach locations having nonzero herring prey (HerringSPRING and HerringFALL).
Year	TotalSPRING	HerringSPRING	TotalFALL	HerringFALL
1973	64	2	103
1974	83	5	77	3
1975	14		70	2
1976	84	2	67	2
1977	143		162	2
1978	136	1	332	8
1979	171	8	380	11
1980	155	2	248	2
1981	200	3	181	3
1982	238	5	193	1
1983	224	1	168	2
1984	205	1	205	6
1985	255	6	233	3
1986	266	10	208	9
1987	252	9	232	16
1988	192	14	226	18
1989	242	14	267	19
1990	245	9	287	29
1991	280	25	348	57
1992	335	41	413	89
1993	353	46	403	85
1994	325	38	309	53
1995	395	53	362	67
1996	368	43	309	28
1997	375	52	288	33
1998	450	55	340	43
1999	469	76	341	31
2000	438	48	295	30
2001	416	45	293	30
2002	448	39	293	23
2003	343	25	301	18
2004	385	31	287	17
2005	341	19	290	19
2006	414	24	329	12
2007	411	15	454	25
2008	424	15	464	23
2009	506	25	475	25
2010	438	21	452	17
2011	421	23	441	18
2012	457	33	458	36
2013	488	28	483	24
2014	403	22	453	14
2015	446	17	461	25
2016	450	20	473	19
2017	353	8	260	7
2018	352	4	400	2
2019	419	3	453	8
2020	112		136
2021	391	16	425	5
2022	434	4	387	4

4.2 Evaluate spatial footprint by season

Visual: stations with herring observations across all years by season, 1982-2022, using the piscivores list

herringagg_stn_fall <- herringagg_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL") |>
        #,year > 1984) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanherringwt, #use bluepywt for individuals input in example
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp, #this leaves out many stations for NEFSC
         #surftemp, #this leaves out many stations for NEFSC
         #oisst, #leaves out everything before 1982
         sstfill
         ) %>%
  na.omit() %>%
  as.data.frame()

herringagg_stn_spring <- herringagg_stn %>%
  filter(season_ng == "SPRING") |>
        #,year > 1984) %>%
  mutate(AreaSwept_km2 =1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanherringwt,
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp, #this leaves out many stations for NEFSC
         #surftemp, #this leaves out many stations for NEFSC
         #oisst, #leaves out everything before 1982
         sstfill
         ) %>%
  na.omit() %>%
  as.data.frame()

nonzerofall <- herringagg_stn_fall |>
  dplyr::filter(Catch_g > 0,
                Year > 1981)

nonzerospring <- herringagg_stn_spring |>
  dplyr::filter(Catch_g > 0,
                Year > 1981)

Fall <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = nonzerofall, aes(x = Lon, y = Lat), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Fall herring in stomachs 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))

Spring <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = nonzerospring, aes(x = Lon, y = Lat), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Spring herring in stomachs 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))
 
Spring + Fall

How does this compare to herring survey strata (in green here)?

# from Jon
herring_spring <- c(01010, 01020, 01030, 01040, 01050, 01060, 01070, 01080, 01090, 
                    01100, 01110, 01120, 01130, 01140, 01150, 01160, 01170, 01180, 
                    01190, 01200, 01210, 01220, 01230, 01240, 01250, 01260, 01270, 
                    01280, 01290, 01300, 01360, 01370, 01380, 01390, 01400, 01610, 
                    01620, 01630, 01640, 01650, 01660, 01670, 01680, 01690, 01700, 
                    01710, 01720, 01730, 01740, 01750, 01760)
herring_fall <- c(01050, 01060, 01070, 01080, 01090, 01100, 01110, 01120, 01130, 
                  01140, 01150, 01160, 01170, 01180, 01190, 01200, 01210, 01220, 
                  01230, 01240, 01250, 01260, 01270, 01280, 01290, 01300, 01360, 
                  01370, 01380, 01390, 01400)

herring_springgrid <-  FishStatsUtils::northwest_atlantic_grid %>%
  filter(stratum_number %in% herring_spring)

herring_fallgrid <-  FishStatsUtils::northwest_atlantic_grid %>%
  filter(stratum_number %in% herring_fall)

Fall <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = herring_fallgrid, aes(x = Lon, y = Lat),  colour = "green", size=0.05, alpha=0.1) +
  geom_point(data = nonzerofall, aes(x = Lon, y = Lat), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Fall herring in stomachs 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))

Spring <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = herring_springgrid, aes(x = Lon, y = Lat),  colour = "green", size=0.05, alpha=0.1) +
  geom_point(data = nonzerospring, aes(x = Lon, y = Lat), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Spring herring in stomachs 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))
 
Spring + Fall

Distribution from NEFSC BTS tows

survdat_nobio <- readRDS("~/Documents/0_Data/benthosindex/data/survdat_nobio.rds")

survdat_herring_tows <- survdat_nobio$survdat |>
  dplyr::filter(SVSPP == 32) |> # Atlantic herring
  dplyr::select(CRUISE6, STATION, STRATUM, SVSPP, YEAR, SEASON, LAT, LON, ABUNDANCE, BIOMASS) |>
  dplyr::distinct()

saveRDS(survdat_herring_tows, here::here("herringpyindex/survdat_herring_tows.rds"))

survdat_herring_tows <- readRDS(here::here("herringpyindex/survdat_herring_tows.rds"))

surv_herr_fall <- survdat_herring_tows |>
  dplyr::filter(SEASON == "FALL",
                YEAR > 1981)
  
surv_herr_spring <- survdat_herring_tows |>
  dplyr::filter(SEASON == "SPRING",
                YEAR > 1981)

Fall <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = herring_fallgrid, aes(x = Lon, y = Lat),  colour = "green", size=0.05, alpha=0.1) +
  geom_point(data = surv_herr_fall, aes(x = LON, y = LAT), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Fall herring NEFSC BTS 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))

Spring <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = herring_springgrid, aes(x = Lon, y = Lat),  colour = "green", size=0.05, alpha=0.1) +
  geom_point(data = surv_herr_spring, aes(x = LON, y = LAT), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Spring herring NEFSC BTS 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))
 
Spring + Fall

I think we can define the fall model domain southern extent using the herring BTS strata.

4.3 Now run models limiting strata to those with herring observations

4.4 Fall model using forage index setup: failed

About the same number of positive observations in spring and fall.

List of estimated fixed and random effects:
    Coefficient_name Number_of_coefficients   Type
1           beta1_ft                     48  Fixed
2           beta2_ft                     48  Fixed
3       L_epsilon1_z                      1  Fixed
4       L_epsilon2_z                      1  Fixed
5         L_omega1_z                      1  Fixed
6         L_omega2_z                      1  Fixed
7          lambda1_k                      3  Fixed
8          lambda2_k                      3  Fixed
9         ln_H_input                      2  Fixed
10         logkappa1                      1  Fixed
11         logkappa2                      1  Fixed
12         logSigmaM                      1  Fixed
13 Epsiloninput1_sff                  27750 Random
14 Epsiloninput2_sff                  27750 Random
15    Omegainput1_sf                    555 Random
16    Omegainput2_sf                    555 Random

### Checking model at initial values
All fixed effects have a nonzero gradient

... After 167 iterations...

The following parameters appear to be approaching zero:
          Param starting_value Lower           MLE Upper final_gradient
55 L_epsilon1_z              1  -Inf -8.171138e-07   Inf  -7.579271e-05
Please turn off factor-model variance parameters `L_` that are approaching zero and re-run the model

Error: Please change model structure to avoid problems with parameter estimates and then re-try; see details in `?check_fit`
In addition: Warning message:
In nlminb(start = startpar, objective = fn, gradient = gr, control = nlminb.control,  :
  NA/NaN function evaluation

Trying spring model

4.5 Spring results using forage index setup: ran

Check diagnostics.

Abundance index:

SOEinputs <- function(infile, season, outfile) {
  
  splitoutput <- read.csv(infile)
  
  # warning, hardcoded. obviously
  stratlook <- data.frame(Stratum = c("Stratum_1",
                                      "Stratum_2",
                                      "Stratum_3",
                                      "Stratum_4",
                                      "Stratum_5",
                                      "Stratum_6",
                                      "Stratum_7",
                                      "Stratum_8",
                                      "Stratum_9",
                                      "Stratum_10",
                                      "Stratum_11",
                                      "Stratum_12",
                                      "Stratum_13",
                                      "Stratum_14",
                                      "Stratum_15"),
                          Region  = c("AllEPU", 
                                      "MABGB", 
                                      "MABGBstate", 
                                      "MABGBfed", 
                                      "MAB",
                                      "GB",
                                      "GOM",
                                      "bfall",
                                      "bfin",
                                      "bfoff",
                                      "MABGBalbinshore",
                                      "MABGBothoffshore",
                                      "albbfin",
                                      "albbfall",
                                      "allother"))
  
  forageindex <- splitoutput %>%
    left_join(stratlook) %>%
    dplyr::select(Time, 
                  EPU = Region, 
                  "Forage Fish Biomass Estimate" = Estimate, 
                  "Forage Fish Biomass Estimate SE" = Std..Error.for.Estimate) %>%
    tidyr::pivot_longer(c("Forage Fish Biomass Estimate", "Forage Fish Biomass Estimate SE"), 
                        names_to = "Var", values_to = "Value") %>%
    dplyr::filter(EPU %in% c("MAB", "GB", "GOM", "AllEPU")) %>%
    dplyr::mutate(Units = "relative grams per stomach") %>%
    dplyr::select(Time, Var, Value, EPU, Units)
  
  forageindex$Var <- stringr::str_c(season, forageindex$Var, sep = " ")
  
  #readr::write_csv(forageindex, outfile)
  #saveRDS(forageindex, outfile)
  
} 


# make data files

SOEinputs(infile = here::here("herringpyindex/allagg_spring_500_lennosst_ALLsplit_biascorrect/Index.csv"),
          season = "Spring", 
           outfile = here::here("herringpyindex/springherringindex.rds"))


forageindex <- readRDS(here::here("herringpyindex/springherringindex.rds"))

# test plot
foragewide <- forageindex %>%
  pivot_wider(names_from = Var, values_from = Value)


ggplot(foragewide, aes(x=Time, y=`Spring Forage Fish Biomass Estimate`, colour = EPU)) +
  geom_errorbar(aes(ymin=`Spring Forage Fish Biomass Estimate`+`Spring Forage Fish Biomass Estimate SE`,
                    ymax=`Spring Forage Fish Biomass Estimate`-`Spring Forage Fish Biomass Estimate SE`))+
  geom_point()+
  geom_line()

Log density pattern:

knitr::include_graphics(here::here("herringpyindex/allagg_spring_500_lennosst_ALLsplit_biascorrect/ln_density-predicted.png"))

Spatial residuals:

knitr::include_graphics(here::here("herringpyindex/allagg_spring_500_lennosst_ALLsplit_biascorrect/quantile_residuals_on_map.png"))

How much data in spring?

knitr::include_graphics(here::here("herringpyindex/allagg_spring_500_lennosst_ALLsplit_biascorrect/Data_by_year.png"))

4.6 Model selection

Same initial as forage index to see if spatial RE, spatio-temporal RE, and aniso make sense.

Comparisons of AIC are presented for both the first (spatial and spatio-temporal random effects, Table Sb4.2) and second (catchability covariates, Table Sb4.3) steps of model selection.

# from each output folder in pyindex, 
outdir <- here::here("herringpyindex/modsel1")
moddirs <- list.dirs(outdir) 
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)


# function to apply extracting info
getmodinfo <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
    fill = TRUE, header = FALSE)
  
  n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
  grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
  max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
  use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
  fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
  bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
  
  #FieldConfig
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
    beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
    beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
  }
  
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    beta1 <- "IID"
    beta2 <- "IID"
  }
  
  
  #RhoConfig
  rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
  rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
  rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
  rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
  
  # read parameter estimates, object is called parameter_Estimates
  if(file.exists(file.path(d.name, "parameter_estimates.RData"))) {
    load(file.path(d.name, "parameter_estimates.RData"))
    
    AIC <- parameter_estimates$AIC[1]  
    converged <- parameter_estimates$Convergence_check[1]
    fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
    randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
    
  }else if(file.exists(file.path(d.name, "parameter_estimates.txt"))){
    
    reptext <- readLines(file.path(d.name, "parameter_estimates.txt"))
    
    AIC <- as.double(reptext[grep(reptext, pattern = "AIC")+1])
    converged <- reptext[grep(reptext, pattern = "Convergence_check")+1]
    fixedcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                     boundary("word"))[[1]][2])
    randomcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                     boundary("word"))[[1]][3])
    
  }else{
    
    AIC <- NA_real_
    converged <- NA_character_
    fixedcoeff <- NA_integer_
    randomcoeff <- NA_integer_
  }
  
  
  #index <- read.csv(file.path(d.name, "Index.csv"))
  
  
  # return model attributes as a dataframe
  out <- data.frame(modname = modname,
                    n_x = n_x,
                    grid_size_km = grid_size_km,
                    max_cells = max_cells,
                    use_anisotropy = use_anisotropy,
                    fine_scale =  fine_scale,
                    bias.correct = bias.correct,
                    omega1 = omega1,
                    omega2 = omega2,
                    epsilon1 = epsilon1,
                    epsilon2 = epsilon2,
                    beta1 = beta1,
                    beta2 = beta2,
                    rho_epsilon1 = rho_epsilon1,
                    rho_epsilon2 = rho_epsilon2,
                    rho_beta1 = rho_beta1,
                    rho_beta2 = rho_beta2,
                    AIC = AIC,
                    converged = converged,
                    fixedcoeff = fixedcoeff,
                    randomcoeff = randomcoeff#,
                    #index = index
  )
    
    return(out)

}


modcompare <- purrr::map_dfr(moddirs, getmodinfo)

Models compared using REML are identified by model name (“modname” in Table Sb4.2) which always includes all prey aggregated, season (“all” for annual models of months 1-12, “fall” for models of months 7-12, and “spring” for models of months 1-6), number of knots (500 for all models), and which fixed and random spatial and spatio-temporal effects were included in which linear predictor (1 or 2). The names for model options and associated VAST model settings are:

Model selection 1 (spatial, spatio-temporal effects, no covariates) options (following Ng et al. (2021)) and naming: * All models set Use_REML = TRUE in fit_model specifications.
* Modeled effects, model name suffix, and VAST settings by model:

“_alleffectson” = Spatial and spatio-temporal random effects plus anisotropy in both linear predictors; FieldConfig default (all IID)
“_noaniso” = Spatial and spatio-temporal random effects in both linear predictors with anisotopy turned off; FieldConfig default (all IID) and use_anisotopy = FALSE
“_noomeps2” = Spatial and spatio-temporal random effects plus anisotropy only in linear predictor 1; FieldConfig = 0 for Omega2, Epsilon2
“_noomeps2_noaniso” = Spatial and spatio-temporal random effects only in linear predictor 1 with anisotopy turned off; FieldConfig = 0 for Omega2, Epsilon2 and use_anisotopy = FALSE
“_noomeps2_noeps1” = Spatial random effects plus anisotropy only in linear predictor 1; FieldConfig = 0 for Omega2, Epsilon2, Epsilon1
“_noomeps2_noeps1_noaniso” = Spatial random effects only in linear predictor 1 with anisotopy turned off; FieldConfig = 0 for Omega2, Epsilon2, Epsilon1 and use_anisotopy = FALSE
“_noomeps12” = Anisotropy, but no spatial or spatio-temporal random effects in either linear predictor; FieldConfig = 0 for both Omega, Epsilon
“_noomeps12_noaniso” = No spatial or spatio-temporal random effects in either linear predictor; FieldConfig = 0 for both Omega, Epsilon and use_anisotopy = FALSE

modselect <- modcompare %>%
  dplyr::mutate(season = case_when(str_detect(modname, "_fall_") ~ "Fall",
                            str_detect(modname, "spring") ~ "Spring",
                            str_detect(modname, "_all_") ~ "Annual",
                            TRUE ~ as.character(NA))) %>%
  dplyr::mutate(converged2 = case_when(str_detect(converged, "no evidence") ~ "likely",
                                str_detect(converged, "is likely not") ~ "unlikely",
                                TRUE ~ as.character(NA))) %>%
  dplyr::mutate(modname = str_extract(modname, '(?<=allagg_).*')) |>
  dplyr::group_by(season) %>%
  dplyr::mutate(deltaAIC = AIC-min(AIC, na.rm = TRUE)) %>%
  dplyr::select(modname, season, deltaAIC, fixedcoeff,
         randomcoeff, use_anisotropy, 
         omega1, omega2, epsilon1, epsilon2, 
         beta1, beta2, AIC, converged2) %>%
  dplyr::arrange(season, AIC)

# DT::datatable(modselect, rownames = FALSE, 
#               options= list(pageLength = 25, scrollX = TRUE),
#               caption = "Comparison of delta AIC values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures. See text for model descriptions.")

flextable::flextable(modselect %>%
                       dplyr::select(-c(use_anisotropy, 
         omega1, omega2, epsilon1, epsilon2, 
         beta1, beta2))) %>%
  flextable::set_header_labels(modname = "Model name",
                               season = "Season",
                               deltaAIC = "dAIC",
                               fixedcoeff = "N fixed",
                               randomcoeff = "N random",
                               converged2 = "Convergence") |>
  flextable::set_caption("Comparison of delta AIC (dAIC) values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures, with number of fixed (N fixed) and random (N random) coefficients. See text for model descriptions associated with each model name.") %>%
  flextable::fontsize(size = 9, part = "all") %>%
  flextable::colformat_double(digits = 2) |>
  flextable::set_table_properties(layout = "autofit", width = 1)

Table 4.2: Comparison of delta AIC (dAIC) values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures, with number of fixed (N fixed) and random (N random) coefficients. See text for model descriptions associated with each model name.
Model name	Season	dAIC	N fixed	N random	AIC	Convergence
all_500_alleffectson	Annual	0.00	9	58,238	28,152.41	likely
all_500_noaniso	Annual	28.55	7	58,238	28,180.96	likely
all_500_noomeps2	Annual	495.74	6	29,168	28,648.15	likely
all_500_noomeps2_noeps1	Annual	653.09	5	668	28,805.50	likely
all_500_noomeps2_noeps1_noaniso	Annual	663.78	3	668	28,816.19	likely
all_500_noomeps12	Annual	1,791.57	1	98	29,943.98	likely
all_500_noomeps12_noaniso	Annual	1,791.57	1	98	29,943.98	likely
all_500_noomeps2_noaniso	Annual		4	29,168
fall_500_alleffectson	Fall	0.00	9	56,706	12,722.49	likely
fall_500_noaniso	Fall	1.41	7	56,706	12,723.90	likely
fall_500_noomeps2	Fall	207.81	6	28,401	12,930.31	likely
fall_500_noomeps2_noaniso	Fall	208.64	4	28,401	12,931.13	likely
fall_500_noomeps2_noeps1_noaniso	Fall	277.62	3	651	13,000.11	likely
fall_500_noomeps2_noeps1	Fall	278.58	5	651	13,001.07	likely
fall_500_noomeps12	Fall	1,872.36	1	96	14,594.85	likely
fall_500_noomeps12_noaniso	Fall	1,872.36	1	96	14,594.85	likely
spring_500_alleffectson	Spring	0.00	9	58,234	14,265.42	likely
spring_500_noaniso	Spring	32.16	7	58,234	14,297.57	likely
spring_500_noomeps2	Spring	229.09	6	29,164	14,494.51	likely
spring_500_noomeps2_noaniso	Spring	250.10	4	29,164	14,515.52	likely
spring_500_noomeps2_noeps1	Spring	427.77	5	664	14,693.18	likely
spring_500_noomeps2_noeps1_noaniso	Spring	436.14	3	664	14,701.56	likely
spring_500_noomeps12	Spring	832.88	1	94	15,098.30	likely
spring_500_noomeps12_noaniso	Spring	832.88	1	94	15,098.30	likely

The annual model without anisotopy and with spatial and spatio-temporal random effects only in linear predictor 1 errored out; all other models completed and converged.

Spatial and spatiotemporal random effects were supported for both linear predictors for annual, fall, and spring models. For annual and spring models, estimating anisotropy (directional correlation) was supported as well, but the dAIC for the fall model suggests that estimating anisotriopy is only slightly more supported than not estimating it.

For now lets estimate spatial and spatiotemporal random effects with anisotropy for all models to test the catchability covariates.

For the second step of model selection with different combinations of vessel effects and or catchability covariates, “modname” in Table Sb4.3 follows a similar pattern as above, with all prey aggregated (“allagg” for all models), season (“all” for annual models of months 1-12, “fall” for models of months 7-12, and “spring” for models of months 1-6), number of knots (500 for all models), and which vessel effects or covariates were included. The names for model options and associated VAST model settings are:

Model selection 2 (covariates) options, FieldConfig default (all IID), with anisotropy:

“_base” = No vessel overdispersion or length/number covariates
“_len” = Predator mean length covariate
“_num” = Number of predator species covariate
“_lennum” = Predator mean length and number of predator species covariates
“_sst” = Combined in situ and OISST covariate
“_lensst” = Predator mean length and SST covariates
“_numsst” = Number of predator species and SST covariates
“_lennumsst” = Predator mean length, number of predator species, and SST covariates
“_eta10” = Overdispersion (vessel effect) in first linear predictor (prey encounter)
“_eta11” = Overdispersion (vessel effect) in both linear predictors (prey encounter and weight)

Strangely, the fall models all errored out with the same error as above, parameter going to 0. Even the same configuration that worked using REML above.

# from each output folder in pyindex, 
outdir <- here("herringpyindex/modsel2")
moddirs <- list.dirs(outdir) 
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)

modcompare2 <- purrr::map_dfr(moddirs, getmodinfo)

modselect2 <- modcompare2 %>%
  dplyr::mutate(season = case_when(str_detect(modname, "_fall_") ~ "Fall",
                            str_detect(modname, "spring") ~ "Spring",
                            str_detect(modname, "_all_") ~ "Annual",
                            TRUE ~ as.character(NA))) %>%
  dplyr::mutate(converged2 = case_when(str_detect(converged, "no evidence") ~ "likely",
                                str_detect(converged, "is likely not") ~ "unlikely",
                                TRUE ~ as.character(NA))) %>%
  dplyr::group_by(season) %>%
  dplyr::mutate(deltaAIC = AIC-min(AIC)) %>%
  dplyr::select(modname, season, deltaAIC, fixedcoeff,
         randomcoeff, use_anisotropy, 
         omega1, omega2, epsilon1, epsilon2, 
         beta1, beta2, AIC, converged2) %>%
  dplyr::arrange(season, AIC)

# DT::datatable(modselect2, rownames = FALSE, 
#               options= list(pageLength = 50, scrollX = TRUE),
#               caption = "Comparison of delta AIC values for alternative vessel effects and catchability covariates. See text for model descriptions.")

flextable::flextable(modselect2%>%
                       dplyr::select(-c(use_anisotropy, 
         omega1, omega2, epsilon1, epsilon2, 
         beta1, beta2))) %>%
  flextable::set_header_labels(modname = "Model name",
                               season = "Season",
                               deltaAIC = "dAIC",
                               fixedcoeff = "N fixed coefficients",
                               randomcoeff = "N random coefficients",
                               converged2 = "Convergence") |>
  flextable::set_caption("Comparison of delta AIC (dAIC) values for alternative vessel effects and catchability covariates. See text for model descriptions associated with each model name.") %>%
  flextable::fontsize(size = 9, part = "all") %>%
  flextable::colformat_double(digits = 2) |>
  flextable::set_table_properties(layout = "autofit", width = .9)

Table 4.3: Comparison of delta AIC (dAIC) values for alternative vessel effects and catchability covariates. See text for model descriptions associated with each model name.
Model name	Season	dAIC	N fixed coefficients	N random coefficients	AIC	Convergence
allagg_all_500_lennumsst	Annual	0.00	113	58,140	27,309.22	likely
allagg_all_500_lennum	Annual	21.58	111	58,140	27,330.80	likely
allagg_all_500_lensst	Annual	366.40	111	58,140	27,675.61	likely
allagg_all_500_len	Annual	389.19	109	58,140	27,698.41	likely
allagg_all_500_numsst	Annual	486.77	111	58,140	27,795.99	likely
allagg_all_500_num	Annual	601.05	109	58,140	27,910.27	likely
allagg_all_500_sst	Annual	884.92	109	58,140	28,194.14	likely
allagg_all_500_eta11	Annual	966.88	109	58,146	28,276.10	likely
allagg_all_500_eta10	Annual	988.51	108	58,143	28,297.73	likely
allagg_all_500_base	Annual	1,000.40	107	58,140	28,309.62	likely
allagg_fall_500_base	Fall
allagg_fall_500_eta10	Fall
allagg_fall_500_eta11	Fall
allagg_fall_500_len	Fall
allagg_fall_500_lennum	Fall
allagg_fall_500_lennumsst	Fall
allagg_fall_500_lensst	Fall
allagg_fall_500_num	Fall
allagg_fall_500_numsst	Fall
allagg_fall_500_sst	Fall
allagg_spring_500_lennumsst	Spring	0.00	109	58,140	14,025.71	likely
allagg_spring_500_lennum	Spring	32.89	107	58,140	14,058.60	likely
allagg_spring_500_lensst	Spring	165.98	107	58,140	14,191.69	likely
allagg_spring_500_len	Spring	194.04	105	58,140	14,219.75	likely
allagg_spring_500_numsst	Spring	260.78	107	58,140	14,286.49	likely
allagg_spring_500_num	Spring	300.30	105	58,140	14,326.01	likely
allagg_spring_500_eta11	Spring	426.99	105	58,146	14,452.70	likely
allagg_spring_500_sst	Spring	430.33	105	58,140	14,456.04	likely
allagg_spring_500_eta10	Spring	447.13	104	58,143	14,472.84	likely
allagg_spring_500_base	Spring	464.35	103	58,140	14,490.06	likely

For the annual and spring models, the same set of covariates from the full forage model were best supported by the data: number of predator species at a station, mean length of predators at a station, and SST at the station.

So it looks like the spring model above, which is bias corrected, is the preferred model based on model selection.

Need to sort out what is going on with fall.

References

Gaichas, Sarah K., James Gartland, Brian E. Smith, Anthony D. Wood, Elizabeth L. Ng, Michael Celestino, Katie Drew, Abigail S. Tyrell, and James T. Thorson. 2023. “Assessing Small Pelagic Fish Trends in Space and Time Using Piscivore Stomach Contents.” Canadian Journal of Fisheries and Aquatic Sciences, October, cjfas-2023-0093. https://doi.org/10.1139/cjfas-2023-0093.

Ng, Elizabeth L, Jonathan J Deroba, Timothy E Essington, Arnaud Grüss, Brian E Smith, and James T Thorson. 2021. “Predator Stomach Contents Can Provide Accurate Indices of Prey Biomass.” ICES Journal of Marine Science 78 (3): 1146–59. https://doi.org/10.1093/icesjms/fsab026.

Herring index from stomachs, evaluate spatial footprint

Data updated through 2022

Sarah Gaichas and James Gartland, based on work by Elizabeth Ng

05 June, 2024