1 Summary

Based on comparisons below, I’m satisfied that the methods update between last years SOE and this doesn’t fundamentally change the trend. I’m also fine with starting the time series earlier. However, looking at the coverage, we may not want to start the model at the absolute beginning of the time series.

Will discuss a good starting point with the team, but for now submit the 1982 starting point with data through 2022.

2 Workflow

Operational updates to the forage index submitted to the State of the Ecosystem report are in the forageindex github repository: https://github.com/NOAA-EDAB/forageindex

Data input files are in the folder fhdat and were processed with the script VASTforage_ProcessInputDat.R in that folder: https://github.com/NOAA-EDAB/forageindex/blob/main/fhdat/VASTforage_ProcessInputDat.R

# Streamlined version of CreateVASTInputs.Rmd for operational updates to forage index indicators
# October 2023
#   This one is updating with 2022 NEFSC and NEAMAP data and OISST
#   To be used in the 2024 State of the Ecosystem report

library(tidyverse)
library(here)
library(dendextend)

# Load NEFSC stomach data received from Brian Smith

# object is called `allfh`
load(here("fhdat/allfh.Rdata"))

#object is called allfh21
load(here("fhdat/allfh21.Rdata"))

#object is called allfh22
load(here("fhdat/allfh22.Rdata"))

# bind all NEFSC stomach datasets
allfh <- allfh %>%
  dplyr::bind_rows(allfh21) |>
  dplyr::bind_rows(allfh22)

###############################################################################
# read predator similarity info to generate predator list
# Input NEFSC food habits overlap matrix:

dietoverlap <- read_csv(here("fhdat/tgmat.2022-02-15.csv"))

# use dendextend functions to get list
d_dietoverlap <- dist(dietoverlap)

guilds <- hclust(d_dietoverlap, method = "complete")

dend <- as.dendrogram(guilds)

dend <- color_branches(dend, k=6) # Brian uses 6 categories

labels(dend) <- paste(as.character(names(dietoverlap[-1]))[order.dendrogram(dend)],
                      "(",labels(dend),")", 
                      sep = "")

pisccomplete <- partition_leaves(dend)[[
  which_node(dend, c("Bluefish..S(37)", "Bluefish..M(36)", "Bluefish..L(35)"))
]]


# Filter NEFSC food habits data with predator list
pisccompletedf <- data.frame("COMNAME" = toupper(str_remove(pisccomplete, "\\..*")),
                             "SizeCat" = str_remove(str_extract(pisccomplete, "\\..*[:upper:]+"), "\\.."),
                             "feedguild" = "pisccomplete")

fh.nefsc.pisc.pisccomplete <- allfh %>%
  #filter(pynam != "EMPTY") %>%
  left_join(pisccompletedf, by = c("pdcomnam" = "COMNAME",
                                   "sizecat" = "SizeCat")) %>%
  filter(!is.na(feedguild))


##############################################################################
# Get prey list from NEFSC and NEAMAP

preycount  <- fh.nefsc.pisc.pisccomplete %>%
  #group_by(year, season, pdcomnam, pynam) %>%
  group_by(pdcomnam, pynam) %>%
  summarise(count = n()) %>%
  #arrange(desc(count))
  pivot_wider(names_from = pdcomnam, values_from = count)


gencomlist <- allfh %>%
  select(pynam, pycomnam2, gencom2) %>%
  distinct()

NEFSCblueprey <- preycount %>%
  #filter(BLUEFISH > 9) %>%
  filter(!pynam %in% c("EMPTY", "BLOWN",
                       "FISH", "OSTEICHTHYES",
                       "ANIMAL REMAINS",
                       "FISH SCALES")) %>%
  #filter(!str_detect(pynam, "SHRIMP|CRAB")) %>%
  left_join(gencomlist) %>%
  filter(!gencom2 %in% c("ARTHROPODA", "ANNELIDA",
                         "CNIDARIA", "UROCHORDATA",
                         "ECHINODERMATA", "WORMS",
                         "BRACHIOPODA", "COMB JELLIES",
                         "BRYOZOA", "SPONGES",
                         "MISCELLANEOUS", "OTHER")) %>%
  arrange(desc(BLUEFISH))

# March 2023, formally add NEAMAP to prey decisions
NEAMAPblueprey <- read.csv(here("fhdat/Full Prey List_Common Names.csv")) %>%
  #filter(BLUEFISH > 9) %>%
  filter(!SCIENTIFIC.NAME %in% c("Actinopterygii", "fish scales",
                                 "Decapoda (megalope)", 
                                 "unidentified material",
                                 "Plantae",
                                 "unidentified animal"))

NEAMAPprey <- NEAMAPblueprey %>%
  dplyr::select(COMMON.NAME, SCIENTIFIC.NAME, BLUEFISH) %>%
  dplyr::filter(!is.na(BLUEFISH)) %>%
  dplyr::mutate(pynam2 = tolower(SCIENTIFIC.NAME),
                pynam2 = stringr::str_replace(pynam2, "spp.", "sp")) %>%
  dplyr::rename(NEAMAP = BLUEFISH)


NEFSCprey <- NEFSCblueprey %>%
  dplyr::select(pycomnam2, pynam, BLUEFISH) %>%
  dplyr::filter(!is.na(BLUEFISH)) %>%
  dplyr::mutate(pynam2 = tolower(pynam)) %>%
  dplyr::rename(NEFSC = BLUEFISH)

# new criteria March 2023, >20 observations NEAMAP+NEFSC, but keep mackerel
# removes the flatfish order (too broad) and unid Urophycis previously in NEAMAP
blueprey <- NEFSCprey %>% 
  dplyr::full_join(NEAMAPprey) %>%
  dplyr::mutate(NEAMAP = ifelse(is.na(NEAMAP), 0, NEAMAP),
                NEFSC = ifelse(is.na(NEFSC), 0, NEFSC),
                total = NEFSC + NEAMAP,
                PREY = ifelse(is.na(SCIENTIFIC.NAME), pynam, SCIENTIFIC.NAME),
                COMMON = ifelse(is.na(COMMON.NAME), pycomnam2, COMMON.NAME),
                pynam = ifelse(is.na(pynam), toupper(pynam2), pynam)) %>%
  dplyr::arrange(desc(total)) %>%
  dplyr::filter(total>20 | pynam=="SCOMBER SCOMBRUS") %>% # >20 leaves out mackerel
  dplyr::mutate(COMMON = case_when(pynam=="ILLEX SP" ~ "Shortfin squids",
                                   pynam2=="teuthida" ~ "Unidentified squids",
                                   TRUE ~ COMMON)) %>%
  dplyr::mutate(PREY = stringr::str_to_sentence(PREY),
                COMMON = stringr::str_to_sentence(COMMON))


fh.nefsc.pisc.pisccomplete.blueprey <- fh.nefsc.pisc.pisccomplete %>%
  mutate(blueprey = case_when(pynam %in% blueprey$pynam ~ "blueprey",
                              TRUE ~ "othprey"))

###############################################################################
# Make the NEFSC dataset aggregating prey based on prey list

bluepyall_stn <- fh.nefsc.pisc.pisccomplete.blueprey %>%
  #create id linking cruise6_station
  #create season_ng spring and fall Spring=Jan-May, Fall=June-Dec
  mutate(id = paste0(cruise6, "_", station),
         year = as.numeric(year),
         month = as.numeric(month),
         season_ng = case_when(month <= 6 ~ "SPRING",
                               month >= 7 ~ "FALL",
                               TRUE ~ as.character(NA))
  ) %>%
  dplyr::select(year, season_ng, id, stratum,
                pynam, pyamtw, pywgti, pyvoli, blueprey, 
                pdcomnam, pdid, pdlen, pdsvol, pdswgt, 
                beglat, beglon, declat, declon, 
                bottemp, surftemp, setdepth) %>%
  group_by(id) %>%
  #mean blueprey g per stomach per tow: sum all blueprey g/n stomachs in tow
  mutate(bluepywt = case_when(blueprey == "blueprey" ~ pyamtw,
                              TRUE ~ 0.0),
         bluepynam = case_when(blueprey == "blueprey" ~ pynam,
                               TRUE ~ NA_character_)) 

# Optional: save at prey disaggregated stage for paper
#saveRDS(bluepyall_stn, here("fhdat/bluepyall_stn.rds"))

# Now get station data in one line
stndat <- bluepyall_stn %>%
  dplyr::select(year, season_ng, id, 
                beglat, beglon, declat, declon, 
                bottemp, surftemp, setdepth) %>%
  distinct()

#pisc stomachs in tow count pdid for each pred and sum
piscstom <- bluepyall_stn %>%
  group_by(id, pdcomnam) %>%
  summarise(nstompd = n_distinct(pdid)) %>%
  group_by(id) %>%
  summarise(nstomtot = sum(nstompd))

#mean and var pred length per tow
pisclen <- bluepyall_stn %>%
  summarise(meanpisclen = mean(pdlen),
            varpisclen = var(pdlen))

# Aggregated prey at station level with predator covariates
bluepyagg_stn <- bluepyall_stn %>%
  summarise(sumbluepywt = sum(bluepywt),
            nbluepysp = n_distinct(bluepynam, na.rm = T),
            npreysp = n_distinct(pynam),
            npiscsp = n_distinct(pdcomnam)) %>%
  left_join(piscstom) %>%
  mutate(meanbluepywt = sumbluepywt/nstomtot) %>%
  left_join(pisclen) %>%
  left_join(stndat)

# save at same stage as before, writing over old file
#saveRDS(bluepyagg_stn, here("fhdat/bluepyagg_stn.rds"))

# current dataset, fix declon, add vessel, rename NEFSC
#nefsc_bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn.rds")) %>%
nefsc_bluepyagg_stn <- bluepyagg_stn %>%
  mutate(declon = -declon,
         vessel = case_when(year<2009 ~ "AL",
                            year>=2009 ~ "HB", 
                            TRUE ~ as.character(NA)))

##############################################################################
# Add NEAMAP to make full aggregated stomach dataset

# Read in NEAMAP updated input from Jim Gartland, reformat with same names
neamap_bluepreyagg_stn <- read_csv(here("fhdat/NEAMAP_Mean stomach weights_Bluefish Prey_Oct2023.csv")) %>%  
  mutate(vessel = "NEAMAP") %>%
  rename(id = station,
         sumbluepywt = sumbluepreywt,
         nbluepysp = nbfpreyspp,
         #npreysp = ,
         npiscsp = npiscspp,
         nstomtot = nstomtot, 
         meanbluepywt = meanbluepreywt,
         meanpisclen = meanpisclen.simple, 
         #varpisclen = ,
         season_ng = season,
         declat  = lat,
         declon = lon,
         bottemp = bWT,
         #surftemp = , 
         setdepth = depthm) 


# combine NEAMAP and NEFSC
bluepyagg_stn_all <-  nefsc_bluepyagg_stn %>%
  bind_rows(neamap_bluepreyagg_stn) 

# Save before SST integration step
#saveRDS(bluepyagg_stn_all, here("fhdat/bluepyagg_stn_all.rds"))

###############################################################################
# Add SST into NEAMAP and reintegrate into full dataset

# Read back in if needed for SST
#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

NEFSCstations <- allfh %>%
  dplyr::mutate(id = paste0(cruise6, "_", station),
                year = as.numeric(year),
                month = as.numeric(month),
                day = as.numeric(day),
                declon = -declon) %>%
  dplyr::select(id, year, month, day, declat, declon) %>%
  dplyr::distinct()

# Need NEAMAP SST update! This is the old file
NEAMAPstationSST <- read.csv(here("fhdat/NEAMAP SST_2007_2022.csv"))

NEAMAPstations <- NEAMAPstationSST %>%
  dplyr::mutate(id = station,
                year = as.numeric(year),
                month = as.numeric(month),
                day = as.numeric(day)) %>%
  dplyr::select(id, year, month, day) %>%
  dplyr::distinct()

# remake diethauls
diethauls <- bluepyagg_stn_all %>%
  dplyr::select(id, declat, declon)

NEFSCstations <- dplyr::select(NEFSCstations, c(-declat, -declon))

Allstations <- bind_rows(NEFSCstations, NEAMAPstations)

#station id, lat lon, year month day
diethauls <- left_join(diethauls, Allstations)

#add year month day to diet data
bluepyagg_stn_all <- left_join(bluepyagg_stn_all, diethauls)

# add NEAMAP SST to surftemp field
NEAMAPidSST <- NEAMAPstationSST %>%
  mutate(id = station) %>%
  dplyr::select(id, SST)

bluepyagg_stn_all <- left_join(bluepyagg_stn_all, NEAMAPidSST, by="id") %>%
  mutate(surftemp = coalesce(surftemp, SST)) %>%
  dplyr::select(-SST)

# save merged dataset with day, month, and NEAMAP surftemp, same name
#saveRDS(bluepyagg_stn_all, here("fhdat/bluepyagg_stn_all.rds"))

###############################################################################
#Now match stations to OISST

#make an SST dataframe for 2022! Add to saved sst_data in data-raw/gridded

library(sf)
library(raster)
library(terra)
library(nngeo)

# Bastille function from https://github.com/kimberly-bastille/ecopull/blob/main/R/utils.R

nc_to_raster <- function(nc,
                         varname,
                         extent = c(0, 360, -90, 90),
                         crop = raster::extent(280, 300, 30, 50),
                         show_images = FALSE) {
  
  message("Reading .nc as brick...")
  
  r <- raster::brick(nc, varname = varname)
  
  message("Setting CRS...")
  raster::crs(r) <- "+proj=longlat +lat_1=35 +lat_2=45 +lat_0=40 +lon_0=-77 +x_0=0 +y_0=0 +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"
  
  # not sure if this is necessary?
  raster::extent(r) <- raster::extent(extent)
  
  if(show_images){
    par(mfrow = c(1,2))
    raster::plot(r, 1, sub = "Full dataset")
  }
  
  message("Cropping data...")
  ne_data <- raster::crop(r, crop)
  #ne_data <- raster::rotate(ne_data) add here for future pulls
  
  if(show_images){
    raster::plot(ne_data, 1, sub = "Cropped dataset")
    par(mfrow = c(1,1))
  }
  
  message("Done!")
  
  return(ne_data)
}

# function to convert to dataframe based on
# https://towardsdatascience.com/transforming-spatial-data-to-tabular-data-in-r-4dab139f311f

raster_to_sstdf <- function(brick,
                            rotate=TRUE){
  
  if(rotate) brick_r <- raster::rotate(brick)
  brick_r <- raster::crop(brick_r, raster::extent(-77,-65,35,45))
  sstdf <- as.data.frame(raster::rasterToPoints(brick_r, spatial = TRUE))
  sstdf <- sstdf %>%
    dplyr::rename(Lon = x,
                  Lat = y) %>%
    tidyr::pivot_longer(cols = starts_with("X"),
                        names_to = c("year", "month", "day"),
                        names_prefix = "X",
                        names_sep = "\\.",
                        values_to = "sst",
    )
  return(sstdf)
}

# pull the OISST data as raster brick, modified from 
# https://github.com/kimberly-bastille/ecopull/blob/main/.github/workflows/pull_satellite_data.yml

varname <- "sst"

# 1985-2021 previously pulled, processed and stored. add 2022.
# add 1981-1984 to extend back in time. No OISST before 1981.
# 1981 is only Sept-Dec so don't use

years <- #1982:1984 # 2022
for(i in years) {
  name <- paste0(i, ".nc")
  dir.create(here::here("data-raw","gridded", "sst_data"), recursive = TRUE)
  filename <- here::here("data-raw","gridded", "sst_data", paste0("test_", i, ".grd"))
  url <- paste0("https://downloads.psl.noaa.gov/Datasets/noaa.oisst.v2.highres/sst.day.mean.", i, ".nc")
  download.file(url, destfile = name)
  
  text <- knitr::knit_expand(text = "test_{{year}} <- nc_to_raster(nc = name, varname = varname)
                                     raster::writeRaster(test_{{year}}, filename = filename, overwrite=TRUE)",
                             year = i)
  print(text)
  try(eval(parse(text = text)))
  unlink(name) # remove nc file to save space
  print(paste("finished",i))
}


# convert raster to dataframe
#years <- 2022
for(i in years) {
  name <- get(paste0("test_",i))
  filename <- here::here("data-raw","gridded", "sst_data", paste0("sst", i, ".rds"))
  text <- knitr::knit_expand(text = "sst{{year}} <- raster_to_sstdf(brick = name)
                                     saveRDS(sst{{year}}, filename)",
                             year = i)
  print(text)
  try(eval(parse(text = text)))
}
#read in diet data with month day fields

#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

stations <- bluepyagg_stn_all %>%
  dplyr::mutate(day = str_pad(day, 2, pad='0'),
                month = str_pad(month, 2, pad='0'),
                yrmody = as.numeric(paste0(year, month, day))) %>%
  dplyr::select(id, declon, declat, year, yrmody) %>%
  na.omit() %>%
  sf::st_as_sf(coords=c("declon","declat"), crs=4326, remove=FALSE) 



#list of SST dataframes
SSTdfs <- list.files(here("data-raw/gridded/sst_data/"), pattern = "*.rds")

dietstn_OISST <- tibble()


for(df in SSTdfs){
  sstdf <- readRDS(paste0(here("data-raw/gridded/sst_data/", df)))
  
  # keep only bluefish dates in SST year
  stationsyr <- stations %>%
    filter(year == unique(sstdf$year))
  
  # keep only sst days in bluefish dataset
  sstdf_survdays <- sstdf %>%
    dplyr::mutate(yrmody = as.numeric(paste0(year, month, day)) )%>%
    dplyr::filter(yrmody %in% unique(stationsyr$yrmody)) %>%
    dplyr::mutate(year = as.numeric(year),
                  month = as.numeric(month),
                  day = as.numeric(day),
                  declon = Lon,
                  declat = Lat) %>%
    dplyr::select(-Lon, -Lat) %>%
    sf::st_as_sf(coords=c("declon","declat"), crs=4326, remove=FALSE)  
  
  # now join by nearest neighbor and date
  
  #https://stackoverflow.com/questions/71959927/spatial-join-two-data-frames-by-nearest-feature-and-date-in-r      
  
  yrdietOISST <- do.call('rbind', lapply(split(stationsyr, 1:nrow(stationsyr)), function(x) {
    sf::st_join(x, sstdf_survdays[sstdf_survdays$yrmody == unique(x$yrmody),],
                #join = st_nearest_feature
                join = st_nn, k = 1, progress = FALSE
    )
  }))
  
  #   #datatable solution--works but doesnt seem faster?
  #    df1 <- data.table(stationsyr)
  #   
  #  .nearest_samedate <- function(x) {
  #    st_join(st_as_sf(x), sstdf_survdays[sstdf_survdays$yrmody == unique(x$yrmody),], join = st_nearest_feature)
  #  }
  # # 
  #  yrdietOISST <- df1[, .nearest_samedate(.SD), by = list(1:nrow(df1))]
  
  dietstn_OISST <- rbind(dietstn_OISST, yrdietOISST)
  
}

#saveRDS(dietstn_OISST, here("data-raw/dietstn_OISST.rds"))

# Now join with OISST dataset

#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))
#dietstn_OISST <- readRDS(here("data-raw/dietstn_OISST.rds"))


dietstn_OISST_merge <- dietstn_OISST %>%
  dplyr::rename(declon = declon.x,
                declat = declat.x,
                year = year.x,
                oisst = sst) %>%
  dplyr::select(id, oisst) %>%
  sf::st_drop_geometry()

bluepyagg_stn_all_OISST <- left_join(bluepyagg_stn_all, dietstn_OISST_merge)

saveRDS(bluepyagg_stn_all_OISST, here("fhdat/bluepyagg_stn_all_OISST_1982-2022.rds"))

VAST models were run using the script VASTunivariate_seasonalforageindex_operational.R in the folder VASTscripts: https://github.com/NOAA-EDAB/forageindex/blob/main/VASTscripts/VASTunivariate_seasonalforageindex_operational.R

# This is the exact VAST code used in Gaichas et al 2023, but with data years 
# extended from 1973-2022
# VAST attempt 2 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization

# Load packages
library(here)
library(dplyr)
library(VAST)

#Read in data, separate spring and fall, and rename columns for VAST:

# this dataset created in SSTmethods.Rmd

bluepyagg_stn <- readRDS(here::here("fhdat/bluepyagg_stn_all_OISST_1982-2022.rds"))

# make SST column that uses surftemp unless missing or 0
# there are 3 surftemp 0 values in the dataset, all with oisst > 15
bluepyagg_stn <- bluepyagg_stn %>%
  dplyr::mutate(sstfill = ifelse((is.na(surftemp)|surftemp==0), oisst, surftemp))

#bluepyagg_stn <- bluepyagg_pred_stn

# filter to assessment years at Tony's suggestion

# code Vessel as AL=0, HB=1, NEAMAP=2

bluepyagg_stn_fall <- bluepyagg_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL") |>
        #,year > 1984) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanbluepywt, #use bluepywt for individuals input in example
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp, #this leaves out many stations for NEFSC
         #surftemp, #this leaves out many stations for NEFSC
         #oisst, #leaves out everything before 1982
         sstfill
         ) %>%
  na.omit() %>%
  as.data.frame()

bluepyagg_stn_spring <- bluepyagg_stn %>%
  filter(season_ng == "SPRING") |>
        #,year > 1984) %>%
  mutate(AreaSwept_km2 =1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanbluepywt,
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp, #this leaves out many stations for NEFSC
         #surftemp, #this leaves out many stations for NEFSC
         #oisst, #leaves out everything before 1982
         sstfill
         ) %>%
  na.omit() %>%
  as.data.frame()


# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302

# use only MAB, GB, GOM, SS EPUs 
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE

bfinshore <- c(3020, 3050, 3080, 3110, 3140, 3170, 3200, 3230, 
              3260, 3290, 3320, 3350, 3380, 3410, 3440, 3450, 3460)

bfoffshore <- c(1010, 1730, 1690, 1650, 1050, 1060, 1090, 1100, 1250, 1200, 1190, 1610)

MAB <- c(1010:1080, 1100:1120, 1600:1750, 3010:3450, 3470, 3500, 3510)
GB  <- c(1090, 1130:1210, 1230, 1250, 3460, 3480, 3490, 3520:3550)
GOM <- c(1220, 1240, 1260:1290, 1360:1400, 3560:3830)
SS  <- c(1300:1352, 3840:3990)

MABGBinshore <- c(3010:3450, 3460, 3470, 3480, 3490, 3500, 3510, 3520:3550)

MABGBoffshore <- c(1010:1080, 1090, 1100:1120,1130:1210, 1230, 1250, 1600:1750)

coast3nmbuffst <- readRDS(here::here("spatialdat/coast3nmbuffst.rds"))

MAB2 <- coast3nmbuffst %>% 
  dplyr::filter(stratum_number %in% MAB) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# MAB state waters
MAB2state <- MAB2 %>%
  dplyr::filter(stratum_number2 %% 10 == 1) 

# MAB federal waters
MAB2fed <- MAB2 %>%
  dplyr::filter(stratum_number2 %% 10 == 2) 

# Georges Bank EPU
GB2 <- coast3nmbuffst %>% 
  dplyr::filter(stratum_number %in% GB) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# GB state waters
GB2state <- GB2 %>%
  dplyr::filter(stratum_number2 %% 10 == 1) 

#GB federal waters
GB2fed <- GB2 %>%
  dplyr::filter(stratum_number2 %% 10 == 2) 

# whole bluefish domain MABG
MABGB2 <- dplyr::bind_rows(MAB2, GB2)

# MABGB state waters
MABGBstate <- dplyr::bind_rows(MAB2state, GB2state)

# MABGB federal waters
MABGBfed <- dplyr::bind_rows(MAB2fed, GB2fed)

# gulf of maine EPU (for SOE)
GOM2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% GOM) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# scotian shelf EPU (for SOE)
SS2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% SS) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# previous bluefish strata
bfinshore2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% bfinshore) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# additional new bluefish strata 2022
bfoffshore2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% bfoffshore) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# all bluefish strata
bfall2 <- dplyr::bind_rows(bfinshore2, bfoffshore2)

# albatross inshore strata
albinshore2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% setdiff(MABGBinshore, bfinshore)) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# Albold for WHAM input
albbfinshore <- dplyr::bind_rows(albinshore2, bfinshore2)

# Albnew for WHAM input
albbfall <- dplyr::bind_rows(albinshore2, bfall2)

# offshore of all bluefish survey strata
MABGBothoffshore2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% setdiff(MABGBoffshore, bfoffshore)) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# needed to cover the whole northwest atlantic grid
allother2 <- coast3nmbuffst %>%
  dplyr::filter(!stratum_number %in% c(MAB, GB, GOM, SS)) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# all epus
allEPU2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% c(MAB, GB, GOM, SS)) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# # configs same FieldConfig as below but formatted differently
# FieldConfig <- c(
#   "Omega1"   = 0,   # number of spatial variation factors (0, 1, AR1)
#   "Epsilon1" = 0,   # number of spatio-temporal factors
#   "Omega2"   = 0, 
#   "Epsilon2" = 0
# ) 

# default configs, not really specified anyway
FieldConfig = matrix( "IID", ncol=2, nrow=3, 
                      dimnames=list(c("Omega","Epsilon","Beta"),c("Component_1","Component_2")))


RhoConfig <- c(
  "Beta1" = 0,      # temporal structure on years (intercepts) 
  "Beta2" = 0, 
  "Epsilon1" = 0,   # temporal structure on spatio-temporal variation
  "Epsilon2" = 0
) 
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1

OverdispersionConfig    <- c("eta1"=0, "eta2"=0)
# eta1 = vessel effects on prey encounter rate
# eta2 = vessel effects on prey weight

strata.limits <- as.list(c("AllEPU" = allEPU2, 
                           "MABGB" = MABGB2,
                           "MABGBstate" = MABGBstate,
                           "MABGBfed" = MABGBfed,
                           "MAB" = MAB2,
                           "GB" = GB2,
                           "GOM" = GOM2,
                           "bfall" = bfall2,
                           "bfin" = bfinshore2,
                           "bfoff" = bfoffshore2,
                           "MABGBalbinshore" = albinshore2,
                           "MABGBothoffshore" = MABGBothoffshore2,
                           "albbfin" = albbfinshore,
                           "albbfall" = albbfall,
                           "allother" = allother2))

settings = make_settings( n_x = 500, 
                          Region = "northwest_atlantic",
                          Version = "VAST_v14_0_1", #needed to prevent error from newer dev version number
                          #strata.limits = list('All_areas' = 1:1e5), full area
                          strata.limits = strata.limits,
                          purpose = "index2", 
                          bias.correct = TRUE,
                          #use_anisotropy = FALSE,
                          #fine_scale = FALSE,
                          #FieldConfig = FieldConfig,
                          #RhoConfig = RhoConfig,
                          OverdispersionConfig = OverdispersionConfig
                          )


New_Extrapolation_List <- readRDS(here::here("spatialdat/CustomExtrapolationList.rds"))

# select dataset and set directory for output

#########################################################
# Run model fall

season <- c("fall_500_lennosst_ALLsplit_biascorrect")

working_dir <- here::here(sprintf("SOEpyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}

# subset for faster testing
#bluepyagg_stn_fall <- bluepyagg_stn_fall %>% filter(Year<1990)                       

fit <- fit_model(
  settings = settings, 
  extrapolation_list = New_Extrapolation_List,
  Lat_i = bluepyagg_stn_fall$Lat, 
  Lon_i = bluepyagg_stn_fall$Lon, 
  t_i = bluepyagg_stn_fall$Year, 
  b_i = as_units(bluepyagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(bluepyagg_stn_fall)),
  v_i = bluepyagg_stn_fall$Vessel,
  Q_ik = as.matrix(bluepyagg_stn_fall[,c("npiscsp", 
                                         "meanpisclen", 
                                         "sstfill"
                                         )]),
  #Use_REML = TRUE,
  working_dir = paste0(working_dir, "/"))

saveRDS(fit, file = paste0(working_dir, "/fit.rds"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

######################################################
# Run model spring

season <- c("spring_500_lennosst_ALLsplit_biascorrect")

working_dir <- here::here(sprintf("SOEpyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}                         
  
# subset for faster testing
#bluepyagg_stn_spring <- bluepyagg_stn_spring %>% filter(Year<1990) 

fit <- fit_model( settings = settings,  
                 extrapolation_list = New_Extrapolation_List,
                 Lat_i = bluepyagg_stn_spring[,'Lat'], 
                 Lon_i = bluepyagg_stn_spring[,'Lon'], 
                 t_i = bluepyagg_stn_spring[,'Year'], 
                 b_i = as_units(bluepyagg_stn_spring[,'Catch_g'], 'g'), 
                 a_i = rep(1, nrow(bluepyagg_stn_spring)),
                 v_i = bluepyagg_stn_spring$Vessel,
                 Q_ik = as.matrix(bluepyagg_stn_spring[,c("npiscsp", 
                                                          "meanpisclen", 
                                                          "sstfill"
                                                          )]),
                # Use_REML = TRUE,
                 working_dir = paste0(working_dir, "/"))

saveRDS(fit, file = paste0(working_dir, "/fit.rds"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

Model output was saved in the folder SOEpyindex for three different time series options: 1973-2022, 1982-2022, and 1985-2022.

The script to create the SOE forage indices from the VAST output is in the SOEpyindex folder: https://github.com/NOAA-EDAB/forageindex/blob/main/SOEpyindex/SOE-VASTForageIndices.R

# create csv for ecodata input
# aim for similar structure to other ecodata datasets


library(dplyr)
library(ggplot2)
library(tidyr)

SOEinputs <- function(infile, season, outfile) {
  
  splitoutput <- read.csv(infile)
  
  # warning, hardcoded. obviously
  stratlook <- data.frame(Stratum = c("Stratum_1",
                                      "Stratum_2",
                                      "Stratum_3",
                                      "Stratum_4",
                                      "Stratum_5",
                                      "Stratum_6",
                                      "Stratum_7",
                                      "Stratum_8",
                                      "Stratum_9",
                                      "Stratum_10",
                                      "Stratum_11",
                                      "Stratum_12",
                                      "Stratum_13",
                                      "Stratum_14",
                                      "Stratum_15"),
                          Region  = c("AllEPU", 
                                      "MABGB", 
                                      "MABGBstate", 
                                      "MABGBfed", 
                                      "MAB",
                                      "GB",
                                      "GOM",
                                      "bfall",
                                      "bfin",
                                      "bfoff",
                                      "MABGBalbinshore",
                                      "MABGBothoffshore",
                                      "albbfin",
                                      "albbfall",
                                      "allother"))
  
  forageindex <- splitoutput %>%
    left_join(stratlook) %>%
    dplyr::select(Time, 
                  EPU = Region, 
                  "Forage Fish Biomass Estimate" = Estimate, 
                  "Forage Fish Biomass Estimate SE" = Std..Error.for.Estimate) %>%
    tidyr::pivot_longer(c("Forage Fish Biomass Estimate", "Forage Fish Biomass Estimate SE"), 
                        names_to = "Var", values_to = "Value") %>%
    dplyr::filter(EPU %in% c("MAB", "GB", "GOM", "AllEPU")) %>%
    dplyr::mutate(Units = "relative grams per stomach") %>%
    dplyr::select(Time, Var, Value, EPU, Units)
  
  forageindex$Var <- stringr::str_c(season, forageindex$Var, sep = " ")
  
  #readr::write_csv(forageindex, outfile)
  saveRDS(forageindex, outfile)
  
} 


# make data files
SOEinputs(infile = "SOEpyindex/1982-2022/allagg_fall_500_lennosst_ALLsplit_biascorrect/Index.csv",
          season = "Fall", 
           outfile = "SOEpyindex/fallforageindex.rds")

SOEinputs(infile = "SOEpyindex/1982-2022/allagg_spring_500_lennosst_ALLsplit_biascorrect/Index.csv",
          season = "Spring", 
           outfile = "SOEpyindex/springforageindex.rds")

# SOEinputs(infile = "pyindex/allagg_annual_500_lennosst_ALLsplit_biascorrect/Index.csv",
#           season = "Annual", 
#            outfile = "toSOE/annualforageindex.rds")



# test plot
# foragewide <- forageindex %>%
#   pivot_wider(names_from = Var, values_from = Value)
# 
# 
# ggplot(foragewide, aes(x=Time, y=`Forage Fish Biomass Estimate`, colour = EPU)) +
#   geom_errorbar(aes(ymin=`Forage Fish Biomass Estimate`+`Forage Fish Biomass Estimate SE`, 
#                     ymax=`Forage Fish Biomass Estimate`-`Forage Fish Biomass Estimate SE`))+
#   geom_point()+
#   geom_line()

Code blocks below setup the index comparisons in the remainder of this document.

# compare 2022 prey list to paper update preylist for indices

stratlook <- data.frame(Stratum = c("Stratum_1",
                                    "Stratum_2",
                                    "Stratum_3",
                                    "Stratum_4",
                                    "Stratum_5",
                                    "Stratum_6",
                                    "Stratum_7",
                                    "Stratum_8",
                                    "Stratum_9",
                                    "Stratum_10",
                                    "Stratum_11",
                                    "Stratum_12",
                                    "Stratum_13",
                                    "Stratum_14",
                                    "Stratum_15"),
                        Region  = c("AllEPU", 
                                    "MABGB", 
                                    "MABGBstate", 
                                    "MABGBfed", 
                                    "MAB",
                                    "GB",
                                    "GOM",
                                    "bfall",
                                    "bfin",
                                    "bfoff",
                                    "MABGBalbinshore",
                                    "MABGBothoffshore",
                                    "albbfin",
                                    "albbfall",
                                    "allother"))

# from each output folder in pyindex, 
outdir <- here("SOEpyindex")
moddirs <- list.dirs(outdir)
# only get pred sensitivities and original full models
fullfall <- grep("/allagg_fall_", moddirs, value = TRUE)
fullspring <- grep("/allagg_spring_", moddirs, value = TRUE)
moddirs <- c(fullfall, fullspring)
# get original published version 1985-2021
outdir1 <- here("pyindex")
moddirs1 <- list.dirs(outdir1)
RTfall <- grep("pyindex/2022/allagg_fall_", moddirs1, value = TRUE)
RTspring <- grep("pyindex/2022/allagg_spring_", moddirs1, value = TRUE)
paperfall <- grep("pyindex/allagg_fall_", moddirs1, value = TRUE)
paperspring <- grep("pyindex/allagg_spring_", moddirs1, value = TRUE)
moddirs1 <- c(RTfall, RTspring, paperfall, paperspring)
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)
modnames1 <- list.dirs(outdir1, full.names = FALSE)

# function to apply extracting info
getmodinfo <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  modnamepre <- modpath[length(modpath)-1]
  
  settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
    fill = TRUE, header = FALSE)
  
  n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
  grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
  max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
  use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
  fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
  bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
  
  #FieldConfig
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
    beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
    beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
  }
  
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    beta1 <- "IID"
    beta2 <- "IID"
  }
  
  
  #RhoConfig
  rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
  rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
  rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
  rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
  
  # read parameter estimates, object is called parameter_Estimates
  load(file.path(d.name, "parameter_estimates.RData"))
  
  AIC <- parameter_estimates$AIC[1]  
  converged <- parameter_estimates$Convergence_check[1]
  fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
  randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
  
  #index <- read.csv(file.path(d.name, "Index.csv"))
  
  # return model attributes as a dataframe
  out <- data.frame(modname = paste0(modnamepre,"_", modname),
                    n_x = n_x,
                    grid_size_km = grid_size_km,
                    max_cells = max_cells,
                    use_anisotropy = use_anisotropy,
                    fine_scale =  fine_scale,
                    bias.correct = bias.correct,
                    omega1 = omega1,
                    omega2 = omega2,
                    epsilon1 = epsilon1,
                    epsilon2 = epsilon2,
                    beta1 = beta1,
                    beta2 = beta2,
                    rho_epsilon1 = rho_epsilon1,
                    rho_epsilon2 = rho_epsilon2,
                    rho_beta1 = rho_beta1,
                    rho_beta2 = rho_beta2,
                    AIC = AIC,
                    converged = converged,
                    fixedcoeff = fixedcoeff,
                    randomcoeff = randomcoeff#,
                    #index = index
  )
    
    return(out)

}

# getfitdat <- function(d.name){
#   # read settings
#   modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
#   modname <- modpath[length(modpath)]
#   
#   readRDS(file.path(d.name, "fit.rds"))
#   nstations <- fit$data_list$n_i # this is grid size not input N stations
#   out <- data.frame(modname = modname,
#                     nstations = nstations
#   )
#   
#   return(out)
# }


# function to apply extracting info
getmodindex <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  modnamepre <- modpath[length(modpath)-1]
  
  index <- read.csv(file.path(d.name, "Index.csv"))
  # return model indices as a dataframe
  out <- data.frame(modname = paste0(modnamepre,"_", modname),
                    index
  )
  
  return(out)
}

moddirsall <- c(moddirs, moddirs1)

modcompare <- purrr::map_dfr(moddirsall, getmodinfo)

modcompareindex <- purrr::map_dfr(moddirsall, getmodindex)

3 Trend comparisons

3.1 Methods change and adding one year

Lets compare last year’s SOE submission which was used in the 2022 Bluefish RT (2022) with the slight adjustment in prey list for the published CJFAS paper (pyindex) and with the same methods as in the CJFAS paper but extending data by 1 year to 2022.

This is fall trend.

splitoutput <- modcompareindex %>%
  dplyr::mutate(Season = modname |> map(str_split, pattern = "_") |> map_chr(c(1,3))) %>%
  dplyr::mutate(Data = modname |> map(str_split, pattern = "_") |> map_chr(c(1,1))) %>% 
  dplyr::left_join(stratlook) %>%
  dplyr::filter(Region %in% c("GOM", "GB", "MAB","MABGBstate", "bfin")) %>%
  dplyr::mutate(Type = ifelse(Region %in% c("GOM", "GB", "MAB"), "Ecoregion", "Bluefish"),
                Region = case_when(Region == "MABGBstate" ~ "StateWaters",
                                   Region == "bfin" ~ "SurveyBluefish",
                                   TRUE ~ Region))

foragemax <- max(splitoutput$Estimate)


foragetsmean <- splitoutput %>%
  dplyr::group_by(modname, Region) %>%
  dplyr::mutate(fmean = mean(Estimate)) 


ggplot(foragetsmean |> filter(Season =="fall",
                             Data %in% c("2022",
                                         "pyindex",
                                         "1985-2022")), 
       aes(x=Time, y=((Estimate-fmean)/fmean), colour=Data)) +
  #geom_errorbar(aes(ymin=(Estimate+Std..Error.for.Estimate - fmean)/fmean, 
  #                  ymax=(Estimate-Std..Error.for.Estimate - fmean)/fmean))+
  geom_point(aes(shape=Data))+
  geom_line()+
  #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
  facet_wrap(~Region, scales = "free_y", ncol = 1) +
  #scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
  ylab("Relative forage biomass scaled to time series mean")  +
  ggtitle("Fall models: trend comparison") +
  theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0)
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))

Trend comparison between fall forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

Figure 3.1: Trend comparison between fall forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

This is spring trend.

ggplot(foragetsmean |> filter(Season =="spring",
                             Data %in% c("2022",
                                         "pyindex",
                                         "1985-2022")), 
       aes(x=Time, y=((Estimate-fmean)/fmean), colour=Data)) +
  #geom_errorbar(aes(ymin=(Estimate+Std..Error.for.Estimate - fmean)/fmean,
  #                  ymax=(Estimate-Std..Error.for.Estimate - fmean)/fmean))+
  geom_point(aes(shape=Data))+
  geom_line()+
  #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
  facet_wrap(~Region, scales = "free_y", ncol = 1) +
  #scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
  ylab("Relative forage biomass scaled to time series mean")  +
  ggtitle("Spring models: trend comparison")+
  theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0)
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))

Trend comparison between spring forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

Figure 3.2: Trend comparison between spring forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

3.2 How far back can we go?

Spatial coverage for stomachs in fall

mapfall <- paste0(moddirs[1], "/Data_by_year.png")

knitr::include_graphics(mapfall)

Spatial coverage for stomachs in spring

mapspring <- paste0(moddirs[4], "/Data_by_year.png")

knitr::include_graphics(mapspring)

Now lets compare trends when we add 2022 and also extend the data back from the CJFAS dataset, with starting points at the beginning of stomach collections (1973-2022), at the earliest full OISST dataset for SST substitution (1982-2022), and the time series start used last year and for bluefish (1985-2022).

This is fall.

ggplot(foragetsmean |> filter(Season =="fall",
                             Data %in% c("1973-2022",
                                         "1982-2022",
                                         "1985-2022")), 
       aes(x=Time, y=((Estimate-fmean)/fmean), colour=Data)) +
  #geom_errorbar(aes(ymin=(Estimate+Std..Error.for.Estimate - fmean)/fmean,
  #                  ymax=(Estimate-Std..Error.for.Estimate - fmean)/fmean))+
  geom_point(aes(shape=Data))+
  geom_line()+
  #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
  facet_wrap(~Region, scales = "free_y", ncol = 1) +
  #scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
  ylab("Relative forage biomass scaled to time series mean")  +
  ggtitle("Fall models: trend comparison")+
  theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0)
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))

Figure 3.3: Trend comparison between fall forage indices using CJFAS published method with datasets from 1973-2022, 1982-2022, and 1985-2022.

This is spring.

ggplot(foragetsmean |> filter(Season =="spring",
                             Data %in% c("1973-2022",
                                         "1982-2022",
                                         "1985-2022")), 
       aes(x=Time, y=((Estimate-fmean)/fmean), colour=Data)) +
  #geom_errorbar(aes(ymin=(Estimate+Std..Error.for.Estimate - fmean)/fmean,
  #                  ymax=(Estimate-Std..Error.for.Estimate - fmean)/fmean))+
  geom_point(aes(shape=Data))+
  geom_line()+
  #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
  facet_wrap(~Region, scales = "free_y", ncol = 1) +
  #scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
  ylab("Relative forage biomass scaled to time series mean")  +
  ggtitle("Spring models: trend comparison")+
  theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0)
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))

Figure 3.4: Trend comparison between spring forage indices using CJFAS published method with datasets from 1973-2022, 1982-2022, and 1985-2022.

4 Scale comparisons

We don’t use scale much, but in case someone does here they are, scaled relative to the max observed across all datasets.

4.1 Methods change and adding one year

Same comparisons as above: fall

ggplot(splitoutput |> filter(Season =="fall",
                             Data %in% c("2022",
                                         "pyindex",
                                         "1985-2022")), 
       aes(x=Time, y=Estimate, colour=Data)) +
  #geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
  geom_ribbon(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate, fill=Data), linetype = 0, alpha = 0.15)+
  geom_point(aes(shape=Data))+
  geom_line()+
  #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
  facet_wrap(~Region, scales = "free_y", ncol = 1) +
  scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
  ylab("Relative forage biomass scaled to maximum")  +
  ggtitle("Fall models: scale comparison")+
  theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0)
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))

Scale comparison between fall forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

Figure 4.1: Scale comparison between fall forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

Spring

ggplot(splitoutput |> filter(Season =="spring",
                             Data %in% c("2022",
                                         "pyindex",
                                         "1985-2022")), 
       aes(x=Time, y=Estimate, colour=Data)) +
  #geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
  geom_ribbon(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate, fill=Data), linetype = 0, alpha = 0.15)+
  geom_point(aes(shape=Data))+
  geom_line()+
  #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
  facet_wrap(~Region, scales = "free_y", ncol = 1) +
  scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
  ylab("Relative forage biomass scaled to maximum")  +
  ggtitle("Spring models: scale comparison")+
  theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0)
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))

Figure 4.2: Scale comparison between spring forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

4.2 How far back can we go?

Same comparisons as above: fall

ggplot(splitoutput |> filter(Season =="fall",
                             Data %in% c("1973-2022",
                                         "1982-2022",
                                         "1985-2022")), 
       aes(x=Time, y=Estimate, colour=Data)) +
  #geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
  geom_ribbon(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate, fill=Data), linetype = 0, alpha = 0.15)+
  geom_point(aes(shape=Data))+
  geom_line()+
  #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
  facet_wrap(~Region, scales = "free_y", ncol = 1) +
  scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
  ylab("Relative forage biomass scaled to maximum")  +
  ggtitle("Fall models: scale comparison")+
  theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0)
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))

Figure 4.3: Scale comparison between fall forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

Spring

ggplot(splitoutput |> filter(Season =="spring",
                             Data %in% c("1973-2022",
                                         "1982-2022",
                                         "1985-2022")), 
       aes(x=Time, y=Estimate, colour=Data)) +
  #geom_errorbar(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate))+
  geom_ribbon(aes(ymin=Estimate+Std..Error.for.Estimate, ymax=Estimate-Std..Error.for.Estimate, fill=Data), linetype = 0, alpha = 0.15)+
  geom_point(aes(shape=Data))+
  geom_line()+
  #acet_wrap(~fct_relevel(Type, "Ecoregion", "Bluefish"), scales = "free_y") +
  facet_wrap(~Region, scales = "free_y", ncol = 1) +
  scale_y_continuous(labels=function(x)round(x/foragemax, digits = 2))+
  ylab("Relative forage biomass scaled to maximum")  +
  ggtitle("Spring models: scale comparison")+
  theme(#legend.position = c(1, 0),
        #legend.justification = c(1, 0)
        legend.position = "bottom",
        legend.text = element_text(size=rel(0.5)),
        legend.key.size = unit(0.5, 'lines'),
        legend.title = element_text(size=rel(0.5)))

Figure 4.4: Scale comparison between spring forage indices using 1985-2021 data for 2022 Bluefish RT and 2023 SOE (2022), CJFAS published (pyindex), and CJFAS published index updated to end in 2022 (1985-2022).

5 Center of gravity exploration

Has forage shifted along the coast?

library(FishStatsUtils)

fitfall <- VAST::reload_model(readRDS(here::here("SOEpyindex/1982-2022/allagg_fall_500_lennosst_ALLsplit_biascorrect/fit.rds")))

cogoutfall <- FishStatsUtils::plot_range_index(Sdreport = fitfall$parameter_estimates$SD, 
                                           Report = fitfall$Report, 
                                           TmbData = fitfall$data_list,
                                           year_labels = as.numeric(fitfall$year_labels),
                                           years_to_plot = fitfall$years_to_plot,
                                           Znames = colnames(fitfall$data_list$Z_xm),
                                           PlotDir = here::here("test")) #already have plots, will delete this directory

saveRDS(cogoutfall, here::here("SOEpyindex/1982-2022/allagg_fall_500_lennosst_ALLsplit_biascorrect/cogout.rds"))

fitspring <- VAST::reload_model(readRDS(here::here("SOEpyindex/1982-2022/allagg_spring_500_lennosst_ALLsplit_biascorrect/fit.rds")))

cogoutspring <- FishStatsUtils::plot_range_index(Sdreport = fitspring$parameter_estimates$SD, 
                                           Report = fitspring$Report, 
                                           TmbData = fitspring$data_list, 
                                           year_labels = as.numeric(fitspring$year_labels),
                                           years_to_plot = fitspring$years_to_plot,
                                           Znames = colnames(fitspring$data_list$Z_xm),
                                           PlotDir = here::here("test"))

saveRDS(cogoutspring, here::here("SOEpyindex/1982-2022/allagg_spring_500_lennosst_ALLsplit_biascorrect/cogout.rds"))

unlink(here::here("test"), recursive=TRUE) #removes directory with unneeded plots

Fall center of gravity, forage index, significantly more North and East over time

cogoutfall <- readRDS(here::here("SOEpyindex/1982-2022/allagg_fall_500_lennosst_ALLsplit_biascorrect/cogout.rds"))

cogdat <- as.data.frame(cogoutfall$COG_Table) |>
  dplyr::mutate(direction = ifelse(m==1, "Eastward", "Northward"))

ggplot2::ggplot(cogdat, ggplot2::aes(x = Year, y = COG_hat)) + 
  ggplot2::geom_point() + 
  ecodata::geom_gls() + 
  ggplot2::labs(y = "Center of gravity, km") +
  ggplot2::facet_wrap(~direction, scales = "free_y") + 
  ecodata::theme_facet()

Spring center of gravity, forage index, no significant trend

cogoutspring <- readRDS(here::here("SOEpyindex/1982-2022/allagg_spring_500_lennosst_ALLsplit_biascorrect/cogout.rds"))

cogdat <- as.data.frame(cogoutspring$COG_Table) |>
  dplyr::mutate(direction = ifelse(m==1, "Eastward", "Northward"))

ggplot2::ggplot(cogdat, ggplot2::aes(x = Year, y = COG_hat)) + 
  ggplot2::geom_point() + 
  ecodata::geom_gls() + 
  ggplot2::labs(y = "Center of gravity, km") +
  ggplot2::facet_wrap(~direction, scales = "free_y") + 
  ecodata::theme_facet()

update get function for ecodata to add cog to existing forage_index dataset

## Forage Index
raw.dir<- here::here("data-raw/")
#ann<- "annualforageindex - Sarah Gaichas - NOAA Federal.rds"
fal<- "fallforageindex - Sarah Gaichas - NOAA Federal.rds"
spr<- "springforageindex - Sarah Gaichas - NOAA Federal.rds"
falcog <- "fallforagecog.rds"
sprcog <- "springforagecog.rds"

get_forage_index <- function(save_clean = F){

  #annual<-readRDS(file.path(raw.dir, ann))
  fall<- readRDS(file.path(raw.dir, fal))
  spring<-readRDS(file.path(raw.dir, spr))
  fallcog <- readRDS(file.path(raw.dir, falcog))
  springcog <-readRDS(file.path(raw.dir, sprcog))

  forage_index<- rbind(fall, spring, fallcog, springcog)


  if (save_clean){
    usethis::use_data(forage_index, overwrite = T)
  } else {
    return(forage_index)
  }
}
get_forage_index(save_clean = T)

update plot function

plot_forage_index <- function(shadedRegion = NULL,
                              report="MidAtlantic",
                              varName = "index") {

  setup <- ecodata::plot_setup(shadedRegion = shadedRegion,
                               report=report)
  
  if (varName == "index") {
    
  if (report == "MidAtlantic") {
    filterEPUs <- c("MAB")
  } else {
    filterEPUs <- c("GB", "GOM")
  }

  fix<- ecodata::forage_index |>
    dplyr::filter(Var %in% c("Fall Forage Fish Biomass Estimate",
                             "Spring Forage Fish Biomass Estimate"),
                  EPU %in% filterEPUs) |>
    dplyr::group_by(EPU) |>
    dplyr::summarise(max = max(Value))

  p <- ecodata::forage_index |>
    dplyr::filter(Var %in% c("Fall Forage Fish Biomass Estimate",
                             "Fall Forage Fish Biomass Estimate SE",
                             "Spring Forage Fish Biomass Estimate",
                             "Spring Forage Fish Biomass Estimate SE"),
                  EPU %in% filterEPUs) |>
    dplyr::group_by(EPU) |>
    tidyr::separate(Var, into = c("Season", "A", "B", "C", "D", "Var")) |>
    dplyr::mutate(Var = tidyr::replace_na(Var, "Mean")) |> #,
                  #max = as.numeric(Value)) |>
    tidyr::pivot_wider(names_from = Var, values_from = Value) |>
    dplyr::left_join(fix) |>
    dplyr::mutate(#Value = Value/resca,
      Mean = as.numeric(Mean),
      #max = as.numeric(Value),
      Mean = Mean/max,
      SE = SE/max,
      Upper = Mean + SE,
      Lower = Mean - SE) |>
    ggplot2::ggplot(ggplot2::aes(x = Time, y = Mean, group = Season))+
    ggplot2::annotate("rect", fill = setup$shade.fill, alpha = setup$shade.alpha,
        xmin = setup$x.shade.min , xmax = setup$x.shade.max,
        ymin = -Inf, ymax = Inf) +
    ggplot2::geom_ribbon(ggplot2::aes(ymin = Lower, ymax = Upper, fill = Season), alpha = 0.5)+
    ggplot2::geom_point()+
    ggplot2::geom_line()+
    ggplot2::ggtitle("")+
    ggplot2::ylab(expression("Relative forage biomass"))+
    ggplot2::xlab(ggplot2::element_blank())+
    ggplot2::facet_wrap(.~EPU)+
    ecodata::geom_gls()+
    ecodata::theme_ts()+
    ecodata::theme_facet()+
    ecodata::theme_title()

    if (report == "NewEngland") {
      p <- p +
        ggplot2::theme(legend.position = "bottom",
                       legend.title = ggplot2::element_blank())

    }
  }
  
  if (varName == "cog"){
    
    p <- ecodata::forage_index |>
      dplyr::filter(Var %in% c("Fall Eastward Forage Fish Center of Gravity",
                               "Fall Eastward Forage Fish Center of Gravity SE",
                               "Fall Northward Forage Fish Center of Gravity",
                               "Fall Northward Forage Fish Center of Gravity SE",
                               "Spring Eastward Forage Fish Center of Gravity",
                               "Spring Eastward Forage Fish Center of Gravity SE",
                               "Spring Northward Forage Fish Center of Gravity",
                               "Spring Northward Forage Fish Center of Gravity SE")) |>
      tidyr::separate(Var, into = c("Season", "Direction", "A", "B", "C", "D", "E", "Var")) |>
      dplyr::mutate(Var = tidyr::replace_na(Var, "Mean")) |> 
      tidyr::pivot_wider(names_from = Var, values_from = Value) |>
      dplyr::mutate(Mean = as.numeric(Mean),
                    Upper = Mean + SE,
                    Lower = Mean - SE) |>
      ggplot2::ggplot(ggplot2::aes(x = Time, y = Mean, group = Season))+
      ggplot2::annotate("rect", fill = setup$shade.fill, alpha = setup$shade.alpha,
                        xmin = setup$x.shade.min , xmax = setup$x.shade.max,
                        ymin = -Inf, ymax = Inf) +
      ggplot2::geom_ribbon(ggplot2::aes(ymin = Lower, ymax = Upper, fill = Season), alpha = 0.3)+ #
      ggplot2::geom_point()+
      ggplot2::geom_line()+
      ggplot2::ggtitle("")+
      ggplot2::ylab(expression("Forage Center of Gravity, km"))+
      ggplot2::xlab(ggplot2::element_blank())+
      ggplot2::facet_wrap(~Direction, scales = "free_y")+ #Season
      ecodata::geom_gls()+
      ecodata::theme_ts()+
      ecodata::theme_facet()+
      ecodata::theme_title()

  }

    return(p)

}

attr(plot_forage_index,"report") <- c("MidAtlantic","NewEngland")
attr(plot_forage_index, "varName") <- c("index", "cog")

forage COG indices produced by SOE-VASTForageCOG.R

fallforagecog <- readRDS(here::here("SOEpyindex/fallforagecog.rds"))
springforagecog <- readRDS(here::here("SOEpyindex/springforagecog.rds"))

setup <- ecodata::plot_setup(shadedRegion = c(2014,2023), report = "MidAtlantic")

testplot <- dplyr::bind_rows(fallforagecog, springforagecog) |>
  
  dplyr::filter(Var %in% c("Fall Eastward Forage Fish Center of Gravity",
                               "Fall Eastward Forage Fish Center of Gravity SE",
                               "Fall Northward Forage Fish Center of Gravity",
                               "Fall Northward Forage Fish Center of Gravity SE",
                               "Spring Eastward Forage Fish Center of Gravity",
                               "Spring Eastward Forage Fish Center of Gravity SE",
                               "Spring Northward Forage Fish Center of Gravity",
                               "Spring Northward Forage Fish Center of Gravity SE")) |>
      tidyr::separate(Var, into = c("Season", "Direction", "A", "B", "C", "D", "E", "Var")) |>
      dplyr::mutate(Var = tidyr::replace_na(Var, "Mean")) |> 
      tidyr::pivot_wider(names_from = Var, values_from = Value) |>
      dplyr::mutate(Mean = as.numeric(Mean),
                    Upper = Mean + SE,
                    Lower = Mean - SE) |>
      ggplot2::ggplot(ggplot2::aes(x = Time, y = Mean, group = Season))+
      ggplot2::annotate("rect", fill = setup$shade.fill, alpha = setup$shade.alpha,
                        xmin = setup$x.shade.min , xmax = setup$x.shade.max,
                        ymin = -Inf, ymax = Inf) +
      ggplot2::geom_ribbon(ggplot2::aes(ymin = Lower, ymax = Upper, fill = Season), alpha = 0.3)+ #
      ggplot2::geom_point()+
      ggplot2::geom_line()+
      ggplot2::ggtitle("")+
      ggplot2::ylab(expression("Forage Center of Gravity, km"))+
      ggplot2::xlab(ggplot2::element_blank())+
      ggplot2::facet_wrap(~Direction, scales = "free_y")+ #Season
      ecodata::geom_gls()+
      ecodata::theme_ts()+
      ecodata::theme_facet()+
      ecodata::theme_title()

testplot

Forage Index update for 2024 SOE

Data updated through 2022

Sarah Gaichas, James Gartland, Brian Smith, Anthony Wood, Elizabeth Ng, Michael Celestino, Katie Drew, Abigail Tyrell, and James Thorson

25 January, 2024