Introduction

The Atlantic herring research track working group is interested in exploring zooplankton indices within the assessment.

These indices could potentially explain:

  1. herring growth/condition
  2. herring recruitment
  3. patterns in herring “survival” random effects

We have zooplankton indices already in ecodata at the EPU scale. These were reviewed with the working group in March 2024 (see here).

However, we want to examine the scale of herring assessment and different combinations of zooplankton species than currently exist in ecodata.

We’ll try VAST models for index standardization using the ECOMON data as an input.

Set of decisions:

  1. zooplankton species
  2. which zooplankton metric (area or volume)
  3. spatial footprint
  4. temporal scale (seasonal? annual?)
  5. potential covariates

Explore data

Zooplankton data

Read in file obtained from Scott Large, source of his zooplankton models:

ecomonall <- read_csv("data/EcoMon_Plankton_Data_v3_8.csv")

names(ecomonall)
##   [1] "cruise_name"      "station"          "zoo_gear"        
##   [4] "ich_gear"         "lat"              "lon"             
##   [7] "date"             "time"             "depth"           
##  [10] "sfc_temp"         "sfc_salt"         "btm_temp"        
##  [13] "btm_salt"         "volume_1m2"       "ctyp_10m2"       
##  [16] "calfin_10m2"      "pseudo_10m2"      "penilia_10m2"    
##  [19] "tlong_10m2"       "cham_10m2"        "echino_10m2"     
##  [22] "larvaceans_10m2"  "para_10m2"        "gas_10m2"        
##  [25] "acarspp_10m2"     "mlucens_10m2"     "evadnespp_10m2"  
##  [28] "salps_10m2"       "oithspp_10m2"     "cirr_10m2"       
##  [31] "chaeto_10m2"      "hyper_10m2"       "gam_10m2"        
##  [34] "evadnord_10m2"    "calminor_10m2"    "copepoda_10m2"   
##  [37] "clauso_10m2"      "dec_10m2"         "euph_10m2"       
##  [40] "prot_10m2"        "acarlong_10m2"    "euc_10m2"        
##  [43] "pel_10m2"         "poly_10m2"        "podon_10m2"      
##  [46] "fish_10m2"        "bry_10m2"         "fur_10m2"        
##  [49] "calspp_10m2"      "oncaea_10m2"      "cory_10m2"       
##  [52] "ost_10m2"         "tstyl_10m2"       "oithspin_10m2"   
##  [55] "mysids_10m2"      "temspp_10m2"      "tort_10m2"       
##  [58] "paraspp_10m2"     "scyphz_10m2"      "anthz_10m2"      
##  [61] "siph_10m2"        "hydrom_10m2"      "coel_10m2"       
##  [64] "ctenop_10m2"      "euph1_10m2"       "thysin_10m2"     
##  [67] "megan_10m2"       "thysra_10m2"      "thyslo_10m2"     
##  [70] "eupham_10m2"      "euphkr_10m2"      "euphspp_10m2"    
##  [73] "thysgr_10m2"      "nemaspp_10m2"     "stylspp_10m2"    
##  [76] "stylel_10m2"      "nemame_10m2"      "thysspp_10m2"    
##  [79] "shysac_10m2"      "thypsp_10m2"      "nemabo_10m2"     
##  [82] "thecos_10m2"      "spirre_10m2"      "spirhe_10m2"     
##  [85] "spirin_10m2"      "spirtr_10m2"      "spirspp_10m2"    
##  [88] "clispp_10m2"      "crevir_10m2"      "diatri_10m2"     
##  [91] "clicus_10m2"      "clipyr_10m2"      "cavunc_10m2"     
##  [94] "cavinf_10m2"      "cavlon_10m2"      "stysub_10m2"     
##  [97] "spirbu_10m2"      "crespp_10m2"      "cavspp_10m2"     
## [100] "cavoli_10m2x"     "gymnos_10m2"      "pnespp_10m2"     
## [103] "paedol_10m2"      "clilim_10m2"      "pnepau_10m2"     
## [106] "volume_100m3"     "ctyp_100m3"       "calfin_100m3"    
## [109] "pseudo_100m3"     "penilia_100m3"    "tlong_100m3"     
## [112] "cham_100m3"       "echino_100m3"     "larvaceans_100m3"
## [115] "para_100m3"       "gas_100m3"        "acarspp_100m3"   
## [118] "mlucens_100m3"    "evadnespp_100m3"  "salps_100m3"     
## [121] "oithspp_100m3"    "cirr_100m3"       "chaeto_100m3"    
## [124] "hyper_100m3"      "gam_100m3"        "evadnord_100m3"  
## [127] "calminor_100m3"   "copepoda_100m3"   "clauso"          
## [130] "dec_100m3"        "euph_100m3"       "prot_100m3"      
## [133] "acarlong_100m3"   "euc_100m3"        "pel_100m3"       
## [136] "poly_100m3"       "podon_100m3"      "fish_100m3"      
## [139] "bry_100m3"        "fur_100m3"        "calspp_100m3"    
## [142] "oncaea_100m3"     "cory_100m3"       "ost_100m3"       
## [145] "tstyl_100m3"      "oithspin_100m3"   "mysids_100m3"    
## [148] "temspp_100m3"     "tort_100m3"       "paraspp_100m3"   
## [151] "scyphz_100m3"     "anthz_100m3"      "siph_100m3"      
## [154] "hydrom_100m3"     "coel_100m3"       "ctenop_100m3"    
## [157] "euph1_100m3"      "thysin_100m3"     "megan_100m3"     
## [160] "thysra_100m3"     "thyslo_100m3"     "eupham_100m3"    
## [163] "euphkr_100m3"     "euphspp_100m3"    "thysgr_100m3"    
## [166] "nemaspp_100m3"    "stylspp_100m3"    "stylel_100m3"    
## [169] "nemame_100m3"     "thysspp_100m3"    "shysac_100m3"    
## [172] "thypsp_100m3"     "nemabo_100m3"     "thecos_100m3"    
## [175] "spirre_100m3"     "spirhe_100m3"     "spirin_100m3"    
## [178] "spirtr_100m3"     "spirspp_100m3"    "clispp_100m3"    
## [181] "crevir_100m3"     "diatri_100m3"     "clicus_100m3"    
## [184] "clipyr_100m3"     "cavunc_100m3"     "cavinf_100m3"    
## [187] "cavlon_100m3"     "stysub_100m3"     "spirbu_100m3"    
## [190] "crespp_100m3"     "cavspp_100m3"     "cavoli_100m3x"   
## [193] "gymnos_100m3"     "pnespp_100m3"     "paedol_100m3"    
## [196] "clilim_100m3"     "pnepau_100m3"     "nofish_10m2"     
## [199] "bretyr_10m2"      "cluhar_10m2"      "cycspp_10m2"     
## [202] "diaspp_10m2"      "cermad_10m2"      "benspp_10m2"     
## [205] "urospp_10m2"      "enccim_10m2"      "gadmor_10m2"     
## [208] "melaeg_10m2"      "polvir_10m2"      "meralb_10m2"     
## [211] "merbil_10m2"      "centstr_10m2"     "pomsal_10m2"     
## [214] "cynreg_10m2"      "leixan_10m2"      "menspp_10m2"     
## [217] "micund_10m2"      "tauads_10m2"      "tauoni_10m2"     
## [220] "auxspp_10m2"      "scosco_10m2"      "pepspp_10m2"     
## [223] "sebspp_10m2"      "prispp_10m2"      "myoaen_10m2"     
## [226] "myooct_10m2"      "ammspp_10m2"      "phogun_10m2"     
## [229] "ulvsub_10m2"      "anaspp_10m2"      "citarc_10m2"     
## [232] "etrspp_10m2"      "syaspp_10m2"      "botspp_10m2"     
## [235] "hipobl_10m2"      "parden_10m2"      "pseame_10m2"     
## [238] "hippla_10m2"      "limfer_10m2"      "glycyn_10m2"     
## [241] "scoaqu_10m2"      "sypspp_10m2"      "lopame_10m2"     
## [244] "nofish_100m3"     "bretyr_100m3"     "cluhar_100m3"    
## [247] "cycspp_100m3"     "diaspp_100m3"     "cermad_100m3"    
## [250] "benspp_100m3"     "urospp_100m3"     "enccim_100m3"    
## [253] "gadmor_100m3"     "melaeg_100m3"     "polvir_100m3"    
## [256] "meralb_100m3"     "merbil_100m3"     "centstr_100m3"   
## [259] "pomsal_100m3"     "cynreg_100m3"     "leixan_100m3"    
## [262] "menspp_100m3"     "micund_100m3"     "tauads_100m3"    
## [265] "tauoni_100m3"     "auxspp_100m3"     "scosco_100m3"    
## [268] "pepspp_100m3"     "sebspp_100m3"     "prispp_100m3"    
## [271] "myoaen_100m3"     "myooct_100m3"     "ammspp_100m3"    
## [274] "phogun_100m3"     "ulvsub_100m3"     "anaspp_100m3"    
## [277] "citarc_100m3"     "etrspp_100m3"     "syaspp_100m3"    
## [280] "botspp_100m3"     "hipobl_100m3"     "parden_100m3"    
## [283] "pseame_100m3"     "hippla_100m3"     "limfer_100m3"    
## [286] "glycyn_100m3"     "scoaqu_100m3"     "sypspp_100m3"    
## [289] "lopame_100m3"

A lookup of these column headings is here: https://www.fisheries.noaa.gov/inport/item/35054

Herring diets

What herring eat from our food habits data is summarized here: https://fwdp.shinyapps.io/tm2020/

Copepods are highest diet proportion, followed by well digested prey (AR), and euphausiids. These are aggregate categories so I should look at the breakdown to species.

# run the diet script that we used for bluefish? gives diet by season and year
# Load NEFSC stomach data received from Brian Smith

# put the get_diet function on this repo in R folder
source(here::here("R/get_diet.R"))

# what is the code
ecodata::species_groupings |> dplyr::filter(RPATH == "AtlHerring")

# get diet takes SVSPP as argument
herringdiet <- get_diet(32) 

saveRDS(herringdiet, here::here("data/herringdiet.rds"))
herringdiet <- readRDS(here::here("data/herringdiet.rds"))

p1 <-   ggplot(herringdiet, aes(year, relmsw, fill=prey)) +
   geom_bar(stat = "identity") + #
   ylab("Percent in Diet") +
   xlab("Year") +
   facet_wrap("season", nrow=4) +
   theme_bw() +
   viridis::scale_fill_viridis(discrete = TRUE) +
   theme(legend.position = "none"
         #legend.position="bottom",
         #legend.text=element_text(size=5)
         ) +
    geom_bar_interactive(stat = "identity", aes(tooltip = prey, data_id = prey))

ggiraph(code = print(p1), height=14)  

Initial decisions

1. Zooplankton species: VAST Categories

Maybe start with total zooplankton volume, Copepoda, Euphausiids, and Calanus finmarchicus? And keep both spatial units. These would be separate univariate models, though we could try a multivariate model with all too.

It appears that Copepoda in the ecomon dataset is not all copepods together, as I hoped, but is unid copepods. Very few are in the data.

So I will sum everything that Harvey classified as a copeopod in this spreadsheet that he shared with Scott.

sumcopepods <- ecomonall |>
  dplyr::rowwise() |>
  dplyr::mutate(allcopepods_10m2 = sum(ctyp_10m2,
                                       calfin_10m2,
                                       pseudo_10m2,
                                       tlong_10m2,
                                       cham_10m2,
                                       para_10m2,
                                       acarspp_10m2,
                                       mlucens_10m2,
                                       calminor_10m2,
                                       clauso_10m2,
                                       acarlong_10m2,
                                       euc_10m2,
                                       fur_10m2,
                                       calspp_10m2,
                                       ost_10m2,
                                       temspp_10m2,
                                       tort_10m2,
                                       paraspp_10m2,
                                       na.rm = TRUE
  ),
  allcopepods_100m3 = sum(ctyp_100m3,
                          calfin_100m3,
                          pseudo_100m3,
                          tlong_100m3,
                          cham_100m3,
                          para_100m3,
                          acarspp_100m3,
                          mlucens_100m3,
                          calminor_100m3,
                          clauso,
                          acarlong_100m3,
                          euc_100m3,
                          fur_100m3,
                          calspp_100m3,
                          ost_100m3,
                          temspp_100m3,
                          tort_100m3,
                          paraspp_100m3,
                          na.rm = TRUE
  ) 
  ) |>
  dplyr::select(cruise_name, station, lat, lon, date, time, depth, 
                sfc_temp, sfc_salt, btm_temp, btm_salt, volume_1m2, volume_100m3,
                allcopepods_10m2, allcopepods_100m3)
  

herringfood <- ecomonall |>
    dplyr::left_join(sumcopepods) |>
    dplyr::select(cruise_name, station, lat, lon, date, time, depth, 
                  sfc_temp, sfc_salt, btm_temp, btm_salt, volume_1m2, volume_100m3,
                  allcopepods_10m2, allcopepods_100m3, euph_10m2, euph_100m3,
                  hyper_10m2, hyper_100m3, calfin_10m2, calfin_100m3) |>
    dplyr::mutate(date = lubridate::dmy(date),
                  year = lubridate::year(date),
                  month = lubridate::month(date),
                  day = lubridate::day(date),
                  stationid = paste0(cruise_name, "_", station),
                  season_ng = case_when(month <= 6 ~ "SPRING",
                                        month >= 7 ~ "FALL",
                                        TRUE ~ as.character(NA)),
                  season_4 = case_when(month %in% c(12,1,2) ~ "winter",
                                       month %in% c(3,4,5) ~ "spring",
                                       month %in% c(6,7,8) ~ "summer",
                                       month %in% c(9,10,11) ~ "fall",
                                       TRUE ~ as.character(NA)))

nonzeroobs <- herringfood |>
  dplyr::group_by(year, month) |>
  dplyr::summarise(totzoo = sum(volume_100m3 != 0, na.rm = TRUE),
                   cope =  sum(allcopepods_100m3 != 0, na.rm = TRUE),
                   euph = sum(euph_100m3 != 0, na.rm = TRUE), 
                   hyp = sum(hyper_100m3 != 0, na.rm = TRUE),
                   calfin = sum(calfin_100m3 != 0, na.rm = TRUE))

So we will look at total zooplankton displacement volume: volume_1m2, volume_100m3

Total euphausiids: euph_10m2, euph_100m3

Total hyperiids: hyper_10m2, hyper_100m3

Calanus finmarchicus: calfin_10m2, calfin_100m3

Total copeopds: summing over the following names for each of the area or volume metrics, only area shown here

ctyp_10m2, calfin_10m2, pseudo_10m2, tlong_10m2, cham_10m2, para_10m2, acarspp_10m2, mlucens_10m2, calminor_10m2, clauso_10m2, acarlong_10m2, euc_10m2, fur_10m2, calspp_10m2, ost_10m2, temspp_10m2, tort_10m2, paraspp_10m2

These are the corresponding species names, with Large copepods denoted by *

Centropages typicus, Calanus finmarchicus*, Pseudocalanus spp., Temora longicornis, Centropages hamatus, Paracalanus parvus, Acartia spp., Metridia lucens*, Calanus minor*, Clausocalanus arcuicornis, Acartia longiremis, Eucalanus spp.*, Clausocalanus furcatus, Calanus spp.*, Temora stylifera, Temora spp., Tortanus discaudatus, Paracalanus spp.

Added four more categories based on discussion with ToR 1 subgroup:

Small copepods ALL:

Centropages typicus, Pseudocalanus spp., Temora longicornis, Centropages hamatus, Paracalanus parvus, Acartia spp., Clausocalanus arcuicornis, Acartia longiremis, Clausocalanus furcatus, Temora stylifera, Temora spp., Tortanus discaudatus, Paracalanus spp.

Large copepods ALL:

Calanus finmarchicus*, Metridia lucens*, Calanus minor*, Eucalanus spp.*, Calanus spp.*

Small copeopods SOE (used in small-large index):

Centropages typicus, Pseudocalanus spp., Temora longicornis, Centropages hamatus

Large copeopds SOE (used in small-large index) = Calanus finmarchicus*

Dataset years extend from 1977 to 2021

(As of June 21, data expanded to include Fall 2022, but table still covers old dataset because I’ve been asked not to distribute the new dataset yet. Models run after 21 June will use data through 2022 from new dataset.)

This is a list of stations with nonzero samples by year and month to better understand seasonal coverage.

DT::datatable(nonzeroobs)

2. Which metric?

Ryan recommends the numbers per 100 cubic meters of filtered water volume _100m3 data, which should match what is used in the SOE indices. However, he thinks that anomalies calculated from the area _10m2 data should be the same as those calculated from the volume based numbers. This makes sense to me.

Harvey is using the numbers per 100 cubic meters for other analyses as well.

Let’s start with the _100m3 numbers for each category. We can try one with the other metric and see if it does make a difference as well.

3. Spatial scale options:

  1. Herring survey strata (differ by spring and fall)
  2. EPUs for comparison with SOE datasets
  3. Full shelf/AllEPU

Lets look at the spatial extent of the data in each season to see if we need to limit model domain. A test model of total zoop biovolume (present at each station in many years) failed using the full set of EPUs because the spatial random effects parameter was headed for 0. This is what happened to the herring index fall model when there were no observations in the southern end of the domain.

This is zooplankton volume by season (half years)

Now using data processed in VASTzoopindex_processinputs.R

herringfood_stn <- readRDS(here::here("data/herringfood_stn_all.rds"))

# make SST column that uses surftemp unless missing or 0

#herringfood_stn <- herringfood_stn %>%
#  dplyr::mutate(sstfill = ifelse((is.na(sfc_temp)|sfc_temp==0), oisst, sfc_temp))

# code Vessel as AL=0, HB=1, NEAMAP=2

herringfood_stn_fall <- herringfood_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL", # Annual model for MRIP index
         year > 1976) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = 1 #as.numeric(as.factor(vessel))-1
  ) %>% 
  dplyr::select(Catch_g = volume_100m3, #use megabenwt for individuals input in example
                Year = year,
                Vessel,
                AreaSwept_km2,
                Lat = lat,
                Lon = lon #,
                #btm_temp, #this leaves out many stations
                #sfc_temp, #this leaves out many stations
                #oisst,
                #sstfill
  ) %>%
  na.omit() %>%
  as.data.frame()

herringfood_stn_spring <- herringfood_stn %>%
  #ungroup() %>%
  filter(season_ng == "SPRING", # Annual model for MRIP index
         year > 1976) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = 1 #as.numeric(as.factor(vessel))-1
  ) %>% 
  dplyr::select(Catch_g = volume_100m3, #use megabenwt for individuals input in example
                Year = year,
                Vessel,
                AreaSwept_km2,
                Lat = lat,
                Lon = lon #,
                #btm_temp, #this leaves out many stations
                #sfc_temp, #this leaves out many stations
                #oisst,
                #sstfill
  ) %>%
  na.omit() %>%
  as.data.frame()


nonzerofall <- herringfood_stn_fall |>
  dplyr::filter(Catch_g > 0) #,
                #Year > 1981)

nonzerospring <- herringfood_stn_spring |>
  dplyr::filter(Catch_g > 0) #,
               # Year > 1981)

Fall <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = nonzerofall, aes(x = Lon, y = Lat), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Fall zoo biovolume 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))

Spring <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = nonzerospring, aes(x = Lon, y = Lat), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Spring zoo biovolume 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))
 
Spring + Fall

4. Temporal scale options:

I need to decide how to break up the data temporally, thinking by season associated with herring population processes. So there could be a winter index, a spring index, a summer index, and a fall index. What months?

Mark W’s herring spawn timing info could be useful here. Do they feed while spawning?

Alternatively, at least two seasons should match the bottom trawl survey timing if we want to use these indices as covariates for availability/catchability.

Mark W’s spawn timing paper also looked at changes in survey timing.

What is NEFSC BTS survey timing? Histograms of survey month grouped by FALL, SPRING:

survdat <- readRDS(url("https://github.com/NOAA-EDAB/benthosindex/raw/main/data/survdat_nobio.rds"))

survdat$survdat |>
  dplyr::mutate(date = lubridate::ymd_hms(EST_TOWDATE),
                month = lubridate::month(date)) |>
  dplyr::group_by(SEASON) |>
  dplyr::group_walk(~ hist(.x$month, 
                           breaks = c(1:12),
                           right = TRUE))

  1. 4 models, 2 matching survey seasons (spring/fall) and try two for winter and summer to cover other portions of herring life history. Current designations are
    • “winter” months 12, 1, 2
    • “spring” months 3, 4, 5
    • “summer” months 6, 7, 8
    • “fall” months 9, 10, 11
  2. Alternatively, roll winter into spring and summer into fall as done for bluefish.
    • “SPRING” months 1-6
    • “FALL” months 7-12
  3. Not sure an annual model makes sense for zooplankton but we could try it, may compare with SOE indices?

5. Covariates

Probably at a minimum temperature should be explored. Catchability or habitat? Its both

Day of year or day of season? potential catchability covariate for this high turnover group

A frontal index might be useful? As a habitat covaraite?

Temperature

We will need to merge in alternative temperature to use all the data since some are NAs.

We have 32693 rows (stations) total, 25513 that have surface temperature, and 22824 that have bottom temperature.

Likely surface temperature is more relevant for zooplankton?

Missing temperature data by year and season (SPRING and FALL):

stnsummary <- herringfood_stn %>%
  #dplyr::filter(year>1984) %>%
  dplyr::group_by(year, season_ng) %>%
  dplyr::summarise(nstation = n(),
                   hastemp = sum(!is.na(sfc_temp))) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(percentmiss = (nstation - hastemp)/nstation*100) %>%
  tidyr::pivot_wider(names_from = season_ng,
    values_from = c(nstation, hastemp, percentmiss)) %>%
  dplyr::select(year, nstation_SPRING, hastemp_SPRING, percentmiss_SPRING,
                nstation_FALL, hastemp_FALL, percentmiss_FALL)

flextable::flextable(stnsummary) %>%
  flextable::set_header_labels(year = 'Year', 
                               nstation_SPRING = "Spring N stations", 
                               hastemp_SPRING = "Spring N stations with in situ temperature",
                               percentmiss_SPRING = "Spring percent missing temperature",
                               nstation_FALL = "Fall N stations", 
                               hastemp_FALL = "Fall N stations with in situ temperature",
                               percentmiss_FALL = "Fall percent missing temperature") %>%
  flextable::set_caption("Number of survey stations by year and season with in situ sea surface temperature measurements.") %>%
  flextable::colformat_double(big.mark = "", digits = 0) %>%
  #flextable::autofit()
  flextable::width(width = 1)

Missing temperature data by year and season (winter, spring, summer, fall):

stnsummary <- herringfood_stn %>%
  #dplyr::filter(year>1984) %>%
  dplyr::group_by(year, season_4) %>%
  dplyr::summarise(nstation = n(),
                   hastemp = sum(!is.na(sfc_temp))) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(percentmiss = (nstation - hastemp)/nstation*100) %>%
  tidyr::pivot_wider(names_from = season_4,
    values_from = c(nstation, hastemp, percentmiss)) %>%
  dplyr::select(year, nstation_winter, hastemp_winter, percentmiss_winter,
                nstation_spring, hastemp_spring, percentmiss_spring,
                nstation_summer, hastemp_summer, percentmiss_summer,
                nstation_fall, hastemp_fall, percentmiss_fall)

flextable::flextable(stnsummary) %>%
  flextable::set_header_labels(year = 'Year',
                               nstation_winter = "Winter N stations", 
                               hastemp_winter = "Winter N stations with in situ temperature", 
                               percentmiss_winter = "Winter percent missing temperature",
                               nstation_spring = "Spring N stations",
                               hastemp_spring = "Spring N stations with in situ temperature",
                               percentmiss_spring = "Spring percent missing temperature",
                               nstation_summer = "Summer N stations", 
                               hastemp_summer = "Summer N stations with in situ temperature", 
                               percentmiss_summer = "Summer percent missing temperature",
                               nstation_fall = "Fall N stations",
                               hastemp_fall = "Fall N stations with in situ temperature",
                               percentmiss_fall = "Fall percent missing temperature") %>%
  flextable::set_caption("Number of survey stations by year and season with in situ sea surface temperature measurements.") %>%
  flextable::colformat_double(big.mark = "", digits = 0) %>%
  #flextable::autofit()
  flextable::width(width = 1)

This can be done with the OISST data as was done for the forage index, or the bottom temperature data as was done for the benthos index.

Day of year

We can derive from date field in dataset, convert to season for seasonal models