Workspace setup

library(CircStats) # for von Mises distribution
library(Rcpp) # likelihood in C++
library(RcppArmadillo) # likelihood in C++
library(sp) # for great circle distances
library(geosphere) # for great circle bearings
library(maps) # for map plots
library(raster)
library(spatstat)
library(sf)
library(geoR)
library(ggplot2)
library(doParallel)
library(foreach)
set.seed(1234)

Habitat simulation

The simulation domain was defined as a 2,500 km\(^2\) area (50 \(\times\) 50 km), discretised into a regular grid with a spatial resolution of 100 m. Habitat suitability was represented as a binary chequerboard pattern consisting of alternating 2.5 \(\times\) 2.5 km cells, classified as suitable (value = 1) and unsuitable (value = 0).

# Define grid in meters: 100 m resolution over 50 km
x_grid <- seq(0, by = 100, length.out = 500)  # 0 to 50,000 meters
y_grid <- seq(0, by = 100, length.out = 500)

# Create coordinate grid
coord_df <- expand.grid(x = x_grid, y = y_grid)

# Checkerboard pattern with 2.5 km squares
x_band <- floor(coord_df$x / 2500) %% 2
y_band <- floor(coord_df$y / 2500) %% 2


df <- data.frame(
  x = coord_df$x,
  y = coord_df$y,
  value = (x_band == y_band) + 0
)

df$value <- (df$value - min(df$value)) / (max(df$value) - min(df$value))

cov_raster <- rasterFromXYZ(df)
crs(cov_raster) <- "+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs"

# Save cov_raster
save(cov_raster, file = "cov_raster.RData")

ggplot(df, aes(x = x, y = y, fill = value)) +
  geom_tile() + 
  labs(fill = "Value") + 
  xlab("X coordinates")+
  ylab("Y coordinates") +
  scale_fill_viridis_c()+
  coord_fixed(expand = FALSE) + 
  theme_bw() 

Central place

centre <- c(24950, 24950)
ggplot(df, aes(x = x, y = y, fill = value)) +
  geom_raster() + 
  geom_point(aes(x=centre[1], y=centre[2]), col="white",size=3)+
  labs(fill = "Value") + 
  xlab("X coordinates")+
  ylab("Y coordinates") +
  scale_fill_viridis_c()+
  theme_bw()

Movement data simulation

Behavioural states

Following Michelot et al. (2017), each central place foraging (CPF) movement path was represented as a sequence of four behavioural states:

  1. outbound travel (fast, highly directed movement away from the central place),
  2. search (moderately fast movement with some directional persistence)
  3. active foraging (slower, less directed and more sinuous movement), and
  4. inbound travel (fast, directed movement towards the central place).

State transition model

Transitions between behavioural states were governed by a probabilistic matrix informed by Euclidean distance to the colony, time since departure from the nest, and habitat suitability. Transition probabilities were computed using a multinomial logit formulation, with parameters chosen to reflect biologically realistic behaviour—for example, an increased probability of switching to foraging in suitable habitat (Michelot et al. 2017).

\[ T = \begin{pmatrix} 1 - \gamma_{1,2} & \gamma_{1,2} & 0 & 0 \\ 0 & 1 - \gamma_{2,3} - \gamma_{2,4} & \gamma_{2,3} & \gamma_{2,4} \\ 0 & 0 & 1 - \gamma_{3,4} & \gamma_{3,4} \\ 0 & 0 & 0 & 1 \end{pmatrix} \]

where \(\gamma_{A, B}\) represents the probability of switching from behavioural state A to behavioural state B and behaviours are numbered according to their canonical progression during a single foraging trip (i.e., outbound (1) \(\rightarrow\) search (2) \(\rightarrow\) foraging (3) \(\rightarrow\) (4) inbound). Each row and column in \(T\) corresponds to a separate behaviour: values along the diagonal give the probabilities of remaining in the same state, whereas off-diagonal values denote probabilities of switching from the row state to the column state. For instance, \(\gamma_{1,2}\) corresponds to the probability of transitioning from outbound travel to search.

We defined:

\[\gamma_{1,2} = \operatorname{logit}^{-1}(\eta_{1,2})$, $\gamma_{2,3} = e^{\eta_{2,3}} / D\]

\[\gamma_{2,4} = e^{\eta_{2,4}} / D\]

\[\gamma_{3,4} = \operatorname{logit}^{-1}(\eta_{3,4})\]

with \(D = 1 + e^{\eta_{2,3}} + e^{\eta_{2,4}}\).

The linear predictors were specified as:

  • \(\eta_{1,2}= -3 + 0.0005 \times \text{distance to colony} + 1.5 \times \text{habitat}\)
  • \(\eta_{2,3}= -5 + 3 \times \text{habitat}\)
  • \(\eta_{2,4}= -6 + 0.05 \times \text{time since departure}\)
  • \(\eta_{3,4}= -5 + 0.015 \times \text{time since departure}\)

This parameterisation increases the probability of switching from outbound travel (1) or search (2) to active foraging (3) in suitable habitat and increases the probability of inbound travel (4) as time since departure grows, thereby capturing key features of CPF behaviour (Michelot et al. 2017).

addLogit_rcpp <- function(x1, x2) {
  p1 <- exp(x1)
  p2 <- exp(x2)
  res <- c(p1 / (1 + p1 + p2), p2 / (1 + p1 + p2), 1 - (p1 / (1 + p1 + p2)) - (p2 / (1 + p1 + p2)))
  return(res)
}

trMatrix_rcpp <- function(beta, dist, time, cov) {
  
  trMat <- matrix(ncol=4, nrow=4)
  
  trMat[1, 1] <- 1 - plogis(beta[1] + beta[2] * dist + beta[3] * cov) # prob of staying in stage 1
  trMat[1, 2] <- plogis(beta[1] + beta[2] * dist + beta[3] * cov)# prob of going from stage 1 to 2 as a function of cov
  trMat[1, 3] <- 0 # prob of going from stage 1 to 3
  trMat[1, 4] <- 0 # prob of going from stage 1 to 4
  
  foo23 <- beta[4] + beta[5] * cov # prob of going from stage 2 to 3 as a function of cov
  foo24 <- beta[6] + beta[7] * time # prob of going from stage 2 to 4 as a function of time
  
  r2 <- addLogit_rcpp(foo23, foo24)
  
  trMat[2, 1] <- 0 # prob of going from stage 2 to 1 
  trMat[2, 2] <- r2[3] # prob of staying stage 2 
  trMat[2, 3] <- r2[1] # prob of going from stage 2 to 3 
  trMat[2, 4] <- r2[2] # prob of going from stage 2 to 4 
  
  trMat[3, 1] <- 0 # prob of going from stage 3 to 1 
  trMat[3, 2] <- 0 # prob of going from stage 3 to 2 
  trMat[3, 3] <- 1 - plogis(beta[8] + beta[9] * time) # prob of staying in stage 3
  trMat[3, 4] <- plogis(beta[8] + beta[9] * time) # prob of going from stage 3 to 4 as a function of time 
  
  trMat[4, 1] <- 0 #prob of staying in stage 4
  trMat[4, 2] <- 0 #prob of staying in stage 4
  trMat[4, 3] <- 0 #prob of staying in stage 4
  trMat[4, 4] <- 1 #prob of staying in stage 4
  
  return(trMat)
}

#linear predictors
beta <- c(-3, 0.0005, 1.5,   # stage 1 → 2
          -5, 3,              # stage 2 → 3
          -6, 0.05,           # stage 2 → 4
          -5, 0.015)          # stage 3 → 4

#Number of states
nbStates<-4

State-specific parameters

Directional travel (outbound and inbound) was modelled as a biased correlated random walk, incorporating repulsion from the colony during outbound movement and attraction towards the colony during inbound movement. In contrast, search and foraging behaviours were modelled as correlated random walks. Step lengths and turning angles were modelled using gamma and von Mises distributions, respectively, with state-specific parameter values.

Behavioural state (No.) μ σ θ κ
Outbound (1) 571.8 195.4 \(\alpha_t + \pi\) 14
Search (2) 350.0 190.0 0 5
Foraging (3) 108.0 131.2 \(\pi\) 0.84
Inbound (4) 571.8 195.4 \(\alpha_t\) 14

New locations were then calculated from sampled step lengths and turning angles using great-circle geometry implemented in the R package geosphere (Hijmans 2026).

#choose model parameters
mu <- c(571.8,350, 108, 571.8) # step mean - in meters for 1 min interval (original values for kittiwakes in 5 min)
sigma <- c(195.4, 190 ,131.2, 195.4) # step SD
am <- c(-0.00647766,3.14001249)
kappa <- c(14, 5, 0.84, 14) # angle concentration - (Larger values lead to lower variance of the von Mises distribution, and hence higher persistence in direction.)

Step selection

At each time step, we drew 10,000 candidate steps from the movement kernel associated with the current behavioural state. We calculated a relative selection weight for each candidate by exponentiating the linear predictor and then selected one end point probabilistically as the animal’s next location. Each candidate step \(i\) was evaluated using a log‑linear model of the form:

\[ \log(w_i) = \beta_1 \times L_i + \beta_2 \times \log L_i + \beta_3 \times h_i \]

\[ w_i = \exp(\eta_i) \quad \mathrm{Pr}(i) = \frac{\exp(\eta_i)}{\sum_j \exp(\eta_j)} \]

where \(L_i\) is the step length, \(h_i\) is the habitat covariate and \(\beta_1, \beta_2\) and \(\beta_3\) are the corresponding selection coefficients.

The coefficients for step length and log step length were derived analytically from the gamma distribution parameters (Klappstein, Thomas, and Michelot 2023; Avgar et al. 2016) while the habitat coefficient \(\beta_3\) was varied systematically across simulations with values ranging from 0.5 to 1.5 (in 0.25 increments) as well as 0 (no habitat selection) to assess how models performed under different selection strengths.

Individuals were initialised at the colony at time \(t = 0\), with the first step generated by drawing a direction \(\phi_0 \sim \mathrm{von\ Mises}(0, 1)\) and a step length \(L_0 \sim \mathrm{Gamma}(k, \theta)\). The latter was parameterised in terms of the state-specific mean \(\mu\) and standard deviation \(\sigma\) for outbound travel, such that \(k = \mu^2 / \sigma^2\) and \(\theta = \sigma^2 / \mu\). The resulting displacement defines the first location, and the initial turning angle was set as the direction from this location back towards the colony.

### SSF parameters
# Equations exist to determine the scale and shape parameters of a gamma distribution
# based on its mean and sd
scale <- sigma^2 / mu
shape <- mu^2 / sigma^2

# The equation presented in Appendix A gives the relationship between the
# gamma distribution parameters and the selection coefficients (referred to as β1 and β2 for L and log(L) respectively).
# β1 −1/scale and β2 = shape − 2.
beta1 <- -1 / scale
beta2 <- shape - 2

cov_beta <- 0 #0.5, 0.75, 1, 1.25, 1.5, 1.75,2

ssf_formula <- as.formula(" ~ cov")

n_zeros <- 10000

Simulate animal tracks

We simulate a full population of 1,000 individuals and generated 25 complete tracks per individual in each simulation.

nbTracks<-25
nbID<-1000 #corresponds to all population

simulate_one_ID <- function(id,
                            nbTracks,
                            centre,
                            mu, sigma, kappa, am,
                            ssf_betas, ssf_formula,
                            cov_raster,
                            beta,
                            nbStates,
                            n_zeros) {
  
  # precompute gamma params
  gamma_shape <- mu^2 / sigma^2
  gamma_scale <- sigma^2 / mu
  
  id_tracks <- vector("list", nbTracks)
  
  for (tr in seq_len(nbTracks)) {
    
    track_list <- vector("list", 5000)  # upper bound, trimmed later
    k <- 1
    
    state <- 1
    time <- 0
    
    phi <- CircStats::rvm(1, 0, 1)
    len <- rgamma(1, shape = gamma_shape[1], scale = gamma_scale[1])
    newLoc <- centre + c(len * cos(phi), len * sin(phi))
    angle <- atan2(centre[2] - newLoc[2], centre[1] - newLoc[1])
    
    track_list[[k]] <- data.frame(
      Track_id = tr, x = centre[1], y = centre[2],
      step = NA, angle = NA, bearing = NA,
      state = 1, time = 0, ID = id
    )
    k <- k + 1
    
    track_list[[k]] <- data.frame(
      Track_id = tr, x = newLoc[1], y = newLoc[2],
      step = len, angle = angle, bearing = phi,
      state = 1, time = 1, ID = id
    )
    k <- k + 1
    time <- 1
    
    while (!(state == 4 && sqrt(sum((newLoc - centre)^2)) < 1000)) {
      
      time <- time + 1
      phi0 <- phi
      
      dist <- sqrt(sum((newLoc - centre)^2))
      cov  <- raster::extract(cov_raster, matrix(newLoc, nrow = 1))
      
      trMat <- trMatrix_rcpp(beta, dist, time, cov)
      state <- sample(seq_len(nbStates), 1, prob = trMat[state, ])
      
      if (state %in% c(1, 4)) {
        angle <- atan2(centre[2] - newLoc[2], centre[1] - newLoc[1])
        phi <- CircStats::rvm(
          n_zeros,
          mean = angle + ifelse(state == 1, pi, 0),
          k = kappa[state]
        )
      } else {
        angle <- CircStats::rvm(n_zeros, am[state - 1], kappa[state])
        phi <- phi + angle
      }
      
      len <- rgamma(
        n_zeros,
        shape = gamma_shape[state],
        scale = gamma_scale[state]
      )
      
      dx <- len * cos(phi)
      dy <- len * sin(phi)
      endpoints <- cbind(newLoc[1] + dx, newLoc[2] + dy)
      
      cov_vals <- raster::extract(cov_raster, endpoints)
      valid <- !is.na(cov_vals)
      
      if (!any(valid)) next
      
      endpoints <- endpoints[valid, , drop = FALSE]
      cov_vals <- cov_vals[valid]
      len      <- len[valid]
      phi      <- phi[valid]
      
      # SSF (~ cov only) — FIXED
      ssf_df <- data.frame(cov = cov_vals)
      
      mod_matrix <- model.matrix(ssf_formula, ssf_df)
      mod_matrix <- mod_matrix[, !colnames(mod_matrix) == "(Intercept)", drop = FALSE]
      
      linear_pred <- mod_matrix %*% ssf_betas[4, state]
      ssf_prob <- as.numeric(exp(linear_pred))
      ssf_prob <- ssf_prob / sum(ssf_prob)
      
      # boundary filter
      inside <- endpoints[,1] >= 100 & endpoints[,1] <= 49900 &
        endpoints[,2] >= 100 & endpoints[,2] <= 49900
      
      ssf_prob[!inside] <- 0
      if (sum(ssf_prob) == 0) next
      ssf_prob <- ssf_prob / sum(ssf_prob)
      
      chosen <- sample.int(length(ssf_prob), 1, prob = ssf_prob)
      
      newLoc <- endpoints[chosen, ]
      phi    <- phi[chosen]
      len    <- len[chosen]
      
      track_list[[k]] <- data.frame(
        Track_id = tr,
        x = newLoc[1], y = newLoc[2],
        step = len,
        angle = phi - phi0,
        bearing = phi,
        state = state,
        time = time,
        ID = id
      )
      k <- k + 1
    }
    
    id_tracks[[tr]] <- do.call(
      rbind,
      track_list[seq_len(k - 1)]
    )
  }
  
  do.call(rbind, id_tracks)
}

library(doParallel)
library(foreach)

ncores <- parallel::detectCores() - 1
cl <- makeCluster(ncores)
registerDoParallel(cl)

for (c in seq_along(cov_beta)) {
  
  message("Running cov_beta = ", cov_beta[c])
  
  ## scenario-specific parameters
  cov_gamma  <- c(1, 1, 1, 1)
  cov_coefs  <- cov_beta[c] * cov_gamma
  
  ssf_betas <- matrix(
    c(beta1, beta2, kappa, cov_coefs),
    ncol = nbStates,
    byrow = TRUE
  )
  
  ## parallel over individuals
  data <- foreach(
    id = seq_len(nbID),
    .combine  = rbind,
    .packages = c("CircStats", "raster"),
    .export   = c(
      "simulate_one_ID",
      "trMatrix_rcpp",
      "cov_raster",
      "ssf_betas",
      "ssf_formula",
      "mu", "sigma", "kappa", "am",
      "beta",
      "centre",
      "nbTracks",
      "nbStates",
      "n_zeros")
  ) %dopar% {
    
    simulate_one_ID(
      id         = id,
      nbTracks   = nbTracks,
      centre     = centre,
      mu         = mu,
      sigma      = sigma,
      kappa      = kappa,
      am         = am,
      ssf_betas  = ssf_betas,
      ssf_formula = ssf_formula,        
      cov_raster = cov_raster,
      beta       = beta,
      nbStates   = nbStates,
      n_zeros    = n_zeros
    )
  }
  
  ## save per-scenario
  data_sf <- sf::st_as_sf(data, coords = c("x", "y"), crs = 32630)
  
  save(
    data_sf,
    file = paste0("CPF_cov", cov_beta[c], "_", nbID, "ind.RData")
  )
}

stopCluster(cl)

# plot tracks
ggplot() +
  geom_tile(data=df, aes(x,y,fill=factor(value)), alpha=0.1)+
  geom_path(data= data, aes(x, y, col= factor(state), group = ID))+
  scale_fill_manual(values = c("lightgrey", "darkgreen"), name = "Habitat", labels=c("Unsuitable", "Suitable"))+
   scale_colour_manual(values = c("black", "darkorange", "darkred", "grey"), name = "Behaviours", labels=c("Outbound", "Search", "Foraging", "Inbound"))+
  theme_bw() +
  coord_fixed()

References

Avgar, Tal, Jonathan R Potts, Mark A Lewis, and Mark S Boyce. 2016. “Integrated Step Selection Analysis: Bridging the Gap Between Resource Selection and Animal Movement.” Methods in Ecology and Evolution 7 (5): 619–30. https://doi.org/10.1111/2041-210x.12528.
Hijmans, Robert J. 2026. “Geosphere: Spherical Trigonometry.”
Klappstein, N J, L Thomas, and T Michelot. 2023. “Flexible Hidden Markov Models for Behaviour-Dependent Habitat Selection.” Movement Ecology 11 (1): 30. https://doi.org/10.1186/s40462-023-00392-3.
Michelot, Théo, Roland Langrock, Sophie Bestley, Ian D Jonsen, Theoni Photopoulou, and Toby A Patterson. 2017. “Estimation and Simulation of Foraging Trips in Land-Based Marine Predators.” Ecology 98 (7): 1932–44. https://doi.org/10.1002/ecy.1880.