library(CircStats) # for von Mises distribution
library(Rcpp) # likelihood in C++
library(RcppArmadillo) # likelihood in C++
library(sp) # for great circle distances
library(geosphere) # for great circle bearings
library(maps) # for map plots
library(raster)
library(spatstat)
library(sf)
library(geoR)
library(ggplot2)
library(doParallel)
library(foreach)
set.seed(1234)
The simulation domain was defined as a 2,500 km\(^2\) area (50 \(\times\) 50 km), discretised into a regular grid with a spatial resolution of 100 m. Habitat suitability was represented as a binary chequerboard pattern consisting of alternating 2.5 \(\times\) 2.5 km cells, classified as suitable (value = 1) and unsuitable (value = 0).
# Define grid in meters: 100 m resolution over 50 km
x_grid <- seq(0, by = 100, length.out = 500) # 0 to 50,000 meters
y_grid <- seq(0, by = 100, length.out = 500)
# Create coordinate grid
coord_df <- expand.grid(x = x_grid, y = y_grid)
# Checkerboard pattern with 2.5 km squares
x_band <- floor(coord_df$x / 2500) %% 2
y_band <- floor(coord_df$y / 2500) %% 2
df <- data.frame(
x = coord_df$x,
y = coord_df$y,
value = (x_band == y_band) + 0
)
df$value <- (df$value - min(df$value)) / (max(df$value) - min(df$value))
cov_raster <- rasterFromXYZ(df)
crs(cov_raster) <- "+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs"
# Save cov_raster
save(cov_raster, file = "cov_raster.RData")
ggplot(df, aes(x = x, y = y, fill = value)) +
geom_tile() +
labs(fill = "Value") +
xlab("X coordinates")+
ylab("Y coordinates") +
scale_fill_viridis_c()+
coord_fixed(expand = FALSE) +
theme_bw()
centre <- c(24950, 24950)
ggplot(df, aes(x = x, y = y, fill = value)) +
geom_raster() +
geom_point(aes(x=centre[1], y=centre[2]), col="white",size=3)+
labs(fill = "Value") +
xlab("X coordinates")+
ylab("Y coordinates") +
scale_fill_viridis_c()+
theme_bw()
Following Michelot et al. (2017), each central place foraging (CPF) movement path was represented as a sequence of four behavioural states:
Transitions between behavioural states were governed by a probabilistic matrix informed by Euclidean distance to the colony, time since departure from the nest, and habitat suitability. Transition probabilities were computed using a multinomial logit formulation, with parameters chosen to reflect biologically realistic behaviour—for example, an increased probability of switching to foraging in suitable habitat (Michelot et al. 2017).
\[ T = \begin{pmatrix} 1 - \gamma_{1,2} & \gamma_{1,2} & 0 & 0 \\ 0 & 1 - \gamma_{2,3} - \gamma_{2,4} & \gamma_{2,3} & \gamma_{2,4} \\ 0 & 0 & 1 - \gamma_{3,4} & \gamma_{3,4} \\ 0 & 0 & 0 & 1 \end{pmatrix} \]
where \(\gamma_{A, B}\) represents the probability of switching from behavioural state A to behavioural state B and behaviours are numbered according to their canonical progression during a single foraging trip (i.e., outbound (1) \(\rightarrow\) search (2) \(\rightarrow\) foraging (3) \(\rightarrow\) (4) inbound). Each row and column in \(T\) corresponds to a separate behaviour: values along the diagonal give the probabilities of remaining in the same state, whereas off-diagonal values denote probabilities of switching from the row state to the column state. For instance, \(\gamma_{1,2}\) corresponds to the probability of transitioning from outbound travel to search.
We defined:
\[\gamma_{1,2} = \operatorname{logit}^{-1}(\eta_{1,2})$, $\gamma_{2,3} = e^{\eta_{2,3}} / D\]
\[\gamma_{2,4} = e^{\eta_{2,4}} / D\]
\[\gamma_{3,4} = \operatorname{logit}^{-1}(\eta_{3,4})\]
with \(D = 1 + e^{\eta_{2,3}} + e^{\eta_{2,4}}\).
The linear predictors were specified as:
This parameterisation increases the probability of switching from outbound travel (1) or search (2) to active foraging (3) in suitable habitat and increases the probability of inbound travel (4) as time since departure grows, thereby capturing key features of CPF behaviour (Michelot et al. 2017).
addLogit_rcpp <- function(x1, x2) {
p1 <- exp(x1)
p2 <- exp(x2)
res <- c(p1 / (1 + p1 + p2), p2 / (1 + p1 + p2), 1 - (p1 / (1 + p1 + p2)) - (p2 / (1 + p1 + p2)))
return(res)
}
trMatrix_rcpp <- function(beta, dist, time, cov) {
trMat <- matrix(ncol=4, nrow=4)
trMat[1, 1] <- 1 - plogis(beta[1] + beta[2] * dist + beta[3] * cov) # prob of staying in stage 1
trMat[1, 2] <- plogis(beta[1] + beta[2] * dist + beta[3] * cov)# prob of going from stage 1 to 2 as a function of cov
trMat[1, 3] <- 0 # prob of going from stage 1 to 3
trMat[1, 4] <- 0 # prob of going from stage 1 to 4
foo23 <- beta[4] + beta[5] * cov # prob of going from stage 2 to 3 as a function of cov
foo24 <- beta[6] + beta[7] * time # prob of going from stage 2 to 4 as a function of time
r2 <- addLogit_rcpp(foo23, foo24)
trMat[2, 1] <- 0 # prob of going from stage 2 to 1
trMat[2, 2] <- r2[3] # prob of staying stage 2
trMat[2, 3] <- r2[1] # prob of going from stage 2 to 3
trMat[2, 4] <- r2[2] # prob of going from stage 2 to 4
trMat[3, 1] <- 0 # prob of going from stage 3 to 1
trMat[3, 2] <- 0 # prob of going from stage 3 to 2
trMat[3, 3] <- 1 - plogis(beta[8] + beta[9] * time) # prob of staying in stage 3
trMat[3, 4] <- plogis(beta[8] + beta[9] * time) # prob of going from stage 3 to 4 as a function of time
trMat[4, 1] <- 0 #prob of staying in stage 4
trMat[4, 2] <- 0 #prob of staying in stage 4
trMat[4, 3] <- 0 #prob of staying in stage 4
trMat[4, 4] <- 1 #prob of staying in stage 4
return(trMat)
}
#linear predictors
beta <- c(-3, 0.0005, 1.5, # stage 1 → 2
-5, 3, # stage 2 → 3
-6, 0.05, # stage 2 → 4
-5, 0.015) # stage 3 → 4
#Number of states
nbStates<-4
Directional travel (outbound and inbound) was modelled as a biased correlated random walk, incorporating repulsion from the colony during outbound movement and attraction towards the colony during inbound movement. In contrast, search and foraging behaviours were modelled as correlated random walks. Step lengths and turning angles were modelled using gamma and von Mises distributions, respectively, with state-specific parameter values.
| Behavioural state (No.) | μ | σ | θ | κ |
|---|---|---|---|---|
| Outbound (1) | 571.8 | 195.4 | \(\alpha_t + \pi\) | 14 |
| Search (2) | 350.0 | 190.0 | 0 | 5 |
| Foraging (3) | 108.0 | 131.2 | \(\pi\) | 0.84 |
| Inbound (4) | 571.8 | 195.4 | \(\alpha_t\) | 14 |
New locations were then calculated from sampled step lengths and
turning angles using great-circle geometry implemented in the R package
geosphere (Hijmans 2026).
#choose model parameters
mu <- c(571.8,350, 108, 571.8) # step mean - in meters for 1 min interval (original values for kittiwakes in 5 min)
sigma <- c(195.4, 190 ,131.2, 195.4) # step SD
am <- c(-0.00647766,3.14001249)
kappa <- c(14, 5, 0.84, 14) # angle concentration - (Larger values lead to lower variance of the von Mises distribution, and hence higher persistence in direction.)
At each time step, we drew 10,000 candidate steps from the movement kernel associated with the current behavioural state. We calculated a relative selection weight for each candidate by exponentiating the linear predictor and then selected one end point probabilistically as the animal’s next location. Each candidate step \(i\) was evaluated using a log‑linear model of the form:
\[ \log(w_i) = \beta_1 \times L_i + \beta_2 \times \log L_i + \beta_3 \times h_i \]
\[ w_i = \exp(\eta_i) \quad \mathrm{Pr}(i) = \frac{\exp(\eta_i)}{\sum_j \exp(\eta_j)} \]
where \(L_i\) is the step length, \(h_i\) is the habitat covariate and \(\beta_1, \beta_2\) and \(\beta_3\) are the corresponding selection coefficients.
The coefficients for step length and log step length were derived analytically from the gamma distribution parameters (Klappstein, Thomas, and Michelot 2023; Avgar et al. 2016) while the habitat coefficient \(\beta_3\) was varied systematically across simulations with values ranging from 0.5 to 1.5 (in 0.25 increments) as well as 0 (no habitat selection) to assess how models performed under different selection strengths.
Individuals were initialised at the colony at time \(t = 0\), with the first step generated by drawing a direction \(\phi_0 \sim \mathrm{von\ Mises}(0, 1)\) and a step length \(L_0 \sim \mathrm{Gamma}(k, \theta)\). The latter was parameterised in terms of the state-specific mean \(\mu\) and standard deviation \(\sigma\) for outbound travel, such that \(k = \mu^2 / \sigma^2\) and \(\theta = \sigma^2 / \mu\). The resulting displacement defines the first location, and the initial turning angle was set as the direction from this location back towards the colony.
### SSF parameters
# Equations exist to determine the scale and shape parameters of a gamma distribution
# based on its mean and sd
scale <- sigma^2 / mu
shape <- mu^2 / sigma^2
# The equation presented in Appendix A gives the relationship between the
# gamma distribution parameters and the selection coefficients (referred to as β1 and β2 for L and log(L) respectively).
# β1 −1/scale and β2 = shape − 2.
beta1 <- -1 / scale
beta2 <- shape - 2
cov_beta <- 0 #0.5, 0.75, 1, 1.25, 1.5, 1.75,2
ssf_formula <- as.formula(" ~ cov")
n_zeros <- 10000
We simulate a full population of 1,000 individuals and generated 25 complete tracks per individual in each simulation.
nbTracks<-25
nbID<-1000 #corresponds to all population
simulate_one_ID <- function(id,
nbTracks,
centre,
mu, sigma, kappa, am,
ssf_betas, ssf_formula,
cov_raster,
beta,
nbStates,
n_zeros) {
# precompute gamma params
gamma_shape <- mu^2 / sigma^2
gamma_scale <- sigma^2 / mu
id_tracks <- vector("list", nbTracks)
for (tr in seq_len(nbTracks)) {
track_list <- vector("list", 5000) # upper bound, trimmed later
k <- 1
state <- 1
time <- 0
phi <- CircStats::rvm(1, 0, 1)
len <- rgamma(1, shape = gamma_shape[1], scale = gamma_scale[1])
newLoc <- centre + c(len * cos(phi), len * sin(phi))
angle <- atan2(centre[2] - newLoc[2], centre[1] - newLoc[1])
track_list[[k]] <- data.frame(
Track_id = tr, x = centre[1], y = centre[2],
step = NA, angle = NA, bearing = NA,
state = 1, time = 0, ID = id
)
k <- k + 1
track_list[[k]] <- data.frame(
Track_id = tr, x = newLoc[1], y = newLoc[2],
step = len, angle = angle, bearing = phi,
state = 1, time = 1, ID = id
)
k <- k + 1
time <- 1
while (!(state == 4 && sqrt(sum((newLoc - centre)^2)) < 1000)) {
time <- time + 1
phi0 <- phi
dist <- sqrt(sum((newLoc - centre)^2))
cov <- raster::extract(cov_raster, matrix(newLoc, nrow = 1))
trMat <- trMatrix_rcpp(beta, dist, time, cov)
state <- sample(seq_len(nbStates), 1, prob = trMat[state, ])
if (state %in% c(1, 4)) {
angle <- atan2(centre[2] - newLoc[2], centre[1] - newLoc[1])
phi <- CircStats::rvm(
n_zeros,
mean = angle + ifelse(state == 1, pi, 0),
k = kappa[state]
)
} else {
angle <- CircStats::rvm(n_zeros, am[state - 1], kappa[state])
phi <- phi + angle
}
len <- rgamma(
n_zeros,
shape = gamma_shape[state],
scale = gamma_scale[state]
)
dx <- len * cos(phi)
dy <- len * sin(phi)
endpoints <- cbind(newLoc[1] + dx, newLoc[2] + dy)
cov_vals <- raster::extract(cov_raster, endpoints)
valid <- !is.na(cov_vals)
if (!any(valid)) next
endpoints <- endpoints[valid, , drop = FALSE]
cov_vals <- cov_vals[valid]
len <- len[valid]
phi <- phi[valid]
# SSF (~ cov only) — FIXED
ssf_df <- data.frame(cov = cov_vals)
mod_matrix <- model.matrix(ssf_formula, ssf_df)
mod_matrix <- mod_matrix[, !colnames(mod_matrix) == "(Intercept)", drop = FALSE]
linear_pred <- mod_matrix %*% ssf_betas[4, state]
ssf_prob <- as.numeric(exp(linear_pred))
ssf_prob <- ssf_prob / sum(ssf_prob)
# boundary filter
inside <- endpoints[,1] >= 100 & endpoints[,1] <= 49900 &
endpoints[,2] >= 100 & endpoints[,2] <= 49900
ssf_prob[!inside] <- 0
if (sum(ssf_prob) == 0) next
ssf_prob <- ssf_prob / sum(ssf_prob)
chosen <- sample.int(length(ssf_prob), 1, prob = ssf_prob)
newLoc <- endpoints[chosen, ]
phi <- phi[chosen]
len <- len[chosen]
track_list[[k]] <- data.frame(
Track_id = tr,
x = newLoc[1], y = newLoc[2],
step = len,
angle = phi - phi0,
bearing = phi,
state = state,
time = time,
ID = id
)
k <- k + 1
}
id_tracks[[tr]] <- do.call(
rbind,
track_list[seq_len(k - 1)]
)
}
do.call(rbind, id_tracks)
}
library(doParallel)
library(foreach)
ncores <- parallel::detectCores() - 1
cl <- makeCluster(ncores)
registerDoParallel(cl)
for (c in seq_along(cov_beta)) {
message("Running cov_beta = ", cov_beta[c])
## scenario-specific parameters
cov_gamma <- c(1, 1, 1, 1)
cov_coefs <- cov_beta[c] * cov_gamma
ssf_betas <- matrix(
c(beta1, beta2, kappa, cov_coefs),
ncol = nbStates,
byrow = TRUE
)
## parallel over individuals
data <- foreach(
id = seq_len(nbID),
.combine = rbind,
.packages = c("CircStats", "raster"),
.export = c(
"simulate_one_ID",
"trMatrix_rcpp",
"cov_raster",
"ssf_betas",
"ssf_formula",
"mu", "sigma", "kappa", "am",
"beta",
"centre",
"nbTracks",
"nbStates",
"n_zeros")
) %dopar% {
simulate_one_ID(
id = id,
nbTracks = nbTracks,
centre = centre,
mu = mu,
sigma = sigma,
kappa = kappa,
am = am,
ssf_betas = ssf_betas,
ssf_formula = ssf_formula,
cov_raster = cov_raster,
beta = beta,
nbStates = nbStates,
n_zeros = n_zeros
)
}
## save per-scenario
data_sf <- sf::st_as_sf(data, coords = c("x", "y"), crs = 32630)
save(
data_sf,
file = paste0("CPF_cov", cov_beta[c], "_", nbID, "ind.RData")
)
}
stopCluster(cl)
# plot tracks
ggplot() +
geom_tile(data=df, aes(x,y,fill=factor(value)), alpha=0.1)+
geom_path(data= data, aes(x, y, col= factor(state), group = ID))+
scale_fill_manual(values = c("lightgrey", "darkgreen"), name = "Habitat", labels=c("Unsuitable", "Suitable"))+
scale_colour_manual(values = c("black", "darkorange", "darkred", "grey"), name = "Behaviours", labels=c("Outbound", "Search", "Foraging", "Inbound"))+
theme_bw() +
coord_fixed()