In this file, all code for producing the results of the paper is documented. The code can be accessed by clicking the Code-bottoms, and hidden again for better readability by clicking the Hide-bottom.
The code is written in R:
R Core Team (2022). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.
The following code chunk loads the required R packages.
library(ggplot2) ## For nice plotting
library(stats4) ## For maximum likelihood facilities (mle())
library(writexl) ## For writing data frames into excel files
The following chunk code defines functions for likelihood calculations and simulations.
## Function returning 2 x the negative log-likelihood of a trace up to a constant
## The trace is assumed an Ornstein-Uhlenbeck with constant mean mu and rate
## parameter alpha.
loglikOU.fun = function(pars, data, delta){
## pars = (alpha, mu, sigma) are the parameters to be estimated in the model:
## dXt = - alpha * ( Xt - mu ) dt + sigma * dWt
## data is a trace
## delta is the time step between observations
alpha0 = max(pars[1],0.001) #Ensuring alpha is positive
mu0 = pars[2] #Mean
sigma2 = max(0,pars[3]) #Infinitisimal variance, should be positive
n = length(data)
Xupp = data[2:n]
Xlow = data[1:(n-1)]
time = delta*(1:(n-1))
gamma2 = sigma2/(2*alpha0) #Asymptotic variance
rho0 = exp(-alpha0*delta) #Autocorrelation
m.part = Xupp - Xlow*rho0 - mu0*(1-rho0) #Observation minus the mean of transition distribution
v.part = gamma2*(1-rho0^2) #Variance of transition distribution
loglik = - n*(log(v.part)) - sum(m.part^2/v.part)
-loglik
}
## Function returning the estimated parameters
## Input is a data trace, the time step delta between observations
estimate.OU = function(data, delta, initial.values = NULL){
if(is.null(initial.values)){
n = length(data)
Xupp = data[2:n]
Xlow = data[1:(n-1)]
mu = mean(data) ##Starting value for mu0
alpha = -log(sum((Xupp-mu)*(Xlow-mu))/sum((Xlow-mu)^2))/delta ##Starting value for alpha0
s2 = mean(diff(data)^2)/delta ##Quadratic variation, starting value for sigma^2
par.init = c(alpha, mu, s2) ## Collect starting values
}
if(!is.null(initial.values)) par.init = initial.values
minuslogl = function(alpha,mu,sigma2){
logLik = tryCatch(loglikOU.fun(pars = c(alpha,mu,sigma2), data = data, delta = delta))
if(is.na(logLik))
{
return(10000)
}else
{
return(logLik)
}
}
temp = stats4::mle(minuslogl = minuslogl,
start = list(alpha = par.init[1], mu = par.init[2], sigma2 = par.init[3]))
return(temp)
}
## Function returning 2 x the negative log-likelihood of a trace up to a constant from model
## dXt = - (a*(Xt - m)^2 + lambda) dt + sigma * dWt
## Assuming alpha0, mu0, sigma known
## Strang splitting estimator based on Taylor expansion around mu(lambda)
loglik.fun = function(pars, data, delta, alpha0, mu0, sigma20, pen = 0){
##pars are the parameters to be estimated, tau and a
##data is a trace under linear changing of lambda
##delta is the time step between observations
##alpha0, mu0, sigma20 are parameters already estimated in the OU process from stationary part
tau = pars[1] #Ramping time
a = max(0.1,pars[2]) #Factor in front of (x-m)^2 in drift. Positive
m = mu0 - alpha0/(2*a) #Constant mean shift
lambda0 = -alpha0^2/(4*a) #Stationary level of control parameter
sigma2 = sigma20 #Infinitisimal variance
n = length(data)
Xupp = data[2:n]
Xlow = data[1:(n-1)]
time = delta*(1:(n-1))
lam.seq = lambda0*(1-time/tau)
alpha.seq = 2*sqrt(-a*lam.seq)
gamma2.seq = sigma2/(2*alpha.seq)
rho.seq = exp(-alpha.seq*delta)
mu.seq = m + sqrt(-lam.seq/a)
## Calculating the Strang splitting scheme pseudo likelihood
fh.half.tmp = a*delta*(Xlow - mu.seq)/2
fh.half = (mu.seq*fh.half.tmp+Xlow)/(fh.half.tmp+1)
fh.half.inv = (mu.seq*fh.half.tmp-Xupp)/(fh.half.tmp-1)
mu.h = fh.half*rho.seq + mu.seq*(1-rho.seq)
m.part = fh.half.inv - mu.h
var.part = gamma2.seq*(1-rho.seq^2)
det.Dfh.half.inv = 1/(a*delta*(Xupp-mu.seq)/2-1)^2
loglik = - sum(log(var.part)) - sum(m.part^2/var.part) +
2*sum(log(det.Dfh.half.inv)) - pen*n*(1/a - 1)*(a < 1)
return(-loglik)
}
## Function returning the estimated parameters
## Input is a data trace, the time step delta between observations, and time passed
## at present since time t_0
estimate.tipping = function(data, delta, initial.values = c(100,1),
alpha0, mu0, sigma20, pen = pen){
par.init = initial.values
minuslogl = function(pars){
logLik = tryCatch(loglik.fun(pars = pars, data = data, delta = delta,
alpha0 = alpha0, mu0 = mu0, sigma20 = sigma20, pen = pen))
if(is.na(logLik))
{
return(10000)
}else
{
return(logLik)
}
}
temp = optim(par = c(tau = par.init[1], a = par.init[2]),
fn = minuslogl, method = "Nelder-Mead", hessian = FALSE)
return(temp)
}
## Function returning a trajectory up to a crossing time of X(t)
## over the barrier as a function of
## the initial condition X0, the size of the noise sigma,
## the integration time step dt, and the length of the simulation N
## and with time varying lambda:
X.traj <- function(sigma = 0.1, lambda0 = -2, tau = 1000, m = 0, a = 1,
T0 = 0, X0 = sqrt(2), dt = 0.1, Ymax = 1000000){
##T0: Time before ramping starts
##Ymax: Max number of simulated points, if tipping does not happen
Y = 0 ## Counting integration steps up to tipping
xbarrier = m - 2 ## One smaller than crossing point at start
Xtraj = X0
X = X0
## Simulation during stationary period, constant lambda
while(X > xbarrier & Y < T0/dt){
X = S.onestep(sigma = sigma, lambda = lambda0,
m = m, a = a, X0 = X, dt = dt)
Xtraj = c(Xtraj, X)
Y = Y+1
}
## Simulation after lambda starts increasing
while(X > xbarrier & Y < Ymax){
time = dt * (Y - T0/dt) ## Time after lambda starts increasing
lambda = lambda0*(1-time/tau)
X = S.onestep(sigma = sigma, lambda = lambda0*(1 - time/tau),
m = m, a = a, X0 = X, dt = dt)
Xtraj = c(Xtraj, X)
Y = Y + 1
}
Y = Y*dt
return(list(FPT = Y, X = Xtraj))
}
## Function returning a simulation of the simple model one time step
## using the Euler-Maruyama scheme
S.onestep <- function(sigma = 0.1, lambda = 0, m = 0, a = 1, X0 = 1, dt = 0.1){
dWt = rnorm(1,0,1) ## Draws a random increment
Y = X0 - a*(X0-m)^2*dt - lambda*dt + sqrt(dt)*sigma*dWt
return(Y)
}
The following code chunk loads the AMOC fingerprint data and estimates parameters. Estimates are reported.
###############################
### Estimation on AMOC data ###
###############################
## Read data
AMOC.data = read.table("AMOCdata.txt", header = TRUE)
## time: calendar time in years
## AMOC0: SST (sea surface temperature) in subpolar gyre, subtracted the monthly mean
## AMOC1: AMOC0 subtracted the global mean SST
## AMOC2: AMOC0 subtracted two times the global mean SST (Arctic amplification)
## GM: global mean SST
## Adding fingerprint with 3 times subtracted global warming for robustness analysis
AMOC.data$AMOC3 = AMOC.data$AMOC0 - 3 * AMOC.data$GM
## Estimating Ornstein-Uhlenbeck parameters from data up to year 1924
## before linear ramping of control parameter lambda starts
## Baseline data: Subset of data for the years 1870-1924
t0 = 1924
data.0 = AMOC.data[AMOC.data$time <= t0, "AMOC2"]
## Estimate parameters
temp = estimate.OU(data = data.0, delta = 1/12)
alpha0 = unname(coef(temp)["alpha"])
mu0 = unname(coef(temp)["mu"])
sigma2 = unname(coef(temp)["sigma2"])
## Ramping data: Subset of data for the years 1925-2020
## Subtracted 2 times global mean
data.2 = AMOC.data[AMOC.data$time > t0, "AMOC2"]
## Estimate ramping parameters
temp = estimate.tipping(data = data.2, delta = 1/12, initial.values = c(100,1),
alpha0 = alpha0, mu0 = mu0, sigma20 = sigma2,
pen = 0.004)
tau = unname(temp$par[1])
a = unname(temp$par[2])
m = mu0 - alpha0/(2 * a)
lambda0 = -alpha0^2/(4 * a)
tc = tau + t0
## Collect estimates
round(c(t0 = t0, alpha0 = alpha0, mu0 = mu0, sigma2 = sigma2, tau = tau,
a = a, m = m, lambda0 = lambda0, tc = tc),2)
## t0 alpha0 mu0 sigma2 tau a m lambda0 tc
## 1924.00 3.06 0.25 0.30 132.52 0.87 -1.51 -2.69 2056.52
In the first code chunk, trajectories from the fitted model are simulated. This chunk is not evaluated, but has already been run, and results stored in the file “SimulatedTraces.Rdata” for saving computer time. The file contains 1000 simulated traces from the model with parameters estimated from the AMOC fingerprint. The file is being loaded in the next code chunk. To rerun the simulation, remove “eval = FALSE” in the headline of the chunk.
## This chunk is not evaluated, but has been run, and results stored in the file
## "SimulatedTraces.Rdata" for saving computer time. The file contains 1000 simulated
## traces from the model with parameters estimated from the AMOC fingerprint
## To run it, remove "eval = FALSE" above
### SIMULATIONS ###
set.seed(123) ## For reproducibility
nsim = 1000 ## Number of simulations
#Define parameters
T0 = t0-1870 ## Time length of stationary dynamics (lambda constant)
sig = sqrt(sigma2) ## Infinitisimal standard deviation: sigma
time = AMOC.data$time
n = length(time)
lam.seq = lambda0*(1-pmax(0,(time-t0))/tau) ## Time varying lambda
alpha.seq = 2*sqrt(-a*lam.seq) ## Time varying alpha
mu.seq = m + sqrt(-lam.seq/a) ## Time varying mu
var0 = sigma2/(2*alpha0) ## Time varying variance
Delta_t = 1/12 ## Time step in years (one measurement per month)
nloop = 20 ## Number of simulation steps between observation steps
tp = 2020 - t0 ## Time length of non-stationary dynamics (lambda linearly increasing)
nt = ceiling(tp/Delta_t) ## Total number of points from t_0 to today
##Simulate process with lambda -> 0, starting at time t_0
xx = matrix(NA, n, nsim)
for (isim in 1:nsim){
#Initial condition is random from stationary distribution
x0 = m + sqrt(-lambda0/a) + rnorm(1, mean = 0, sd = sig/sqrt(2*alpha0))
xxx = X.traj(sigma = sig, lambda0 = lambda0, tau = tau, m = m, a = a,
T0 = T0, X0 = x0, dt = Delta_t/nloop,
Ymax = nloop*n-1)
nx = length(xxx$X) ##If tipping happens before, trajectory is shorter than nt
xxxx = xxx$X[seq(1, nx, nloop)] ##Only keep points at observed times
nxx = length(xxxx)
xx[1:nxx, isim] = xxxx
}
xxtime <- time[1:n]
xx.data = data.frame(X = c(xx), time = rep(xxtime, nsim),
repetition = rep(1:nsim, each = n),
model = rep(m + sqrt(-lam.seq[1:n]/a), nsim))
## End of simulation of nsim paths
## Save for later use
#save(xx.data, nsim, Delta_t, file = "SimulatedTraces.Rdata")
In the following code chunk, parameters are estimated on each of the 1000 simulated trajectories. In this way, 1000 sets of parameter estimates are obtained, and the 2.5 and 97.5 percentiles provide a 95% confidence interval, and the 16.5 and 83.5 percentiles provide a 66% confidence interval. A 66% confidence interval is the definition of “likely” by the Intergovernmental Panel on Climate Change (IPCC).
##############################
## Estimation on each trace ##
##############################
load("SimulatedTraces.Rdata")
pen = 0.004 ## Penalization, optimal value determined by cross-validation
estim.matrix = matrix(0, ncol = 8, nrow = nsim) ## Matrix to hold estimates
dimnames(estim.matrix)[[2]] = c("alpha0","mu0","lambda0", "tau", "s2", "m", "a", "tc")
for(isim in 1:nsim){
## Get data from isim simulation after time t0, when linear ramping has started
xxi = xx.data[xx.data$time > t0 & xx.data$repetition == isim,"X"]
xxi = xxi[xxi>-1.2] ## Remove last data points in case it has tipped
## Get baseline data from isim simulation, before time t0
xxi.0 = xx.data[xx.data$time <= t0 & xx.data$repetition == isim,"X"]
nx = length(xxi)
temp1 = estimate.OU(data = xxi.0, delta = Delta_t)
estim.matrix[isim,"alpha0"] = coef(temp1)[1]
estim.matrix[isim,"mu0"] = coef(temp1)[2]
estim.matrix[isim,"s2"] = coef(temp1)[3]
temp2 = estimate.tipping(data = xxi, delta = Delta_t, initial.values = c(100, 1),
alpha0 = coef(temp1)[1], mu0 = coef(temp1)[2], sigma20 = coef(temp1)[3], pen = pen)
estim.matrix[isim,"tau"] = temp2$par[1]
estim.matrix[isim, "a"] = temp2$par[2]
}
estim.matrix[,"m"] = estim.matrix[,"mu0"] - estim.matrix[,"alpha0"]/(2*estim.matrix[,"a"])
estim.matrix[,"lambda0"] = - estim.matrix[,"alpha0"]^2/(4*estim.matrix[,"a"])
estim.matrix[,"tc"] = estim.matrix[,"tau"] + t0
#write_xlsx(as.data.frame(estim.matrix), "EstimMatrix_1000repetitions.xlsx")
true.data = data.frame(truevalue = c(alpha0, mu0,lambda0, tau, sigma2, m, a, tau + t0),
type = dimnames(estim.matrix)[[2]])
The percentiles for the estimates of the tipping time are reported. The plot shows histograms of all the estimates with the blue lines indicating the values used in the simulations, confirming that the estimation procedure works as intended, as well as providing empirical variances of the estimators. Some of these histograms are also provided in Figure 6 of the paper.
round(quantile(estim.matrix[,"tc"], prob = c(0.025, 0.165, 0.5, 0.835, 0.975)))
## 2.5% 16.5% 50% 83.5% 97.5%
## 2025 2039 2053 2070 2095
## Put estimates in data frame for plotting
estim.data = data.frame(estimate = c(estim.matrix),
type = rep(dimnames(estim.matrix)[[2]],
each = dim(estim.matrix)[[1]]))
ggplot(data = estim.data, aes(x = estimate)) +
geom_histogram() +
geom_vline(data = true.data, aes(xintercept = truevalue), linewidth = 1, color = "blue") +
facet_wrap(~ type, scale = "free", ncol = 4)
The following code produces simulated data to obtain panels as those presented in Figure 4 b,c,d in the paper.
Figure illustrating the model and 10 simulated trajectories on top.
########################################################
## Figures with realizations under changing lambda ##
## Running estimation of variance and autocorrelation ##
########################################################
## Parameter values used in the simulation
tau = 200 ##
l0 = -3 ## Baseline control parameter lambda
sigm = 0.25 ##
s2 = sigm^2 ##
## Settings for simulation and estimation
rep = 10 ## Number of repetitions of simulated traces
Tw = 50 ## Window size for variance and autocorrelation estimation
t0 = 100 ## Start of change in control parameter lambda
dt = 0.1 ## Time step between observations
nloop = 10 ## Substeps between observation steps to decrease discretization error
alpha0 = 2*sqrt(-l0) ## Baseline alpha in OU approximation
rho0 = exp(-alpha0 * dt) ## Baseline autocorrelation in OU approximation
gam0 = s2/(2*alpha0) ## Baseline variance in OU approximation
vargam0 = 2*gam0^2/(alpha0 * Tw) ## Baseline variance of variance estimator
varrho0 = 2*alpha0*dt^2/Tw ## Baseline variance of autocorrelation estimator
q95 = qnorm(0.95) ## 95% quantile of normal distribution
X0 = sqrt(-l0) ## Starting value for simulation
Tend = t0 + tau ## Length of simulations
time = seq(0, Tend, dt) - t0 ## Time for simulations
n = length(time) ## Number of observation points in simulations
lt = c(rep(l0, t0/dt), (l0*(1 - (0:(tau/dt))/(tau/dt)))) ## Evolution of lambda
mu = sqrt(-lt) ## Evolution of mu
alpha = 2 * sqrt(-lt) ## Evolution of alpha
rho = exp( - alpha * dt) ## Evolution of autocorrelation
gam = s2/(2*alpha) ## Evolution of variance
vargam = 2*gam^2/(alpha*Tw) ## Evolution of variance of var-estimator
varrho = 2*alpha*dt^2/Tw ## Evolution of variance of corr-estimator
## Simulations
X = matrix(-10, ncol = rep, nrow = n)
for (i in 1:rep){
#Initial condition is random from stationary distribution
x0 = X0 + rnorm(1, mean = 0, sd = sigm/sqrt(2*alpha0))
xx = X.traj(sigma = sigm, lambda0 = l0, tau = tau, m = 0, a = 1,
T0 = t0, X0 = x0, dt = dt/nloop,
Ymax = nloop*n-1)
nx = length(xx$X) ##If tipping happens before, trajectory is shorter than nt
xxx = xx$X[seq(1, nx, nloop)] ##Only keep points at observed times
nxx = min(n, length(xxx))
X[1:nxx, i] = xxx[1:nxx]
}
plotdata = data.frame(X = c(X), time = rep(time, rep), rep = rep(1:rep, each = n))
plotdata$X[plotdata$X < -9.9] = NA
ggplot(data = plotdata, aes(x = time, y = pmax(X,-2.2), group = as.factor(rep))) +
geom_line(color = "gray") +
geom_line(data = data.frame(X = mu, time = time), color = "blue", size = 0.8) +
geom_line(data = data.frame(X = -mu, time = time), color = "blue", size = 0.8, linetype = "dashed") +
ylim(-2.2, 2.2) +
ylab("Realizations") + xlab("Time (years)")
Figures showing variance and autocorrelation estimates in running windows with confidence bands.
########################################################
## Figures with realizations under changing lambda ##
## Running estimation of variance and autocorrelation ##
########################################################
### Variance and autocorrelation estimation in running windows
### Data is detrended within each window
var.matrix = matrix(-1, ncol = rep, nrow = n - Tw/dt)
rho.matrix = matrix(-1, ncol = rep, nrow = n - Tw/dt)
for(i in 1:(n - Tw/dt)){
for(j in 1:rep){
data.i = X[i:(i+Tw/dt),j] ## Get data in running window
data.i = data.i[data.i > -2.3] ## Remove data if tipped
time.i = (1:length(data.i))*dt ## Get time interval for data in running window
temp = lm(data.i ~ time.i) ## Get linear trend
data.i = data.i - temp$fitted.values ## Subtract linear trend
var.matrix[i, j] = var(data.i) ## Variance estimate in running window
rho.matrix[i, j] = acf(data.i, lag.max = 1, plot = FALSE)$acf[2] ## Autocorrelation estimate in running window
}
}
plotdata.var.rho = data.frame(var = c(var.matrix), rho = c(rho.matrix),
time = - t0 + Tw/2 + rep(dt*seq(1,n - Tw/dt,1), rep),
rep = rep(1:rep, each = n - Tw/dt))
## Variance
ggplot(data = plotdata.var.rho, aes(x = time, y = var, group = as.factor(rep))) +
geom_line(color = "gray", size = 0.3) +
geom_line(data = data.frame(var = gam, time = time), color = "blue", size = 1) +
geom_vline(xintercept = tau, size = 1.5) +
geom_hline(yintercept = gam0, size = 1) +
geom_hline(yintercept = gam0 + q95 * sqrt(vargam0), size = 0.7, linetype = "dashed") +
geom_hline(yintercept = gam0 - q95 * sqrt(vargam0), size = 0.7, linetype = "dashed") +
geom_line(data = data.frame(var = gam + q95 * sqrt(vargam), time = time), size = 0.7, linetype = "dashed") +
geom_line(data = data.frame(var = gam - q95 * sqrt(vargam), time = time), size = 0.7, linetype = "dashed") +
ylim(0, 0.05) + xlim(-Tw/2,tau) +
ylab("Variance") + xlab("Time") +
geom_segment(aes(x = 0, y = 0.025, xend = Tw, yend = 0.025), lineend = "butt", size = 1) +
geom_segment(aes(x = 0, y = 0.024, xend = 0, yend = 0.026), lineend = "butt", size = 1) +
geom_segment(aes(x = Tw, y = 0.024, xend = Tw, yend = 0.026), lineend = "butt", size = 1) +
annotate("text", x=Tw/2, y=0.028, label= "window size", size = 5)
## Autoorrelation
ggplot(data = plotdata.var.rho, aes(x = time, y = rho, group = as.factor(rep))) +
geom_line(color = "gray", size = 0.3) +
geom_line(data = data.frame(rho = rho, time = time), color = "blue", size = 1) +
geom_vline(xintercept = tau, size = 1.5) +
geom_hline(yintercept = rho0, size = 1) +
geom_hline(yintercept = rho0 + q95 * sqrt(varrho0), size = 0.7, linetype = "dashed") +
geom_hline(yintercept = rho0 - q95 * sqrt(varrho0), size = 0.7, linetype = "dashed") +
geom_line(data = data.frame(rho = rho - q95 * sqrt(varrho), time = time),
size = 0.7, linetype = "dashed") +
geom_line(data = data.frame(rho = rho + q95 * sqrt(varrho), time = time),
size = 0.7, linetype = "dashed") +
ylim(0.6, 1) + xlim(-Tw/2,tau) +
ylab("Autocorrelation") + xlab("Time") +
geom_segment(aes(x = 0, y = 0.9, xend = Tw, yend = 0.9), lineend = "butt", size = 1) +
geom_segment(aes(x = 0, y = 0.89, xend = 0, yend = 0.91), lineend = "butt", size = 1) +
geom_segment(aes(x = Tw, y = 0.89, xend = Tw, yend = 0.91), lineend = "butt", size = 1) +
annotate("text", x=Tw/2, y=0.925, label= "window size", size = 5)
Here, residuals are calculated and plotted in a qq-plot, similar to Figure 6 in the paper. The qq-plot shows good fit to the model.
n = length(data.2)
Xupp = data.2[2:n]
Xlow = data.2[1:(n-1)]
time = Delta_t*(1:(n-1))
lam.seq = lambda0*(1-pmax(0,(time-t0))/tau)
alpha.seq = 2*sqrt(-a*lam.seq)
mu.seq = m + sqrt(-lam.seq/a)
gamma2.seq = sigma2/(2*alpha.seq)
rho.seq = exp(-alpha.seq*Delta_t)
mean1.tmp = a*Delta_t*(Xlow - mu.seq)
mean2.tmp = (mu.seq*mean1.tmp+Xlow)/(mean1.tmp+1)
mean.cond = mean2.tmp*rho.seq + mu.seq*(1-rho.seq)
sd.seq = sqrt(gamma2.seq*(1-rho.seq^2))
fh.half.tmp = a*Delta_t*(Xlow - mu.seq)/2
fh.half = (mu.seq*fh.half.tmp+Xlow)/(fh.half.tmp+1)
fh.half.inv = (mu.seq*fh.half.tmp-Xupp)/(fh.half.tmp-1)
mu.h = fh.half*rho.seq + mu.seq*(1-rho.seq)
m.part = fh.half.inv - mu.h
var.part = gamma2.seq*(1-rho.seq^2)
norm.resid.S = m.part/sd.seq
qqnorm(norm.resid.S)
qqline(norm.resid.S)