This is an R Markdown document that documents data analyses for the manuscript Zander et al (Seasonal climate signals preserved in biochemical varves: insights from novel high-resolution sediment scanning techniques). For full interpretation of the data and plots contained here, please see the associated manuscript. Any use of data and plots should refer to Zander et al.
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
Sys.setenv(LANGUAGE = "en")
# Check and install pacman if neccesary
if (!require("pacman")) {
install.packages("pacman")
library(pacman)
}
# Load libraries
::p_load(
pacman
data.table, dtw, ggplot2, tidyverse, grid, lineup, vegan, corrplot, psych, distantia,
viridis, pracma, reshape2, mgcv, gridExtra, gamclass, cowplot, mvnormtest )
# Load data
<- read.csv("ZAB_HiRes_XRF.csv")
XRF <- read.csv("ZAB_CNS_2020.csv")
CNS <- read.csv("ZAB_HiRes_HSI.csv")
HSI <- read.csv("Chrono_Tornado.csv")
Chrono <- read.csv("meteo_1966_2020.csv") meteo
# Correction to TOC and TN to account for degration/remineralization using formula from galman et al 2008 (Limnology and Oceanography)
$year <- as.numeric(CNS$year)
CNS$toc_p <- as.numeric(CNS$toc_p)
CNS$toc_corr <- (CNS$toc_p + (23.41 * (2020.3 - CNS$year + 0.5) / (2020.3 - CNS$year + 0.5 + 1) / 100) * CNS$toc_p) # 2020.3 represents approximate date of coring
CNS$tn_corr <- (CNS$tn_p + (36.17 * (2020.3 - CNS$year + 0.5) / (2020.3 - CNS$year + 0.5 + 1) / 100) * CNS$tn_p)
CNS$tc_corr <- CNS$tic_p + CNS$toc_corr
CNS
plot(CNS$tc_corr, CNS$year, type = "l", ylim = c(2000, 2020), main = "Total carbon with and without toc decay correction")
lines(CNS$tc_p, CNS$year, col = "blue")
$toc.n_corr <- CNS$toc_corr / CNS$tn_corr
CNS# Mass accumulation rate caclulation
$MAR <- CNS$calib.thickness.mm * CNS$dry_bulk_density_gcm3
CNS
# removing cracks
<- cbind(XRF[, -c(4:16)], scale(XRF[, c(4:16)]))
XRF_scaled <- XRF_scaled$Ca.KA + XRF_scaled$Fe.KA + XRF_scaled$Si.KA # counts of abundant elements
crack_finder <- cbind(XRF, crack_finder)
XRF <- subset.data.frame(XRF, crack_finder > -2.9) # remove cracks (defined as areas with low abundances of common elements)
XRF
# assign ages based on depths
for (i in 1:nrow(Chrono)) {
setDT(XRF)[comp_depth_mm <= Chrono$Comp.Depth[i] & comp_depth_mm >= Chrono$Comp.Depth[i + 1], year := Chrono$Year[i]]
}
<- subset(XRF, year != 0) # subset only depths with assigned ages
XRF $cum_year_scale <- 0
XRF
# setting each year to be 1 unit long
for (i in 1:nrow(Chrono)) {
setDT(XRF)[year == Chrono$Year[i], year_scale := seq(1, 0.001, length.out = nrow(subset(XRF, year == Chrono$Year[i])))]
}$cum_year_scale <- XRF$year_scale + XRF$year + 0.3
XRF
##### HSI data
$year_scale <- 0
HSI$year_scale <- as.numeric(HSI$year_scale)
HSI$year <- 0
HSI$year <- as.numeric(HSI$year)
HSIfor (i in 1:nrow(Chrono)) {
setDT(HSI)[HSI_depth <= Chrono$HSI_depth[i] & HSI_depth >= Chrono$HSI_depth[i + 1], year := Chrono$Year[i]]
}### calibrating rabd indices according to calibration published in Zander et al., 2021 (Science of Total Environment)
$TChl <- 1560.48 * HSI$`RABD655.685max` - 1578.9
HSI$Bphe <- HSI$RABD845 * 861.18 - 851.65
HSI< 0] <- 0
HSI[HSI
# setting each year to be 1 unit long (HSI version)
<- subset(HSI, year != 0) # only including years with ages
HSI $cum_year_scale <- 0
HSIfor (i in 1:nrow(Chrono)) {
setDT(HSI)[year == Chrono$Year[i], year_scale := seq(1, 0.001, length.out = nrow(subset(HSI, year == Chrono$Year[i])))]
}$cum_year_scale <- HSI$year_scale + HSI$year + 0.3
HSI
#### aligning and regularlizing XRF and HSI data
<- as.data.frame(HSI[, c(11, 3, 4, 9, 10)])
Source <- as.data.frame(XRF[, 21])
Destin <- colnames(Source)
Names
# interpolate data for all columns
<- Destin
Data for (i in 2:ncol(Source)) {
<- approx(Source[, 1], Source[, i], xout = Destin[, 1])
temp <- temp$y
Data[, i]
}# Replace destination depth with interpolated depth (just to be sure it worked)
1] <- temp$x
Data[, # Replace column names with actual names
colnames(Data) <- c("Depth_cm", Names[2:length(Names)])
## Full Dataset !
<- cbind(XRF[, c(21, 20, 18, 4:8, 10:12)], Data[, c(3, 4, 5)])
HiRes_full colnames(HiRes_full) <- c("Age", "Varve Year", "Comp Depth", "Ca", "Fe", "Mn", "Si", "P", "S", "K", "Ti", "Rmean", "TChl", "Bphe")
###### Aligning HSI to XRF using dynamical time warp alignment of Rmean and Ca
<- cbind(HiRes_full[, 1:3], scale(HiRes_full[, 4:14]))
HiRes_full_scaled <- dtw(x = HiRes_full_scaled$Rmean, y = HiRes_full_scaled$Ca, window.type = "sakoechiba",
test_dtw_P2 window.size = 50, step.pattern = symmetricP2, keep = TRUE)
# Sakoe Chiba band is simple symmetrical window (window width here is equal to the thinnest varve width, i.e. <= 1 year)
# Testing showed symmetric P2 is most reasonable step pattern, but this could vary
plot(test_dtw_P2, type = "threeway")
<- na.omit(Data[, 2:5])
HSI_regular <- warp(test_dtw_P2, index.reference = FALSE)
warp <- HSI_regular[warp, ]
HSI_DTW
# example plot of DTW alignment
plot(HiRes_full_scaled$Age-0.3, HiRes_full_scaled$Ca, type = "l", xlim = c(2015, 2020), ylim= c(-2.5,4.5), main = "example plot of DTW alignment", xlab= "C (%)", ylab = "Z-score")
lines(HiRes_full_scaled$Age-0.3, HiRes_full_scaled$Rmean, col = "red")
lines(HiRes_full_scaled$Age-0.3, scale(HSI_DTW$Rmean), col = "blue")
abline(v=c(2015,2016,2017,2018,2019), lty=3)
legend(x = "bottomright", # Position
legend = c("Ca", "Rmean", "Rmean (after DTW)"), # Legend texts
lty = 1, # Line types
col = c("black", "red", "blue"), # Line colors
lwd = 2) # Line width
rm(test_dtw_P2)
# Putting together full, aligned dataset
12:14] <- HSI_DTW[, c(2:4)]
HiRes_full[, colnames(HiRes_full)[c(2, 3)] <- c("varve_year", "comp_depth_mm")
<- cbind(HiRes_full[, 1:3], scale(HiRes_full[, 4:14]))
HiRes_full_scaled <- HiRes_full[HiRes_full$`varve_year` >= 1966] # subset for study period 1966-2019 HiRes_full_sub1
Plotting XRF and HSI data
<- c("#c01d11", "2068c9", "#33799b", "#7c9b21", "#a54a0a", "#b3a22e", "#1798a1", "#a62b84", "#0f692d", "#500f8f") # color vector
color1 <- tidyr::pivot_longer(HiRes_full, cols = c("Ca", "K", "Ti", "Si", "Fe", "S", "Mn", "P"), names_to = "proxy") # make sideways
XRF_piv <- ggplot(XRF_piv, aes(value, comp_depth_mm, color = forcats::as_factor(proxy))) +
xrf_data_plot_depth geom_path() +
scale_color_manual(values = color1) +
scale_y_reverse(limits = c(345.06, 0.6)) +
labs(y = "Depth (mm)", x = "cps", title = element_blank()) +
facet_wrap(~proxy, ncol = 8, scales = "free") +
theme_bw() +
theme(
strip.background = element_blank(),
legend.box.background = element_blank(), legend.position = "none"
)
<- tidyr::pivot_longer(HSI, cols = c("TChl", "Bphe"), names_to = "proxy") # make sideways
HSI_piv <- ggplot(HSI_piv, aes(value, HSI_depth, color = forcats::as_factor(proxy))) +
HSI_data_plot_depth geom_path() +
scale_color_manual(values = c("#0f692d", "#500f8f")) +
scale_y_reverse(limits = c(320.04, 0.54)) +
labs(y = element_blank(), x = "ug/g", title = element_blank()) +
facet_wrap(~proxy, ncol = 8, scales = "free") +
theme_bw() +
theme(
strip.background = element_blank(),
legend.box.background = element_blank(), legend.position = "none"
)
plot_grid(xrf_data_plot_depth, HSI_data_plot_depth, rel_widths = c(4 / 5, 1 / 5))
Plotting CNS, MAR, Cs-137
<- CNS[CNS$year >= 1966 & CNS$year <= 2019, ]
CNS par(mfrow = c(1, 6), mar = c(5.1, 2, 2, 1))
plot(CNS$tc_corr, CNS$year, ylim = c(1966, 2020), type = "o", pch = 16, xlim = c(0, 20), ylab = "Year (CE)", xlab = "C (% weight)")
lines(CNS$toc_corr, CNS$year, lty = 5, type = "o", pch = 16)
lines(CNS$tic_p, CNS$year, lty = 3, type = "o", pch = 16)
plot(CNS$toc.n_corr, CNS$year, ylim = c(1966, 2020), type = "o", pch = 16, xlim = c(0, 12), xlab = "TOC:N Ratio", yaxt = "n", lty = 3, ylab = "")
plot(CNS$tn_corr, CNS$year, ylim = c(1966, 2020), type = "o", pch = 16, xlim = c(0, 2), xlab = "N (% weight)", yaxt = "n", ylab = "")
plot(CNS$ts_p, CNS$year, ylim = c(1966, 2020), type = "o", pch = 16, xlim = c(0, 3), xlab = "S (% weight)", yaxt = "n", ylab = "")
plot(CNS$MAR, CNS$year, ylim = c(1966, 2020), type = "o", pch = 16, xlim = c(0, 2), xlab = "MAR (g cm-2 yr-1)", yaxt = "n", ylab = "")
plot(CNS[is.na(CNS$ZAB_12_1_Cs) == FALSE, ]$ZAB_12_1_Cs, CNS[is.na(CNS$ZAB_12_1_Cs) == FALSE, ]$year,
ylim = c(1966, 2020), type = "o", pch = 16,
xlab = "137Cs (Bq kg-1)", xlim = c(0, 350), yaxt = "n", ylab = ""
)
lines(CNS$Cs, CNS$year, type = "o", lty = 2, pch = 1)
Annual cyle
# First, aligning all data to a fractional varve year scale from 0 to 1
<- rep("A", nrow(HiRes_full_sub1))
A <- as.data.frame(cbind(A, HiRes_full_sub1))
HiRes_full_sub1
<- 0.02 # target resolution in fractional year units (each year will contain 50 data points)
gap
<- cut(HiRes_full_sub1$Age, seq(min(HiRes_full_sub1$Age), max(HiRes_full_sub1$Age + gap), by = gap), right = FALSE)
int <- aggregate(HiRes_full_sub1[c(2, 4:15)], list(HiRes_full_sub1$A, int), mean)
ag $age_new <- seq(1966.3, 2020.28, length.out = nrow(ag))
ag
$year_scale <- ag$age_new - floor(ag$age_new)
ag$year_scale <- round(ag$year_scale, 2)
agfor (i in 1:length(ag$year_scale)) {
if (ag$year_scale[i] <= 0.3) {
$year_scale[i] <- ag$year_scale[i] + 0.7
agelse {
} $year_scale[i] <- ag$year_scale[i] - 0.3
ag
}
}
<- as.data.frame(cbind(ag[16:17], ag[5:15]))
ag 3:13] <- scale(ag[, 3:13])
ag[, <- aggregate(ag[3:13], list(ag$year_scale), FUN = mean)
year_ag
<- tidyr::pivot_longer(year_ag, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy")
year_ag_piv1 <- c("#c01d11", "#33799b", "#7c9b21", "#a54a0a", "#b3a22e", "#1798a1", "#a62b84", "#0f692d", "#500f8f") # color vector
color2 colnames(year_ag_piv1)[1] <- "year_scale"
#### annual cyle plot mean values
<- ggplot(year_ag_piv1, aes(year_scale, value, color = forcats::as_factor(proxy))) +
ann_cycle1 geom_path(alpha = 0.6, lwd = 1.5) +
scale_color_manual(values = color2) +
labs(
x = "",
y = "Z-score",
title = "Average Annual Cycle 1966-2019",
color = "proxy"
+
) theme_bw() +
guides(colour = guide_legend(nrow = 1)) +
theme(legend.position = "bottom")
ann_cycle1
Varve type classification based on multivariate time series clustering of within-varve time series
# preparing data for dissimilarity calculation: transform, detrend and scale data
<- HiRes_full[, c(4:11, 13, 14)]
sequence1 <- sequence1 + 1
sequence1 <- log(sequence1)
sequence1 <- detrend(as.matrix(sequence1))
sequence1 <- as.data.frame(sequence1)
sequence1 $`Bphe`[sequence1$`Bphe` <= sequence1$`Bphe`[6413]] <- sequence1$`Bphe`[6413] # resetting 0 baseline for Bphe after detrending
sequence1<- cbind(HiRes_full$varve_year, XRF$year_scale[1:6413], scale(sequence1))
sequence1 <- as.data.frame(sequence1)
sequence1 colnames(sequence1)[c(1, 2)] <- c("Varve_Year", "Year_scale")
<- subset(sequence1, Varve_Year <= 2019 & Varve_Year >= 1966) # study period
sequence1
<- workflowPsi(
psi1 sequences = sequence1,
grouping.column = "Varve_Year",
time.column = "Year_scale",
method = "euclidean",
diagonal = TRUE,
ignore.blocks = TRUE,
format = "matrix"
)
<- hclust(as.dist(psi1), method = "ward.D2")
hclust_1 plot(hclust_1)
Heatmap of dissimilarity scores
<- psi1
psi1_nozero == 0] <- NA
psi1_nozero[psi1_nozero heatmap(psi1_nozero, Rowv = as.dendrogram(hclust_1), Colv = as.dendrogram(hclust_1), symm = TRUE, col = viridis(256))
<- cutree(hclust_1, h = 5.3)
mycl <- as.data.frame(mycl)
mycl colnames(mycl) <- c("group")
$year <- rownames(mycl) mycl
Bar plot - importance of each element in driving year-to-year dissimilarity
<- workflowImportance(
psi.importance sequences = sequence1,
grouping.column = "Varve_Year",
time.column = "Year_scale",
method = "euclidean",
diagonal = TRUE,
ignore.blocks = TRUE
)
<- psi.importance$psi
psi.df <- psi.importance$psi.drop
psi.drop.df barplot(colMeans(psi.drop.df[, 3:12]))
Varve Type plots
$varve_year <- ag$age_new - 0.3
ag$varve_year <- floor(ag$varve_year)
ag$clust_group <- 0
ag<- seq(2019, 1966, -1)
varve_years for (i in 1:length(ag$varve_year)) {
for (j in 1:length(varve_years)) {
if (ag$varve_year[i] == varve_years[j]) {
$clust_group[i] <- mycl[j, 1]
ag
}
}
}
# proxy clusters mean and 80% CIs
<- aggregate(subset(ag, clust_group == 1), list(subset(ag, clust_group == 1)$year_scale), FUN = mean)
group1_ag <- aggregate(subset(ag, clust_group == 1), list(subset(ag, clust_group == 1)$year_scale), function(x) quantile(x, 0.90))
group1_90 <- aggregate(subset(ag, clust_group == 1), list(subset(ag, clust_group == 1)$year_scale), function(x) quantile(x, 0.10))
group1_10 <- aggregate(subset(ag, clust_group == 2), list(subset(ag, clust_group == 2)$year_scale), FUN = mean)
group2_ag <- aggregate(subset(ag, clust_group == 2), list(subset(ag, clust_group == 2)$year_scale), function(x) quantile(x, 0.9))
group2_90 <- aggregate(subset(ag, clust_group == 2), list(subset(ag, clust_group == 2)$year_scale), function(x) quantile(x, 0.1))
group2_10 <- aggregate(subset(ag, clust_group == 3), list(subset(ag, clust_group == 3)$year_scale), FUN = mean)
group3_ag <- aggregate(subset(ag, clust_group == 3), list(subset(ag, clust_group == 3)$year_scale), function(x) quantile(x, 0.9))
group3_90 <- aggregate(subset(ag, clust_group == 3), list(subset(ag, clust_group == 3)$year_scale), function(x) quantile(x, 0.1))
group3_10 <- aggregate(subset(ag, clust_group == 4), list(subset(ag, clust_group == 4)$year_scale), FUN = mean)
group4_ag <- aggregate(subset(ag, clust_group == 4), list(subset(ag, clust_group == 4)$year_scale), function(x) quantile(x, 0.9))
group4_90 <- aggregate(subset(ag, clust_group == 4), list(subset(ag, clust_group == 4)$year_scale), function(x) quantile(x, 0.1))
group4_10
<- tidyr::pivot_longer(group1_ag, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy")
group1_ag_piv1 <- cbind(group1_ag_piv1[, c(3, 8, 9)], tidyr::pivot_longer(group1_90, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy", values_to = "p90th")[9], tidyr::pivot_longer(group1_10, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy", values_to = "p10th")[9])
group1_ag_piv1 $group <- "Varve Type 1"
group1_ag_piv1
<- tidyr::pivot_longer(group2_ag, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy")
group2_ag_piv1 <- cbind(group2_ag_piv1[, c(3, 8, 9)], tidyr::pivot_longer(group2_90, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy", values_to = "p90th")[9], tidyr::pivot_longer(group2_10, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy", values_to = "p10th")[9])
group2_ag_piv1 $group <- "Varve Type 2"
group2_ag_piv1
<- tidyr::pivot_longer(group3_ag, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy")
group3_ag_piv1 <- cbind(group3_ag_piv1[, c(3, 8, 9)], tidyr::pivot_longer(group3_90, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy", values_to = "p90th")[9], tidyr::pivot_longer(group3_10, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy", values_to = "p10th")[9])
group3_ag_piv1 $group <- "Varve Type 3"
group3_ag_piv1
<- tidyr::pivot_longer(group4_ag, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy")
group4_ag_piv1 <- cbind(group4_ag_piv1[, c(3, 8, 9)], tidyr::pivot_longer(group4_90, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy", values_to = "p90th")[9], tidyr::pivot_longer(group4_10, cols = c("Ca", "Ti", "Si", "Fe", "S", "Mn", "P", "TChl", "Bphe"), names_to = "proxy", values_to = "p10th")[9])
group4_ag_piv1 $group <- "Varve Type 4"
group4_ag_piv1
<- rbind(group1_ag_piv1, group2_ag_piv1, group3_ag_piv1, group4_ag_piv1)
groups_all $proxy_group <- "A"
groups_all$proxy %in% c("Ca", "P", "TChl"), ]$proxy_group <- "A"
groups_all[groups_all$proxy %in% c("Fe", "S", "Mn"), ]$proxy_group <- "B"
groups_all[groups_all$proxy %in% c("Ti", "Si", "Bphe"), ]$proxy_group <- "C"
groups_all[groups_all
# plot
<- ggplot(groups_all, aes(year_scale)) +
all_groups_plot2 geom_path(aes(x = year_scale, y = value, color = forcats::as_factor(proxy)), size = 1) +
geom_ribbon(aes(ymin = p10th, ymax = p90th, fill = forcats::as_factor(proxy)), alpha = 0.3) +
scale_color_manual(values = c(color2)) +
scale_fill_manual(values = c(color2)) +
labs(
x = "",
y = "Z-score",
color = "proxy"
+
) theme_bw() +
guides(colour = guide_legend(nrow = 1)) +
theme(legend.position = "bottom") +
facet_grid(rows = vars(group), cols = vars(proxy_group))
all_groups_plot2
Boxplot of seasonal weather by varve type
### aggregate meteo data into annual seasonal values
<- subset.data.frame(meteo, station == "K?TRZYN")
Ket_meteo # substitute missing wind data with mean values
$ws_mean_daily[(29309 - 19173):(29683 - 19173)] <- NA
Ket_meteo<- Ket_meteo %>%
Ket_meteo group_by(day_of_year) %>%
mutate(ws_mean_daily = replace(ws_mean_daily, is.na(ws_mean_daily), mean(ws_mean_daily, na.rm = TRUE))) %>%
ungroup()
<- as.data.frame(Ket_meteo)
Ket_meteo # define varve year as March to March
$varve_year <- Ket_meteo$yy
Ket_meteo$varve_year[Ket_meteo$mm == 1 | Ket_meteo$mm == 2] <- Ket_meteo$yy[Ket_meteo$mm == 1 | Ket_meteo$mm == 2] - 1
Ket_meteo
<- aggregate(Ket_meteo[, c(9, 11, 39)], list(Ket_meteo$varve_year), mean)
meteo_ann_mean colnames(meteo_ann_mean) <- c("varve_year", "Temp_ANN", "Precip_ANN", "Wind_ANN")
<- subset(meteo_ann_mean, varve_year >= 1966)
meteo_ann_mean
# creating empty columns
$Temp_MAM <- 0
meteo_ann_mean$Temp_JJA <- 0
meteo_ann_mean$Temp_SON <- 0
meteo_ann_mean$Temp_DJF <- 0
meteo_ann_mean$Temp_MAM_lag1 <- 0
meteo_ann_mean$Temp_MAMJJA <- 0
meteo_ann_mean
$p90_Precip_MAM <- 0
meteo_ann_mean$p90_Precip_JJA <- 0
meteo_ann_mean$p90_Precip_SON <- 0
meteo_ann_mean$p90_Precip_DJF <- 0
meteo_ann_mean$p90_Precip_MAM_lag1 <- 0
meteo_ann_mean
$p90_Wind_MAM <- 0
meteo_ann_mean$p90_Wind_JJA <- 0
meteo_ann_mean$p90_Wind_SON <- 0
meteo_ann_mean$p90_Wind_DJF <- 0
meteo_ann_mean$p90_Wind_MAM_lag1 <- 0
meteo_ann_mean$MAR_DEC_Wind_Days <- 0
meteo_ann_mean
$Temp_MAR <- 0
meteo_ann_mean$Temp_APR <- 0
meteo_ann_mean$Temp_MAY <- 0
meteo_ann_mean$Temp_JUN <- 0
meteo_ann_mean$Temp_JUL <- 0
meteo_ann_mean$Temp_AUG <- 0
meteo_ann_mean$Temp_SEP <- 0
meteo_ann_mean$Temp_OCT <- 0
meteo_ann_mean$Temp_NOV <- 0
meteo_ann_mean$Temp_DEC <- 0
meteo_ann_mean$Temp_JAN_LAG1 <- 0
meteo_ann_mean$Temp_FEB_LAG1 <- 0
meteo_ann_mean$Temp_MAR_LAG1 <- 0
meteo_ann_mean$Temp_APR_LAG1 <- 0
meteo_ann_mean$Temp_MAY_LAG1 <- 0
meteo_ann_mean
$p90_Precip_MAR <- 0
meteo_ann_mean$p90_Precip_APR <- 0
meteo_ann_mean$p90_Precip_MAY <- 0
meteo_ann_mean$p90_Precip_JUN <- 0
meteo_ann_mean$p90_Precip_JUL <- 0
meteo_ann_mean$p90_Precip_AUG <- 0
meteo_ann_mean$p90_Precip_SEP <- 0
meteo_ann_mean$p90_Precip_OCT <- 0
meteo_ann_mean$p90_Precip_NOV <- 0
meteo_ann_mean$p90_Precip_DEC <- 0
meteo_ann_mean$p90_Precip_JAN_LAG1 <- 0
meteo_ann_mean$p90_Precip_FEB_LAG1 <- 0
meteo_ann_mean$p90_Precip_MAR_LAG1 <- 0
meteo_ann_mean$p90_Precip_APR_LAG1 <- 0
meteo_ann_mean$p90_Precip_MAY_LAG1 <- 0
meteo_ann_mean
$p90_Wind_MAR <- 0
meteo_ann_mean$p90_Wind_APR <- 0
meteo_ann_mean$p90_Wind_MAY <- 0
meteo_ann_mean$p90_Wind_JUN <- 0
meteo_ann_mean$p90_Wind_JUL <- 0
meteo_ann_mean$p90_Wind_AUG <- 0
meteo_ann_mean$p90_Wind_SEP <- 0
meteo_ann_mean$p90_Wind_OCT <- 0
meteo_ann_mean$p90_Wind_NOV <- 0
meteo_ann_mean$p90_Wind_DEC <- 0
meteo_ann_mean$p90_Wind_JAN_LAG1 <- 0
meteo_ann_mean$p90_Wind_FEB_LAG1 <- 0
meteo_ann_mean$p90_Wind_MAR_LAG1 <- 0
meteo_ann_mean$p90_Wind_APR_LAG1 <- 0
meteo_ann_mean$p90_Wind_MAY_LAG1 <- 0
meteo_ann_mean
# filling in with annualized data
<- 7 # threshold for wind days (m/s)
wt
for (i in 1:nrow(meteo_ann_mean)) {
$Temp_MAM[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 5 & mm >= 3)[, 9])
meteo_ann_mean$Temp_JJA[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 8 & mm >= 6)[, 9])
meteo_ann_mean$Temp_SON[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 11 & mm >= 9)[, 9])
meteo_ann_mean$Temp_MAMJJA[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 8 & mm >= 3)[, 9])
meteo_ann_mean<- as.data.frame(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 12)[, 9])
D <- as.data.frame(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i + 1] & mm <= 2)[, 9])
JF colnames(D) <- "a"
colnames(JF) <- "a"
<- rbind(D, JF)
DJF $Temp_DJF[i] <- mean(DJF$a)
meteo_ann_mean
$Temp_MAR[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 3)[, 9])
meteo_ann_mean$Temp_APR[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 4)[, 9])
meteo_ann_mean$Temp_MAY[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 5)[, 9])
meteo_ann_mean$Temp_JUN[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 6)[, 9])
meteo_ann_mean$Temp_JUL[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 7)[, 9])
meteo_ann_mean$Temp_AUG[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 8)[, 9])
meteo_ann_mean$Temp_SEP[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 9)[, 9])
meteo_ann_mean$Temp_OCT[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 10)[, 9])
meteo_ann_mean$Temp_NOV[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 11)[, 9])
meteo_ann_mean$Temp_DEC[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 12)[, 9])
meteo_ann_mean$Temp_JAN_LAG1[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 1)[, 9])
meteo_ann_mean$Temp_FEB_LAG1[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 2)[, 9])
meteo_ann_mean$Temp_MAR_LAG1[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 3)[, 9])
meteo_ann_mean$Temp_APR_LAG1[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 4)[, 9])
meteo_ann_mean$Temp_MAY_LAG1[i] <- mean(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 5)[, 9])
meteo_ann_mean
$p90_Precip_MAM[i] <- stats::quantile((subset(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 5 & mm >= 3)[, 11]), probs = 0.9)
meteo_ann_mean$p90_Precip_JJA[i] <- quantile((subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 8 & mm >= 6)[, 11]), 0.9)
meteo_ann_mean$p90_Precip_SON[i] <- quantile((subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 11 & mm >= 9)[, 11]), 0.9)
meteo_ann_mean<- as.data.frame(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 12)[, 11])
D <- as.data.frame(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i + 1] & mm <= 2)[, 11])
JF colnames(D) <- "a"
colnames(JF) <- "a"
<- rbind(D, JF)
DJF $p90_Precip_DJF[i] <- quantile(DJF$a, 0.9)
meteo_ann_mean
$p90_Precip_MAR[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 3)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_APR[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 4)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_MAY[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 5)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_JUN[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 6)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_JUL[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 7)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_AUG[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 8)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_SEP[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 9)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_OCT[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 10)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_NOV[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 11)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_DEC[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 12)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_JAN_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 1)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_FEB_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 2)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_MAR_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 3)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_APR_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 4)[, 11], probs = 0.9)
meteo_ann_mean$p90_Precip_MAY_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 5)[, 11], probs = 0.9)
meteo_ann_mean
$p90_Wind_MAM[i] <- quantile((subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 5 & mm >= 3)$ws_mean_daily), 0.9)
meteo_ann_mean$p90_Wind_JJA[i] <- quantile((subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 8 & mm >= 6)$ws_mean_daily), 0.9)
meteo_ann_mean$p90_Wind_SON[i] <- quantile((subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm <= 11 & mm >= 9)$ws_mean_daily), 0.9)
meteo_ann_mean<- as.data.frame(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 12)$ws_mean_daily)
D <- as.data.frame(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i + 1] & mm <= 2)$ws_mean_daily)
JF colnames(D) <- "a"
colnames(JF) <- "a"
<- rbind(D, JF)
DJF $p90_Wind_DJF[i] <- quantile(DJF$a, 0.9)
meteo_ann_mean$MAR_DEC_Wind_Days[i] <- nrow(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & ws_mean_daily >= wt & mm >= 3))
meteo_ann_mean
$p90_Wind_MAR[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 3)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_APR[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 4)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_MAY[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 5)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_JUN[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 6)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_JUL[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 7)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_AUG[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 8)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_SEP[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 9)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_OCT[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 10)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_NOV[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 11)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_DEC[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] & mm == 12)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_JAN_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 1)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_FEB_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 2)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_MAR_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 3)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_APR_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 4)[, 39], probs = 0.9)
meteo_ann_mean$p90_Wind_MAY_LAG1[i] <- stats::quantile(subset.data.frame(Ket_meteo, yy == meteo_ann_mean$varve_year[i] + 1 & mm == 5)[, 39], probs = 0.9)
meteo_ann_mean
}
# calculating MAM_lag1 (spring of year after varve year, this may be included in some varves)
$Temp_MAM_lag1 <- c(meteo_ann_mean$Temp_MAM[2:54], NA)
meteo_ann_mean$p90_Wind_MAM_lag1 <- c(meteo_ann_mean$p90_Wind_MAM[2:54], NA)
meteo_ann_mean$p90_Precip_MAM_lag1 <- c(meteo_ann_mean$p90_Precip_MAM[2:54], NA)
meteo_ann_mean
# substituting mean values in years with missing wind data
28, c(18:21, 58:66)] <- colMeans(na.omit(meteo_ann_mean[c(-28, -29), ]))[c(18:21, 58:66)]
meteo_ann_mean[29, c(16:18, 21, 52:60)] <- colMeans(na.omit(meteo_ann_mean[c(-28, -29), ]))[c(16:18, 21, 52:60)]
meteo_ann_mean[
# Boxplot of seasonal weather by varve type
<- as.data.frame(cbind(mycl[54:1, 1], meteo_ann_mean[, 1], scale(meteo_ann_mean[c(5:9, 11:20)])))
meteo_clust colnames(meteo_clust)[c(1, 2)] <- c("Group", "Year")
<- tidyr::pivot_longer(meteo_clust, cols = colnames(meteo_clust)[c(3:17)], names_to = "variable")
meteo_clust_piv $variable <- rep(c("MAM", "JJA", "SON", "DJF", "MAM_lag1"), 54 * 3)
meteo_clust_piv$variable <- factor(meteo_clust_piv$variable, levels = c("MAM", "JJA", "SON", "DJF", "MAM_lag1"))
meteo_clust_piv$met_var <- rep(c("temp", "temp", "temp", "temp", "temp", "p90_precip", "p90_precip", "p90_precip", "p90_precip", "p90_precip", "p90_wind", "p90_wind", "p90_wind", "p90_wind", "p90_wind"), 54)
meteo_clust_piv<- c("#D55E00", "#56B4E9", "#009E73", "#F0E442")
cbPalette <- ggplot() +
groups_plot2 geom_boxplot(data = meteo_clust_piv, aes(x = variable, y = value, fill = factor(Group)), lwd = 0.3, outlier.size = 0.2) +
xlab("Season") +
scale_fill_manual(values = cbPalette) +
ylab("Z-score") +
theme(legend.title = element_blank(), panel.grid = element_blank()) +
facet_grid(rows = vars(met_var))
+ geom_hline(yintercept = 0, linetype = "dashed") groups_plot2
Assessing normality - some variables show positive skew, however ANOVA is not strongly sensitive to normality assumption, so we proceed
## assessing normality
ggplot(data = meteo_clust_piv, aes(x = value)) +
stat_density() +
facet_grid(rows = vars(met_var), cols = vars(variable), scales = "free_y")
<- as.data.frame(meteo_clust[, c(3:17)])
y
"Shapiro-Wilks test"
[1] "Shapiro-Wilks test"
<- function(df, bonf = TRUE, alpha = 0.05) {
shapiro_test_df <- lapply(df, shapiro.test)
l <- do.call("c", lapply(l, "[[", 1))
s <- do.call("c", lapply(l, "[[", 2))
p if (bonf == TRUE) {
<- ifelse(p > alpha / length(l), "H0", "Ha")
sig else {
} <- ifelse(p > alpha, "H0", "Ha")
sig
}return(list(
statistic = s,
p.value = p,
significance = sig,
method = ifelse(bonf == TRUE, "Shapiro-Wilks test with Bonferroni Correction",
"Shapiro-Wilks test without Bonferroni Correction"
)
))
}
shapiro_test_df(y, bonf = TRUE)
$statistic
Temp_MAM.W Temp_JJA.W Temp_SON.W Temp_DJF.W Temp_MAM_lag1.W p90_Precip_MAM.W
0.9702331 0.9735592 0.9792417 0.9707698 0.9708621 0.8881146
p90_Precip_JJA.W p90_Precip_SON.W p90_Precip_DJF.W p90_Precip_MAM_lag1.W p90_Wind_MAM.W p90_Wind_JJA.W
0.9793625 0.9506285 0.9773174 0.8866153 0.9768590 0.9764569
p90_Wind_SON.W p90_Wind_DJF.W p90_Wind_MAM_lag1.W
0.9810630 0.9862046 0.9773622
$p.value
Temp_MAM Temp_JJA Temp_SON Temp_DJF Temp_MAM_lag1 p90_Precip_MAM
0.1973847411 0.2751992104 0.4685928590 0.2083880995 0.2198589151 0.0001132049
p90_Precip_JJA p90_Precip_SON p90_Precip_DJF p90_Precip_MAM_lag1 p90_Wind_MAM p90_Wind_JJA
0.4735658925 0.0264979994 0.3940685521 0.0001164720 0.3776767268 0.3637323773
p90_Wind_SON p90_Wind_DJF p90_Wind_MAM_lag1
0.5469326229 0.7871439074 0.4080749462
$significance
Temp_MAM Temp_JJA Temp_SON Temp_DJF Temp_MAM_lag1 p90_Precip_MAM
"H0" "H0" "H0" "H0" "H0" "Ha"
p90_Precip_JJA p90_Precip_SON p90_Precip_DJF p90_Precip_MAM_lag1 p90_Wind_MAM p90_Wind_JJA
"H0" "H0" "H0" "Ha" "H0" "H0"
p90_Wind_SON p90_Wind_DJF p90_Wind_MAM_lag1
"H0" "H0" "H0"
$method
[1] "Shapiro-Wilks test with Bonferroni Correction"
Analysis of variance. Null Hypothesis: years associated with the four varve types experienced the same meteorological conditions Note: significance code symbols are different here than in the manuscript
# Analysis of variance
<- as.matrix(meteo_clust[, c(3:17)])
y <- manova(y ~ as.factor(meteo_clust$Group))
manova1 "multivariate analysis of variance"
[1] "multivariate analysis of variance"
summary(manova1)
Df Pillai approx F num Df den Df Pr(>F)
as.factor(meteo_clust$Group) 3 1.3648 2.0587 45 111 0.00119 **
Residuals 49
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
"analysis of variance for each meteo variable"
[1] "analysis of variance for each meteo variable"
summary.aov(manova1)
Response Temp_MAM :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 9.634 3.2113 3.7265 0.01717 *
Residuals 49 42.226 0.8618
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Response Temp_JJA :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 10.328 3.4427 4.2776 0.00926 **
Residuals 49 39.437 0.8048
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Response Temp_SON :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 9.516 3.1720 3.8796 0.01445 *
Residuals 49 40.063 0.8176
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Response Temp_DJF :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 4.044 1.34791 1.4606 0.2368
Residuals 49 45.221 0.92287
Response Temp_MAM_lag1 :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 11.214 3.7378 4.4906 0.007316 **
Residuals 49 40.786 0.8324
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Response p90_Precip_MAM :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 4.379 1.45983 1.4974 0.2269
Residuals 49 47.772 0.97494
Response p90_Precip_JJA :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 6.608 2.20273 2.3277 0.08605 .
Residuals 49 46.370 0.94633
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Response p90_Precip_SON :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 2.901 0.96688 0.9485 0.4245
Residuals 49 49.952 1.01943
Response p90_Precip_DJF :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 4.451 1.48374 1.5861 0.2047
Residuals 49 45.839 0.93549
Response p90_Precip_MAM_lag1 :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 3.456 1.1519 1.1627 0.3335
Residuals 49 48.544 0.9907
Response p90_Wind_MAM :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 14.809 4.9363 6.3359 0.001022 **
Residuals 49 38.176 0.7791
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Response p90_Wind_JJA :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 5.054 1.68458 1.7229 0.1745
Residuals 49 47.911 0.97777
Response p90_Wind_SON :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 10.760 3.5867 4.1827 0.01029 *
Residuals 49 42.017 0.8575
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Response p90_Wind_DJF :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 1.919 0.63973 0.6153 0.6084
Residuals 49 50.943 1.03966
Response p90_Wind_MAM_lag1 :
Df Sum Sq Mean Sq F value Pr(>F)
as.factor(meteo_clust$Group) 3 19.538 6.5127 9.8306 3.477e-05 ***
Residuals 49 32.462 0.6625
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
1 observation deleted due to missingness
Assessing normality - some variables are non-normal, but we proceed with correlation analysis. Emphasis is on understanding relationships, not significance tests. Variables with highest correlations are normally distributed.
# calculated annual means for geochemical data
<- aggregate(HiRes_full[, c(4:11, 13, 14)], list(HiRes_full$varve_year), mean)
proxy_ann_mean colnames(proxy_ann_mean)[1] <- "varve_year"
<- proxy_ann_mean[proxy_ann_mean$varve_year >= 1966, ]
proxy_ann_mean <- cbind(proxy_ann_mean[2:11], CNS[54:1, c(11, 37, 39, 38, 41)])
proxy_ann_all colnames(proxy_ann_all)[11:14] <- c("TIC", "TOC", "TC", "TN")
<- tidyr::pivot_longer(proxy_ann_all, cols = colnames(proxy_ann_all), names_to = "proxy")
proxy_ann_all_piv ggplot(data = proxy_ann_all_piv, aes(x = value)) +
stat_density() +
facet_wrap(facets = "proxy", nrow = 3, scales = "free")
"Shapiro-Wilks test"
[1] "Shapiro-Wilks test"
shapiro_test_df(proxy_ann_all, bonf = TRUE)
$statistic
Ca.W Fe.W Mn.W Si.W P.W S.W K.W Ti.W TChl.W Bphe.W TIC.W TOC.W TC.W
0.9696440 0.9131046 0.9241429 0.9814126 0.9476018 0.9637299 0.9572583 0.9853412 0.9112746 0.9838330 0.9713009 0.9797486 0.9418974
TN.W MAR.W
0.9751930 0.9360141
$p.value
Ca Fe Mn Si P S K Ti TChl Bphe
0.1859328483 0.0008274316 0.0021517032 0.5627068717 0.0195840976 0.1012762802 0.0519709111 0.7474558174 0.0007096279 0.6760597969
TIC TOC TC TN MAR
0.2198375108 0.4896885978 0.0111917053 0.3225689073 0.0063794821
$significance
Ca Fe Mn Si P S K Ti TChl Bphe TIC TOC TC TN MAR
"H0" "Ha" "Ha" "H0" "H0" "H0" "H0" "H0" "Ha" "H0" "H0" "H0" "H0" "H0" "H0"
$method
[1] "Shapiro-Wilks test with Bonferroni Correction"
Seasonal correlations. Selected correlations from this analysis are displayed in Table 1 of associated manuscript.
<- meteo_ann_mean[, c(5:21)] # seasonal meteo data
meteo_sub1 54, c(5, 11, 16)] <- colMeans(meteo_sub1[-54, ])[c(5, 11, 16)] # replacing missing data with means
meteo_sub1[
# Seasonal correlations
<- corbetw2mat(meteo_sub1, proxy_ann_all, what = "all")
cor_matrix_seasonal
<- acf(proxy_ann_all, lag = 1, plot = FALSE)
AR_proxies <- AR_proxies$acf
AR_proxies
<- acf(meteo_sub1, lag = 1, plot = FALSE)
AR_meteo <- AR_meteo$acf
AR_meteo
<- matrix(, nrow = ncol(meteo_sub1), ncol = ncol(proxy_ann_all))
adj_n # calculate adjusted n using method of Bretherton et al. (1999)
for (i in 1:ncol(meteo_sub1)) {
for (j in 1:ncol(proxy_ann_all)) {
<- nrow(proxy_ann_all) * (1 - AR_meteo[2, i, i] * AR_proxies[2, j, j]) / (1 + AR_meteo[2, i, i] * AR_proxies[2, j, j])
adj_n[i, j]
}
}> 54] <- 54
adj_n[adj_n colnames(adj_n) <- colnames(proxy_ann_all)
rownames(adj_n) <- colnames(meteo_sub1)
<- corr.p(cor_matrix_seasonal, n = adj_n, adjust = "fdr")$p
pval <- t(cor_matrix_seasonal)
cor_matrix_seasonal <- t(pval)
pval corrplot(cor_matrix_seasonal, method = "color", p.mat = pval, sig.level = c(0.01, 0.05, 0.1), pch = c("."), insig = "label_sig")
Correlations with monthly meteo data and full correlation plot
<- meteo_ann_mean[, 22:66] # monthly meteo data
meteo_sub2 54, c(13:15, 28:30, 43:45)] <- colMeans(meteo_sub2[-54, ])[c(13:15, 28:30, 43:45)] # replacing missing data with means
meteo_sub2[
<- corbetw2mat(proxy_ann_all, meteo_sub2, what = "all")
cor_matrix_months colnames(cor_matrix_months) <- c("Temp MAR", "Temp APR", "Temp MAY", "Temp JUN", "Temp JUL", "Temp AUG", "Temp SEP", "Temp OCT", "Temp NOV", "Temp DEC", "Temp JAN", "Temp FEB", "Temp-MAR", "Temp-APR", "Temp-MAY", "Precip MAR", "Precip APR", "Precip MAY", "Precip JUN", "Precip JUL", "Precip AUG", "Precip SEP", "Precip OCT", "Precip NOV", "Precip DEC", "Precip JAN", "Precip FEB", "Precip-MAR", "Precip-APR", "Precip-MAY", "Wind MAR", "Wind APR", "Wind MAY", "Wind JUN", "Wind JUL", "Wind AUG", "Wind SEP", "Wind OCT", "Wind NOV", "Wind DEC", "Wind JAN", "Wind FEB", "Wind-MAR", "Wind-APR", "Wind-MAY")
<- acf(meteo_sub2, lag = 1, plot = FALSE)
AR_meteo_sub2 <- AR_meteo_sub2$acf
AR_meteo_sub2
<- matrix(, nrow = ncol(proxy_ann_all), ncol = ncol(meteo_sub2))
adj_n_2 # calculate adjusted n using method of Bretherton et al. (1999)
for (i in 1:ncol(proxy_ann_all)) {
for (j in 1:ncol(meteo_sub2)) {
<- nrow(meteo_sub2) * (1 - AR_proxies[2, i, i] * AR_meteo_sub2[2, j, j]) / (1 + AR_proxies[2, i, i] * AR_meteo_sub2[2, j, j])
adj_n_2[i, j]
}
}> 54] <- 54
adj_n_2[adj_n_2 colnames(adj_n_2) <- colnames(meteo_sub2)
rownames(adj_n_2) <- colnames(proxy_ann_all)
<- corr.p(cor_matrix_months, n = adj_n_2, adjust = "fdr")$p
pval_2 par(mfrow = c(2, 2))
corrplot(cor_matrix_months[, 1:15], method = "color", p.mat = pval_2[, 1:15], sig.level = c(0.01, 0.05, 0.1), pch = c("."), insig = "label_sig")
corrplot(cor_matrix_months[, 16:30], method = "color", p.mat = pval_2[, 16:30], sig.level = c(0.01, 0.05, 0.1), pch = c("."), insig = "label_sig")
corrplot(cor_matrix_months[, 31:45], method = "color", p.mat = pval_2[, 31:45], sig.level = c(0.01, 0.05, 0.1), pch = c("."), insig = "label_sig")
corrplot(cor_matrix_seasonal[,c(-5,-11,-16)], method = "color", p.mat = pval[,c(-5,-11,-16)], sig.level = c(0.01, 0.05, 0.1), pch = c("."), insig = "label_sig")
<- rda(proxy_ann_all, meteo_clust[, c(3:6, 8:11, 13:16)], scale = TRUE)
rda_1 RsquareAdj(rda_1)
$r.squared
[1] 0.468402
$adj.r.squared
[1] 0.3128123
summary(rda_1)$cont
$importance
Importance of components:
RDA1 RDA2 RDA3 RDA4 RDA5 RDA6 RDA7 RDA8 RDA9 RDA10 RDA11 RDA12 PC1
Eigenvalue 4.245 0.92460 0.58711 0.52234 0.26917 0.18465 0.142550 0.087522 0.032040 0.022689 0.0062269 0.0020600 2.2679
Proportion Explained 0.283 0.06164 0.03914 0.03482 0.01794 0.01231 0.009503 0.005835 0.002136 0.001513 0.0004151 0.0001373 0.1512
Cumulative Proportion 0.283 0.34465 0.38379 0.41861 0.43655 0.44886 0.458366 0.464201 0.466337 0.467850 0.4682646 0.4684020 0.6196
PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14
Eigenvalue 1.43923 1.0964 0.78670 0.63538 0.54924 0.37014 0.27920 0.2085 0.142169 0.094810 0.052231 0.037459 0.014580
Proportion Explained 0.09595 0.0731 0.05245 0.04236 0.03662 0.02468 0.01861 0.0139 0.009478 0.006321 0.003482 0.002497 0.000972
Cumulative Proportion 0.71554 0.7886 0.84109 0.88344 0.92006 0.94474 0.96335 0.9773 0.986728 0.993049 0.996531 0.999028 1.000000
plot(rda_1, scaling = 3, display = c("cn", "sp"))
<- scores(rda_1, choices = 1:2, scaling = 3, display = "sp")
spe.sc arrows(0, 0, spe.sc[, 1], spe.sc[, 2], length = 0.1, lty = 1, col = "red", lwd = 1)
<- scores(rda_1, choices = 1:2, scaling = 3, display = "sites")
points.rda
points(points.rda[meteo_clust$Group == 1, ], col = "#d55e00", pch = 16) # VT-1
points(points.rda[meteo_clust$Group == 2, ], col = "#56b4e9", pch = 16) # VT-2
points(points.rda[meteo_clust$Group == 3, ], col = "#009e73", pch = 16) # VT-3
points(points.rda[meteo_clust$Group == 4, ], col = "#f0e442", pch = 16) # VT-4
<- as.data.frame(rev(mycl$group))
VT <- cbind(proxy_ann_all, meteo_ann_mean)
Annual_data_comb $VT <- VT[1:54,1]
Annual_data_comb# MAMJJA Temp GAM
<- gam(Temp_MAMJJA ~ s(TC) + s(Ti), data = Annual_data_comb, method = "REML", select = TRUE)
gam_MAMJJA_temp summary(gam_MAMJJA_temp)
Family: gaussian
Link function: identity
Formula:
Temp_MAMJJA ~ s(TC) + s(Ti)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.08264 0.09249 130.6 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(TC) 0.9616 9 2.781 4.47e-06 ***
s(Ti) 0.9065 9 1.077 0.00184 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.548 Deviance explained = 56.4%
-REML = 59.676 Scale est. = 0.46196 n = 54
<- as_tibble(as.data.frame(predict(gam_MAMJJA_temp, se.fit = TRUE, unconditional = TRUE)))
pred_MAMJJA_temp <- bind_cols(Annual_data_comb, pred_MAMJJA_temp) %>%
pred_MAMJJA_temp mutate(upr = fit + 2 * se.fit, lwr = fit - 2 * se.fit)
theme_set(theme_bw())
<- ggplot(Annual_data_comb, aes(x = varve_year, y = Temp_MAMJJA)) +
MAMJJA_temp_plot geom_point() +
scale_x_continuous(breaks = seq(1965, 2020, 5), limits = c(1965, 2020)) +
geom_ribbon(
data = pred_MAMJJA_temp,
mapping = aes(ymin = lwr, ymax = upr, x = varve_year), alpha = 0.4, inherit.aes = FALSE,
fill = "lightblue"
+
) geom_line(
data = pred_MAMJJA_temp,
mapping = aes(y = fit, x = varve_year), inherit.aes = FALSE, size = 1, colour = "black"
+
) labs(x = "varve_year", y = "MAMJJA Temp (°C)") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
MAMJJA_temp_plot
plot(gam_MAMJJA_temp, pages = 1, all.terms = TRUE, shade = TRUE, residuals = TRUE, pch = 1, cex = 1, seWithMean = TRUE, shade.col = "lightblue")
Diagnostic plots
par(mfrow = c(3, 2))
gam.check(gam_MAMJJA_temp)
Method: REML Optimizer: outer newton
full convergence after 12 iterations.
Gradient range [-2.820061e-05,1.069222e-05]
(score 59.67644 & scale 0.4619602).
Hessian positive definite, eigenvalue range [1.536549e-06,26.51675].
Model rank = 19 / 19
Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.
k' edf k-index p-value
s(TC) 9.000 0.962 1.14 0.85
s(Ti) 9.000 0.906 0.98 0.44
acf(gam_MAMJJA_temp$residuals, lag.max = 36, main = "ACF")
pacf(gam_MAMJJA_temp$residuals, lag.max = 36, main = "pACF")
10-fold cross-validated RMSE
<- CVgam(formula = Temp_MAMJJA ~ s(TC) + s(Ti), data = Annual_data_comb, method = "REML") CV_temp
GAMscale CV-mse-GAM
0.4577 0.4797
paste("CV-RMSE (°C) = ", round(sqrt(CV_temp$cvscale), digits = 2))
[1] "CV-RMSE (°C) = 0.69"
paste("CV-RMSE (%) = ", round(100 * sqrt(CV_temp$cvscale) / (max(Annual_data_comb$Temp_MAMJJA) - min(Annual_data_comb$Temp_MAMJJA)), digits = 2))
[1] "CV-RMSE (%) = 14.42"
Split-period calibration and validation (MAMJJA Temperature)
# preparing summary table
<- Annual_data_comb[Annual_data_comb$varve_year <= 1992, ]
Annual_data_comb_cal <- Annual_data_comb[Annual_data_comb$varve_year > 1992, ]
Annual_data_comb_ver
<- data.frame(cal_period = c("1966-1992", "1993-2019"), val_period = c("1993-2019", "1966-1992"), Rsqr_adj = NA, RE = NA, CE = NA, RMSE = NA)
split_period_temp # calibration model
<- gam(Temp_MAMJJA ~ s(TC) + s(Ti), data = Annual_data_comb_cal, method = "REML", select = TRUE)
gam_MAMJJA_temp_cal $Rsqr_adj[1] <- summary(gam_MAMJJA_temp_cal)$r.sq
split_period_temp
# Calculate RE, CE, RMSE
<- as_tibble(as.data.frame(predict(gam_MAMJJA_temp_cal, newdata = Annual_data_comb, se.fit = TRUE, unconditional = TRUE)))
pred_MAMJJA_temp_cal <- as_tibble(as.data.frame(predict(gam_MAMJJA_temp_cal, newdata = Annual_data_comb_ver, se.fit = TRUE, unconditional = TRUE)))
pred_MAMJJA_temp_ver
<- 1 - sum((Annual_data_comb_ver$Temp_MAMJJA - pred_MAMJJA_temp_ver$fit)^2) / sum((Annual_data_comb_ver$Temp_MAMJJA - mean(Annual_data_comb_cal$Temp_MAMJJA))^2)
RE_MAMJJA_temp $RE[1] <- RE_MAMJJA_temp
split_period_temp
<- 1 - sum((Annual_data_comb_ver$Temp_MAMJJA - pred_MAMJJA_temp_ver$fit)^2) / sum((Annual_data_comb_ver$Temp_MAMJJA - mean(Annual_data_comb_ver$Temp_MAMJJA))^2)
CE_MAMJJA_temp $CE[1] <- CE_MAMJJA_temp
split_period_temp
<- sqrt(mean((Annual_data_comb_ver$Temp_MAMJJA - pred_MAMJJA_temp_ver$fit)^2))
RMSEP_MAMJJA_temp $RMSE[1] <- paste(round(RMSEP_MAMJJA_temp, digits = 3), " (", round(100 * RMSEP_MAMJJA_temp / (max(Annual_data_comb$Temp_MAMJJA) - min(Annual_data_comb$Temp_MAMJJA)), digits = 3), "%)")
split_period_temp
# now switching calibration and verification periods
<- gam(Temp_MAMJJA ~ s(TC) + s(Ti), data = Annual_data_comb_ver, method = "REML", select = TRUE)
gam_MAMJJA_temp_ver $Rsqr_adj[2] <- summary(gam_MAMJJA_temp_ver)$r.sq
split_period_temp<- as_tibble(as.data.frame(predict(gam_MAMJJA_temp_ver, newdata = Annual_data_comb_cal, se.fit = TRUE, unconditional = TRUE)))
pred_MAMJJA_temp_ver_2 <- 1 - sum((Annual_data_comb_cal$Temp_MAMJJA - pred_MAMJJA_temp_ver_2$fit)^2) / sum((Annual_data_comb_cal$Temp_MAMJJA - mean(Annual_data_comb_ver$Temp_MAMJJA))^2)
RE_MAMJJA_temp_2 $RE[2] <- RE_MAMJJA_temp_2
split_period_temp
<- 1 - sum((Annual_data_comb_cal$Temp_MAMJJA - pred_MAMJJA_temp_ver_2$fit)^2) / sum((Annual_data_comb_cal$Temp_MAMJJA - mean(Annual_data_comb_cal$Temp_MAMJJA))^2)
CE_MAMJJA_temp_2 $CE[2] <- CE_MAMJJA_temp_2
split_period_temp
<- sqrt(mean((Annual_data_comb_cal$Temp_MAMJJA - pred_MAMJJA_temp_ver_2$fit)^2))
RMSEP_MAMJJA_temp_2 $RMSE[2] <- paste(round(RMSEP_MAMJJA_temp_2, digits = 3), " (", round(100 * RMSEP_MAMJJA_temp_2 / (max(Annual_data_comb$Temp_MAMJJA) - min(Annual_data_comb$Temp_MAMJJA)), digits = 3), "%)")
split_period_temp
split_period_temp
# split period plot
<- as_tibble(as.data.frame(predict(gam_MAMJJA_temp_ver, newdata = Annual_data_comb, se.fit = TRUE, unconditional = TRUE)))
pred_MAMJJA_temp_ver_3 <- bind_cols(Annual_data_comb, pred_MAMJJA_temp_ver_3)
pred_MAMJJA_temp_ver_3 <- as_tibble(as.data.frame(predict(gam_MAMJJA_temp_cal, newdata = Annual_data_comb, se.fit = TRUE, unconditional = TRUE)))
pred_MAMJJA_temp_cal_3 <- bind_cols(Annual_data_comb, pred_MAMJJA_temp_cal_3)
pred_MAMJJA_temp_cal_3 theme_set(theme_bw())
<- ggplot(Annual_data_comb, aes(x = varve_year, y = Temp_MAMJJA)) +
gam_MAMJJA_temp_split geom_point() +
scale_x_continuous(breaks = seq(1965, 2020, 5), limits = c(1965, 2020)) +
geom_line(
data = pred_MAMJJA_temp,
mapping = aes(y = fit, x = varve_year), inherit.aes = FALSE, size = 1, colour = "black"
+
) geom_line(
data = pred_MAMJJA_temp_ver_3,
mapping = aes(y = fit, x = varve_year), inherit.aes = FALSE, size = 1, colour = "red"
+
) geom_line(
data = pred_MAMJJA_temp_cal_3,
mapping = aes(y = fit, x = varve_year), inherit.aes = FALSE, size = 1, colour = "blue"
+
) labs(x = "Year", y = "MAMJJA Temp (°C)") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
gam_MAMJJA_temp_split
# removing missing years
<- Annual_data_comb[Annual_data_comb$varve_year != 1994 & Annual_data_comb$varve_year != 1993, ]
wind_data_comb
<- gam(MAR_DEC_Wind_Days ~ s(MAR) + s(Si), data = wind_data_comb, method = "REML", select = TRUE)
gam_wind_days_MAR_DEC
summary(gam_wind_days_MAR_DEC)
Family: gaussian
Link function: identity
Formula:
MAR_DEC_Wind_Days ~ s(MAR) + s(Si)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 13.3462 0.8181 16.31 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Approximate significance of smooth terms:
edf Ref.df F p-value
s(MAR) 0.9192 9 1.260 0.000817 ***
s(Si) 0.9065 9 1.074 0.001750 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
R-sq.(adj) = 0.478 Deviance explained = 49.6%
-REML = 167.48 Scale est. = 34.804 n = 52
<- as_tibble(as.data.frame(predict(gam_wind_days_MAR_DEC, se.fit = TRUE, unconditional = TRUE)))
pred_wind_days_MAR_DEC <- bind_cols(wind_data_comb, pred_wind_days_MAR_DEC) %>%
pred_wind_days_MAR_DEC mutate(upr = fit + 2 * se.fit, lwr = fit - 2 * se.fit)
theme_set(theme_bw())
<- ggplot(wind_data_comb, aes(x = varve_year, y = MAR_DEC_Wind_Days)) +
wind_days_MAR_DEC_plot geom_point() +
scale_x_continuous(breaks = seq(1965, 2020, 5), limits = c(1965, 2020)) +
geom_ribbon(
data = pred_wind_days_MAR_DEC,
mapping = aes(ymin = lwr, ymax = upr, x = varve_year), alpha = 0.4, inherit.aes = FALSE,
fill = "lightblue"
+
) geom_line(
data = pred_wind_days_MAR_DEC,
mapping = aes(y = fit, x = varve_year), inherit.aes = FALSE, size = 1, colour = "black"
+
) labs(x = "Year", y = "# of days with mean wind speed > 7 m/s")
wind_days_MAR_DEC_plot
plot(gam_wind_days_MAR_DEC, pages = 1, shade = TRUE, residuals = TRUE, pch = 1, cex = 1, seWithMean = TRUE, shade.col = "lightblue")
Diagnostic plots
par(mfrow = c(3, 2))
gam.check(gam_wind_days_MAR_DEC)
Method: REML Optimizer: outer newton
full convergence after 13 iterations.
Gradient range [-6.492196e-05,0.0001896789]
(score 167.4796 & scale 34.80371).
Hessian positive definite, eigenvalue range [2.334065e-05,25.51643].
Model rank = 19 / 19
Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.
k' edf k-index p-value
s(MAR) 9.000 0.919 1.34 0.99
s(Si) 9.000 0.906 0.99 0.43
acf(gam_wind_days_MAR_DEC$residuals, lag.max = 36, main = "ACF")
pacf(gam_wind_days_MAR_DEC$residuals, lag.max = 36, main = "pACF")
10-fold cross-validated RMSE
<- CVgam(formula = MAR_DEC_Wind_Days ~ s(MAR) + s(Si), data = wind_data_comb, method = "REML") CV_wind
GAMscale CV-mse-GAM
34.8747 36.8989
paste("CV-RMSE (days) = ", round(sqrt(CV_wind$cvscale), digits = 2))
[1] "CV-RMSE (days) = 6.07"
paste("CV-RMSE (%) = ", round(100 * sqrt(CV_wind$cvscale) / (max(wind_data_comb$MAR_DEC_Wind_Days) - min(wind_data_comb$MAR_DEC_Wind_Days)), digits = 2))
[1] "CV-RMSE (%) = 18.98"
Split-period calibration and validation
<- wind_data_comb[wind_data_comb$varve_year <= 1991, ]
wind_data_comb_cal <- wind_data_comb[wind_data_comb$varve_year > 1991, ]
wind_data_comb_ver
# preparing summary table
<- data.frame(cal_period = c("1966-1991", "1992, 1995-2019"), val_period = c("1992, 1995-2019", "1966-1991"), Rsqr_adj = NA, RE = NA, CE = NA, RMSE = NA)
split_period_wind # calibration model
<- gam(MAR_DEC_Wind_Days ~ s(MAR) + s(Si), data = wind_data_comb_cal, method = "REML", select = TRUE)
gam_wind_days_MAR_DEC_cal $Rsqr_adj[1] <- summary(gam_wind_days_MAR_DEC_cal)$r.sq
split_period_wind
# Calculate RE, CE, RMSE
<- as_tibble(as.data.frame(predict(gam_wind_days_MAR_DEC_cal, newdata = wind_data_comb_ver, se.fit = TRUE, unconditional = TRUE)))
pred_wind_days_ver <- 1 - sum((wind_data_comb_ver$MAR_DEC_Wind_Days - pred_wind_days_ver$fit)^2) / sum((wind_data_comb_ver$MAR_DEC_Wind_Days - mean(wind_data_comb_cal$MAR_DEC_Wind_Days))^2)
RE_wind_days $RE[1] <- RE_wind_days
split_period_wind
<- 1 - sum((wind_data_comb_ver$MAR_DEC_Wind_Days - pred_wind_days_ver$fit)^2) / sum((wind_data_comb_ver$MAR_DEC_Wind_Days - mean(wind_data_comb_ver$MAR_DEC_Wind_Days))^2)
CE_wind_days $CE[1] <- CE_wind_days
split_period_wind
<- sqrt(mean((wind_data_comb_ver$MAR_DEC_Wind_Days - pred_wind_days_ver$fit)^2))
RMSEP_wind_days $RMSE[1] <- paste(round(RMSEP_wind_days, digits = 3), " (", round(100 * RMSEP_wind_days / (max(wind_data_comb$MAR_DEC_Wind_Days) - min(wind_data_comb$MAR_DEC_Wind_Days)), digits = 3), "%)")
split_period_wind
# now using ver as cal
<- gam(MAR_DEC_Wind_Days ~ s(MAR) + s(Si), data = wind_data_comb_ver, method = "REML", select = TRUE)
gam_wind_days_MAR_DEC_ver $Rsqr_adj[2] <- summary(gam_wind_days_MAR_DEC_ver)$r.sq
split_period_wind
<- as_tibble(as.data.frame(predict(gam_wind_days_MAR_DEC_ver, newdata = wind_data_comb_cal, se.fit = TRUE, unconditional = TRUE)))
pred_wind_days_ver_2 <- 1 - sum((wind_data_comb_cal$MAR_DEC_Wind_Days - pred_wind_days_ver_2$fit)^2) / sum((wind_data_comb_cal$MAR_DEC_Wind_Days - mean(wind_data_comb_ver$MAR_DEC_Wind_Days))^2)
RE_wind_days_2 $RE[2] <- RE_wind_days_2
split_period_wind
<- 1 - sum((wind_data_comb_cal$MAR_DEC_Wind_Days - pred_wind_days_ver_2$fit)^2) / sum((wind_data_comb_cal$MAR_DEC_Wind_Days - mean(wind_data_comb_cal$MAR_DEC_Wind_Days))^2)
CE_wind_days_2 $CE[2] <- CE_wind_days_2
split_period_wind
<- sqrt(mean((wind_data_comb_cal$MAR_DEC_Wind_Days - pred_wind_days_ver_2$fit)^2))
RMSEP_wind_days_2 $RMSE[2] <- paste(round(RMSEP_wind_days_2, digits = 3), " (", round(100 * RMSEP_wind_days_2 / (max(wind_data_comb$MAR_DEC_Wind_Days) - min(wind_data_comb$MAR_DEC_Wind_Days)), digits = 3), "%)")
split_period_wind
split_period_wind
# split period plot
<- as_tibble(as.data.frame(predict(gam_wind_days_MAR_DEC_ver, newdata = wind_data_comb, se.fit = TRUE, unconditional = TRUE)))
pred_wind_days_ver_3 <- bind_cols(wind_data_comb, pred_wind_days_ver_3)
pred_wind_days_ver_3 <- as_tibble(as.data.frame(predict(gam_wind_days_MAR_DEC_cal, newdata = wind_data_comb, se.fit = TRUE, unconditional = TRUE)))
pred_wind_days_cal_3 <- bind_cols(wind_data_comb, pred_wind_days_cal_3)
pred_wind_days_cal_3 theme_set(theme_bw())
<- ggplot(wind_data_comb, aes(x = varve_year, y = MAR_DEC_Wind_Days)) +
gam_wind_days_MAR_DEC_split geom_point() +
scale_x_continuous(breaks = seq(1965, 2020, 5), limits = c(1965, 2020)) +
geom_line(
data = pred_wind_days_MAR_DEC,
mapping = aes(y = fit, x = varve_year), inherit.aes = FALSE, size = 1, colour = "black"
+
) geom_line(
data = pred_wind_days_ver_3,
mapping = aes(y = fit, x = varve_year), inherit.aes = FALSE, size = 1, colour = "red"
+
) geom_line(
data = pred_wind_days_cal_3,
mapping = aes(y = fit, x = varve_year), inherit.aes = FALSE, size = 1, colour = "blue"
+
) labs(x = "Year", y = "# of days with mean wind speed > 7 m/s") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
gam_wind_days_MAR_DEC_split
"Annual resolution proxy data"
[1] "Annual resolution proxy data"
corrplot(cor(proxy_ann_all), addCoef.col = "black", number.cex = 10 / ncol(proxy_ann_all))
"Full resolution (60 um) scanning proxy data"
[1] "Full resolution (60 um) scanning proxy data"
corrplot(cor(HiRes_full[HiRes_full$varve_year >= 1966, 4:14]), addCoef.col = "black", number.cex = 10 / ncol(HiRes_full[, 4:14]))
"Seasonal meteo data"
[1] "Seasonal meteo data"
corrplot(cor(meteo_sub1), addCoef.col = "black", number.cex = 10 / ncol(meteo_sub1))
Sys.Date()
[1] "2021-08-17"
sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17134)
Matrix products: default
locale:
[1] LC_COLLATE=German_Switzerland.1252 LC_CTYPE=German_Switzerland.1252 LC_MONETARY=German_Switzerland.1252
[4] LC_NUMERIC=C LC_TIME=German_Switzerland.1252
attached base packages:
[1] grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] mvnormtest_0.1-9 cowplot_1.1.1 gamclass_0.62.3 gridExtra_2.3 mgcv_1.8-36 nlme_3.1-152 reshape2_1.4.4
[8] pracma_2.3.3 viridis_0.6.1 viridisLite_0.4.0 distantia_1.0.2 psych_2.1.6 corrplot_0.89 vegan_2.5-7
[15] lattice_0.20-44 permute_0.9-5 lineup_0.38-3 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4
[22] readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 tidyverse_1.3.1 ggplot2_3.3.4 dtw_1.22-3 proxy_0.4-26
[29] data.table_1.14.0 pacman_0.5.1
loaded via a namespace (and not attached):
[1] fs_1.5.0 lubridate_1.7.10 RColorBrewer_1.1-2 doParallel_1.0.16 httr_1.4.2 tools_4.1.0
[7] backports_1.2.1 utf8_1.2.1 R6_2.5.0 rpart_4.1-15 DBI_1.1.1 colorspace_2.0-1
[13] withr_2.4.2 tidyselect_1.1.1 mnormt_2.0.2 arrangements_1.1.9 compiler_4.1.0 cli_2.5.0
[19] rvest_1.0.0 xml2_1.3.2 labeling_0.4.2 scales_1.1.1 randomForest_4.6-14 digest_0.6.27
[25] rmarkdown_2.9 jpeg_0.1-8.1 pkgconfig_2.0.3 htmltools_0.5.1.1 dbplyr_2.1.1 maps_3.3.0
[31] rlang_0.4.11 readxl_1.3.1 rstudioapi_0.13 farver_2.1.0 generics_0.1.0 jsonlite_1.7.2
[37] magrittr_2.0.1 dotCall64_1.0-1 Matrix_1.3-4 Rcpp_1.0.6 munsell_0.5.0 fansi_0.5.0
[43] lifecycle_1.0.0 stringi_1.6.1 yaml_2.2.1 MASS_7.3-54 plyr_1.8.6 parallel_4.1.0
[49] crayon_1.4.1 haven_2.4.1 splines_4.1.0 hms_1.1.0 tmvnsim_1.0-2 knitr_1.33
[55] pillar_1.6.1 codetools_0.2-18 reprex_2.0.0 glue_1.4.2 evaluate_0.14 latticeExtra_0.6-29
[61] modelr_0.1.8 png_0.1-7 vctrs_0.3.8 spam_2.6-0 foreach_1.5.1 cellranger_1.1.0
[67] gtable_0.3.0 assertthat_0.2.1 xfun_0.24 broom_0.7.7 iterators_1.0.13 tinytex_0.32
[73] fields_12.3 cluster_2.1.2 gmp_0.6-2 ellipsis_0.3.2