rm(list = ls())
library(caret)
library(caretEnsemble)
library(dplyr)
library(ranger)
library(ggtext)
library(cowplot)
library(forcats)
library(tidyr)
library(stringr)
library(corrplot)
Decomposing biodiversity change to processes of extinction, colonization, and persistence across scales
This document contains the code to reproduce the models fitting, the predictions and the figures of our manuscript. We modeled 1) species richness, 2) numbers and 3) probabilities of extinction, colonization and persistence. Models were fitted for the Atlas datasets (hereafter CzAtlas) on one hand and for the Czech Breeding Birds Survey on the other hand (hereafter CzBBS, or JPSP for Jednotný program sčítání ptáků) using repeated cross-validation with 3 different tree-based algorithms.
The data used (i.e. CzAtlas and CzBBS) are not open thus we provide them already processed into species richness, colonization, extinction… per unit of area. If you are interested in accessing the raw datasets (with species identities), feel free to contact the corresponding author.
Model fitting
Species richness
CzAtlas
## Load data
load("../data/data_model_fitting/atlas_cart_data.Rdata")
## We perform repeated (n = 3) cross-validation (k = 10) using the caret package
# First we split the dataset into training and validation datasets
# set.seed(333)
<- trainControl(method = "repeatedcv", number = 10, repeats = 3) traindata
## We use three tree-based algorithms in order to assess which one perform the best
## Create the list of models
<-
model_list_atlas caretList(
~ .,
sr data = atlas_cart_data,
trControl = traindata,
metric = "RMSE",
tuneList = list(
rf = caretModelSpec(method = "ranger",
importance = "permutation",
tuneGrid = data.frame(mtry = ceiling((ncol(atlas_cart_data)-1)/3),
splitrule = "variance",
min.node.size = 5)),
gbm = caretModelSpec(method = "gbm",
distribution = "poisson",
# keep.data = TRUE,
tuneGrid = data.frame(n.trees = 500,
interaction.depth = 1,
shrinkage = 0.1,
n.minobsinnode = 5)),
xgboost = caretModelSpec(method = "xgbTree",
tuneGrid = expand.grid(nrounds = 500,
max_depth = 5,
eta = 0.1,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1))
)
)
# saveRDS(model_list_atlas, "../models/sr_atlas.rds")
CzBBS (or JPSP)
# Load data
load("../data/data_model_fitting/JPSP_cart_data.Rdata")
# First, convert the date into a numeric format (here time stamp since 1970-01-01)
$Date <- as.numeric(JPSP_cart_data$Date)
JPSP_cart_data
## Reduce the time_span
<- JPSP_cart_data %>% dplyr::filter(time_span <= 5 & time_span >= 3) JPSP_cart_data
## Create the list of models
<-
model_list_JPSP caretList(
~ .,
sr data = JPSP_cart_data,
trControl = traindata,
metric = "RMSE",
tuneList = list(
rf = caretModelSpec(method = "ranger",
importance = "permutation",
tuneGrid = data.frame(mtry = ceiling((ncol(JPSP_cart_data)-1)/3),
splitrule = "variance",
min.node.size = 5)),
gbm = caretModelSpec(method = "gbm",
distribution = "poisson",
tuneGrid = data.frame(n.trees = 500,
interaction.depth = 1,
shrinkage = 0.1,
n.minobsinnode = 5)),
xgboost = caretModelSpec(method = "xgbTree",
tuneGrid = expand.grid(nrounds = 500,
max_depth = 5,
eta = 0.1,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1))
)
)
# saveRDS(model_list_JPSP, file = "../models/sr_bbs.rds")
Number of Colonization, Extinction and Persistence (CEP)
CzAtlas
## Load data for learning
<- readRDS("../data/data_model_fitting/CEP_atlas_cart_data.rds")
CEP_atlas_cart_data <- CEP_atlas_cart_data[!grepl("^proba", CEP_atlas_cart_data$metric),] CEP_atlas_cart_data
## For each metric, perform repeated cross-validation with the 3 algorithms
## Learn for each metric
for(THE_metric in unique(CEP_atlas_cart_data$metric)){
assign(
paste0("CEP_atlas_models_", THE_metric),
caretList(
~ .,
value data = CEP_atlas_cart_data %>% filter(metric == THE_metric) %>% dplyr::select(-quadrat, -metric, -trials),
trControl = traindata,
metric = "RMSE",
tuneList = list(
rf = caretModelSpec(method = "ranger",
importance = "permutation",
tuneGrid = data.frame(mtry = ceiling((ncol(CEP_atlas_cart_data)-2)/3),
splitrule = "variance",
min.node.size = 5)),
gbm = caretModelSpec(method = "gbm",
tuneGrid = data.frame(n.trees = 500,
interaction.depth = 1,
shrinkage = 0.1,
n.minobsinnode = 5)),
xgboost = caretModelSpec(method = "xgbTree",
tuneGrid = expand.grid(nrounds = 500,
max_depth = 5,
eta = 0.1,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1))
)
)
)
}### Merge all of them together
<- list()
CEP_models "col"]] <- CEP_atlas_models_col
CEP_models[["ext"]] <- CEP_atlas_models_ext
CEP_models[["pers"]] <- CEP_atlas_models_pers
CEP_models[[saveRDS(CEP_models, file = "../models/nCEP_atlas.rds")
CzBBS
<- readRDS("../data/data_model_fitting/CEP_JPSP_cart_data.rds")
CEP_JPSP_cart_data <- CEP_JPSP_cart_data[!grepl("^proba", CEP_JPSP_cart_data$metric),] CEP_JPSP_cart_data
for(THE_metric in unique(CEP_JPSP_cart_data$metric)){
assign(
paste0("CEP_JPSP_models_", THE_metric),
caretList(
~ .,
value data = CEP_JPSP_cart_data %>%
filter(metric == THE_metric) %>%
::select(-metric) %>%
dplyrfilter(!is.na(value)),
trControl = traindata,
metric = "RMSE",
tuneList = list(
rf = caretModelSpec(method = "ranger",
importance = "permutation",
tuneGrid = data.frame(mtry = ceiling((ncol(CEP_JPSP_cart_data)-2)/3),
splitrule = "variance",
min.node.size = 5)),
gbm = caretModelSpec(method = "gbm",
tuneGrid = data.frame(n.trees = 500,
interaction.depth = 1,
shrinkage = 0.1,
n.minobsinnode = 5)),
xgboost = caretModelSpec(method = "xgbTree",
tuneGrid = expand.grid(nrounds = 500,
max_depth = 5,
eta = 0.1,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1))
)
)
)
}### Merge all of them together
<- list()
CEP_models_JPSP "col"]] <- `CEP_JPSP_models_colonization`
CEP_models_JPSP[["ext"]] <- `CEP_JPSP_models_extinction`
CEP_models_JPSP[["pers"]] <- `CEP_JPSP_models_persistence`
CEP_models_JPSP[[saveRDS(CEP_models_JPSP, file = "../models/nCEP_JPSP.rds")
Probability of Colonization, Extinction and Persitence
CzAtlas
## Load data for learning
<- readRDS("../data/data_model_fitting/CEP_atlas_cart_data.rds")
CEP_atlas_cart_data <- CEP_atlas_cart_data[grepl("^proba", CEP_atlas_cart_data$metric),] CEP_atlas_cart_data
## For each metric, perform repeated cross-validation with the 3 algorithms
## Learn for each metric
for(THE_metric in unique(CEP_atlas_cart_data$metric)){
assign(
paste0("CEP_atlas_models_", THE_metric),
caretList(
~ .,
value data = CEP_atlas_cart_data %>% filter(metric == THE_metric) %>% dplyr::select(-quadrat, -metric, -trials),
trControl = traindata,
metric = "RMSE",
tuneList = list(
rf = caretModelSpec(method = "ranger",
importance = "permutation",
tuneGrid = data.frame(mtry = ceiling((ncol(CEP_atlas_cart_data)-2)/3),
splitrule = "variance",
min.node.size = 5)),
gbm = caretModelSpec(method = "gbm",
tuneGrid = data.frame(n.trees = 500,
interaction.depth = 1,
shrinkage = 0.1,
n.minobsinnode = 5)),
xgboost = caretModelSpec(method = "xgbTree",
tuneGrid = expand.grid(nrounds = 500,
max_depth = 5,
eta = 0.1,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1))
)
)
)
}### Merge all of them together
<- list()
CEP_models "col"]] <- CEP_atlas_models_proba_colonized
CEP_models[["ext"]] <- CEP_atlas_models_proba_ext
CEP_models[["pers"]] <- CEP_atlas_models_proba_pers
CEP_models[[saveRDS(CEP_models, file = "../models/probaCEP_atlas.rds")
CzBBS
<- readRDS("../data/data_model_fitting/CEP_JPSP_cart_data.rds")
CEP_JPSP_cart_data <- CEP_JPSP_cart_data[grepl("^proba", CEP_JPSP_cart_data$metric),] CEP_JPSP_cart_data
for(THE_metric in unique(CEP_JPSP_cart_data$metric)){
assign(
paste0("CEP_JPSP_models_", THE_metric),
caretList(
~ .,
value data = CEP_JPSP_cart_data %>%
filter(metric == THE_metric) %>%
::select(-metric) %>%
dplyrfilter(!is.na(value)),
trControl = traindata,
metric = "RMSE",
tuneList = list(
rf = caretModelSpec(method = "ranger",
importance = "permutation",
tuneGrid = data.frame(mtry = ceiling((ncol(CEP_JPSP_cart_data)-2)/3),
splitrule = "variance",
min.node.size = 5)),
gbm = caretModelSpec(method = "gbm",
tuneGrid = data.frame(n.trees = 500,
interaction.depth = 1,
shrinkage = 0.1,
n.minobsinnode = 5)),
xgboost = caretModelSpec(method = "xgbTree",
tuneGrid = expand.grid(nrounds = 500,
max_depth = 5,
eta = 0.1,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1))
)
)
)
}### Merge all of them together
<- list()
CEP_models_JPSP "col"]] <- `CEP_JPSP_models_proba colonized`
CEP_models_JPSP[["ext"]] <- `CEP_JPSP_models_proba extinction`
CEP_models_JPSP[["pers"]] <- `CEP_JPSP_models_proba persistence`
CEP_models_JPSP[[saveRDS(CEP_models_JPSP, file = "../models/probaCEP_JPSP.rds")
Predictions
Species richness
# Load the models
<- readRDS(file = "../models/sr_atlas.rds")
models_list_atlas <- readRDS(file = "../models/sr_bbs.rds") models_list_JPSP
CzAtlas
## Load the data used for predictions
load("../data/data_to_predict/atlas_data_topredict.Rdata")
## In order to set a fixed values of sampling effort for the different spatial scales, we fit a quadratic regression between area and sampling effort
<- lm(effort ~ poly(spatial_scale, 2), data = atlas_data_topredict) polynome
## Original grid cell ######
<-
pred130sqkm %>%
atlas_data_topredict filter(spatial_scale == 130) %>%
::select(-c(KVADRAT, sr, time_span, effort, spatial_scale)) %>%
dplyrmutate(time_span = 4,
effort = round(predict(polynome, newdata = data.frame(spatial_scale = 130))))
## Make the prediction
$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(pred130sqkm)))
pred130sqkm$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 130]
pred130sqkm## And compute the trend
### M2 --> M4
<-
slope_130_M2M4 %>%
pred130sqkm group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
})### M2 --> M3
<-
slope_130_M2M3 %>%
pred130sqkm filter(Date != 2015) %>%
group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
})### M3 --> M4
<-
slope_130_M3M4 %>%
pred130sqkm filter(Date != 1987) %>%
group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
})
## 2 by 2 grids ######
## Prepare the dataset
<-
pred530sqkm %>%
atlas_data_topredict filter(spatial_scale == 530) %>%
::select(-c(KVADRAT, sr, time_span, effort, spatial_scale)) %>%
dplyrmutate(time_span = 4,
effort = round(predict(polynome, newdata = data.frame(spatial_scale = 530))))
## Make the prediction
$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(pred530sqkm)))
pred530sqkm$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 530]
pred530sqkm## And compute the trend
### M2 --> M4
<-
slope_530_M2M4 %>%
pred530sqkm group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) ## take off the cells of different area which contains less than 4 subcells
filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){3}$)", ., perl = T)))
### M2 --> M3
<-
slope_530_M2M3 %>%
pred530sqkm filter(Date != 2015) %>%
group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) ## take off the cells of different area which contains less than 4 subcells
filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){3}$)", ., perl = T)))
### M3 --> M4
<-
slope_530_M3M4 %>%
pred530sqkm filter(Date != 1987) %>%
group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) ## take off the cells of different area which contains less than 4 subcells
filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){3}$)", ., perl = T)))
## 4 by 4 cells ######
## Prepare the dataset
<-
pred2150sqkm %>%
atlas_data_topredict filter(spatial_scale == 2150) %>%
::select(-c(KVADRAT, sr, time_span, effort, spatial_scale)) %>%
dplyrmutate(time_span = 4,
effort = round(predict(polynome, newdata = data.frame(spatial_scale = 2150))))
## Make the prediction
$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(pred2150sqkm)))
pred2150sqkm$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 2150]
pred2150sqkm## And compute the trend
## M2 --> M4
<-
slope_2150_M2M4 %>%
pred2150sqkm group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){15}$)", ., perl = T)))
## M2 --> M3
<-
slope_2150_M2M3 %>%
pred2150sqkm filter(Date != 2015) %>%
group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){15}$)", ., perl = T)))
## M3 --> M4
<-
slope_2150_M3M4 %>%
pred2150sqkm filter(Date != 1987) %>%
group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){15}$)", ., perl = T)))
## Entire Czechia ######
<-
predCR %>%
atlas_data_topredict filter(spatial_scale == 83900) %>%
::select(-c(KVADRAT, sr, time_span, effort, spatial_scale)) %>%
dplyrmutate(time_span = 4,
effort = round(predict(polynome, newdata = data.frame(spatial_scale = 83900))))
## Make the prediction
$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(predCR)))
predCR$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 83900]
predCR## And compute the trend
### M2 --> M4
<-
slope_CR_M2M4 %>%
predCR group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
})### M2 --> M3
<-
slope_CR_M2M3 %>%
predCR filter(Date != 2015) %>%
group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
})### M3 --> M4
<-
slope_CR_M3M4 %>%
predCR filter(Date != 1987) %>%
group_by(KVADRAT) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
})
## Add the 100x100 km
load(file = "../data/data_to_predict/atlas_data_topredict_with10000sqkm.Rdata")
## Prepare the dataset
<-
pred10000sqkm %>%
atlas_data_topredict_with10000sqkm filter(spatial_scale == 10000) %>%
rbind(atlas_data_topredict_with10000sqkm %>%
filter(spatial_scale == 10000) %>%
mutate(Date = 2002)) %>%
rbind(atlas_data_topredict_with10000sqkm %>%
filter(spatial_scale == 10000) %>%
mutate(Date = 2015)) %>%
::select(-c(spatial_scale))
dplyr## Make the prediction
$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(pred10000sqkm)))
pred10000sqkm# pred10000sqkm$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 10000]
## And compute the trend
## M2 --> M4
<-
slope_10000_M2M4 %>%
pred10000sqkm group_by(elevation) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) filter(!is.na(elevation))
## M2 --> M3
<-
slope_10000_M2M3 %>%
pred10000sqkm filter(Date != 2015) %>%
group_by(elevation) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) filter(!is.na(elevation))
## M3 --> M4
<-
slope_2150_M3M4 %>%
pred2150sqkm filter(Date != 1987) %>%
group_by(elevation) %>%
do({
<- lm(pred_sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2])
%>%
}) filter(!is.na(elevation))
CzBBS
## Load data used for predictions
load("../data/data_to_predict/JPSP_topredict.Rdata")
## Create the predictions
<- predict(models_list_JPSP$rf$finalModel, data = JPSP_topredict %>% dplyr::select(-id))
JPSP_predictions ## Add them to the data
$sr <- round(JPSP_predictions$predictions)
JPSP_topredict## Now compute the trends
### M2 --> M4
<-
slope_JPSP_M2M4 %>%
JPSP_topredict group_by(id, AREA) %>%
do({
<- lm(sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2],
pval = (summary(mod)$coefficients[2,"Pr(>|t|)"]))
%>%
}) left_join(JPSP_topredict %>% dplyr::select(id, AREA), by = c("id", "AREA")) %>%
unique()
#### M2 --> M3
<-
slope_JPSP_M2M3 %>%
JPSP_topredict filter(Date != 2015) %>%
group_by(id, AREA) %>%
do({
<- lm(sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2],
pval = (summary(mod)$coefficients[2,"Pr(>|t|)"]))
%>%
}) left_join(JPSP_topredict %>% dplyr::select(id, AREA), by = c("id", "AREA")) %>%
unique()
#### M3 --> M4
<-
slope_JPSP_M3M4 %>%
JPSP_topredict filter(Date != 1987) %>%
group_by(id, AREA) %>%
do({
<- lm(sr ~ Date, data = .)
mod data.frame(slope = coef(mod)[2],
pval = (summary(mod)$coefficients[2,"Pr(>|t|)"]))
%>%
}) left_join(JPSP_topredict %>% dplyr::select(id, AREA), by = c("id", "AREA")) %>%
unique()
Number of Colonization, Extinction and Persistence
CzAtlas
<- readRDS("../models/nCEP_atlas.rds") nCEP_atlas_models
## Create the dataframe to predict
<-
nCEP_topredict %>%
atlas_data_topredict filter(AREA != 0) %>%
::select(-c(Date, time_span, sr, effort)) %>%
dplyrdistinct() %>%
mutate(effort_diff = 0,
timespan_diff = 0) %>%
## add the periods
crossing(period = c("M2M3", "M3M4", "M2M4"))
## And make predictions for each metric
<- data.frame()
predicted_nCEP_atlas ## predictions for each metric
for(THE_metric in names(nCEP_atlas_models)){
<- nCEP_atlas_models[[THE_metric]][["xgboost"]]
model <- predict(model,
pred newdata = nCEP_topredict)
<-
predicted_nCEP_atlas rbind(predicted_nCEP_atlas,
%>% cbind(pred = round(pred)) %>% mutate(metric = THE_metric))
nCEP_topredict
}## Also, compute jaccard index
## Compute turnover
<-
predicted_nCEP_atlas %>%
predicted_nCEP_atlas pivot_wider(id_cols = c(Lat, Long, AREA, polypoint_ratio, KVADRAT, elevation, spatial_scale, effort_diff, timespan_diff),
names_from = c(metric,period),
values_from = pred,
names_sep = "") %>%
group_by(Lat, Long, AREA, polypoint_ratio, KVADRAT, elevation, spatial_scale, effort_diff, timespan_diff) %>%
mutate(jaccardM2M3 = persM2M3/(persM2M3 + colM2M3 + extM2M3),
jaccardM3M4 = persM3M4/(persM3M4 + colM3M4 + extM3M4),
jaccardM2M4 = persM2M4/(persM2M4 + colM2M4 + extM2M4)) %>%
ungroup()
CzBBS
<- readRDS("../models/nCEP_JPSP.rds") nCEP_BBS_models
<- data.frame()
predicted_nCEP_BBS for(THE_metric in names(nCEP_BBS_models)){
<- nCEP_BBS_models[[THE_metric]][["rf"]]
model <-
pred predict(model,
newdata = JPSP_topredict %>%
::select(-time_span, -id, -sr, -Date, polypoint_ratio, Lat, Long, elevation, AREA) %>%
dplyrmutate(timespan_diff = 0) %>%
crossing(period = c("M2M3", "M3M4", "M2M4")))
<-
predicted_nCEP_BBS rbind(
predicted_nCEP_BBS,%>%
JPSP_topredict ::select(time_span, Lat, Long, elevation, AREA) %>%
dplyrmutate(metric = THE_metric) %>%
crossing(period = c("M2M3", "M3M4", "M2M4")) %>%
cbind(pred = round(pred))
)
}## Compute turnover
<-
predicted_nCEP_BBS %>%
predicted_nCEP_BBS pivot_wider(id_cols = c(Lat, Long, AREA, elevation, time_span),
names_from = c(metric,period),
values_from = pred,
names_sep = "") %>%
group_by(Lat, Long, AREA, elevation, time_span) %>%
mutate(jaccardM2M3 = persM2M3/(persM2M3 + colM2M3 + extM2M3),
jaccardM3M4 = persM3M4/(persM3M4 + colM3M4 + extM3M4),
jaccardM2M4 = persM2M4/(persM2M4 + colM2M4 + extM2M4)) %>%
ungroup()
Probability of Colonization, Extinction and Persistence
CzAtlas
<- readRDS("../models/probaCEP_atlas.rds") pCEP_atlas_models
## Create the dataframe to predict
<-
pCEP_topredict %>%
atlas_data_topredict filter(!(((KVADRAT == 6844 & Date == 2015 & effort != 5))|(KVADRAT == 7248 & Date == 2015 & effort != 5))) %>%
filter(AREA != 0) %>%
::select(-c(Date, time_span, sr, effort)) %>%
dplyrdistinct() %>%
mutate(effort_diff = 0,
timespan_diff = 0) %>%
## add the periods
crossing(period = c("M2M3", "M3M4", "M2M4"))
## And make predictions for each metric
<- data.frame()
predicted_pCEP_atlas ## predictions for each metric
for(THE_metric in names(pCEP_atlas_models)){
<- pCEP_atlas_models[[THE_metric]][["xgboost"]]
model <- predict(model,
pred newdata = pCEP_topredict)
<-
predicted_pCEP_atlas rbind(predicted_pCEP_atlas,
%>% cbind(pred) %>% mutate(metric = THE_metric))
pCEP_topredict }
CzBBS
<- readRDS("../models/probaCEP_JPSP.rds") pCEP_BBS_models
<- data.frame()
predicted_pCEP_BBS for(THE_metric in names(pCEP_BBS_models)){
<- pCEP_BBS_models[[THE_metric]][["rf"]]
model <-
pred predict(model,
newdata = JPSP_topredict %>%
::select(-time_span, -id, -sr, -Date, polypoint_ratio, Lat, Long, elevation, AREA) %>%
dplyrmutate(timespan_diff = 0) %>%
crossing(period = c("M2M3", "M3M4", "M2M4")))
<-
predicted_pCEP_BBS rbind(
predicted_pCEP_BBS,%>%
JPSP_topredict ::select(time_span, Lat, Long, elevation, AREA) %>%
dplyrmutate(metric = THE_metric) %>%
crossing(period = c("M2M3", "M3M4", "M2M4")) %>%
cbind(pred)
) }
Figures
Species richness
# M2M4 ####
pdf("../figures/sr_spatialscaling_M2M4.pdf",
height = 5, width = 5.5)
# Data management
%>%
slope_JPSP_M2M4 ::rename(area = AREA) %>%
dplyr::select(slope, area) %>%
dplyrmutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>%
rbind(
%>%
slope_130_M2M4 ::select(slope) %>%
dplyrmutate(area = 130) %>%
rbind(
%>%
slope_530_M2M4 ::select(slope) %>%
dplyrmutate(area = 530)
%>%
) rbind(
%>%
slope_2150_M2M4 ::select(slope) %>%
dplyrmutate(area = 2150)
%>%
) # rbind(
# slope_10000_M2M4 %>%
# dplyr::select(slope) %>%
# mutate(area = 10000)
# ) %>%
rbind(
%>%
slope_CR_M2M4 ::select(slope) %>%
dplyrmutate(area = 83900)
)%>%
) ## Plot
ggplot() +
geom_boxplot(aes(area, slope, group = area))+
# geom_boxplot(aes(area, slope, group = area), data = . %>% filter(area == 10000),
# color = "blue", width = 0.3, size = 1)+
scale_x_log10()+
xlab("Spatial grain (km²)")+
ylab("Species Richness change\nper year")+
ylim(-2,2)+
theme(panel.background = element_blank(),
panel.grid.major = element_line(colour = "lightgrey"),
axis.text.x = element_markdown())+
stat_summary(aes(area, slope), fun.y = median, geom = 'line', linetype = 2, size = 1, color = "blue")+
stat_summary(aes(area, slope), fun = mean,
geom = "point", col = "red", size = 1)+
stat_summary(aes(area, slope), fun.data = mean_se,
geom = "errorbar", size = .5, width = .1, color = "red")+
geom_hline(yintercept = 0)+
annotation_logticks(sides = "b")+
coord_cartesian(clip = "off")+
theme(text = element_text(size = 16))
dev.off()
png
2
# M2M3 ####
pdf("../figures/sr_spatialscaling_M2M3.pdf",
height = 5, width = 5.5)
%>%
slope_JPSP_M2M3 ::rename(area = AREA) %>%
dplyr::select(slope, area) %>%
dplyrmutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>%
rbind(
%>%
slope_130_M2M3 ::select(slope) %>%
dplyrmutate(area = 130) %>%
rbind(
%>%
slope_530_M2M3 ::select(slope) %>%
dplyrmutate(area = 530)
%>%
) rbind(
%>%
slope_2150_M2M3 ::select(slope) %>%
dplyrmutate(area = 2150)
%>%
) rbind(
%>%
slope_CR_M2M3 ::select(slope) %>%
dplyrmutate(area = 83900)
)%>%
) ggplot() +
geom_boxplot(aes(area, slope, group = area))+
scale_x_log10()+
xlab("Spatial grain (km²)")+
ylab("Species Richness change\nper year")+
ylim(-2,2)+
theme(panel.background = element_blank(),
panel.grid.major = element_line(colour = "lightgrey"),
axis.text.x = element_markdown())+
stat_summary(aes(area, slope), fun.y = median, geom = 'line', linetype = 2, size = 1, color = "blue")+
stat_summary(aes(area, slope), fun = mean,
geom = "point", col = "red", size = 1)+
stat_summary(aes(area, slope), fun.data = mean_se,
geom = "errorbar", size = .5, width = .1, color = "red")+
geom_hline(yintercept = 0)+
annotation_logticks(sides = "b")+
coord_cartesian(clip = "off")+
theme(text = element_text(size = 16))
dev.off()
png
2
# M3M4 ####
pdf("../figures/sr_spatialscaling_M3M4.pdf",
height = 5, width = 5.5)
%>%
slope_JPSP_M3M4 ::rename(area = AREA) %>%
dplyr::select(slope, area) %>%
dplyrmutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>%
rbind(
%>%
slope_130_M3M4 ::select(slope) %>%
dplyrmutate(area = 130) %>%
rbind(
%>%
slope_530_M3M4 ::select(slope) %>%
dplyrmutate(area = 530)
%>%
) rbind(
%>%
slope_2150_M3M4 ::select(slope) %>%
dplyrmutate(area = 2150)
%>%
) rbind(
%>%
slope_CR_M3M4 ::select(slope) %>%
dplyrmutate(area = 83900)
)%>%
) ggplot() +
geom_boxplot(aes(area, slope, group = area))+
scale_x_log10()+
xlab("Spatial grain (km²)")+
ylab("Species Richness change\nper year")+
ylim(-2,2)+
theme(panel.background = element_blank(),
panel.grid.major = element_line(colour = "lightgrey"),
axis.text.x = element_markdown())+
stat_summary(aes(area, slope), fun.y = median, geom = 'line', linetype = 2, size = 1, color = "blue")+
stat_summary(aes(area, slope), fun = mean,
geom = "point", col = "red", size = 1)+
stat_summary(aes(area, slope), fun.data = mean_se,
geom = "errorbar", size = .5, width = .1, color = "red")+
geom_hline(yintercept = 0)+
annotation_logticks(sides = "b")+
coord_cartesian(clip = "off")+
theme(text = element_text(size = 16))
dev.off()
png
2
## Figure 4
pdf("../figures/sr_2periods.pdf",
height = 5, width = 5.5)
%>%
slope_JPSP_M2M3 ::rename(area = AREA) %>%
dplyr::select(slope, area) %>%
dplyrmutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>%
rbind(
%>%
slope_130_M2M3 ::select(slope) %>%
dplyrmutate(area = 130) %>%
rbind(
%>%
slope_530_M2M3 ::select(slope) %>%
dplyrmutate(area = 530)
%>%
) rbind(
%>%
slope_2150_M2M3 ::select(slope) %>%
dplyrmutate(area = 2150)
%>%
) rbind(
%>%
slope_CR_M2M3 ::select(slope) %>%
dplyrmutate(area = 83900)
) %>% mutate(period = "M2M3") %>%
) rbind(
%>%
slope_JPSP_M3M4 ::rename(area = AREA) %>%
dplyr::select(slope, area) %>%
dplyrmutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>%
rbind(
%>%
slope_130_M3M4 ::select(slope) %>%
dplyrmutate(area = 130) %>%
rbind(
%>%
slope_530_M3M4 ::select(slope) %>%
dplyrmutate(area = 530)
%>%
) rbind(
%>%
slope_2150_M3M4 ::select(slope) %>%
dplyrmutate(area = 2150)
%>%
) rbind(
%>%
slope_CR_M3M4 ::select(slope) %>%
dplyrmutate(area = 83900)
) %>% mutate(period = "M3M4")
) %>%
) mutate(period = case_when(
== "M2M3" ~ "1985-2003",
period == "M3M4" ~ "2003-2017"
period %>%
)) mutate(period = fct_relevel(period, c("1985-2003", "2003-2017"))) %>%
ggplot()+
geom_boxplot(aes(area, slope, group = interaction(area, period), color = period))+
stat_summary(aes(area, slope, color = period), fun.y = median, geom = 'line', size = 1, linetype = 2)+
scale_x_log10()+
annotation_logticks(sides = "b")+
xlab("Spatial grain (km²)")+
ylab("Species Richness change\nper year")+
labs(color = "Period", shape = "Period", linetype = "Period")+
ylim(-2,2)+
theme_bw()+
theme(text = element_text(size = 16))+
theme(panel.background = element_blank(),
legend.position = "none",
panel.grid.major = element_line(colour = "lightgrey"),
axis.text.x = element_markdown())
dev.off()
png
2
pdf("../figures/trend_details.pdf",
height = 3, width = 11.297)
## Plot the detailed trends
%>%
JPSP_topredict mutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51)) %>%
::select(sr, Date, AREA) %>%
dplyrrbind(
%>% dplyr::select(pred_sr, Date) %>% mutate(AREA = 130) %>% dplyr::rename(sr = pred_sr),
pred130sqkm %>% dplyr::select(pred_sr, Date) %>% mutate(AREA = 530) %>% dplyr::rename(sr = pred_sr),
pred530sqkm %>% dplyr::select(pred_sr, Date) %>% mutate(AREA = 2150) %>% dplyr::rename(sr = pred_sr),
pred2150sqkm %>% dplyr::select(pred_sr, Date) %>% mutate(AREA = 83900) %>% dplyr::rename(sr = pred_sr)
predCR %>%
) ggplot(aes(Date, sr))+
stat_summary(fun = median,
geom = "point", col = "red", size = 2)+
theme_bw()+
stat_summary(aes(group = 1), fun.y = median,
geom = "line", linetype = "dashed", size = 1, color = "blue")+
facet_wrap(~ AREA, nrow = 1)+
ylab("Species richness")+
xlab("Year")+
scale_x_continuous(breaks = c(1990,2000,2010))+
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
axis.text = element_text(size = 12),
axis.title = element_text(size = 16))
dev.off()
png
2
Number of Colonization, Extinction, Persistence and Turnover
pdf("../figures/nCEP_scaling.pdf",
height = 5, width = 5.5)
for(periods in c("M2M3", "M3M4", "M2M4")){
print(
%>% dplyr::select(spatial_scale, contains("M", ignore.case = F)) %>% dplyr::rename(AREA = spatial_scale) %>%
predicted_nCEP_atlas rbind(
%>% dplyr::select(contains("M", ignore.case = F), AREA) %>%
predicted_nCEP_BBS mutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51))
%>%
) pivot_longer(cols = -c("AREA"),
names_to = "period",
values_to = "value") %>%
separate(period, into = c("metric", "period"), sep = c(-4)) %>%
filter(metric == "col" | metric == "ext" | metric == "pers") %>%
filter(period == periods) %>%
ggplot()+
geom_boxplot(aes(AREA, value,
group = interaction(AREA,metric),
fill = metric),
outlier.shape = NA)+
scale_fill_brewer(palette = "Dark2")+
scale_color_brewer(palette = "Dark2")+
scale_x_log10()+
scale_y_log10(limits = c(4,210))+
annotation_logticks(sides = "bl")+
stat_summary(aes(AREA, value, color = metric), fun.y = median, geom = 'line', linetype = 2, size = 1)+
ylab("Count")+
xlab("Spatial grain\n(km²)")+
labs(fill = "Probability")+
theme_bw()+
theme(text = element_text(size = 16),
legend.position = "none")
)
}dev.off()
png
2
pdf("../figures/col_ext_jaccard_2periods.pdf",
height = 5, width = 5.5)
for(THE_metric in c("jaccard", "col", "ext")){
if(THE_metric == "jaccard"){
print(predicted_nCEP_atlas %>% dplyr::select(spatial_scale, contains("M", ignore.case = F)) %>%
::rename(AREA = spatial_scale) %>%
dplyrrbind(predicted_nCEP_BBS %>%
::select(contains("M", ignore.case = F), AREA) %>%
dplyrmutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51))) %>%
::select(AREA, contains(THE_metric)) %>%
dplyrpivot_longer(cols = contains(THE_metric),
names_to = "period",
values_to = "value") %>%
mutate(period = str_remove(period, THE_metric)) %>%
mutate(period = case_when(
== "M2M4" ~ "1985-2017",
period == "M2M3" ~ "1985-2003",
period == "M3M4" ~ "2003-2017")) %>%
period mutate(period = fct_relevel(period, c("1985-2017", "1985-2003", "2003-2017"))) %>%
filter(period != "1985-2017") %>%
ggplot()+
geom_boxplot(aes(AREA, value, group = interaction(AREA, period), color = period))+
stat_summary(aes(AREA, value, color = period), fun.y = median, geom = 'line', size = 1, linetype = 2)+
scale_x_log10()+
annotation_logticks(sides = "b")+
xlab("Spatial grain (km²)")+
ylab(THE_metric)+
labs(color = "Period", shape = "Period", linetype = "Period")+
theme_bw()+
theme(text = element_text(size = 16),legend.position = "none"))
else{
}print(predicted_nCEP_atlas %>% dplyr::select(spatial_scale, contains("M", ignore.case = F)) %>%
::rename(AREA = spatial_scale) %>%
dplyrrbind(predicted_nCEP_BBS %>% dplyr::select(contains("M", ignore.case = F), AREA) %>%
mutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51))) %>%
::select(AREA, contains(THE_metric)) %>%
dplyrpivot_longer(cols = contains(THE_metric),
names_to = "period",
values_to = "value") %>%
mutate(period = str_remove(period, THE_metric)) %>%
mutate(period = case_when(
== "M2M4" ~ "1985-2017",
period == "M2M3" ~ "1985-2003",
period == "M3M4" ~ "2003-2017")) %>%
period mutate(period = fct_relevel(period, c("1985-2017", "1985-2003", "2003-2017"))) %>%
filter(period != "1985-2017") %>%
ggplot()+
geom_boxplot(aes(AREA, value, group = interaction(AREA, period), color = period))+
stat_summary(aes(AREA, value, color = period), fun.y = median, geom = 'line', size = 1, linetype = 2)+
scale_x_log10()+
annotation_logticks(sides = "b")+
xlab("Spatial grain (km²)")+
ylab(THE_metric)+
ylim(0,50)+
labs(color = "Period", shape = "Period", linetype = "Period")+
theme_bw()+
theme(text = element_text(size = 16),
legend.position = "none"))
}
}dev.off()
png
2
pdf("../figures/jaccard.pdf",
height = 5, width = 5.5)
for(period in c("jaccardM2M3","jaccardM3M4")){
print(predicted_nCEP_atlas %>%
::select(spatial_scale, contains("M", ignore.case = F)) %>%
dplyr::rename(AREA = spatial_scale) %>%
dplyrrbind(predicted_nCEP_BBS %>%
::select(contains("M", ignore.case = F), AREA) %>%
dplyrmutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51))) %>%
::select(AREA, period) %>%
dplyr::rename(value = period) %>%
dplyrggplot(aes(AREA, value, group = AREA))+
annotation_logticks(sides = "b")+
geom_boxplot()+
scale_x_log10()+
xlab("Spatial grain\n(km²)")+
ylim(.35, 1)+
ylab("Jaccard")+
theme_bw()+
theme(strip.background = element_blank(),
strip.text.x = element_blank(),
text = element_text(size = 16))+
stat_summary(aes(group = 1), fun.y = median,
geom = "line", linetype = "dashed", size = 1, color = "blue"))
}dev.off()
png
2
Probability of Colonization, Extinction, Persistence
pdf("../figures/pCEP.pdf",
height = 5, width = 5.5)
for(time in c("M2M3", "M3M4", "M2M4")) {
print(
%>%
predicted_pCEP_BBS mutate(spatial_scale = ifelse(AREA < 0.2, 0.13, 2.51)) %>%
mutate(period = fct_relevel(period, c("M2M4", "M2M3", "M3M4"))) %>%
::select(spatial_scale, pred, period, metric) %>%
dplyrrbind(predicted_pCEP_atlas %>% dplyr::select(spatial_scale, pred, period, metric)) %>%
mutate(metric = case_when(
== "col" ~ "Colonization",
metric == "ext" ~ "Extinction",
metric == "pers" ~ "Persistence"
metric %>%
)) filter(period == time,
!= "Species pool\ncolonization") %>%
metric ggplot()+
geom_boxplot(aes(spatial_scale, pred,
group = interaction(spatial_scale,metric),
fill = metric),
outlier.shape = NA) +
scale_fill_brewer(palette = "Dark2")+
scale_color_brewer(palette = "Dark2")+
scale_x_log10()+
annotation_logticks(sides = "b")+
stat_summary(aes(spatial_scale, pred, color = metric), fun.y = median, geom = 'line', linetype = 2, size = 1)+
ylim(0,1)+
ylab("Probability")+
xlab("Spatial grain\n(km²)")+
labs(fill = "Probability")+
theme_bw()+
theme(text = element_text(size = 16),
legend.position = "none"))
}dev.off()
png
2
pdf("../figures/pCEP_2periods.pdf",
height = 5, width = 5.5)
for(THE_metric in c("col", "ext")){
print(predicted_pCEP_BBS %>%
mutate(spatial_scale = ifelse(AREA < 0.2, 0.13, 2.51)) %>%
mutate(period = fct_relevel(period, c("M2M4", "M2M3", "M3M4"))) %>%
::select(spatial_scale, pred, period, metric) %>%
dplyrrbind(predicted_pCEP_atlas %>% dplyr::select(spatial_scale, pred, period, metric)) %>%
filter(pred >= 0 & pred <= 1) %>%
filter(period != "M2M4") %>%
filter(metric == THE_metric) %>%
ggplot()+
geom_boxplot(aes(spatial_scale, pred, group = interaction(spatial_scale, period), color = period))+
stat_summary(aes(spatial_scale, pred, color = period, linetype = period), fun.y = median, geom = 'line', linetype = 2, size = 1)+
scale_x_log10()+
annotation_logticks(sides = "b")+
xlab("Spatial grain (km²)")+
ylab(THE_metric)+
ylim(0, .5)+
labs(color = "Period", shape = "Period", linetype = "Period")+
theme_bw()+
theme(text = element_text(size = 16))+
theme(panel.background = element_blank(),
legend.position = "none",
panel.grid.major = element_line(colour = "lightgrey"),
axis.text.x = element_markdown()))
}
dev.off()
png
2
Correlation and Variable importance (Appendix B)
<- readRDS("../data/data_model_fitting/CEP_atlas_cart_data.rds")
CEP_atlas_cart_data $metric[CEP_atlas_cart_data$metric == "proba_colonized"] <- "proba_col"
CEP_atlas_cart_data
<- readRDS("../data/data_model_fitting/CEP_JPSP_cart_data.rds")
CEP_JPSP_cart_data <- CEP_JPSP_cart_data %>%
CEP_JPSP_cart_data filter(metric != "proba colonization") %>%
mutate(metric = case_when(
== "colonization" ~ "col",
metric == "extinction" ~ "ext",
metric == "persistence" ~ "pers",
metric == "proba persistence" ~ "proba_pers",
metric == "proba extinction" ~ "proba_ext",
metric == "proba colonized" ~ "proba_col",
metric
))
pdf("../figures/correlations.pdf",
height = 8.27, width = 11.69)
## For Atlas model
corrplot(cor(atlas_cart_data[,names(atlas_cart_data) != "sr"]),
main = "Correlation - CzAtlas - species richness predictors",
type = "lower", mar=c(0,0,1,0))
ggplot(data = varImp(models_list_atlas$xgboost), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Predictor Variables", y = "Importance") +
ggtitle("Variable Importance Plot - CzAtlas - XGBoost algorithm") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
%>% filter(metric == "ext") %>%
CEP_atlas_cart_data select(-quadrat, -metric, -trials, -value) %>%
plot(main = "Correlation - CzAtlas - colonization/extinction/persistence")
for(THE_metric in unique(CEP_atlas_cart_data$metric)){
if(!grepl("proba_", THE_metric)){
print(
ggplot(data = varImp(nCEP_atlas_models[[THE_metric]][["xgboost"]]), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Predictor Variables", y = "Importance") +
ggtitle(paste0("Variable Importance Plot - CzAtlas - ", THE_metric)) +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
)else{
}print(
ggplot(data = varImp(pCEP_atlas_models[[gsub("proba_","",THE_metric)]][["xgboost"]]), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Predictor Variables", y = "Importance") +
ggtitle(paste0("Variable Importance Plot - CzAtlas - ", THE_metric)) +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
)
}
}
## For CzBBS model
corrplot(cor(JPSP_cart_data[,names(JPSP_cart_data) != "sr"]),
main = "Correlation - CzBBS - species richness predictors",
type = "lower", mar=c(0,0,1,0))
ggplot(data = varImp(models_list_JPSP$rf), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Predictor Variables", y = "Importance") +
ggtitle("Variable Importance Plot - CzBBS Data - random forest algorithm") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
%>%
CEP_JPSP_cart_data filter(metric == "ext") %>%
::select(-metric) %>%
dplyrfilter(!is.na(value)) %>%
plot(main = paste0("Correlation - CzBBS - colonization/extinction/persistence predictors"))
for(THE_metric in unique(CEP_JPSP_cart_data$metric)){
if(!grepl("proba_", THE_metric)){
print(
ggplot(data = varImp(nCEP_BBS_models[[THE_metric]][["rf"]]), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Predictor Variables", y = "Importance") +
ggtitle(paste0("Variable Importance Plot - CzBBS - ", THE_metric)) +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
)else{
}print(
ggplot(data = varImp(pCEP_atlas_models[[gsub("proba_","",THE_metric)]][["xgboost"]]), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Predictor Variables", y = "Importance") +
ggtitle(paste0("Variable Importance Plot - CzBBS - ", THE_metric)) +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
)
}
}
dev.off()
png
2