Decomposing biodiversity change to processes of extinction, colonization, and persistence across scales

This document contains the code to reproduce the models fitting, the predictions and the figures of our manuscript. We modeled 1) species richness, 2) numbers and 3) probabilities of extinction, colonization and persistence. Models were fitted for the Atlas datasets (hereafter CzAtlas) on one hand and for the Czech Breeding Birds Survey on the other hand (hereafter CzBBS, or JPSP for Jednotný program sčítání ptáků) using repeated cross-validation with 3 different tree-based algorithms.

The data used (i.e. CzAtlas and CzBBS) are not open thus we provide them already processed into species richness, colonization, extinction… per unit of area. If you are interested in accessing the raw datasets (with species identities), feel free to contact the corresponding author.

Model fitting

Species richness

CzAtlas

rm(list = ls())
library(caret)
library(caretEnsemble)
library(dplyr)
library(ranger)
library(ggtext)
library(cowplot)
library(forcats)
library(tidyr)
library(stringr)
library(corrplot)
## Load data
load("../data/data_model_fitting/atlas_cart_data.Rdata")

## We perform repeated (n = 3) cross-validation (k = 10) using the caret package
# First we split the dataset into training and validation datasets
# set.seed(333)
traindata <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
## We use three tree-based algorithms in order to assess which one perform the best
## Create the list of models
model_list_atlas <-
  caretList(
    sr ~ ., 
    data = atlas_cart_data,
    trControl = traindata,
    metric = "RMSE",
    tuneList = list(
      rf =  caretModelSpec(method = "ranger", 
                           importance = "permutation",
                           tuneGrid = data.frame(mtry = ceiling((ncol(atlas_cart_data)-1)/3),
                                                                    splitrule = "variance",
                                                                    min.node.size = 5)),
      gbm = caretModelSpec(method = "gbm",
                           distribution = "poisson",
                           # keep.data = TRUE,
                           tuneGrid = data.frame(n.trees = 500,
                                                 interaction.depth = 1,
                                                 shrinkage = 0.1,
                                                 n.minobsinnode = 5)),
      xgboost = caretModelSpec(method = "xgbTree",
                               tuneGrid = expand.grid(nrounds = 500,
                                                     max_depth = 5,
                                                     eta = 0.1,
                                                     gamma = 0,
                                                     colsample_bytree = 1,
                                                     min_child_weight = 1,
                                                     subsample = 1))
    )
  )

# saveRDS(model_list_atlas, "../models/sr_atlas.rds")

CzBBS (or JPSP)

# Load data
load("../data/data_model_fitting/JPSP_cart_data.Rdata")

# First, convert the date into a numeric format (here time stamp since 1970-01-01)
JPSP_cart_data$Date <- as.numeric(JPSP_cart_data$Date)

## Reduce the time_span
JPSP_cart_data <- JPSP_cart_data %>% dplyr::filter(time_span <= 5 & time_span >= 3)
## Create the list of models
model_list_JPSP <-
  caretList(
    sr ~ ., 
    data = JPSP_cart_data,
    trControl = traindata,
    metric = "RMSE",
    tuneList = list(
      rf =  caretModelSpec(method = "ranger",
                           importance = "permutation",
                           tuneGrid = data.frame(mtry = ceiling((ncol(JPSP_cart_data)-1)/3),
                                                                    splitrule = "variance",
                                                                    min.node.size = 5)),
      gbm = caretModelSpec(method = "gbm",
                           distribution = "poisson",
                           tuneGrid = data.frame(n.trees = 500,
                                                 interaction.depth = 1,
                                                 shrinkage = 0.1,
                                                 n.minobsinnode = 5)),
      xgboost = caretModelSpec(method = "xgbTree",
                               tuneGrid = expand.grid(nrounds = 500,
                                                     max_depth = 5,
                                                     eta = 0.1,
                                                     gamma = 0,
                                                     colsample_bytree = 1,
                                                     min_child_weight = 1,
                                                     subsample = 1))
    )
  )

# saveRDS(model_list_JPSP, file = "../models/sr_bbs.rds")

Number of Colonization, Extinction and Persistence (CEP)

CzAtlas

## Load data for learning
CEP_atlas_cart_data <- readRDS("../data/data_model_fitting/CEP_atlas_cart_data.rds")
CEP_atlas_cart_data <- CEP_atlas_cart_data[!grepl("^proba", CEP_atlas_cart_data$metric),]
## For each metric, perform repeated cross-validation with the 3 algorithms
## Learn for each metric
for(THE_metric in unique(CEP_atlas_cart_data$metric)){
  
  assign(
    paste0("CEP_atlas_models_", THE_metric),
    caretList(
      value ~ ., 
      data = CEP_atlas_cart_data %>% filter(metric == THE_metric) %>% dplyr::select(-quadrat, -metric, -trials),
      trControl = traindata,
      metric = "RMSE",
      tuneList = list(
        rf =  caretModelSpec(method = "ranger",
                             importance = "permutation",
                             tuneGrid = data.frame(mtry = ceiling((ncol(CEP_atlas_cart_data)-2)/3),
                                                                      splitrule = "variance",
                                                                      min.node.size = 5)),
        gbm = caretModelSpec(method = "gbm",
                             tuneGrid = data.frame(n.trees = 500,
                                                   interaction.depth = 1,
                                                   shrinkage = 0.1,
                                                   n.minobsinnode = 5)),
        xgboost = caretModelSpec(method = "xgbTree",
                                 tuneGrid = expand.grid(nrounds = 500,
                                                       max_depth = 5,
                                                       eta = 0.1,
                                                       gamma = 0,
                                                       colsample_bytree = 1,
                                                       min_child_weight = 1,
                                                       subsample = 1))
      )
    )
  )
}
### Merge all of them together
CEP_models <- list()
CEP_models[["col"]] <-  CEP_atlas_models_col
CEP_models[["ext"]] <-  CEP_atlas_models_ext
CEP_models[["pers"]] <-  CEP_atlas_models_pers
saveRDS(CEP_models, file = "../models/nCEP_atlas.rds")

CzBBS

CEP_JPSP_cart_data <- readRDS("../data/data_model_fitting/CEP_JPSP_cart_data.rds")
CEP_JPSP_cart_data <- CEP_JPSP_cart_data[!grepl("^proba", CEP_JPSP_cart_data$metric),]
for(THE_metric in unique(CEP_JPSP_cart_data$metric)){
  
  assign(
    paste0("CEP_JPSP_models_", THE_metric),
    caretList(
      value ~ ., 
      data = CEP_JPSP_cart_data %>% 
        filter(metric == THE_metric) %>%
        dplyr::select(-metric) %>% 
        filter(!is.na(value)),
      trControl = traindata,
      metric = "RMSE",
      tuneList = list(
        rf =  caretModelSpec(method = "ranger",
                             importance = "permutation",
                             tuneGrid = data.frame(mtry = ceiling((ncol(CEP_JPSP_cart_data)-2)/3),
                                                                      splitrule = "variance",
                                                                      min.node.size = 5)),
        gbm = caretModelSpec(method = "gbm",
                             tuneGrid = data.frame(n.trees = 500,
                                                   interaction.depth = 1,
                                                   shrinkage = 0.1,
                                                   n.minobsinnode = 5)),
        xgboost = caretModelSpec(method = "xgbTree",
                                 tuneGrid = expand.grid(nrounds = 500,
                                                       max_depth = 5,
                                                       eta = 0.1,
                                                       gamma = 0,
                                                       colsample_bytree = 1,
                                                       min_child_weight = 1,
                                                       subsample = 1))
      )
    )
  )
}
### Merge all of them together
CEP_models_JPSP <- list()
CEP_models_JPSP[["col"]] <-  `CEP_JPSP_models_colonization`
CEP_models_JPSP[["ext"]] <-  `CEP_JPSP_models_extinction`
CEP_models_JPSP[["pers"]] <-  `CEP_JPSP_models_persistence`
saveRDS(CEP_models_JPSP, file = "../models/nCEP_JPSP.rds")

Probability of Colonization, Extinction and Persitence

CzAtlas

## Load data for learning
CEP_atlas_cart_data <- readRDS("../data/data_model_fitting/CEP_atlas_cart_data.rds")
CEP_atlas_cart_data <- CEP_atlas_cart_data[grepl("^proba", CEP_atlas_cart_data$metric),]
## For each metric, perform repeated cross-validation with the 3 algorithms
## Learn for each metric
for(THE_metric in unique(CEP_atlas_cart_data$metric)){
  
  assign(
    paste0("CEP_atlas_models_", THE_metric),
    caretList(
      value ~ ., 
      data = CEP_atlas_cart_data %>% filter(metric == THE_metric) %>% dplyr::select(-quadrat, -metric, -trials),
      trControl = traindata,
      metric = "RMSE",
      tuneList = list(
        rf =  caretModelSpec(method = "ranger",
                             importance = "permutation",
                             tuneGrid = data.frame(mtry = ceiling((ncol(CEP_atlas_cart_data)-2)/3),
                                                                      splitrule = "variance",
                                                                      min.node.size = 5)),
        gbm = caretModelSpec(method = "gbm",
                             tuneGrid = data.frame(n.trees = 500,
                                                   interaction.depth = 1,
                                                   shrinkage = 0.1,
                                                   n.minobsinnode = 5)),
        xgboost = caretModelSpec(method = "xgbTree",
                                 tuneGrid = expand.grid(nrounds = 500,
                                                       max_depth = 5,
                                                       eta = 0.1,
                                                       gamma = 0,
                                                       colsample_bytree = 1,
                                                       min_child_weight = 1,
                                                       subsample = 1))
      )
    )
  )
}
### Merge all of them together
CEP_models <- list()
CEP_models[["col"]] <-  CEP_atlas_models_proba_colonized
CEP_models[["ext"]] <-  CEP_atlas_models_proba_ext
CEP_models[["pers"]] <-  CEP_atlas_models_proba_pers
saveRDS(CEP_models, file = "../models/probaCEP_atlas.rds")

CzBBS

CEP_JPSP_cart_data <- readRDS("../data/data_model_fitting/CEP_JPSP_cart_data.rds")
CEP_JPSP_cart_data <- CEP_JPSP_cart_data[grepl("^proba", CEP_JPSP_cart_data$metric),]
for(THE_metric in unique(CEP_JPSP_cart_data$metric)){
  
  assign(
    paste0("CEP_JPSP_models_", THE_metric),
    caretList(
      value ~ ., 
      data = CEP_JPSP_cart_data %>% 
        filter(metric == THE_metric) %>%
        dplyr::select(-metric) %>% 
        filter(!is.na(value)),
      trControl = traindata,
      metric = "RMSE",
      tuneList = list(
        rf =  caretModelSpec(method = "ranger",
                             importance = "permutation",
                             tuneGrid = data.frame(mtry = ceiling((ncol(CEP_JPSP_cart_data)-2)/3),
                                                                      splitrule = "variance",
                                                                      min.node.size = 5)),
        gbm = caretModelSpec(method = "gbm",
                             tuneGrid = data.frame(n.trees = 500,
                                                   interaction.depth = 1,
                                                   shrinkage = 0.1,
                                                   n.minobsinnode = 5)),
        xgboost = caretModelSpec(method = "xgbTree",
                                 tuneGrid = expand.grid(nrounds = 500,
                                                       max_depth = 5,
                                                       eta = 0.1,
                                                       gamma = 0,
                                                       colsample_bytree = 1,
                                                       min_child_weight = 1,
                                                       subsample = 1))
      )
    )
  )
}
### Merge all of them together
CEP_models_JPSP <- list()
CEP_models_JPSP[["col"]] <-  `CEP_JPSP_models_proba colonized`
CEP_models_JPSP[["ext"]] <-  `CEP_JPSP_models_proba extinction`
CEP_models_JPSP[["pers"]] <-  `CEP_JPSP_models_proba persistence`
saveRDS(CEP_models_JPSP, file = "../models/probaCEP_JPSP.rds")

Predictions

Species richness

# Load the models
models_list_atlas <- readRDS(file = "../models/sr_atlas.rds")
models_list_JPSP <- readRDS(file = "../models/sr_bbs.rds")

CzAtlas

## Load the data used for predictions
load("../data/data_to_predict/atlas_data_topredict.Rdata")
## In order to set a fixed values of sampling effort for the different spatial scales, we fit a quadratic regression between area and sampling effort
polynome <- lm(effort ~ poly(spatial_scale, 2), data = atlas_data_topredict)
## Original grid cell ######
pred130sqkm <-
  atlas_data_topredict %>%
  filter(spatial_scale == 130) %>% 
  dplyr::select(-c(KVADRAT, sr, time_span, effort, spatial_scale)) %>% 
  mutate(time_span = 4,
         effort = round(predict(polynome, newdata = data.frame(spatial_scale = 130))))
## Make the prediction
pred130sqkm$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(pred130sqkm)))
pred130sqkm$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 130]
## And compute the trend
### M2 --> M4 
slope_130_M2M4 <- 
pred130sqkm %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  })
### M2 --> M3 
slope_130_M2M3 <- 
pred130sqkm %>% 
  filter(Date != 2015) %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  })
### M3 --> M4
slope_130_M3M4 <- 
pred130sqkm %>% 
  filter(Date != 1987) %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  })

## 2 by 2 grids ######
## Prepare the dataset
pred530sqkm <-
  atlas_data_topredict %>%
  filter(spatial_scale == 530) %>% 
  dplyr::select(-c(KVADRAT, sr, time_span, effort, spatial_scale)) %>% 
  mutate(time_span = 4,
         effort = round(predict(polynome, newdata = data.frame(spatial_scale = 530))))
## Make the prediction
pred530sqkm$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(pred530sqkm)))
pred530sqkm$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 530]
## And compute the trend
### M2 --> M4
slope_530_M2M4 <-
pred530sqkm %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  ## take off the cells of different area which contains less than 4 subcells
  filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){3}$)", ., perl = T)))
### M2 --> M3
slope_530_M2M3 <-
pred530sqkm %>% 
  filter(Date != 2015) %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  ## take off the cells of different area which contains less than 4 subcells
  filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){3}$)", ., perl = T)))
### M3 --> M4
slope_530_M3M4 <-
pred530sqkm %>% 
  filter(Date != 1987) %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  ## take off the cells of different area which contains less than 4 subcells
  filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){3}$)", ., perl = T)))

## 4 by 4 cells ######
## Prepare the dataset
pred2150sqkm <-
  atlas_data_topredict %>%
  filter(spatial_scale == 2150) %>% 
  dplyr::select(-c(KVADRAT, sr, time_span, effort, spatial_scale)) %>% 
  mutate(time_span = 4,
         effort = round(predict(polynome, newdata = data.frame(spatial_scale = 2150))))
## Make the prediction
pred2150sqkm$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(pred2150sqkm)))
pred2150sqkm$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 2150]
## And compute the trend
## M2 --> M4 
slope_2150_M2M4 <-
pred2150sqkm %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){15}$)", ., perl = T)))
## M2 --> M3
slope_2150_M2M3 <-
pred2150sqkm %>% 
  filter(Date != 2015) %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){15}$)", ., perl = T)))
## M3 --> M4
slope_2150_M3M4 <-
pred2150sqkm %>% 
  filter(Date != 1987) %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  filter_at(vars(KVADRAT), any_vars(grepl("^[^,\n]*((,[^,\n]*){15}$)", ., perl = T)))

## Entire Czechia ######
predCR <- 
  atlas_data_topredict %>%
  filter(spatial_scale == 83900) %>% 
  dplyr::select(-c(KVADRAT, sr, time_span, effort, spatial_scale)) %>% 
  mutate(time_span = 4,
         effort = round(predict(polynome, newdata = data.frame(spatial_scale = 83900))))
## Make the prediction
predCR$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(predCR)))
predCR$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 83900]
## And compute the trend
### M2 --> M4
slope_CR_M2M4 <- 
predCR %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  })
### M2 --> M3
slope_CR_M2M3 <- 
predCR %>% 
  filter(Date != 2015) %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  })
### M3 --> M4
slope_CR_M3M4 <- 
predCR %>% 
  filter(Date != 1987) %>% 
  group_by(KVADRAT) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  })
## Add the 100x100 km
load(file = "../data/data_to_predict/atlas_data_topredict_with10000sqkm.Rdata")
## Prepare the dataset
pred10000sqkm <-
  atlas_data_topredict_with10000sqkm %>%
  filter(spatial_scale == 10000) %>% 
  rbind(atlas_data_topredict_with10000sqkm %>%
          filter(spatial_scale == 10000) %>% 
          mutate(Date = 2002)) %>% 
    rbind(atlas_data_topredict_with10000sqkm %>%
          filter(spatial_scale == 10000) %>% 
          mutate(Date = 2015)) %>% 
  dplyr::select(-c(spatial_scale))
## Make the prediction
pred10000sqkm$pred_sr <- round(predict(models_list_atlas$xgboost$finalModel, newdata = as.matrix(pred10000sqkm)))
# pred10000sqkm$KVADRAT <- atlas_data_topredict$KVADRAT[atlas_data_topredict$spatial_scale == 10000]
## And compute the trend
## M2 --> M4 
slope_10000_M2M4 <-
pred10000sqkm %>% 
  group_by(elevation) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  filter(!is.na(elevation))
## M2 --> M3
slope_10000_M2M3 <-
pred10000sqkm %>% 
  filter(Date != 2015) %>% 
  group_by(elevation) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  filter(!is.na(elevation))
## M3 --> M4
slope_2150_M3M4 <-
pred2150sqkm %>% 
  filter(Date != 1987) %>% 
  group_by(elevation) %>% 
  do({
    mod <-  lm(pred_sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2])
  }) %>% 
  filter(!is.na(elevation))

CzBBS

## Load data used for predictions
load("../data/data_to_predict/JPSP_topredict.Rdata")
## Create the predictions
JPSP_predictions <- predict(models_list_JPSP$rf$finalModel, data = JPSP_topredict %>% dplyr::select(-id))
## Add them to the data
JPSP_topredict$sr <- round(JPSP_predictions$predictions)
## Now compute the trends
### M2 --> M4
slope_JPSP_M2M4 <- 
JPSP_topredict %>% 
  group_by(id, AREA) %>% 
  do({
    mod <-  lm(sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2],
               pval = (summary(mod)$coefficients[2,"Pr(>|t|)"]))
  }) %>% 
  left_join(JPSP_topredict %>% dplyr::select(id, AREA), by = c("id", "AREA")) %>% 
  unique()
#### M2 --> M3
slope_JPSP_M2M3 <-
JPSP_topredict %>% 
  filter(Date != 2015) %>% 
  group_by(id, AREA) %>% 
  do({
    mod <-  lm(sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2],
               pval = (summary(mod)$coefficients[2,"Pr(>|t|)"]))
  }) %>% 
  left_join(JPSP_topredict %>% dplyr::select(id, AREA), by = c("id", "AREA")) %>% 
  unique()
#### M3 --> M4
slope_JPSP_M3M4 <-
JPSP_topredict %>% 
  filter(Date != 1987) %>% 
  group_by(id, AREA) %>% 
  do({
    mod <-  lm(sr ~ Date, data = .)
    data.frame(slope = coef(mod)[2],
               pval = (summary(mod)$coefficients[2,"Pr(>|t|)"]))
  }) %>% 
  left_join(JPSP_topredict %>% dplyr::select(id, AREA), by = c("id", "AREA")) %>% 
  unique()

Number of Colonization, Extinction and Persistence

CzAtlas

nCEP_atlas_models <- readRDS("../models/nCEP_atlas.rds")
## Create the dataframe to predict
nCEP_topredict <-
atlas_data_topredict %>% 
  filter(AREA != 0) %>% 
  dplyr::select(-c(Date, time_span, sr, effort)) %>% 
  distinct() %>% 
  mutate(effort_diff = 0,
         timespan_diff = 0) %>% 
  ## add the periods
  crossing(period = c("M2M3", "M3M4", "M2M4"))

## And make predictions for each metric
predicted_nCEP_atlas <- data.frame()
## predictions for each metric
for(THE_metric in names(nCEP_atlas_models)){
  model <- nCEP_atlas_models[[THE_metric]][["xgboost"]]
  pred <- predict(model, 
                  newdata = nCEP_topredict)
  predicted_nCEP_atlas <- 
  rbind(predicted_nCEP_atlas,
        nCEP_topredict %>% cbind(pred = round(pred)) %>% mutate(metric = THE_metric))
}
## Also, compute jaccard index
## Compute turnover
predicted_nCEP_atlas <- 
predicted_nCEP_atlas %>% 
  pivot_wider(id_cols = c(Lat, Long, AREA, polypoint_ratio, KVADRAT, elevation, spatial_scale, effort_diff, timespan_diff),
              names_from = c(metric,period),
              values_from = pred,
              names_sep = "") %>% 
  group_by(Lat, Long, AREA, polypoint_ratio, KVADRAT, elevation, spatial_scale, effort_diff, timespan_diff) %>% 
  mutate(jaccardM2M3 = persM2M3/(persM2M3 + colM2M3 + extM2M3),
         jaccardM3M4 = persM3M4/(persM3M4 + colM3M4 + extM3M4),
         jaccardM2M4 = persM2M4/(persM2M4 + colM2M4 + extM2M4)) %>% 
  ungroup()

CzBBS

nCEP_BBS_models <- readRDS("../models/nCEP_JPSP.rds")
predicted_nCEP_BBS <- data.frame()
for(THE_metric in names(nCEP_BBS_models)){
  model <- nCEP_BBS_models[[THE_metric]][["rf"]]
  pred <- 
    predict(model, 
            newdata = JPSP_topredict %>% 
              dplyr::select(-time_span, -id, -sr, -Date, polypoint_ratio, Lat, Long, elevation, AREA) %>% 
              mutate(timespan_diff = 0) %>% 
              crossing(period = c("M2M3", "M3M4", "M2M4")))
  predicted_nCEP_BBS <- 
  rbind(
    predicted_nCEP_BBS,
    JPSP_topredict %>% 
    dplyr::select(time_span, Lat, Long, elevation, AREA) %>% 
    mutate(metric = THE_metric) %>% 
    crossing(period = c("M2M3", "M3M4", "M2M4")) %>% 
    cbind(pred = round(pred))
  )
}
## Compute turnover
predicted_nCEP_BBS <- 
predicted_nCEP_BBS %>% 
  pivot_wider(id_cols = c(Lat, Long, AREA, elevation, time_span),
              names_from = c(metric,period),
              values_from = pred,
              names_sep = "") %>% 
  group_by(Lat, Long, AREA, elevation, time_span) %>% 
  mutate(jaccardM2M3 = persM2M3/(persM2M3 + colM2M3 + extM2M3),
         jaccardM3M4 = persM3M4/(persM3M4 + colM3M4 + extM3M4),
         jaccardM2M4 = persM2M4/(persM2M4 + colM2M4 + extM2M4)) %>% 
  ungroup()

Probability of Colonization, Extinction and Persistence

CzAtlas

pCEP_atlas_models <- readRDS("../models/probaCEP_atlas.rds")
## Create the dataframe to predict
pCEP_topredict <-
atlas_data_topredict %>% 
  filter(!(((KVADRAT == 6844 & Date == 2015 & effort != 5))|(KVADRAT == 7248 & Date == 2015 & effort != 5))) %>% 
  filter(AREA != 0) %>% 
  dplyr::select(-c(Date, time_span, sr, effort)) %>% 
  distinct() %>% 
  mutate(effort_diff = 0,
         timespan_diff = 0) %>% 
  ## add the periods
  crossing(period = c("M2M3", "M3M4", "M2M4"))

## And make predictions for each metric
predicted_pCEP_atlas <- data.frame()
## predictions for each metric
for(THE_metric in names(pCEP_atlas_models)){
  model <- pCEP_atlas_models[[THE_metric]][["xgboost"]]
  pred <- predict(model, 
                  newdata = pCEP_topredict)
  predicted_pCEP_atlas <- 
  rbind(predicted_pCEP_atlas,
        pCEP_topredict %>% cbind(pred) %>% mutate(metric = THE_metric))
}

CzBBS

pCEP_BBS_models <- readRDS("../models/probaCEP_JPSP.rds")
predicted_pCEP_BBS <- data.frame()
for(THE_metric in names(pCEP_BBS_models)){
  model <- pCEP_BBS_models[[THE_metric]][["rf"]]
  pred <- 
    predict(model, 
            newdata = JPSP_topredict %>% 
              dplyr::select(-time_span, -id, -sr, -Date, polypoint_ratio, Lat, Long, elevation, AREA) %>% 
              mutate(timespan_diff = 0) %>% 
              crossing(period = c("M2M3", "M3M4", "M2M4")))
  predicted_pCEP_BBS <- 
  rbind(
    predicted_pCEP_BBS,
    JPSP_topredict %>% 
    dplyr::select(time_span, Lat, Long, elevation, AREA) %>% 
    mutate(metric = THE_metric) %>% 
    crossing(period = c("M2M3", "M3M4", "M2M4")) %>% 
    cbind(pred)
  )
}

Figures

Species richness

# M2M4 ####
pdf("../figures/sr_spatialscaling_M2M4.pdf",
    height = 5, width = 5.5)
# Data management
slope_JPSP_M2M4 %>% 
  dplyr::rename(area = AREA) %>% 
  dplyr::select(slope, area) %>% 
  mutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>% 
  rbind(
    slope_130_M2M4 %>% 
      dplyr::select(slope) %>% 
      mutate(area = 130) %>% 
      rbind(
        slope_530_M2M4 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 530)
      ) %>% 
      rbind(
        slope_2150_M2M4 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 2150)
      ) %>%
      # rbind(
      #   slope_10000_M2M4 %>%
      #     dplyr::select(slope) %>%
      #     mutate(area = 10000)
      # ) %>%
      rbind(
        slope_CR_M2M4 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 83900)
      )
  ) %>% 
  ## Plot
  ggplot() +
  geom_boxplot(aes(area, slope, group = area))+
  # geom_boxplot(aes(area, slope, group = area), data = . %>% filter(area == 10000),
  #              color = "blue", width = 0.3, size = 1)+
  scale_x_log10()+
  xlab("Spatial grain (km²)")+
  ylab("Species Richness change\nper year")+
  ylim(-2,2)+
    theme(panel.background = element_blank(),
        panel.grid.major = element_line(colour = "lightgrey"),
        axis.text.x = element_markdown())+
  stat_summary(aes(area, slope), fun.y = median, geom = 'line', linetype = 2, size = 1,  color = "blue")+
  stat_summary(aes(area, slope), fun = mean,
               geom = "point", col = "red", size = 1)+
  stat_summary(aes(area, slope), fun.data = mean_se,
               geom = "errorbar", size = .5, width = .1, color = "red")+
  geom_hline(yintercept = 0)+
  annotation_logticks(sides = "b")+
  coord_cartesian(clip = "off")+
  theme(text = element_text(size = 16))
dev.off()
png 
  2 
# M2M3 ####
pdf("../figures/sr_spatialscaling_M2M3.pdf",
    height = 5, width = 5.5)
slope_JPSP_M2M3 %>% 
  dplyr::rename(area = AREA) %>% 
  dplyr::select(slope, area) %>% 
  mutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>% 
  rbind(
    slope_130_M2M3 %>% 
      dplyr::select(slope) %>% 
      mutate(area = 130) %>% 
      rbind(
        slope_530_M2M3 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 530)
      ) %>% 
      rbind(
        slope_2150_M2M3 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 2150)
      ) %>% 
      rbind(
        slope_CR_M2M3 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 83900)
      )
  ) %>% 
  ggplot() +
  geom_boxplot(aes(area, slope, group = area))+
  scale_x_log10()+
  xlab("Spatial grain (km²)")+
  ylab("Species Richness change\nper year")+
  ylim(-2,2)+
    theme(panel.background = element_blank(),
        panel.grid.major = element_line(colour = "lightgrey"),
        axis.text.x = element_markdown())+
  stat_summary(aes(area, slope), fun.y = median, geom = 'line', linetype = 2, size = 1,  color = "blue")+
  stat_summary(aes(area, slope), fun = mean,
               geom = "point", col = "red", size = 1)+
  stat_summary(aes(area, slope), fun.data = mean_se,
               geom = "errorbar", size = .5, width = .1, color = "red")+
  geom_hline(yintercept = 0)+
  annotation_logticks(sides = "b")+
  coord_cartesian(clip = "off")+
  theme(text = element_text(size = 16))
dev.off()
png 
  2 
# M3M4 ####
pdf("../figures/sr_spatialscaling_M3M4.pdf",
    height = 5, width = 5.5)
slope_JPSP_M3M4 %>% 
  dplyr::rename(area = AREA) %>% 
  dplyr::select(slope, area) %>% 
  mutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>% 
  rbind(
    slope_130_M3M4 %>% 
      dplyr::select(slope) %>% 
      mutate(area = 130) %>% 
      rbind(
        slope_530_M3M4 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 530)
      ) %>% 
      rbind(
        slope_2150_M3M4 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 2150)
      ) %>% 
      rbind(
        slope_CR_M3M4 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 83900)
      )
  ) %>% 
  ggplot() +
  geom_boxplot(aes(area, slope, group = area))+
  scale_x_log10()+
  xlab("Spatial grain (km²)")+
  ylab("Species Richness change\nper year")+
  ylim(-2,2)+
    theme(panel.background = element_blank(),
        panel.grid.major = element_line(colour = "lightgrey"),
        axis.text.x = element_markdown())+
  stat_summary(aes(area, slope), fun.y = median, geom = 'line', linetype = 2, size = 1,  color = "blue")+
  stat_summary(aes(area, slope), fun = mean,
               geom = "point", col = "red", size = 1)+
  stat_summary(aes(area, slope), fun.data = mean_se,
               geom = "errorbar", size = .5, width = .1, color = "red")+
  geom_hline(yintercept = 0)+
  annotation_logticks(sides = "b")+
  coord_cartesian(clip = "off")+
  theme(text = element_text(size = 16))
dev.off()
png 
  2 
## Figure 4
pdf("../figures/sr_2periods.pdf",
    height = 5, width = 5.5)
slope_JPSP_M2M3 %>% 
  dplyr::rename(area = AREA) %>% 
  dplyr::select(slope, area) %>% 
  mutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>% 
  rbind(
    slope_130_M2M3 %>% 
      dplyr::select(slope) %>% 
      mutate(area = 130) %>% 
      rbind(
        slope_530_M2M3 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 530)
      ) %>% 
      rbind(
        slope_2150_M2M3 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 2150)
      ) %>% 
      rbind(
        slope_CR_M2M3 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 83900)
      ) 
  ) %>% mutate(period = "M2M3") %>% 
  rbind(
    slope_JPSP_M3M4 %>% 
      dplyr::rename(area = AREA) %>% 
      dplyr::select(slope, area) %>% 
      mutate(area = ifelse(area < 0.2, 0.13, 2.51)) %>% 
      rbind(
        slope_130_M3M4 %>% 
          dplyr::select(slope) %>% 
          mutate(area = 130) %>% 
          rbind(
            slope_530_M3M4 %>% 
              dplyr::select(slope) %>% 
              mutate(area = 530)
          ) %>% 
          rbind(
            slope_2150_M3M4 %>% 
              dplyr::select(slope) %>% 
              mutate(area = 2150)
          ) %>% 
          rbind(
            slope_CR_M3M4 %>% 
              dplyr::select(slope) %>% 
              mutate(area = 83900)
          ) 
       ) %>% mutate(period = "M3M4")
    ) %>% 
  mutate(period = case_when(
    period == "M2M3" ~ "1985-2003",
    period == "M3M4" ~ "2003-2017"
  )) %>% 
  mutate(period = fct_relevel(period, c("1985-2003", "2003-2017"))) %>% 
  ggplot()+
  geom_boxplot(aes(area, slope, group = interaction(area, period), color = period))+
  stat_summary(aes(area, slope, color = period), fun.y = median, geom = 'line', size = 1, linetype = 2)+
  scale_x_log10()+
  annotation_logticks(sides = "b")+
  xlab("Spatial grain (km²)")+
  ylab("Species Richness change\nper year")+
  labs(color = "Period", shape = "Period", linetype = "Period")+
  ylim(-2,2)+
  theme_bw()+
  theme(text = element_text(size = 16))+
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.grid.major = element_line(colour = "lightgrey"),
        axis.text.x = element_markdown())
dev.off()
png 
  2 
pdf("../figures/trend_details.pdf", 
    height = 3, width = 11.297)
## Plot the detailed trends
JPSP_topredict %>% 
  mutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51)) %>% 
  dplyr::select(sr, Date, AREA) %>% 
  rbind(
    pred130sqkm %>% dplyr::select(pred_sr, Date) %>% mutate(AREA = 130) %>% dplyr::rename(sr = pred_sr),
    pred530sqkm %>% dplyr::select(pred_sr, Date) %>% mutate(AREA = 530) %>% dplyr::rename(sr = pred_sr),
    pred2150sqkm %>% dplyr::select(pred_sr, Date) %>% mutate(AREA = 2150) %>% dplyr::rename(sr = pred_sr),
    predCR %>% dplyr::select(pred_sr, Date) %>% mutate(AREA = 83900) %>% dplyr::rename(sr = pred_sr)
  ) %>% 
  ggplot(aes(Date, sr))+
  stat_summary(fun = median, 
               geom = "point", col = "red", size = 2)+
  theme_bw()+
  stat_summary(aes(group = 1), fun.y = median,
               geom = "line", linetype = "dashed", size = 1, color = "blue")+
  facet_wrap(~ AREA, nrow = 1)+
  ylab("Species richness")+
  xlab("Year")+
  scale_x_continuous(breaks = c(1990,2000,2010))+
  theme(
    strip.background = element_blank(),
    strip.text.x = element_blank(),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 16))
dev.off()
png 
  2 

Number of Colonization, Extinction, Persistence and Turnover

pdf("../figures/nCEP_scaling.pdf",
    height = 5, width = 5.5)
for(periods in c("M2M3", "M3M4", "M2M4")){
  print(
    predicted_nCEP_atlas %>% dplyr::select(spatial_scale, contains("M", ignore.case = F)) %>% dplyr::rename(AREA = spatial_scale) %>% 
    rbind(
      predicted_nCEP_BBS %>% dplyr::select(contains("M", ignore.case = F), AREA) %>% 
        mutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51))
    ) %>% 
      pivot_longer(cols = -c("AREA"),
                 names_to = "period",
                 values_to = "value") %>% 
    separate(period, into = c("metric", "period"), sep = c(-4)) %>% 
    filter(metric == "col" | metric == "ext" | metric == "pers") %>% 
  filter(period == periods) %>% 
  ggplot()+
  geom_boxplot(aes(AREA, value,
                   group = interaction(AREA,metric),
                   fill = metric),
                   outlier.shape = NA)+
  scale_fill_brewer(palette = "Dark2")+
  scale_color_brewer(palette = "Dark2")+
  scale_x_log10()+
  scale_y_log10(limits = c(4,210))+
  annotation_logticks(sides = "bl")+
  stat_summary(aes(AREA, value, color = metric), fun.y = median, geom = 'line', linetype = 2, size = 1)+
  ylab("Count")+
  xlab("Spatial grain\n(km²)")+
  labs(fill = "Probability")+
  theme_bw()+
  theme(text = element_text(size = 16),
        legend.position = "none")
  )
}
dev.off()
png 
  2 
pdf("../figures/col_ext_jaccard_2periods.pdf",
    height = 5, width = 5.5)
for(THE_metric in c("jaccard", "col", "ext")){
  if(THE_metric == "jaccard"){
    print(predicted_nCEP_atlas %>% dplyr::select(spatial_scale, contains("M", ignore.case = F)) %>% 
            dplyr::rename(AREA = spatial_scale) %>% 
            rbind(predicted_nCEP_BBS %>% 
                    dplyr::select(contains("M", ignore.case = F), AREA) %>% 
                    mutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51))) %>% 
            dplyr::select(AREA, contains(THE_metric)) %>% 
            pivot_longer(cols = contains(THE_metric),
                         names_to = "period",
                         values_to = "value") %>% 
            mutate(period = str_remove(period, THE_metric)) %>% 
            mutate(period = case_when(
              period == "M2M4" ~ "1985-2017",
              period == "M2M3" ~ "1985-2003",
              period == "M3M4" ~ "2003-2017")) %>% 
            mutate(period = fct_relevel(period, c("1985-2017", "1985-2003", "2003-2017"))) %>%
            filter(period != "1985-2017") %>%
            ggplot()+
            geom_boxplot(aes(AREA, value, group = interaction(AREA, period), color = period))+
            stat_summary(aes(AREA, value, color = period), fun.y = median, geom = 'line', size = 1, linetype = 2)+
            scale_x_log10()+
            annotation_logticks(sides = "b")+
            xlab("Spatial grain (km²)")+
            ylab(THE_metric)+
            labs(color = "Period", shape = "Period", linetype = "Period")+
            theme_bw()+
            theme(text = element_text(size = 16),legend.position = "none"))
  }else{
    print(predicted_nCEP_atlas %>% dplyr::select(spatial_scale, contains("M", ignore.case = F)) %>%
        dplyr::rename(AREA = spatial_scale) %>% 
        rbind(predicted_nCEP_BBS %>% dplyr::select(contains("M", ignore.case = F), AREA) %>% 
                mutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51))) %>% 
        dplyr::select(AREA, contains(THE_metric)) %>% 
        pivot_longer(cols = contains(THE_metric),
                     names_to = "period",
                     values_to = "value") %>% 
        mutate(period = str_remove(period, THE_metric)) %>% 
          mutate(period = case_when(
            period == "M2M4" ~ "1985-2017",
            period == "M2M3" ~ "1985-2003",
            period == "M3M4" ~ "2003-2017")) %>% 
        mutate(period = fct_relevel(period, c("1985-2017", "1985-2003", "2003-2017"))) %>%
        filter(period != "1985-2017") %>%
        ggplot()+
        geom_boxplot(aes(AREA, value, group = interaction(AREA, period), color = period))+
        stat_summary(aes(AREA, value, color = period), fun.y = median, geom = 'line', size = 1, linetype = 2)+
        scale_x_log10()+
        annotation_logticks(sides = "b")+
        xlab("Spatial grain (km²)")+
        ylab(THE_metric)+
        ylim(0,50)+
        labs(color = "Period", shape = "Period", linetype = "Period")+
        theme_bw()+
        theme(text = element_text(size = 16),
              legend.position = "none"))
  }
}
dev.off()
png 
  2 
pdf("../figures/jaccard.pdf",
    height = 5, width = 5.5)

for(period in c("jaccardM2M3","jaccardM3M4")){
print(predicted_nCEP_atlas %>% 
        dplyr::select(spatial_scale, contains("M", ignore.case = F)) %>%
        dplyr::rename(AREA = spatial_scale) %>% 
        rbind(predicted_nCEP_BBS %>% 
        dplyr::select(contains("M", ignore.case = F), AREA) %>% 
        mutate(AREA = ifelse(AREA < 0.2, 0.13, 2.51))) %>% 
        dplyr::select(AREA, period) %>% 
        dplyr::rename(value = period) %>% 
        ggplot(aes(AREA, value, group = AREA))+
        annotation_logticks(sides = "b")+
        geom_boxplot()+
        scale_x_log10()+
        xlab("Spatial grain\n(km²)")+
        ylim(.35, 1)+
        ylab("Jaccard")+
        theme_bw()+
        theme(strip.background = element_blank(),
              strip.text.x = element_blank(),
              text = element_text(size = 16))+
        stat_summary(aes(group = 1), fun.y = median,
                     geom = "line", linetype = "dashed", size = 1, color = "blue"))
}
dev.off()
png 
  2 

Probability of Colonization, Extinction, Persistence

pdf("../figures/pCEP.pdf",
    height = 5, width = 5.5)

for(time in c("M2M3", "M3M4", "M2M4")) {
  print(
  predicted_pCEP_BBS %>%
  mutate(spatial_scale = ifelse(AREA < 0.2, 0.13, 2.51)) %>%
  mutate(period = fct_relevel(period, c("M2M4", "M2M3", "M3M4"))) %>%
  dplyr::select(spatial_scale, pred, period, metric) %>%
  rbind(predicted_pCEP_atlas %>% dplyr::select(spatial_scale, pred, period, metric)) %>%
  mutate(metric = case_when(
    metric == "col" ~ "Colonization",
    metric == "ext" ~ "Extinction",
    metric == "pers" ~ "Persistence"
  )) %>%
  filter(period == time,
         metric != "Species pool\ncolonization") %>%
  ggplot()+
  geom_boxplot(aes(spatial_scale, pred,
                   group = interaction(spatial_scale,metric),
                   fill = metric),
                   outlier.shape = NA) +
  scale_fill_brewer(palette = "Dark2")+
  scale_color_brewer(palette = "Dark2")+
  scale_x_log10()+
  annotation_logticks(sides = "b")+
  stat_summary(aes(spatial_scale, pred, color = metric), fun.y = median, geom = 'line', linetype = 2, size = 1)+
  ylim(0,1)+
  ylab("Probability")+
  xlab("Spatial grain\n(km²)")+
  labs(fill = "Probability")+
  theme_bw()+
  theme(text = element_text(size = 16),
        legend.position = "none"))
}
dev.off()
png 
  2 
pdf("../figures/pCEP_2periods.pdf",
    height = 5, width = 5.5)
for(THE_metric in c("col", "ext")){
  print(predicted_pCEP_BBS %>% 
  mutate(spatial_scale = ifelse(AREA < 0.2, 0.13, 2.51)) %>%
  mutate(period = fct_relevel(period, c("M2M4", "M2M3", "M3M4"))) %>% 
  dplyr::select(spatial_scale, pred, period, metric) %>% 
  rbind(predicted_pCEP_atlas %>% dplyr::select(spatial_scale, pred, period, metric)) %>% 
  filter(pred >= 0 & pred <= 1) %>% 
  filter(period != "M2M4") %>%
  filter(metric == THE_metric) %>%
  ggplot()+
  geom_boxplot(aes(spatial_scale, pred, group = interaction(spatial_scale, period), color = period))+
  stat_summary(aes(spatial_scale, pred, color = period, linetype = period), fun.y = median, geom = 'line', linetype = 2, size = 1)+
  scale_x_log10()+
  annotation_logticks(sides = "b")+
  xlab("Spatial grain (km²)")+
  ylab(THE_metric)+
  ylim(0, .5)+
  labs(color = "Period", shape = "Period", linetype = "Period")+
  theme_bw()+
  theme(text = element_text(size = 16))+
  theme(panel.background = element_blank(),
        legend.position = "none",
        panel.grid.major = element_line(colour = "lightgrey"),
        axis.text.x = element_markdown()))
}

dev.off()
png 
  2 

Correlation and Variable importance (Appendix B)

CEP_atlas_cart_data <- readRDS("../data/data_model_fitting/CEP_atlas_cart_data.rds")
CEP_atlas_cart_data$metric[CEP_atlas_cart_data$metric == "proba_colonized"] <- "proba_col"

CEP_JPSP_cart_data <- readRDS("../data/data_model_fitting/CEP_JPSP_cart_data.rds")
CEP_JPSP_cart_data <- CEP_JPSP_cart_data %>% 
  filter(metric != "proba colonization") %>% 
  mutate(metric = case_when(
    metric == "colonization" ~ "col",
    metric == "extinction" ~ "ext",
    metric == "persistence" ~ "pers",
    metric == "proba persistence" ~ "proba_pers",
    metric == "proba extinction" ~ "proba_ext",
    metric == "proba colonized" ~ "proba_col",
  ))

pdf("../figures/correlations.pdf",
    height = 8.27, width = 11.69)
## For Atlas model
corrplot(cor(atlas_cart_data[,names(atlas_cart_data) != "sr"]),
     main = "Correlation - CzAtlas - species richness predictors",
     type = "lower", mar=c(0,0,1,0))

ggplot(data = varImp(models_list_atlas$xgboost), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(x = "Predictor Variables", y = "Importance") +
  ggtitle("Variable Importance Plot - CzAtlas - XGBoost algorithm") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

CEP_atlas_cart_data %>% filter(metric == "ext") %>% 
    select(-quadrat, -metric, -trials, -value) %>% 
    plot(main = "Correlation - CzAtlas - colonization/extinction/persistence")

for(THE_metric in unique(CEP_atlas_cart_data$metric)){
  if(!grepl("proba_", THE_metric)){
    print(
      ggplot(data = varImp(nCEP_atlas_models[[THE_metric]][["xgboost"]]), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
      geom_bar(stat = "identity", fill = "blue") +
      labs(x = "Predictor Variables", y = "Importance") +
      ggtitle(paste0("Variable Importance Plot - CzAtlas - ", THE_metric)) +
      theme_bw()+
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    )
  }else{
    print(
      ggplot(data = varImp(pCEP_atlas_models[[gsub("proba_","",THE_metric)]][["xgboost"]]), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
      geom_bar(stat = "identity", fill = "blue") +
      labs(x = "Predictor Variables", y = "Importance") +
      ggtitle(paste0("Variable Importance Plot - CzAtlas - ", THE_metric)) +
      theme_bw()+
      theme(axis.text.x = element_text(angle = 45, hjust = 1)) 
    )
  }
}

## For CzBBS model
corrplot(cor(JPSP_cart_data[,names(JPSP_cart_data) != "sr"]),
     main = "Correlation - CzBBS - species richness predictors",
     type = "lower", mar=c(0,0,1,0))

ggplot(data = varImp(models_list_JPSP$rf), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(x = "Predictor Variables", y = "Importance") +
  ggtitle("Variable Importance Plot - CzBBS Data - random forest algorithm") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

CEP_JPSP_cart_data %>% 
  filter(metric == "ext") %>%
  dplyr::select(-metric) %>% 
  filter(!is.na(value)) %>% 
  plot(main = paste0("Correlation - CzBBS - colonization/extinction/persistence predictors"))

for(THE_metric in unique(CEP_JPSP_cart_data$metric)){
  if(!grepl("proba_", THE_metric)){
    print(
      ggplot(data = varImp(nCEP_BBS_models[[THE_metric]][["rf"]]), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
      geom_bar(stat = "identity", fill = "blue") +
      labs(x = "Predictor Variables", y = "Importance") +
      ggtitle(paste0("Variable Importance Plot - CzBBS - ", THE_metric)) +
      theme_bw()+
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    )
  }else{
    print(
      ggplot(data = varImp(pCEP_atlas_models[[gsub("proba_","",THE_metric)]][["xgboost"]]), aes(x = reorder(rownames(importance), Overall), y = Overall)) +
      geom_bar(stat = "identity", fill = "blue") +
      labs(x = "Predictor Variables", y = "Importance") +
      ggtitle(paste0("Variable Importance Plot - CzBBS - ", THE_metric)) +
      theme_bw()+
      theme(axis.text.x = element_text(angle = 45, hjust = 1)) 
    )
  }
}

dev.off()
png 
  2