3. Zonal statistics with discrete data
The objective of this demo is to quantify the categories of herbaceous vegetation in San Diego city - USA. The dataset used is Copernicus Global Land Service (CGLS) which provides a series of bio-geophysical products on the status and evolution of land surface at global scale to a spatial resolution of 100m. More information click here
Information: |
- For this demo you need rgee , sf , tidyverse , mapview , raster , viridis and ggspatial packages previously installed. |
3.1 Requirements
library(rgee)
library(sf)
library(tidyverse)
library(mapview)
library(raster)
library(viridis)
library(ggspatial)
ee_Initialize()
── rgee 1.1.2 ────────────────────────── earthengine-api 0.1.292 ──
✓ user: not_defined
✓ Initializing Google Earth Engine: DONE!
✓ Earth Engine account: users/ambarja
───────────────────────────────────────────────────────────────────
3.2 Vector layer reading of your study area
sandiego <- st_read(
"https://github.com/healthinnovation/sdb-gpkg/raw/main/SanDiego_districts.gpkg"
) %>%
dplyr::select(PO_NAME)
glimpse(sandiego)
Rows: 108
Columns: 2
$ PO_NAME <chr> "Alpine", "Bonita", "Boulevard", "Campo", "Chula Vista", …
$ geom <MULTIPOLYGON [°]> MULTIPOLYGON (((-116.6075 3..., MULTIPOLYGON…
3.3 Processing data with rgee
dataset <- ee$Image("COPERNICUS/Landcover/100m/Proba-V-C3/Global/2019")$
select("discrete_classification")$
eq(30) # 30: Herbaceous vegetation
herbaceous <- dataset$updateMask(dataset)
3.4 A function that extracts the number of pixels of herbaceous vegetation for each polygon
ee_get_data <- function(img, region, scale = 1000) {
lista_histo <- list()
for (i in 1:nrow(region)) {
region_ee <- region[i, ] %>% sf_as_ee()
ee_histo <- img$reduceRegion(
reducer = ee$Reducer$frequencyHistogram(),
geometry = region_ee,
scale = scale
)
lista_histo[[i]] <- ee_histo$getInfo() %>%
map_df(., .f = as.data.frame) %>%
mutate(NAME = region[[i, 1]] %>% as.vector())
}
return(lista_histo)
}
3.5 Application of the function ee_get_data
# Time: ~ 2 min
rawdata <- ee_get_data(img = herbaceous, region = sandiego)
class(rawdata)
[1] "list"
3.6 Preprocesing rawdata
# Herbaceous area in Hectare Units
newdata <- rawdata %>%
map_df(.f = as_tibble) %>%
mutate_if(
is.numeric,
.funs = function(x) {x * 1000 * 1000 / 10000}
) %>%
rename(area_Ha = X1)
glimpse(newdata)
Rows: 66
Columns: 2
$ area_Ha <dbl> 800.000000, 1073.333333, 187.843137, 1300.000000, 59.6078…
$ NAME <chr> "Alpine", "Bonita", "Boulevard", "Campo", "Chula Vista", …
# Left join with spatial data
sandiego_herb <- left_join(
sandiego,
newdata,
by = c("PO_NAME"="NAME")
)
glimpse(sandiego_herb)
Rows: 694
Columns: 3
$ PO_NAME <chr> "Alpine", "Bonita", "Boulevard", "Campo", "Chula Vista", …
$ area_Ha <dbl> 800.00000, 1073.33333, 187.84314, 1300.00000, 59.60784, 1…
$ geom <MULTIPOLYGON [°]> MULTIPOLYGON (((-116.6075 3..., MULTIPOLYGON…
3.7 Exploration map
mapview(sandiego_herb,zcol="area_Ha", layer.name = "Herbaceous")