3. Zonal statistics with discrete data

The objective of this demo is to quantify the categories of herbaceous vegetation in San Diego city - USA. The dataset used is Copernicus Global Land Service (CGLS) which provides a series of bio-geophysical products on the status and evolution of land surface at global scale to a spatial resolution of 100m. More information click here

Information:
- For this demo you need rgee, sf, tidyverse, mapview, raster, viridis and ggspatial packages previously installed.

3.1 Requirements

library(rgee)
library(sf)
library(tidyverse)
library(mapview)
library(raster)
library(viridis)
library(ggspatial)
ee_Initialize()
── rgee 1.1.2 ────────────────────────── earthengine-api 0.1.292 ── 
 ✓ user: not_defined
 ✓ Initializing Google Earth Engine:  DONE!
 ✓ Earth Engine account: users/ambarja 
─────────────────────────────────────────────────────────────────── 

3.2 Vector layer reading of your study area

sandiego <- st_read(
  "https://github.com/healthinnovation/sdb-gpkg/raw/main/SanDiego_districts.gpkg"
) %>%
  dplyr::select(PO_NAME) 
glimpse(sandiego)
Rows: 108
Columns: 2
$ PO_NAME <chr> "Alpine", "Bonita", "Boulevard", "Campo", "Chula Vista", …
$ geom    <MULTIPOLYGON [°]> MULTIPOLYGON (((-116.6075 3..., MULTIPOLYGON…

3.3 Processing data with rgee

dataset <- ee$Image("COPERNICUS/Landcover/100m/Proba-V-C3/Global/2019")$
  select("discrete_classification")$
  eq(30) # 30: Herbaceous vegetation

herbaceous <- dataset$updateMask(dataset)

3.4 A function that extracts the number of pixels of herbaceous vegetation for each polygon

ee_get_data <- function(img, region, scale = 1000) {
  lista_histo <- list()
  for (i in 1:nrow(region)) {
    region_ee <- region[i, ] %>% sf_as_ee()
    ee_histo <- img$reduceRegion(
      reducer = ee$Reducer$frequencyHistogram(),
      geometry = region_ee,
      scale = scale
    )
    lista_histo[[i]] <- ee_histo$getInfo() %>%
      map_df(., .f = as.data.frame) %>%
      mutate(NAME = region[[i, 1]] %>% as.vector())
  }
  return(lista_histo)
}

3.5 Application of the function ee_get_data

# Time: ~ 2 min
rawdata <- ee_get_data(img = herbaceous, region = sandiego)
class(rawdata)
[1] "list"

3.6 Preprocesing rawdata

# Herbaceous area in Hectare Units 
newdata <- rawdata %>% 
  map_df(.f = as_tibble) %>% 
  mutate_if(
    is.numeric,
    .funs = function(x) {x * 1000 * 1000 / 10000}
    ) %>% 
  rename(area_Ha = X1)
glimpse(newdata)
Rows: 66
Columns: 2
$ area_Ha <dbl> 800.000000, 1073.333333, 187.843137, 1300.000000, 59.6078…
$ NAME    <chr> "Alpine", "Bonita", "Boulevard", "Campo", "Chula Vista", …
# Left join with spatial data
sandiego_herb <- left_join(
 sandiego,
 newdata,
 by = c("PO_NAME"="NAME")
)
glimpse(sandiego_herb)
Rows: 694
Columns: 3
$ PO_NAME <chr> "Alpine", "Bonita", "Boulevard", "Campo", "Chula Vista", …
$ area_Ha <dbl> 800.00000, 1073.33333, 187.84314, 1300.00000, 59.60784, 1…
$ geom    <MULTIPOLYGON [°]> MULTIPOLYGON (((-116.6075 3..., MULTIPOLYGON…

3.7 Exploration map

mapview(sandiego_herb,zcol="area_Ha", layer.name = "Herbaceous")

3.8 Final Map

ggplot() + 
  geom_sf(data = sandiego_herb) +
  annotation_map_tile(zoom = 10) + 
  geom_sf(data = sandiego_herb,aes(fill = area_Ha),lwd=0.05) + 
  scale_fill_viridis() + 
  theme_minimal() + 
  labs(title = "Herbaceous vegetation - San Diego",
       caption = "Source: Copernicus Global Land Service (CGLS)")