---
title: "Spatial data & GIS tools"
subtitle: "Lecture 21"
author: "Dr. Colin Rundel"
footer: "Sta 523 - Fall 2022"
format:
  revealjs:
    theme: slides.scss
    transition: fade
    slide-number: true
    self-contained: true
execute: 
  echo: true
---

```{r setup, message=FALSE, warning=FALSE, include=FALSE}
options(
  htmltools.dir.version = FALSE, # for blogdown
  width=50
)

knitr::opts_chunk$set(
  fig.align = "center", fig.retina = 2, dpi = 150,
  out.width = "100%"
)

library(tidyverse)
library(maptools)
library(sf)
library(patchwork)

ggplot2::theme_set(ggplot2::theme_bw())

cols = c("#7fc97f","#386cb0","#beaed4","#fdc086")
```


# Geospatial stuff is hard


## Projections

```{r echo=FALSE}
lat_lines  = map(seq(9.999, 89.999, length.out = 9), ~ cbind(seq(-179.999, -9.999, length.out=100), .))
long_lines = map(seq(-179.999, -9.999, length.out = 17), ~ cbind(., seq(9.999, 89.999, length.out=100))) 

lat_long = c(lat_lines, long_lines) %>% 
  st_multilinestring() %>%
  st_sfc() %>%
  st_set_crs("+proj=longlat +datum=WGS84 +no_defs")
```

```{r echo=FALSE, fig.align="center", message=FALSE, out.width="75%", fig.height=5}
data(wrld_simpl, package = "maptools")
world = st_as_sf(wrld_simpl)

NAm = world %>% filter(FIPS %in% c("CA","GL","MX","US"))
NAm_google = st_transform(NAm, st_crs(3857))


par(mar=c(3,2,2,1),mfrow=c(2,3))
plot(lat_long, col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Lat/Long (epsg:4326)")
plot(st_transform(NAm, st_crs(4326)) %>% st_geometry(), col="black", add=TRUE)

plot(st_transform(lat_long, st_crs(3857)), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Google / Web Mercator (epsg:3857)", ylim=c(0, 2e7))
plot(st_transform(NAm, st_crs(3857)) %>% st_geometry(), col="black", add=TRUE)

lcc = "+proj=lcc +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"
plot(st_transform(lat_long, lcc), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Lambert Conformal Conic:")
plot(st_transform(NAm, lcc) %>% st_geometry(), col="black", add=TRUE)

aea = "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"
plot(st_transform(lat_long, aea), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Alberts Equal Area")
plot(st_transform(NAm, aea) %>% st_geometry(), col="black", add=TRUE)

robinson = "+proj=robin +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
plot(st_transform(lat_long, robinson), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Robinson")
plot(st_transform(NAm, robinson) %>% st_geometry(), col="black", add=TRUE)

mollweide = "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
plot(st_transform(lat_long, mollweide), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Mollweide")
plot(st_transform(NAm, mollweide) %>% st_geometry(), col="black", add=TRUE)
```

::: {.aside}
EPSG is a public registry of coordinate reference systems and related data (name comes from the European Petroleum Survey Group)
:::


## Dateline

How long is the flight between the Western most and the Eastern most points in the US?

. . .

```{r echo=FALSE, fig.align="center", fig.height=4, fig.width=8, warning=FALSE, message=FALSE, out.width="50%", warning=FALSE}
par(mar=c(3,3,1,1), mfrow=c(2,1))

ak = st_read("data/ak/states.shp", quiet = TRUE, stringsAsFactors = FALSE)
ak_geom = st_geometry(ak)

west_hem = st_polygon(list(matrix(c(-180,90, -180,-90, 0,-90, 0,90, -180,90), ncol=2,byrow=TRUE))) %>% 
  st_sfc() %>%
  st_set_crs("+proj=longlat +datum=WGS84")

east_hem = st_polygon(list(matrix(c(180,90, 180,-90, 0,-90, 0,90, 180,90), ncol=2,byrow=TRUE))) %>% 
  st_sfc() %>%
  st_set_crs("+proj=longlat +datum=WGS84")

ak_west = st_intersection(ak_geom, west_hem)
ak_east = st_intersection(ak_geom, east_hem)

ak_east_shift = (ak_east - c(360,0)) %>% st_set_crs("+proj=longlat +datum=WGS84")

ak_shift = st_union(ak_east_shift, ak_west)

plot(ak_shift, axes=TRUE, col="black", border=NA, xlim=c(-190, -130))
points(c(-360+179.776,-179.146), c(51.952,51.273),col='red')
abline(v=-180,col='blue',lty=2)

plot(ak_shift, col="black", border=NA, xlim=c(-185, -170), ylim=c(50, 55))
points(c(-360+179.776,-179.146), c(51.952,51.273),col='red')
abline(v=-180,col='blue',lty=2)
box()
```


## Great circle distance

::: {.small}
```{r fig.align="center", fig.width=8, fig.height=4, out.width="80%"}
par(mar=c(0,0,0,0))
ak1 = c(179.776, 51.952)
ak2 = c(-179.146, 51.273)
inter = geosphere::gcIntermediate(ak1, ak2, n=50, addStartEnd=TRUE)
plot(st_geometry(world), col="black", ylim=c(-90,90), axes=TRUE)
lines(inter,col='red',lwd=2,lty=3)
```
:::


## Plotting is hard

```{r echo=FALSE, fig.align="center", fig.width=8, fig.height=6, out.width="60%"}
plot(st_geometry(NAm), col="black", border=NA, axes=TRUE)
```


## Relationships

![](imgs/taal_photo.jpg){fig-align="center" width="75%"}

##

![](imgs/taal_seq.png){fig-align="center" width="100%"}

##

![](imgs/taal_before_after.jpg){fig-align="center" width="100%"}

::: {.aside}
[Source](https://earthobservatory.nasa.gov/images/146444/an-ash-damaged-island-in-the-philippines)
:::


# Geospatial Data and R


## Packages for geospatial data in R

R has a rich package ecosystem for read/writing, manipulating, and analyzing geospatial data.

::: {.small}
* `sp` - core classes for handling spatial data, additional utility functions - **Deprecated**

* `rgdal` - R interface to `gdal` (Geospatial Data Abstraction Library) for reading and writing spatial data - **Deprecated**

* `rgeos` - R interface to `geos` (Geometry Engine Open Source) library for querying and manipulating spatial data. Reading and writing WKT. - **Deprecated**

* `sf` - Combines the functionality of `sp`, `rgdal`, and `rgeos` into a single package based on tidy simple features.

* `raster` - classes and tools for handling spatial raster data.

* `stars` - Reading, manipulating, writing and plotting spatiotemporal arrays (rasters)

See more - [Spatial task view](http://cran.r-project.org/web/views/Spatial.html)
:::

## Installing `sf`

This is the hardest part of using the `sf` package, difficulty comes from is dependence on several external libraries (`geos`, `gdal`, and `proj`).

* *Windows* - installing from source works when Rtools is installed (system requirements are downloaded from rwinlib)

* *MacOS* - install dependencies via homebrew: `gdal`, `geos`, `proj`, `udunits`.

* *Linux* - Install development pacakages for GDAL (>= 2.0.0), GEOS (>= 3.3.0), Proj4 (>= 4.8.0), udunits2 from your package manager of choice.


More specific details are included in the repo [README](https://github.com/r-spatial/sf) on github.



## Simple Features

```{r echo=FALSE}
pt = st_point(c(30, 10))
ls = st_linestring(matrix(c(30, 10, 10, 30, 40, 40), byrow=TRUE, ncol=2))
poly = st_polygon(list(matrix(c(30, 10, 40, 40, 20, 40, 10, 20, 30, 10), ncol=2, byrow=TRUE)))
poly_hole = st_polygon(
              list(
                matrix(c(35, 10, 45, 45, 15, 40, 10, 20, 35, 10), ncol=2, byrow=TRUE),
                matrix(c(20, 30, 35, 35, 30, 20, 20, 30), ncol=2, byrow=TRUE)
              )
            )

pts = st_multipoint(matrix(c(10, 40, 40, 30, 20, 20, 30, 10), ncol=2, byrow=TRUE))
lss = st_multilinestring(list( 
        matrix(c(10, 10, 20, 20, 10, 40), ncol=2, byrow=TRUE),
        matrix(c(40, 40, 30, 30, 40, 20, 30, 10), ncol=2, byrow=TRUE)
      ))
polys = st_multipolygon(list(
          list(matrix(c(30, 20, 45, 40, 10, 40, 30, 20), ncol=2, byrow=TRUE)),
          list(matrix(c(15, 5, 40, 10, 10, 20, 5, 10, 15, 5), ncol=2, byrow=TRUE))
        ))
polys_hole = st_multipolygon(list(
                list(matrix(c(40, 40, 20, 45, 45, 30, 40, 40), ncol=2, byrow=TRUE)),
                list(matrix(c(20, 35, 10, 30, 10, 10, 30, 5, 45, 20, 20, 35), ncol=2, byrow=TRUE),
                     matrix(c(30, 20, 20, 15, 20, 25, 30, 20), ncol=2, byrow=TRUE))
              ))
```

```{r echo=FALSE}
par(mar=c(1,1,2,1), mfrow=c(4,4))

plot(pt, axes=FALSE, main="Point", pch=16); box()
plot(ls, axes=FALSE, main="Linestring");    box()
plot(poly, axes=FALSE, col="lightgrey", main="Polygon");  box()
plot(poly_hole, axes=FALSE, col="lightgrey", main="Polygon w/ Hole(s)");  box()

plot(pts, axes=FALSE, main="Multipoint", pch=16); box()
plot(lss, axes=FALSE, main="Multilinestring");    box()
plot(polys, axes=FALSE, col="lightgrey", main="Multipolygon");  box()
plot(polys_hole, axes=FALSE, col="lightgrey", main="Multipolygon w/ Hole(s)");  box()
```




## Reading, writing, and converting simple features

* `st_read` / `st_write` - Shapefile, GeoJSON, KML, ...

* `read_sf` / `write_sf` - Same, suports tibbles ...

* `st_as_sfc` / `st_as_wkt` - WKT

* `st_as_sfc` / `st_as_binary` - WKB

* `st_as_sfc` / `as(x, "Spatial")` - sp

::: {.aside}
See [sf vignette #2 - Reading, Writing and Converting Simple Features](https://cran.r-project.org/web/packages/sf/vignettes/sf2.html).
:::

## Shapefiles

```{r}
fs::dir_info("data/gis/nc_counties/") %>% select(path:size)
```


## NC Counties

::: {.small}
```{r}
(st_read("data/gis/nc_counties/", quiet=FALSE))
```
:::


## sf tibbles

::: {.small}
```{r}
(nc = read_sf("data/gis/nc_counties/"))
```
:::


## `sf` classes

```{r}
str(nc, max.level=1)

class(nc)

class(nc$geometry)

class(nc$geometry[[1]])
```


## Plotting

```{r}
plot(nc)
```


## More Plotting

```{r echo=TRUE, fig.height=4}
plot(nc["AREA"])
```


## Graticules

```{r echo=TRUE, fig.height=4}
plot(nc["AREA"], graticule=TRUE, axes=TRUE, las=1)
```


## Geometries

```{r echo=TRUE, fig.height=4}
plot(st_geometry(nc), graticule=TRUE, axes=TRUE, las=1)
```


## ggplot2

```{r echo=TRUE, fig.height=4}
ggplot(nc, aes(fill=AREA)) +
  geom_sf()
```

## ggplot2 + palettes

```{r echo=TRUE, fig.height=4}
ggplot(nc, aes(fill=AREA)) +
  geom_sf() +
  scale_fill_viridis_c()
```


## leaflet

::: {.small}
```{r}
st_transform(nc, "+proj=longlat +datum=WGS84") %>%
leaflet::leaflet(width = 600, height = 400) %>%
  leaflet::addPolygons(
    weight = 1,
    popup = ~COUNTY,
    highlightOptions = leaflet::highlightOptions(color = "red", weight = 2, bringToFront = TRUE)
  )
```
:::


## leaflet + tiles

::: {.small}
```{r}
st_transform(nc, "+proj=longlat +datum=WGS84") %>%
leaflet::leaflet(width = 600, height = 400) %>%
  leaflet::addPolygons(
    weight = 1,
    popup = ~COUNTY,
    highlightOptions = leaflet::highlightOptions(color = "red", weight = 2, bringToFront = TRUE)
  ) %>%
  leaflet::addTiles()
```
:::



# GIS in R


## Geometry casting

```{r fig.height=3.5}
nc_pts = st_cast(nc, "MULTIPOINT")
ggplot() +
  geom_sf(data=nc) +
  geom_sf(data=nc_pts, size=0.5, color="blue")
```


## Joining

::: {.small}
```{r fig.height=3.5, out.width="66%"}
(nc_state = st_union(nc))

ggplot() + geom_sf(data=nc_state)
```
:::


## sf & dplyr

::: {.small}
```{r fig.height=3.5, out.width="66%"}
nc_cut = nc %>%
  mutate(
    ctr_x = st_centroid(nc) %>% st_coordinates() %>% .[,1],
    region = cut(ctr_x, breaks = 5)
  )

ggplot(nc_cut) +
  geom_sf(aes(fill=region)) +
  guides(fill = "none")
```
:::


## sf & dplyr (cont.)

::: {.small}
```{r fig.height=3.5, out.width="66%"}
nc_cut2 = nc_cut %>%
  group_by(region) %>%
  summarize(
    area = sum(AREA)
  )

ggplot() + geom_sf(data=nc_cut2, aes(fill=area))
```
:::


## Affine Transformations

```{r fig.height=4}
rotate = function(a) matrix(c(cos(a), sin(a), -sin(a), cos(a)), 2, 2)

(ggplot() + geom_sf(data=(nc_state) * rotate(-pi/4))) + 
(ggplot() + geom_sf(data=(nc_state) * rotate(pi/6)))
```


## Scaling + Translations

```{r fig.height=3.5}
ctrd = st_centroid(st_geometry(nc))
nc_scaled = (st_geometry(nc) - ctrd) * 0.66 + ctrd

ggplot() + geom_sf(data=nc_scaled)
```



## Some other data

```{r}
air = read_sf("data/gis/airports/", quiet=TRUE)
hwy = read_sf("data/gis/us_interstates/", quiet=TRUE)
```

```{r fig.height=2}
(ggplot(nc) + geom_sf()) +
(ggplot(air) + geom_sf(color = "blue")) +
(ggplot(hwy) + geom_sf(color = "red"))
```



## Overlays?

```{r}
plot(st_geometry(nc),  axes=TRUE, las=1)
plot(st_geometry(air), axes=TRUE, pch=16, col="blue", main="air", add=TRUE)
plot(st_geometry(hwy), axes=TRUE, col="red", add=TRUE)
```


## Overlays? (ggplot)

```{r}
ggplot() +
  geom_sf(data=nc) +
  geom_sf(data=air, color="blue") + 
  geom_sf(data=hwy, color="red")
```


## Projections

:::: {.columns .small}
::: {.column width='50%'}
```{r}
st_crs(nc)
```
:::

::: {.column width='50%'}
```{r}
st_crs(hwy)
```
:::
::::


## Aside - UTM Zones

![](imgs/UTM_Zones.png){fig-align="center" width="100%"}


## hwy -> Lat/Long

```{r}
hwy = st_transform(hwy, st_crs(nc))
```

```{r echo=FALSE}
plot(st_geometry(nc),  axes=TRUE, las=1)
plot(st_geometry(air), axes=TRUE, pch=16, col="blue", main="air", add=TRUE)
plot(st_geometry(hwy), axes=TRUE, col="red", add=TRUE)
```



# Airport Example


## NC Airports

```{r echo=FALSE}
plot(st_geometry(nc),  axes=TRUE, las=1)
plot(st_geometry(air), axes=TRUE, pch=16, col="blue", add=TRUE)
```


## Sparse Insections


::: {.small}
```{r}
st_intersects(nc[20:30,], air) %>% str()
```
:::


## Dense Insections

::: {.small}
```{r}
st_intersects(nc, air, sparse=FALSE) %>% str()
st_intersects(nc, air, sparse=FALSE) %>% .[20:30, 260:270]
```
:::


## Which counties have airports?

:::: {.columns .small}
::: {.column width='50%'}
```{r}
nc_air = nc %>% 
  mutate(
    n_air = map_int(
      st_intersects(nc, air), 
      length
    )    
  ) %>%
  filter(n_air > 0)

nc_air %>% pull(COUNTY)
```
:::

::: {.column width='50%' .fragment}
```{r}
air_nc = air %>%
  slice(
    st_intersects(nc, air) %>% 
      unlist() %>% 
      unique()
  )
air_nc %>% pull(AIRPT_NAME)
```
:::
::::

 

## Results

```{r fig.height=4}
ggplot() +
  geom_sf(data=nc) +
  geom_sf(data = nc_air, fill = "lightblue") + 
  geom_sf(data = air_nc, color = "red", size=2)
```



# Highway Example


## Highways

```{r fig.height = 5, out.width="60%"}
ggplot() +
  geom_sf(data=nc) +
  geom_sf(data=hwy, col='red')
```


## NC Interstate Highways

::: {.small}
```{r fig.height=3.5}
hwy_nc = st_intersection(hwy, nc)

ggplot() + 
  geom_sf(data=nc) +
  geom_sf(data=hwy_nc, col='red')
```
:::


## Counties near a highway (Projection)

::: {.small}
```{r fig.height=3.5}
nc_utm  = st_transform(nc, "+proj=utm +zone=17 +datum=NAD83 +units=m +no_defs")
hwy_utm  = st_transform(hwy, "+proj=utm +zone=17 +datum=NAD83 +units=m +no_defs")

hwy_nc = st_intersection(hwy_utm, nc_utm)

ggplot() + 
  geom_sf(data=nc_utm) +
  geom_sf(data=hwy_nc, col='red')
```
:::


## Counties near the interstate (Buffering)

::: {.small}
```{r fig.height=3.5}
hwy_nc_buffer = hwy_nc %>%
  st_buffer(10000)

ggplot() + 
  geom_sf(data=nc_utm) +
  geom_sf(data=hwy_nc, color='red') +
  geom_sf(data=hwy_nc_buffer, fill='red', alpha=0.3)
```
:::


## Counties near the interstate (Buffering + Union)

::: {.small}
```{r fig.height=3.5}
hwy_nc_buffer = hwy_nc %>%
  st_buffer(10000) %>%
  st_union() %>%
  st_sf()
  
ggplot() + 
  geom_sf(data=nc_utm) +
  geom_sf(data=hwy_nc, color='red') +
  geom_sf(data=hwy_nc_buffer, fill='red', alpha=0.3)
```
:::

## Counties near the interstate (Buffering + Union)

::: {.small}
```{r echo=FALSE}
hwy_nc_buffer = hwy_nc %>%
  st_buffer(10000) %>%
  st_union() %>%
  st_sf()

hwy_cty = nc_utm %>%
  slice(
    st_intersects(hwy_nc_buffer, nc_utm) %>% 
      unlist() %>% 
      unique()
  )
  
ggplot() + 
  geom_sf(data=nc_utm) +
  geom_sf(data=hwy_nc, color='red') +
  geom_sf(data=hwy_cty, fill='lightblue') +
  geom_sf(data=hwy_nc_buffer, fill='red', alpha=0.3) 
```
:::


# Gerrymandering Example

## NC House Districts - 112th Congress

::: {.medium}
```{r}
( nc_house = read_sf("data/gis/nc_districts112.gpkg", quiet = TRUE) |>
    select(ID, DISTRICT) )
```
:::

##

```{r}
nc_house = nc_house |>
  st_transform("+proj=utm +zone=17 +datum=NAD83 +units=km +no_defs")
plot(nc_house[,"DISTRICT"], axes=TRUE)
```

## Measuring Compactness - Reock Score

The Reock score is a measure of compactness that is calculated as the ratio of a shape's area to the area of its minimum bounding circle.

::: {.small}
```{r}
circs = nc_house |> 
  lwgeom::st_minimum_bounding_circle()
plot(circs |> filter(DISTRICT == 1) |> st_geometry(), axes=TRUE)
plot(nc_house |> select(DISTRICT) |> filter(DISTRICT == 1), add=TRUE)
```
:::

##

::: {.small}
```{r}
ggplot(mapping = aes(fill=DISTRICT)) +
  geom_sf(data=nc_house) +
  geom_sf(data=circs, alpha=0.15) +
  guides(color="none", fill="none")
```
:::

## Calculating Reock

```{r}
nc_house |>
  mutate(reock = (st_area(nc_house) / st_area(circs)) |> as.numeric()) |>
  ggplot(aes(fill = reock)) +
    geom_sf()

```

##

::: {.small}
```{r}
nc_house |>
  mutate(reock = st_area(nc_house) / st_area(circs)) |>
  arrange(reock) |>
  print(n=Inf)
```
:::
