Unlike vector data, a raster data consists of cells or pixels organized into rows and columns as a matrix where each cell contains a value representing geographical feature on the earth. The size of the area on the surface that each pixel covers is known as the spatial resolution of the image. For instance, an image that has a 1 m spatial resolution means that each pixel in the image represents a 1 m x 1 m area. There are two types of raster data: continuous and discrete. An example of discrete raster data is Land use raster. Data types are flexible, including discrete and categorical data, such as soil or land-use maps, or continuous data as in digital elevation models, precipitation gradient maps, or pollutant concentration maps, etc.
Generally, two types of raster use in GIS and remote sensing application: a single band, or layer measure of a single characteristic of spatial area and multiple bands raster contains multiple spatially corresponding matrices of cell values representing the same spatial area. An example of a single-band raster data set is a digital elevation model (DEM). Each cell in a DEM contains only one value representing elevation of earth surface. Most satellite imagery has multiple bands, typically containing values within band of the electromagnetic spectrum.
In this exercise, we will use SRTM 90 Digital Elevation Model of Onondaga County, New York State which was downloaded from CGIAR-CSI.
This exercise will cover following raster processing in python:
The data set use in this exercise could be found here.
import os
path= "E:/GitHub/geospatial-python-github.io/Lesson_06_working_with_raster_data/"
os.chdir(path)
print("Current Working Directory " , os.getcwd())
%matplotlib inline
import sys
from glob import glob
from osgeo import ogr, gdal
from osgeo import gdalconst
import subprocess
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.colors import ListedColormap, BoundaryNorm
import numpy as np
import seaborn as sns
import geopandas as gpd
import pycrs
import fiona
from fiona.crs import from_epsg
from shapely.geometry import box
from shapely.geometry import Point
import shapely.geometry as geoms
import rasterio as rio
from rasterio.plot import show
import rasterio.warp
import rasterio.shutil
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.plot import plotting_extent
from rasterio.plot import show_hist
from rasterio.mask import mask
from rasterio.merge import merge
from rasterio import Affine, MemoryFile
from rasterio.enums import Resampling
from rasterio import plot
import rasterstats as rs
import georasters as gr
from rastertodataframe import raster_to_dataframe
import earthpy.spatial as es
import earthpy.plot as ep
import earthpy as et
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate
print(f"Python {sys.version}")
print(f"geopandas {gpd.__version__}")
print(f"osgeo {gdal.__version__}")
print(f"rasterio {rio.__version__}")
sns.set_style("white")
sns.set(font_scale=1.5)
In this exercise, we will use SRTM 90 Digital Elevation Model of Onondaga County, New York State which was downloaded from CGIAR-CSI. We will use rasterio package to open and visualization raster in Python.
dem_path = "Data_06/RASTER/DEM/Onondaga_DEM.tif"
dem=rio.open(dem_path)
dem.crs
dem.width
# raster column
dem.height
dem.count
type(dem)
DEM_meta=dem.meta
print(DEM_meta)
show(dem,cmap='RdYlGn')
from rasterio.plot import show_hist
show_hist(dem, bins=20, lw=0.0, stacked=False, alpha=0.3,
histtype='stepfilled', title="Histogram")
You can also open a raster layer with gdal.Open()
raster = gdal.Open(dem_path)
type(raster)
gt =raster.GetGeoTransform()
print (gt)
pixelSizeX = gt[1]
pixelSizeY =-gt[5]
print (pixelSizeX)
print (pixelSizeY)
raster.GetProjection()
data = raster.GetRasterBand(1).ReadAsArray()
plt.imshow(data,cmap='gray')
plt.show()
We can export a raster file in python using the rasterio write() function.
Use rio.open() to create a new blank raster ‘template’. Then write the DEM numpy array to to that template using dst.write().
outTIF= "Data_06/RASTER/DEM/Onondaga_DEM_NEW.tif"
dem_meta=dem.profile
with rio.open(outTIF, 'w', **dem_meta) as dst:
dst.write(dem.read(1), 1)
src = rio.open("Data_06/RASTER/DEM/Onondaga_DEM_NEW.tif")
show(src)
Now I will show how to write raster with GDAL driver. We will use a method to save the data in a format provided by the user.
g = gdal.Open ("Data_06/RASTER/DEM/Onondaga_DEM_NEW.tif" )
# Get the projection information
g.GetProjection()
# Get the x, y and number of bands from the original file
x_size, y_size, n_bands = g.RasterXSize, g.RasterYSize, g.RasterCount
data = g.ReadAsArray ()
# Get a handler to a driver
driver = gdal.GetDriverByName ( "HFA" )
data
# Next line creates the output dataset with
# 1. The filename ("Onondaga_DEM.img")
# 2. The raster size (x_size, y_size)
# 3. The number of bands
# 4.The data type (in this case, Byte.
# Other typical values might be gdal.GDT_Int16
# or gdal.GDT_Float32)
dataset_out = driver.Create( "Data_06/RASTER/DEM/Onondaga_DEM_gdal.img", x_size, y_size, n_bands, \
gdal.GDT_Byte )
# Set the output geotransform by reading the input one
dataset_out.SetGeoTransform ( g.GetGeoTransform() )
# Set the output projection by reading the input one
dataset_out.SetProjection ( g.GetProjectionRef() )
# Now, get band # 1, and write our data array.
# Note that the data array needs to have the same type
# as the one specified for dataset_out
dataset_out.GetRasterBand ( 1 ).WriteArray ( data )
# This bit forces GDAL to close the file and write to it
dataset_out = None
!gdalinfo "Data_06/RASTER/DEM/Onondaga_DEM_gdal.img"
For loading multi-bands image in Python, we will use a sub-set of Landsat 8 multispectral image covering Onondaga county of New York state. This image was downloaded from USGS Earth Explore.
b2="Data_06/RASTER/LC08_20150706_subset/LC08_L1TP_015030_20150716_20170226_01_T1_B2.TIF"
b3="Data_06/RASTER/LC08_20150706_subset/LC08_L1TP_015030_20150716_20170226_01_T1_B3.TIF"
b4="Data_06/RASTER/LC08_20150706_subset/LC08_L1TP_015030_20150716_20170226_01_T1_B4.TIF"
b5="Data_06/RASTER/LC08_20150706_subset/LC08_L1TP_015030_20150716_20170226_01_T1_B5.TIF"
b2_r=rio.open(b2)
b3_r=rio.open(b3)
b4_r=rio.open(b4)
b5_r=rio.open(b5)
# Initialize subplots
import matplotlib.pyplot as plt
%matplotlib inline
fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(10, 4), sharey=True)
# Plot 4 bands
show(b2_r, ax=ax1)
show(b3_r, ax=ax2)
show(b4_r, ax=ax3)
show(b5_r, ax=ax4)
# Add titles
ax1.set_title("Band-2")
ax2.set_title("Band-3")
ax3.set_title("Band-4")
ax4.set_title("Band-5")
Some remote sensing datasets are stored with each band in a separate file. However, often you want to use all of the bands together in your analysis. For example you need all of the bands together in the same file or “stack” in order to plot a color RGB image. EarthPy has a stack() function that allows you to take a set of .tif files that are all in the same spatial extent, CRS and resolution and either export them together a single stacked .tif file or work with them in Python directly as a stacked numpy array.
To begin using the EarthPy stack() function, import the needed packages and create an array to be plotted. Below you plot the data as continuous with a colorbar using the plot_bands() function.
We will use es.stack() function of earthpy library to create a raster stack of multi-band raster. It need three steps:
You will use the glob function see the files in landsat data directory
glob ("Data_06/RASTER/LC08_20150706_subset/*")
Now we will create a list for band (2,3,4 and 5)
multi_bands=glob( "Data_06/RASTER/LC08_20150706_subset/*B[2:3:4:5]*.tif")
multi_bands
Now you have a list of all of the landsat bands in your landsat collections folder. You could chose to open each file individually using the rio.open (rasterio library) function.
Remember that Python uses 0 based indexing so band 3 is actually at index [2] not [3].
with rio.open(multi_bands[3]) as src:
landsat_band4 = src.read()
ep.plot_bands(landsat_band4[0],
title="Landsat NIR (Band-5)",
scale=False,
figsize=(5, 6))
plt.show()
landsat_multi_path = "Data_06/RASTER/LC08_20150706_subset/landsat_multi.tif"
land_stack, land_meta = es.stack(multi_bands,
landsat_multi_path)
Once we have stacked 4 bands, we can import it and work with it as we need to!
with rio.open(landsat_multi_path) as src:
landsat_multi = src.read()
band_titles = ["Blue", "Green", "Red", "NIR"]
ep.plot_bands(landsat_multi,
title=band_titles, cbar=False)
plt.show()
We can plot 3 band color composite images with Landsat bands. Below you will plot an RGB image using landsat. Refer to the landsat bands in the table at the top of this page to figure out the NR, Red and Blue
ep.plot_rgb(landsat_multi,
rgb=[3, 2, 1],
stretch=True,
figsize=(5, 6),
title="RGB Composite Image")
plt.show()
Now we will look at the values that are stored in the band. As the values of the bands are stored as numpy arrays, it is extremely easy to calculate basic statistics by using basic numpy functions.
# Open multiband-raster
fp = "Data_06/RASTER/LC08_20150706_subset/landsat_multi.tif"
# Open the file:
raster = rio.open(fp)
# Read all bands as np.array
array = raster.read()
# Calculate statistics for each band
stats = []
for band in array:
stats.append({
'min': band.min(),
'mean': band.mean(),
'median': np.median(band),
'max': band.max()})
print(stats)
ep.hist(array,
figsize=(8,8),
title=["Band 1", "Band 2", "Band 3", "Band 4"])
plt.show()
This exercise you will learn raster projection and projection transformation of raster data.
We will use SRTM 90 digital Elevation Model New York State which was downloaded from CGIAR-CSI. We will re-project it from WSG84 coordinate to Albers Equal Area Conic NAD83 projection system.
dem_path = "Data_06/RASTER/DEM/Onondaga_DEM.tif"
dem=rio.open(dem_path)
show(dem)
dem.crs
First, we will re-project one tile of DEM raster data from WGS 1984 CRS to Albers Equal Area Conic projection system. In Python, re-projection of raster little complicated. The code below reprojects between two arrays, using no pre-existing GIS datasets. The rasterio.warp.reproject() has two positional arguments: source and destination. The remaining keyword arguments parameterize the reprojection transform.
Reprojecting a GeoTIFF dataset from one coordinate reference system is a common use case. Rasterio provides a few utilities to make this even easier:
transform_bounds() transforms the bounding coordinates of the source raster to the target coordinate reference system, densifiying points along the edges to account for non-linear transformations of the edges.
calculate_default_transform() transforms bounds to target coordinate system, calculates resolution if not provided, and returns destination transform and dimensions.
inputTIF = 'Data_06/RASTER/DEM/Onondaga_DEM.tif'
projTIF = 'Data_06/RASTER/DEM/Onondaga_DEM_PROJ.tif'
# Define EPSG:5070 projection
dst_crs={'init': 'epsg:5070'}
with rio.open(inputTIF) as src:
transform, width, height = calculate_default_transform(
src.crs, dst_crs, src.width, src.height, *src.bounds)
kwargs = src.meta.copy()
kwargs.update({
'crs': dst_crs,
'transform': transform,
'width': width,
'height': height
})
with rio.open(projTIF, 'w', **kwargs) as dst:
for i in range(1, src.count + 1):
reproject(
source=rio.band(src, i),
destination=rio.band(dst, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs=dst_crs,
resampling=Resampling.nearest)
with rio.open(projTIF) as src:
rasterio.plot.show(src, title='Albers Equal Area Conic', cmap='RdYlGn')
Below reproject_et function wrapps the rasterio reproject code with some addtional arguments that allow to define input path, the output path, and the new CRS components. These three argument only need to change we are going to cahnge project other raster layers. (Source)
def reproject_et(inpath, outpath, new_crs):
dst_crs = new_crs # CRS for web meractor
with rio.open(inpath) as src:
transform, width, height = calculate_default_transform(
src.crs, dst_crs, src.width, src.height, *src.bounds)
kwargs = src.meta.copy()
kwargs.update({
'crs': dst_crs,
'transform': transform,
'width': width,
'height': height
})
with rio.open(outpath, 'w', **kwargs) as dst:
for i in range(1, src.count + 1):
reproject(
source=rio.band(src, i),
destination=rio.band(dst, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs=dst_crs,
resampling=Resampling.nearest)
reproject_et(inpath = 'Data_06/RASTER/DEM/Onondaga_DEM.tif',
outpath = 'Data_06/RASTER/DEM/Onondaga_DEM_PROJ_02.tif',
new_crs = "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs")
# Check projection with GDAL
import gdal
!gdalinfo "Data_06/RASTER/DEM/Onondaga_DEM_PROJ_02.tif"
# Load DEM
dem_path = "Data_06/RASTER/DEM/DEM.tif"
GP_DEM=rio.open(dem_path)
# Load CO state boundary file
fp_co="Data_06/CO_STATE_BD.shp"
CO_BD= gpd.GeoDataFrame.from_file(fp_co)
# Plot them
fig, ax = plt.subplots(figsize=(5, 15))
rasterio.plot.show(GP_DEM, ax=ax)
CO_BD.plot(ax=ax, facecolor='none', edgecolor='blue')
Before clipping, weneed to get the coordinates of the geometry in JESON format. This can be done with following function:
def getFeatures(gdf):
"""Function to parse features from GeoDataFrame in such a manner that rasterio wants them"""
import json
return [json.loads(gdf.to_json())['features'][0]['geometry']]
Get the geometry coordinates by using the function.
coords = getFeatures(CO_BD)
# print(coords)
Now we are ready to clip the raster with the polygon using the coords variable that we just created. Clipping the raster can be done easily with the mask function that we imported in the beginning from rasterio, and specifying clip=True.
# Clip the raster with Polygon
out_img, out_transform = mask(dataset=GP_DEM, shapes=coords, crop=True)
Next, we need to modify the metadata. Let’s start by copying the metadata from the original data file.
# Copy the metadata
out_meta = GP_DEM.meta.copy()
print(out_meta)
# Define EPSG:5070 projection
new_crs={'init': 'epsg:5070'}
Now we need to update the metadata with new dimensions, transform (affine) and CRS (as Proj4 text):
out_meta.update({"driver": "GTiff",
"height": out_img.shape[1],
"width": out_img.shape[2],
"transform": out_transform,
"crs": new_crs}
)
Finally, we can save the clipped raster to disk with following command:
out_tif =("Data_06/RASTER/DEM/CO_DEM.tif")
with rasterio.open(out_tif, "w", **out_meta) as dest:
dest.write(out_img)
We can use GDAL in order to re-open the file and read it in as a 2D array. Additionally, it is important to note that our DEM file has NaN values which will later cause Matplotlib to fail. Therefore, we’ll also mask these values out so that Matplotlib will be unaware of them and use imshow() function to create a plot with colorbar.
filename = "Data_06/RASTER/DEM/CO_DEM.tif"
gdal_data = gdal.Open(filename)
gdal_band = gdal_data.GetRasterBand(1)
nodataval = gdal_band.GetNoDataValue()
# convert to a numpy array
data_array = gdal_data.ReadAsArray().astype(np.float)
data_array
# replace missing values if necessary
if np.any(data_array == nodataval):
data_array[data_array == nodataval] = np.nan
data_array[data_array == nodataval] = np.nan
%matplotlib inline
fig = plt.figure(figsize = (7,6))
plt.imshow(data_array, cmap='terrain_r')
plt.colorbar(cax = fig.add_axes([1, 0.25, 0.05, 0.5]))
plt.title("Elevation (m)")
plt.show()
#Plot out data with Matplotlib's 'contour'
fig = plt.figure(figsize = (7,6))
plt.contour(data_array, cmap = "viridis",
levels = list(range(0, 4000, 100)))
plt.colorbar(cax = fig.add_axes([1, 0.25, 0.05, 0.5]))
plt.title("Elevation Contours")
plt.show()
#Plot our data with Matplotlib's 'contourf'
fig = plt.figure(figsize = (7, 6))
plt.contourf(data_array, cmap = "viridis",
levels = list(range(0, 4000,100)))
plt.colorbar(cax = fig.add_axes([1, 0.25, 0.05, 0.5]))
plt.title("Elevation (m)")
#plt.gca().set_aspect('equal', adjustable='box')
plt.show()
stack_band_paths = glob( "Data_06/RASTER/LC08_20150706_subset/*B[2:3:4:5]*.tif")
stack_band_paths.sort()
# Create output directory and the output path
output_dir ="Data_06/RASTER/LC08_20150706_subset_clip/"
raster_out_path = os.path.join(output_dir, "raster.tiff")
array, raster_prof = es.stack(stack_band_paths, out_path=raster_out_path)
extent = plotting_extent(array[0], raster_prof["transform"])
fig, ax = plt.subplots(figsize=(6, 6))
ep.plot_rgb(
array,
ax=ax,
rgb=[3, 2, 1],
stretch=True,
extent=extent,
str_clip=0.5,
title="RGB Image of Un-cropped Raster",
)
plt.show()
aoi = gpd.read_file('Data_06\AOI.shp')
The crop function won’t work properly if the data are in different Coordinate Reference Systems (CRS). To fix this, be sure to reproject the crop layer to match the CRS of your raster data. To reproject your data, first get the CRS of the raster from the rasterio profile object. Then use that to reproject using geopandas .to_crs method.
# Check current projection system
aoi.crs
with rio.open(stack_band_paths[0]) as raster_crs:
crop_raster_profile = raster_crs.profile
aoi_proj = aoi.to_crs(crop_raster_profile["crs"])
When you need to crop and stack a set of images, it is most efficient to first crop each image, and then stack it. es.crop_all() is an efficient way to crop all bands in an image quickly. The function will write out cropped rasters to a directory and return a list of file paths that can then be used with es.stack().
band_paths_list = es.crop_all(
stack_band_paths, output_dir, aoi_proj, overwrite=True
)
cropped_array, array_raster_profile = es.stack(band_paths_list, nodata=-9999)
crop_extent = plotting_extent(
cropped_array[0], array_raster_profile["transform"]
)
# Make subplots that are next to each other
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12, 8))
# Natural Color
aoi_proj.plot(ax=ax1, facecolor='none', edgecolor='red')
ep.plot_rgb(
cropped_array,
ax=ax1,
stretch=True,
extent=crop_extent,
title="Natural Color Cropped Raster \n and AOI Boundary",
)
# False color
aoi_proj.plot(ax=ax2, facecolor='none', edgecolor='blue')
ep.plot_rgb(
cropped_array,
ax=ax2,
rgb=[3, 2, 1],
stretch=True,
extent=crop_extent,
title="False Color Cropped Raster \n and AOI Boundary",
)
plt.show()
Raster mosaic can be done easily with the merge() function in Rasterio. Here, we will show you how to mosaic four DEM raster and create a seamless raster.
# List all dem files with glob() function
dirpath= "Data_06/RASTER/DEM/DEM_GP/"
out_fp = "Data_06/RASTER/DEM/DEM_Mosaic.tif"
# Make a search criteria to select the DEM files
search_criteria = "*_DEM*.tif"
q = os.path.join(dirpath, search_criteria)
print(q)
We will use glob() fuction to create list all rastetr in /DEM_GP folder.
# glob function can be used to list files from a directory with specific criteria
dem_fps = glob(q)
dem_fps
Let’s first create an empty list for the datafiles that will be part of the mosaic.
# List for the source files
src_files_to_mosaic = []
# Iterate over raster files and add them to source -list in 'read mode'
for fp in dem_fps:
src = rio.open(fp)
src_files_to_mosaic.append(src)
src_files_to_mosaic
Let’s plot them next to each other to see how they look like
# Create 4 plots next to each other
fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(12, 4))
# Plot first four files
show(src_files_to_mosaic[0], ax=ax1)
show(src_files_to_mosaic[1], ax=ax2)
show(src_files_to_mosaic[2], ax=ax3)
show(src_files_to_mosaic[3], ax=ax4)
# Do not show y-ticks values in last three axis
for ax in [ax2, ax3, ax4]:
ax.yaxis.set_visible(False)
As we can see we have multiple separate raster files that are actually located next to each other. Hence, we want to put them together into a single raster file that can by done by creating a raster mosaic.
Now as we have placed the individual raster files in read -mode into the source_files_to_mosaic -list, it is really easy to merge those together and create a mosaic with rasterio’s merge function:
We will use merge() function to create a mosaic raster, Merge function returns a single mosaic array and the transformation info
# Merge function returns a single mosaic array and the transformation info
mosaic, out_trans = merge(src_files_to_mosaic)
Let’s first update the metadata with new dimensions, transform and CRS:
# Copy the metadata
out_meta = src.meta.copy()
new_crs={'init': 'epsg:5070'}
# Update the metadata
out_meta.update({"driver": "GTiff",
"height": mosaic.shape[1],
"width": mosaic.shape[2],
"transform": out_trans,
"crs": new_crs
}
)
we can write mosaic DEM as geo-tif format
# Write the mosaic raster to disk
with rasterio.open(out_fp, "w", **out_meta) as dest:
dest.write(mosaic)
Now we will load and plot newly created raster again
mosaic_dem_fp = "Data_06/RASTER/DEM/DEM_Mosaic.tif"
MOSAIC_DEM=rio.open(mosaic_dem_fp)
show(MOSAIC_DEM)
Reclassification is the process of reassigning a value, a range of values, or a list of values in a raster to new output values.
# Open data and assign negative values to nan
with rio.open('Data_06/RASTER/DEM/CO_DEM.tif') as src:
co_dem_im = src.read(1, masked=True)
# View object dimensions
co_dem_im.shape
# Prettier plotting with seaborn
sns.set(font_scale=1.5, style="whitegrid")
# Histogram
ep.hist(co_dem_im, colors=['grey'],
figsize=(6,5),
title="Distribution of DEM Values",
xlabel='Elevation (meters)',
ylabel='Frequency',
bins=5)
plt.show()
# View min and max values in the data
print('DEM min value:', co_dem_im.min())
print('DEM max value:', co_dem_im.max())
xlim=[1000, 4000]
counts, bins = np.histogram(co_dem_im,
bins=3, range=xlim)
# Print histogram outputs
print("counts:", counts)
print("bins:", bins)
Next, customize your histogram with breaks that you think might make sense as breaks to use for your raster map. in the example below, breaks are added in 1000 meter increments using the bins = argument.
ep.hist(co_dem_im.ravel(),
figsize=(6,5),
bins=[1000, 2000, 3000, 4000],
title="Histogram with Custom Breaks",
xlabel="Height (m)", ylabel="Number of Pixels")
plt.show()
# Define bins that you want, and then classify the data
class_bins =[co_dem_im.min(), 2000, 3000, np.inf]
np.digitize
Numpy has a function called digitize that is useful for classifying the values in an array. It is similar to how histogram works, because it categorizes datapoints into bins. However, unlike histogram, it doesn’t aggregate/count the number of values within each bin.
# You'll classify the original image array, then unravel it again for plotting
co_dem_im_class = np.digitize(co_dem_im, class_bins)
# Note that you have an extra class in the data (0)
print(np.unique(co_dem_im_class))
type(co_dem_im_class)
After running the classification we have one extra class. This class - the first class - is your missing data value. Our classified array output is also a regular (not a masked) array. We can reassign the first class in our data to a mask using np.ma.masked_where().
Reassign all values that are classified as 0 to masked (no data value), This will prevent pixels that == 0 from being rendered on a map in matplotlib.
co_dem_class_ma = np.ma.masked_where(co_dem_im_class == 0,
co_dem_im_class,
copy=True)
co_dem_class_ma
# A cleaner seaborn style for raster plots
sns.set_style("white")
# Plot newly classified and masked raster
fig, ax = plt.subplots(figsize=(6,6))
ep.plot_bands(co_dem_class_ma,
ax=ax,
scale=False)
plt.show()
# Plot data using nicer colors
colors = ['linen', 'darkgreen', 'maroon']
cmap = ListedColormap(colors)
norm = BoundaryNorm(class_bins, len(colors))
fig, ax = plt.subplots(figsize=(6,6))
ep.plot_bands(co_dem_class_ma,
cmap=cmap,
title="Classified DEM",
scale=False,
ax=ax)
plt.show()
The plot looks OK but the legend doesn’t represent the data well. The legend is continuous - with a range between 0 and 3. However you want to plot the data using discrete bins.
Finally, clean up our plot legend. Given you have discrete values you will create a CUSTOM legend with the 3 categories that you created in our classification matrix.
# Create a list of labels to use for your legend
height_class_labels = ["< 2000 (Low)", "2000-3000 (Medium)", " > 3000 (High)"]
# A path is an object drawn by matplotlib. In this case a patch is a box draw on your legend
# Below you create a unique path or box with a unique color - one for each of the labels above
legend_patches = [Patch(color=icolor, label=label)
for icolor, label in zip(colors, height_class_labels)]
cmap = ListedColormap(colors)
fig, ax = plt.subplots(figsize=(6,6))
ep.plot_bands(co_dem_class_ma,
cmap=cmap,
ax=ax,
cbar=False)
ax.legend(handles=legend_patches,
facecolor="white",
edgecolor="white",
bbox_to_anchor=(1.65, 0.65)) # Place legend to the RIGHT of the map
ax.set_axis_off()
We can calculate statistic for each zone defined by a zone or polygon dataset, based on values from another dataset (a value raster). A single output value is computed for every zone in the input zone dataset.
In this excercise we will use zonal_stats() function of rasterstats to county level statistics of DEM data of 4 States of th Great Plain region.
with rio.open('Data_06/RASTER/DEM/DEM.tif') as DEM_src:
DEM_data = DEM_src.read(1, masked=True)
DEM_meta = DEM_src.profile
fig, ax = plt.subplots(figsize=(8, 8))
ax.hist(DEM_data.ravel(),
color="purple"
)
ax.set(xlabel=" ELevation (m)",
ylabel="Total Pixels",
title="Distribution of Pixel Values")
# Turn off scientific notation
ax.ticklabel_format(useOffset=False,
style='plain')
print('Mean:', DEM_data.mean())
print('Max:', DEM_data.max())
print('Min:', DEM_data.min())
DEM_data_no_na = DEM_data[~np.isnan(DEM_data)].ravel()
fp_county="Data_06/GP_county.shp"
county_BD= gpd.GeoDataFrame.from_file(fp_county)
county_BD.geom_type.head()
fig, ax = plt.subplots(figsize=(6, 6))
ep.plot_bands(DEM_data,
# Here you must set the spatial extent or else the data will not line up with your geopandas layer
extent=plotting_extent(DEM_src),
cmap='Greys',
title="GP DEM",
scale=False,
ax=ax)
county_BD.plot(ax=ax,
edgecolor='blue',
facecolor='none')
ax.set_axis_off()
plt.show()
ow we can summary statistic all of the pixels that fall within each county using the function zonal_stats() in the rasterstats library.
county_stat = rs.zonal_stats(county_BD,
DEM_data,
nodata=-999,
affine=DEM_meta['transform'],
geojson_out=True,
copy_properties=True,
stats="min mean max median")
# View object type
type(county_stat)
county_stat_gdf = gpd.GeoDataFrame.from_features(county_stat)
county_stat_gdf.to_file("Data_06/County_mean.shp")
# Write as CSV files
county_stat_df = pd.DataFrame(county_stat_gdf.drop(columns='geometry'))
county_stat_df.to_csv("Data_06/County_mean.csv")
county_stat_df.head()
fig, ax = plt.subplots(1, figsize=(7, 7))
county_stat_gdf.plot(column='median',
edgecolor="gray",
ax=ax,
cmap='Blues',
linewidth=0.5
)
ax.set_title("Medain DEM (m)");
ax.axis('off');
fig.colorbar(ax.collections[0], ax=ax)
Resampling refers to changing the cell values due to changes in the raster cell grid. This can occur during reprojection. Even if the projection is not changing, we may want to change the effective cell size of an existing dataset.
with rio.open('Data_06/RASTER/DEM/CO_DEM.tif') as DEM_src:
DEM_data = DEM_src.read(1, masked=True)
DEM_meta = DEM_src.profile
gt = DEM_meta["transform"]
pixelSizeX = gt[0]
pixelSizeY =-gt[4]
print(pixelSizeX)
print(pixelSizeY)
Upsampling refers to cases where we are converting to higher resolution/smaller cells. We will use gdal.GRA_Bilinear of gdal.Wrap() to create 2.5 x 2.5 raster.
inDs = gdal.Open('Data_06/RASTER/DEM/CO_DEM.tif')
dem_25=gdal.Warp('Data_06/RASTER/DEM/CO_DEM_25km.tif', inDs,
format = 'GTiff',
xRes = 2500, yRes = 2500,
resampleAlg = gdal.GRA_Bilinear)
gt_25 =dem_25.GetGeoTransform()
print (gt_25)
pixelSizeX = gt_25[1]
pixelSizeY = -gt_25[5]
print(pixelSizeX)
print(pixelSizeY)
Aggregate or downscaling creates a new Raster-layer with a lower resolution (larger cells). Aggregation groups rectangular areas to create larger cells. The value for the resulting cells is computed with a user-specified function. We will create 10 km x 10 km raster from 5 km x 5 kg raster using gdal.GRA_Average of gdal.Wrap().
dem_10=gdal.Warp('Data_06/RASTER/DEM/CO_DEM_10km.tif', inDs,
format = 'GTiff',
xRes = 10000, yRes = 10000,
resampleAlg = gdal.GRA_Bilinear)
gt_10 =dem_10.GetGeoTransform()
print (gt_10)
pixelSizeX = gt_10[1]
pixelSizeY = -gt_10[5]
print(pixelSizeX)
print(pixelSizeY)
Raster algebra in Pyhton is a set-based algebra for manipulating a raster object more raster layers (“maps”) of similar dimensions to produce a new raster layer (map) using algebraic operations. A simple sample of raster algebra in python is to convert unit of DEM raster from meter to feet (1 m = 3.28084 feet)
with rio.open('Data_06/RASTER/DEM/CO_DEM.tif') as DEM_src:
DEM_data = DEM_src.read(1, masked=True)
DEM_meta = DEM_src.profile
dem_feet=DEM_data * 3.28084
fig, ax = plt.subplots(figsize=(6, 6))
ep.plot_bands(dem_feet,
cmap='Greys',
title="DEM (feet)",
scale=False,
ax=ax)
ax.set_axis_off()
plt.show()
dem_feet_1000 = dem_feet/1000
fig, ax = plt.subplots(figsize=(6, 6))
ep.plot_bands(dem_feet_1000,
cmap='Greys',
title="DEM (feet x 1000)",
scale=False,
ax=ax)
ax.set_axis_off()
plt.show()
We also do some calculations between bands or raster. Here I will show you how to calculate the normalized difference vegetation index (NDVI) using Landsat 8 data.
with rio.open('Data_06/RASTER/LC08_20150706_subset_clip\\LC08_L1TP_015030_20150716_20170226_01_T1_B4_crop.TIF') as red_src:
red_data = red_src.read(1, masked=True)
red_meta = red_src.profile
with rio.open('Data_06/RASTER/LC08_20150706_subset_clip\\LC08_L1TP_015030_20150716_20170226_01_T1_B5_crop.TIF') as nir_src:
nir_data = nir_src.read(1, masked=True)
nir_meta = nir_src.profile
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
plot.show(red_data, ax=ax1, cmap='Blues') #red
plot.show(nir_data, ax=ax2, cmap='Blues') #nir
plt.tight_layout()
ndvi = np.zeros(red_data.shape, dtype=rasterio.float32)
ndvi = (nir_data.astype(float)-red_data.astype(float))/(nir_data+red_data)
meta_ndvi = red_meta
meta_ndvi.update(
dtype=rasterio.float32,
count=1,
compress='lzw')
with rasterio.open('Data_06/RASTER/NDVI/NDVI.tif', 'w', **meta_ndvi) as dst:
dst.write_band(1, ndvi.astype(rasterio.float32))
fig, ax = plt.subplots(figsize=(6, 6))
ep.plot_bands(ndvi,
cmap='RdYlGn',
title="NDVI",
scale=False,
#vmin=-1, vmax=1,
ax=ax)
ax.set_axis_off()
plt.show()
You can use High-level functions to manipulate multiple raster after creating create raster stack (collection of raster layers).
glist = glob( "Data_06/RASTER/GP_RASTER/*.tif")
glist
with rasterio.open(glist[0]) as src0:
meta = src0.meta
meta.update(count = len(glist))
with rasterio.open('Data_06/RASTER/raster_stack.tif', 'w', **meta) as dst:
for id, layer in enumerate(glist, start=1):
with rasterio.open(layer) as src1:
dst.write_band(id, src1.read(1))
with rio.open('Data_06/RASTER/raster_stack.tif') as stack_src:
stack_data = stack_src.read(masked=True)
stack_meta = stack_src.profile
stack_meta
band_titles=['DEM', 'MAP','MAT','NDVI','NLCD','Slope']
ep.plot_bands(stack_data,
cmap='RdYlGn',
scale=False,
title=band_titles)
plt.show()
we will use gdal.RasterizeLayer() from gdal.
shape_poly = 'Data_06/GP_STATE.shp'
poly_ndsm = 'Data_06/RASTER/GP_RASTER/DEM.tif'
poly_data = gdal.Open(poly_ndsm, gdalconst.GA_ReadOnly)
poly_output = 'Data_06/GP_BD_grid.tif'
geo_transform = poly_data.GetGeoTransform()
x_min = geo_transform[0]
y_max = geo_transform[3]
x_max = x_min + geo_transform[1] * poly_data.RasterXSize
y_min = y_max + geo_transform[5] * poly_data.RasterYSize
x_res = poly_data.RasterXSize
y_res = poly_data.RasterYSize
mb_v = ogr.Open(shape_poly)
mb_l = mb_v.GetLayer()
pixel_width = geo_transform[1]
target_ds = gdal.GetDriverByName('GTiff').Create(poly_output, x_res, y_res, 1, gdal.GDT_Int16)
target_ds.SetGeoTransform((x_min, pixel_width, 0, y_min, 0, pixel_width))
band = target_ds.GetRasterBand(1)
band.FlushCache()
gdal.RasterizeLayer(target_ds, [1], mb_l, options=["ATTRIBUTE=STATE_ID"])
target_ds = None
grid_raster = 'Data_06/GP_BD_grid.tif'
grid = gr.from_file(grid_raster)
grid_df = grid.to_pandas()
grid_df.head()
columns =['row','col' ]
grid_df = pd.DataFrame(grid_df.drop(columns=columns))
grid_df.rename(columns = {'value':'STATE_ID'}, inplace = True)
grid_df = grid_df[grid_df['STATE_ID'] > 0]
grid_df=grid_df[['STATE_ID','x','y']] # organize column
grid_df
grid_df.to_csv ('Data_06/gp_grid.csv', index = None, header=True)
shape_point = 'Data_06/RASTER/NDVI/NDVI.shp'
point_ndsm = 'Data_06/RASTER/NDVI/NDVI.tif'
point_data = gdal.Open(point_ndsm, gdalconst.GA_ReadOnly)
point_output = 'Data_06/RASTER/NDVI/NEW_NDVI.tif'
geo_transform = point_data.GetGeoTransform()
x_min = geo_transform[0]
y_max = geo_transform[3]
x_max = x_min + geo_transform[1] * point_data.RasterXSize
y_min = y_max + geo_transform[5] * point_data.RasterYSize
x_res = point_data.RasterXSize
y_res = point_data.RasterYSize
mb_v = ogr.Open(shape_point)
mb_l = mb_v.GetLayer()
pixel_width = geo_transform[1]
target_ds = gdal.GetDriverByName('GTiff').Create(point_output, x_res, y_res, 1, gdal.GDT_Float32)
target_ds.SetGeoTransform((x_min, pixel_width, 0, y_min, 0, pixel_width))
band = target_ds.GetRasterBand(1)
band.FlushCache()
gdal.RasterizeLayer(target_ds, [1], mb_l, options=["ATTRIBUTE=NDVI"])
target_ds = None
fp = 'Data_06/RASTER/NDVI/NEW_NDVI.tif'
new_ndvi = rio.open(fp)
show(new_ndvi,cmap='RdYlGn')
new_ndvi.meta
For convert raster data to spatial point data frame, we will use .to_pandas() function of GeoRasters package.
single_raster = 'Data_06/RASTER/GP_RASTER/DEM.tif'
single_data = gr.from_file(single_raster)
# Get some stats
single_data.mean()
single_df = single_data.to_pandas()
single_df.head()
columns =['row','col' ]
single_df = pd.DataFrame(single_df.drop(columns=columns))
single_df.rename(columns = {'value':'DEM'}, inplace = True)
single_df=single_df[['x','y','DEM']] # organize column
single_df.head()
single_df.to_csv ('Data_06/RASTER/gp_grid_dem.csv', index = None, header=True)
single_df_point = gpd.GeoDataFrame(
single_df, geometry=gpd.points_from_xy(single_df.x,single_df.y))
single_df_point.crs = {'init' :'epsg:32618'}
single_df_point.to_file("Data_06/dem_point.shp")
We will extract raster values to SPDF data frame the Characterize the sampling locations with a comprehensive set of environmental data.
values = pd.Series()
inputShape= 'Data_06/GP_POINT_SOC.shp'
inputRaster ='Data_06/RASTER/GP_RASTER/DEM.tif'
# Read input shapefile with fiona and iterate over each feature
with fiona.open(inputShape) as shp:
for feature in shp:
siteID = feature['properties']['SiteID']
coords = feature['geometry']['coordinates']
# Read pixel value at the given coordinates using Rasterio
# NB: `sample()` returns an iterable of ndarrays.
with rio.open(inputRaster) as src:
value = [v for v in src.sample([coords])][0][0]
# Update the pandas serie accordingly
values.loc[siteID] = value
values
df = pd.DataFrame(values)
df.columns = ['DEM']
df.index.name = 'SiteID'
df.head()
# Write records into a CSV file
df.to_csv('Data_06/GP_DEM.csv', header =True)
multi_values_points = pd.Series()
inputShape= 'Data_06/GP_POINT_SOC.shp'
# Read input shapefile with fiona and iterate over each feature
with fiona.open(inputShape) as shp:
for feature in shp:
siteID = feature['properties']['SiteID']
coords = feature['geometry']['coordinates']
# Read pixel value at the given coordinates using Rasterio
# NB: `sample()` returns an iterable of ndarrays.
with rio.open('Data_06/RASTER/raster_stack.tif') as stack_src:
value = [v for v in stack_src.sample([coords])]
# Update the pandas serie accordingly
multi_values_points.loc[siteID] = value
multi_values_points
df1 = pd.DataFrame(multi_values_points.values.tolist(), index=multi_values_points.index)
df1['SiteID'] = df1.index
df1
df2 = pd.DataFrame(df1[0].values.tolist(),
columns=['DEM','MAP','MAT','NDVI','NLCD','Slope'])
df2.head()
df2['SiteID'] = df1.index
df2
df3 =df2[['SiteID','DEM','Slope','MAP','MAT','NDVI','NLCD']] # organize column
# Write records into a CSV file
df3.to_csv('Data_06/gp_point_data.csv', header =True, index=False)
# Read GPS of soil sampling locations
f_grid = 'Data_06/GP_grid.csv'
grid_df = pd.read_csv(f_grid)
grid_df.head()
grid_df['GridID']=grid_df.index
grid_df
grid_point = gpd.GeoDataFrame(
grid_df, geometry=gpd.points_from_xy(grid_df.x, grid_df.y))
grid_point.to_file('Data_06/gp_grid.shp')
multi_values_grid = pd.Series()
inputGrid= 'Data_06/gp_grid.shp'
# Read input shapefile with fiona and iterate over each feature
with fiona.open(inputGrid) as shp:
for feature in shp:
gridID = feature['properties']['GridID']
coords = feature['geometry']['coordinates']
# Read pixel value at the given coordinates using Rasterio
# NB: `sample()` returns an iterable of ndarrays.
with rio.open('Data_06/RASTER/raster_stack.tif') as stack_src:
value = [v for v in stack_src.sample([coords])]
# Update the pandas serie accordingly
multi_values_grid.loc[gridID] = value
mf1 = pd.DataFrame(multi_values_grid.values.tolist(), index=multi_values_grid.index)
mf1['GridID'] = mf1.index
mf1
mf2 = pd.DataFrame(mf1[0].values.tolist(),
columns=['DEM','MAP','MAT','NDVI','NLCD','Slope'])
mf2
mf2['GridID'] = mf1.index
mf2['x'] = grid_df['x']
mf2['y'] = grid_df['y']
mf2
mf2 = mf2[mf2['NLCD'] > 0]
mf2
mf3 =mf2[['GridID','x','y','DEM','Slope','MAP','MAT','NDVI','NLCD']] # organize column
# Write records into a CSV file
mf3.to_csv('Data_06/gp_grid_data.csv', header =True, index=False)
http://geologyandpython.com/dem-processing.html
https://automating-gis-processes.github.io/CSC/notebooks/L4/spatial-join.html
https://geohackweek.github.io/raster/04-workingwithrasters/
https://www.e-education.psu.edu/geog489/node/2215
https://pcjericks.github.io/py-gdalogr-cookbook/raster_layers.html
https://automating-gis-processes.github.io/2016/Lesson7-read-raster.html https://www.neonscience.org/plot-neon-rgb-py