Interactive Data Exploration

This notebook demonstrates how the functions and techniques we covered in the first notebook can be combined to build interactive data exploration tools. The code in the cells below will generate two interactive panels. The first panel enables comparison of LIS output, SNODAS, and SNOTEL snow depth and snow water equivalent at SNOTEL site locations. The second panel enables exploration of LIS output using an interactive map.

Note: some cells below take several minutes to run and some aspects of the interactive widgets may not work in the rendered version on the Hackweek site.

Import Libraries

import numpy as np
import pandas as pd
import geopandas
import xarray as xr
import fsspec
import s3fs
from datetime import datetime as dt
from scipy.spatial import distance

import holoviews as hv, geoviews as gv
from geoviews import opts
from geoviews import tile_sources as gvts

from datashader.colors import viridis
import datashader
from holoviews.operation.datashader import datashade, shade, dynspread, spread, rasterize

from holoviews.streams import Selection1D, Params
import panel as pn
import param as pm
import hvplot.pandas 
import hvplot.xarray
# create S3 filesystem object
s3 = s3fs.S3FileSystem()

# define S3 bucket name
bucket = "s3://eis-dh-hydro/SNOWEX-HACKWEEK"

# set holoviews backend to Bokeh
gv.extension('bokeh')

Load Data

SNOTEL Sites info

# create dictionary linking state names and abbreviations
snotel = {"AZ" : "arizona",
          "CO" : "colorado",
          "ID" : "idaho",
          "MT" : "montana", 
          "NM" : "newmexico",
          "UT" : "utah",
          "WY" : "wyoming"}
# load SNOTEL site metadata for sites in the given state
def load_site(state):
    
    # define path to file
    key = f"SNOTEL/snotel_{state}.csv"
    
    # load csv into pandas DataFrame
    df = pd.read_csv(s3.open(f'{bucket}/{key}', mode='r'))
    
    return df 

SNOTEL Depth & SWE

def load_snotel_txt(state, var):
    
    # define path to file
    key = f"SNOTEL/snotel_{state}{var}_20162020.txt"

    # open text file
    fh = s3.open(f"{bucket}/{key}")
    
    # read each line and note those that begin with '#'
    lines = fh.readlines()
    skips = sum(1 for ln in lines if ln.decode('ascii').startswith('#'))
    
    # load txt file into pandas DataFrame (skipping lines beginning with '#')
    df = pd.read_csv(s3.open(f"{bucket}/{key}"), skiprows=skips)
    
    # convert Date column from str to pandas datetime objects
    df['Date'] = pd.to_datetime(df['Date'])
    return df
# load SNOTEL depth & swe into dictionaries

# define empty dicts
snotel_depth = {}
snotel_swe = {}

# loop over states and load SNOTEL data
for state in snotel.keys():
    print(f"Loading state {state}")
    snotel_depth[state] = load_snotel_txt(state, 'depth')
    snotel_swe[state] = load_snotel_txt(state, 'swe')
Loading state AZ
Loading state CO
Loading state ID
Loading state MT
Loading state NM
Loading state UT
Loading state WY

SNODAS Depth & SWE

Like the LIS output we have been working with, a sample of SNODAS data is available on our S3 bucket in Zarr format. We can therefore load the SNODAS just as we load the LIS data.

# load snodas depth data
key = "SNODAS/snodas_snowdepth_20161001_20200930.zarr"
snodas_depth = xr.open_zarr(s3.get_mapper(f"{bucket}/{key}"), consolidated=True)

# load snodas swe data
key = "SNODAS/snodas_swe_20161001_20200930.zarr"
snodas_swe = xr.open_zarr(s3.get_mapper(f"{bucket}/{key}"), consolidated=True)

LIS Outputs

Next we’ll load the LIS outputs. First, we’ll define the helper function we saw in the previous notebook that adds lat and lon as coordinate variables. We’ll use this immediately upon loading the data.

def add_latlon_coords(dataset: xr.Dataset)->xr.Dataset:
    """Adds lat/lon as dimensions and coordinates to an xarray.Dataset object."""
    
    # get attributes from dataset
    attrs = dataset.attrs
    
    # get x, y resolutions
    dx = round(float(attrs['DX']), 3)
    dy = round(float(attrs['DY']), 3)
    
    # get grid cells in x, y dimensions
    ew_len = len(dataset['east_west'])
    ns_len = len(dataset['north_south'])
    
    # get lower-left lat and lon
    ll_lat = round(float(attrs['SOUTH_WEST_CORNER_LAT']), 3)
    ll_lon = round(float(attrs['SOUTH_WEST_CORNER_LON']), 3)
    
    # calculate upper-right lat and lon
    ur_lat =  ll_lat + (dy * ns_len)
    ur_lon = ll_lon + (dx * ew_len)
    
    # define the new coordinates
    coords = {
        # create an arrays containing the lat/lon at each gridcell
        'lat': np.linspace(ll_lat, ur_lat, ns_len, dtype=np.float32, endpoint=False),
        'lon': np.linspace(ll_lon, ur_lon, ew_len, dtype=np.float32, endpoint=False)
    }
    
   
    # drop the original lat and lon variables
    dataset = dataset.rename({'lon': 'orig_lon', 'lat': 'orig_lat'})
    # rename the grid dimensions to lat and lon
    dataset = dataset.rename({'north_south': 'lat', 'east_west': 'lon'})
    # assign the coords above as coordinates
    dataset = dataset.assign_coords(coords)
    # reassign variable attributes
    dataset.lon.attrs = dataset.orig_lon.attrs
    dataset.lat.attrs = dataset.orig_lat.attrs
    
    return dataset

Load the LIS data and apply add_latlon_coords():

# LIS surfacemodel DA_10km
key = "DA_SNODAS/SURFACEMODEL/LIS_HIST.d01.zarr"

lis_sf = xr.open_zarr(s3.get_mapper(f"{bucket}/{key}"), consolidated=True)

# (optional for 10km simulation?)
lis_sf = add_latlon_coords(lis_sf)

# drop off irrelevant variables
drop_vars = ['_history', '_eis_source_path', 'orig_lat', 'orig_lon']
lis_sf = lis_sf.drop(drop_vars)
lis_sf
<xarray.Dataset>
Dimensions:           (time: 730, lat: 215, lon: 361, SoilMoist_profiles: 4)
Coordinates:
  * time              (time) datetime64[ns] 2016-10-01 2016-10-02 ... 2018-09-30
  * lat               (lat) float32 28.55 28.65 28.75 ... 49.75 49.85 49.95
  * lon               (lon) float32 -113.9 -113.8 -113.8 ... -78.05 -77.95
Dimensions without coordinates: SoilMoist_profiles
Data variables: (12/24)
    Albedo_tavg       (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    CanopInt_tavg     (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    ECanop_tavg       (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    ESoil_tavg        (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    GPP_tavg          (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    LAI_tavg          (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    ...                ...
    Snowcover_tavg    (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    SoilMoist_tavg    (time, SoilMoist_profiles, lat, lon) float32 dask.array<chunksize=(1, 4, 215, 361), meta=np.ndarray>
    Swnet_tavg        (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    TVeg_tavg         (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    TWS_tavg          (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
    TotalPrecip_tavg  (time, lat, lon) float32 dask.array<chunksize=(1, 215, 361), meta=np.ndarray>
Attributes: (12/14)
    DX:                      0.10000000149011612
    DY:                      0.10000000149011612
    MAP_PROJECTION:          EQUIDISTANT CYLINDRICAL
    NUM_SOIL_LAYERS:         4
    SOIL_LAYER_THICKNESSES:  [10.0, 30.000001907348633, 60.000003814697266, 1...
    SOUTH_WEST_CORNER_LAT:   28.549999237060547
    ...                      ...
    conventions:             CF-1.6
    institution:             NASA GSFC
    missing_value:           -9999.0
    references:              Kumar_etal_EMS_2006, Peters-Lidard_etal_ISSE_2007
    source:                  Noah-MP.4.0.1
    title:                   LIS land surface model output

Working with the full LIS output dataset can be slow and consume lots of memory. Here we temporally subset the data to a shorter window of time. The full dataset contains daily values from 10/1/2016 to 9/30/2018. Feel free to explore the full dataset by modifying the time_range variable below and re-running all cells that follow.

# subset LIS data for two years  
time_range = slice('2016-10-01', '2017-04-30')
lis_sf = lis_sf.sel(time=time_range)

In the next cell, we extract the data variable names and timesteps from the LIS outputs. These will be used to define the widget options.

# gather metadata from LIS

# get variable names:string
vnames = list(lis_sf.data_vars)
print(vnames)

# get time-stamps:string
tstamps = list(np.datetime_as_string(lis_sf.time.values, 'D'))
print(len(tstamps), tstamps[0], tstamps[-1])
['Albedo_tavg', 'CanopInt_tavg', 'ECanop_tavg', 'ESoil_tavg', 'GPP_tavg', 'LAI_tavg', 'LWdown_f_tavg', 'Lwnet_tavg', 'NEE_tavg', 'Qg_tavg', 'Qh_tavg', 'Qle_tavg', 'Qs_tavg', 'Qsb_tavg', 'RadT_tavg', 'SWE_tavg', 'SWdown_f_tavg', 'SnowDepth_tavg', 'Snowcover_tavg', 'SoilMoist_tavg', 'Swnet_tavg', 'TVeg_tavg', 'TWS_tavg', 'TotalPrecip_tavg']
212 2016-10-01 2017-04-30

By default, the holoviews plotting library automatically adjusts the range of plot colorbars based on the range of values in the data being plotted. This may not be ideal when comparing data on different timesteps. In the next cell we extract the upper and lower bounds for each data variable which we’ll later use to set a static colorbar range.

Note: this cell will take ~1m40s to run

%%time
# pre-load min/max range for LIS variables
def get_cmap_range(vns):
    vals = [(lis_sf[x].sel(time='2016-12').min(skipna=True).values.item(),
        lis_sf[x].sel(time='2016-12').max(skipna=True).values.item()) for x in vns]
    return dict(zip(vns, vals))

cmap_lims = get_cmap_range(vnames)
CPU times: user 11.8 s, sys: 1.2 s, total: 13 s
Wall time: 1min 1s

Interactive Widgets

SNOTEL Site Map and Timeseries

The two cells that follow will create an interactive panel for comparing LIS, SNODAS, and SNOTEL snow depth and snow water equivalent. The SNOTEL site locations are plotted as points on an interactive map for each state. Hover over the sites to view metadata and click on a site to generate a timeseries!

Note: it will take some time for the timeseries to display.

# get snotel depth
def get_depth(state, site, ts, te):
    df = snotel_depth[state]
    
    # subset between time range
    mask = (df['Date'] >= ts) & (df['Date'] <= te)
    df = df.loc[mask]
    
    # extract timeseries for the site
    return pd.concat([df.Date, df.filter(like=site)], axis=1).set_index('Date')

# get snotel swe
def get_swe(state, site, ts, te):
    df = snotel_swe[state]
    
    # subset between time range
    mask = (df['Date'] >= ts) & (df['Date'] <= te)
    df = df.loc[mask]
    
    # extract timeseries for the site
    return pd.concat([df.Date, df.filter(like=site)], axis=1).set_index('Date')

# co-locate site & LIS model cell
def nearest_grid(pt):
    # pt : input point, tuple (longtitude, latitude)
    # output:
    #        x_idx, y_idx 
    loc_valid = df_loc.dropna()
    pts = loc_valid[['lon', 'lat']].to_numpy()
    idx = distance.cdist([pt], pts).argmin()

    return loc_valid['east_west'].iloc[idx], loc_valid['north_south'].iloc[idx]

# get LIS variable 
def var_subset(dset, v, lon, lat, ts, te):
    return dset[v].sel(lon=lon, lat=lat, method="nearest").sel(time=slice(ts, te)).load()

# line plots
def line_callback(index, state, vname, ts_tag, te_tag):
    sites = load_site(snotel[state])
    row = sites.iloc[0]
    
    tmp = var_subset(lis_sf, vname, row.lon, row.lat, ts_tag, te_tag) 
    xr_sf = xr.zeros_like(tmp)
    
    xr_snodas = xr_sf
    
    ck = get_depth(state, row.site_name, ts_tag, te_tag).to_xarray().rename({'Date': 'time'})
    xr_snotel = xr.zeros_like(ck)
    
    if not index:
        title='Var: -- Lon: -- Lat: --'
        return (xr_sf.hvplot(title=title, color='blue', label='LIS') \
                * xr_snotel.hvplot(color='red', label='SNOTEL') \
                * xr_snodas.hvplot(color='green', label='SNODAS')).opts(legend_position='right')
        

    else:
        sites = load_site(snotel[state])
        first_index = index[0]
        row = sites.iloc[first_index]
    
        
        xr_sf = var_subset(lis_sf, vname, row.lon, row.lat, ts_tag, te_tag)
    
        vs = vname.split('_')[0]
        title=f'Var: {vs} Lon: {row.lon} Lat: {row.lat}'

        
        # update snotel data 
        if 'depth' in vname.lower():
            xr_snotel =  get_depth(state, row.site_name, ts_tag, te_tag).to_xarray().rename({'Date': 'time'})*0.01
            xr_snodas =  var_subset(snodas_depth, 'SNOWDEPTH', row.lon, row.lat, ts_tag, te_tag)*0.001
        
        if 'swe' in vname.lower():
            xr_snotel =  get_swe(state, row.site_name, ts_tag, te_tag).to_xarray().rename({'Date': 'time'})
            xr_snodas =  var_subset(snodas_swe, 'SWE', row.lon, row.lat, ts_tag, te_tag)

    
        return xr_sf.hvplot(title=title, color='blue', label='LIS') \
               * xr_snotel.hvplot(color='red', label='SNOTEL') \
               * xr_snodas.hvplot(color='green', label='SNODAS')
# sites on map
def plot_points(state):  
    # dataframe to hvplot obj Points
    sites=load_site(snotel[state])
    pts_opts=dict(size=12, nonselection_alpha=0.4,tools=['tap', 'hover'])
    site_points=sites.hvplot.points(x='lon', y='lat', c='elev', cmap='fire', geo=True, hover_cols=['site_name', 'ntwk', 'state', 'lon', 'lat']).opts(**pts_opts)  
    return site_points

# base map
tiles = gvts.OSM()

# state widget
state_select = pn.widgets.Select(options=list(snotel.keys()), name="State")
state_stream = Params(state_select, ['value'], rename={'value':'state'})

# variable widget
var_select = pn.widgets.Select(options=['SnowDepth_tavg', 'SWE_tavg'], name="LIS Variable List")
var_stream = Params(var_select, ['value'], rename={'value':'vname'})

# date range widget
date_fmt = '%Y-%m-%d'
sdate_input = pn.widgets.DatetimeInput(name='Start date', value=dt(2016,10,1),start=dt.strptime(tstamps[0], date_fmt), end=dt.strptime(tstamps[-1], date_fmt), format=date_fmt)
sdate_stream = Params(sdate_input, ['value'], rename={'value':'ts_tag'})
edate_input = pn.widgets.DatetimeInput(name='End date', value=dt(2017,3,31),start=dt.strptime(tstamps[0], date_fmt), end=dt.strptime(tstamps[-1], date_fmt),format=date_fmt)
edate_stream = Params(edate_input, ['value'], rename={'value':'te_tag'})

# generate site points as dynamic map
# plots points and calls plot_points() when user selects a site
site_dmap = hv.DynamicMap(plot_points, streams=[state_stream]).opts(height=400, width=600)
# pick site
select_stream = Selection1D(source=site_dmap)

# link widgets to callback function
line = hv.DynamicMap(line_callback, streams=[select_stream, state_stream, var_stream, sdate_stream, edate_stream])

# create panel layout
pn.Row(site_dmap*tiles, pn.Column(state_select, var_select, pn.Row(sdate_input, edate_input), line))

Interactive LIS Output Explorer

The cell below creates a panel layout for exploring LIS output rasters. Select a variable using the drop down and then use the date slider to scrub back and forth in time!

# date widget (slider & key in)
# start and end dates
date_fmt = '%Y-%m-%d'
b = dt.strptime('2016-10-01', date_fmt)
e = dt.strptime('2017-04-30', date_fmt)

# define date widgets
date_slider = pn.widgets.DateSlider(start=b, end=e, value=b, name="LIS Model Date")
dt_input = pn.widgets.DatetimeInput(name='LIS Model Date Input', value=b, format=date_fmt)
date_stream = Params(date_slider, ['value'], rename={'value':'date'})

# variable widget
var_select = pn.widgets.Select(options=vnames, name="LIS Variable List")
var_stream = Params(var_select, ['value'], rename={'value':'vname'})


# base map widget
map_layer= pn.widgets.RadioButtonGroup(
    name='Base map layer',
    options=['Open Street Map', 'Satellite Imagery'],
    value='Satellite Imagery',
    button_type='primary',
    background='#f307eb')

# lis output display callback function
# returns plot of LIS output when date/variable is changed
def var_layer(vname, date):
    t_stamp = dt.strftime(date, '%Y-%m-%d')
    dssm = lis_sf[vname].sel(time=t_stamp)

    image = dssm.hvplot(geo=True)
    clim = cmap_lims[vname]
    return image.opts(clim=clim)

# watches date widget for updates
@pn.depends(dt_input.param.value, watch=True)
def _update_date(dt_input):
    date_slider.value=dt_input

# updates basemap on widget change
def update_map(maps):
    tile = gvts.OSM if maps=='Open Street Map' else gvts.EsriImagery
    return tile.opts(alpha=0.7)

# link widgets to callback functions
streams = dict(vname=var_select.param.value, date=date_slider.param.value)        
dmap = hv.DynamicMap(var_layer, streams=streams)
dtile = hv.DynamicMap(update_map, streams=dict(maps=map_layer.param.value))

# create panel layout of widgets and plot
pn.Column(var_select, date_slider, dt_input, map_layer,
          dtile*rasterize(dmap, aggregator=datashader.mean()).opts(cmap=viridis,colorbar=True,width=800, height=600))

Fin

Thank you for joining us for this tutorial. We hope that you are now more familiar with NASA’s Land Information System and how to use Python to explore and use the model simulation output LIS generates. For more information please see the links under the “More information” dropdown on the introduction page of this tutorial.