ATMS 391: Geophysical Data Analysis

Homework 12


(1) Recreate Figure 1 from Horel and Wallace (1981) using the NCEP reanalysis data.

In [2]:
import pandas as pd
import numpy as np
from numpy import ma
from matplotlib import pyplot as plt
from mpl_toolkits.basemap import Basemap as bm
import xray
%matplotlib inline
In [3]:
sstdata=xray.open_dataset('sst_seas_ncep.nc')
sstdata
Out[3]:
<xray.Dataset>
Dimensions:  (lat: 94, lon: 192, time: 272)
Coordinates:
  * lat      (lat) float32 88.542 86.6531 84.7532 82.8508 80.9473 79.0435 ...
  * lon      (lon) float32 0.0 1.875 3.75 5.625 7.5 9.375 11.25 13.125 15.0 ...
  * time     (time) datetime64[ns] 1948-02-29 1948-05-31 1948-08-31 ...
Data variables:
    skt      (time, lat, lon) float32 -38.5333 -38.5388 -38.5811 -38.6714 ...
In [4]:
#find time series at point nearest to 10N and 95W
tser=sstdata.sel(lat=[10.],lon=[360.-95.],method='nearest')
tser

plt.figure()
plt.plot(tser['time'].values,tser['skt'].values.ravel())
#sst_resample
Out[4]:
[<matplotlib.lines.Line2D at 0x7f7e3e332a10>]
In [5]:
ls=xray.open_dataset('lsmask.19294.nc',decode_times=False)
ls['lsmask']
Out[5]:
<xray.DataArray 'lsmask' (time: 1, lat: 94, lon: 192)>
array([[[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [-1., -1., -1., ..., -1., -1., -1.],
        [-1., -1., -1., ..., -1., -1., -1.],
        [-1., -1., -1., ..., -1., -1., -1.]]])
Coordinates:
  * lat      (lat) float32 88.542 86.6531 84.7532 82.8508 80.9473 79.0435 ...
  * lon      (lon) float32 0.0 1.875 3.75 5.625 7.5 9.375 11.25 13.125 15.0 ...
  * time     (time) float64 -1.577e+07
Attributes:
    long_name: Surface land/sea mask
    valid_range: [-1.  1.]
    actual_range: [-1.  0.]
    units: none
    precision: 2
    var_desc: Land/Sea Mask
    level_desc: Surface
    statistic: Individual Obs
    parent_stat: Other
    dataset: NCEP Reanalysis Derived Products
In [6]:
plt.figure()
plt.imshow(ls['lsmask'].values[0,:,:])
plt.colorbar
Out[6]:
<function matplotlib.pyplot.colorbar>
In [7]:
mask3d=ls['lsmask'].values.repeat(272,0)
np.shape(mask3d)
Out[7]:
(272, 94, 192)
In [8]:
sstdata
Out[8]:
<xray.Dataset>
Dimensions:  (lat: 94, lon: 192, time: 272)
Coordinates:
  * lat      (lat) float32 88.542 86.6531 84.7532 82.8508 80.9473 79.0435 ...
  * lon      (lon) float32 0.0 1.875 3.75 5.625 7.5 9.375 11.25 13.125 15.0 ...
  * time     (time) datetime64[ns] 1948-02-29 1948-05-31 1948-08-31 ...
Data variables:
    skt      (time, lat, lon) float32 -38.5333 -38.5388 -38.5811 -38.6714 ...
In [9]:
sstdata['skt'].values=np.ma.masked_where(mask3d == -1, sstdata['skt'].values)
plt.figure()
plt.pcolormesh(sstdata['lon'].values,sstdata['lat'].values,sstdata['skt'].values[0,:,:])
plt.clim([-20,40])
plt.colorbar()
Out[9]:
<matplotlib.colorbar.Colorbar instance at 0x7f7e0e950f80>
In [10]:
#calculate seasonal climatology

sstseas=sstdata.groupby('time.season').mean(dim='time')


for i in np.arange(4):
    plt.figure()
    plt.pcolormesh(sstseas['lon'].values,sstseas['lat'].values,sstseas['skt'].values[i,:,:])
    plt.clim([-20,40])
    plt.title(sstseas['season'].values[i])
    plt.colorbar()
In [11]:
import numpy as np
In [16]:
fixed_sstseas=xray.concat([sstseas.sel(season='DJF'),sstseas.sel(season='MAM'),sstseas.sel(season='JJA'),sstseas.sel(season='SON')],'season')

#resize (tile ) to be same shape as data
meansstseas=np.tile(fixed_sstseas['skt'].values,(68,1,1))

anomsst=sstdata['skt'].values-meansstseas

plt.pcolormesh(sstdata['lon'].values,sstdata['lat'].values,anomsst[4,:,:])
plt.clim([-10,10])
plt.colorbar()
Out[16]:
<matplotlib.colorbar.Colorbar instance at 0x7f7e0e2b0680>
In [18]:
def seas_departure(inputarr,field):
    seas=inputarr.groupby('time.season').mean(dim='time')
    fixed_seas=xray.concat([seas.sel(season='DJF'),seas.sel(season='MAM'),\
             seas.sel(season='JJA'),seas.sel(season='SON')], 'season')
    #calculates the seasonal climatology but np.tile gives you back a numpy array
    mean_seas=np.tile(fixed_seas[field].values,(68,1,1))
    #put it back into an xray dataset
    #foo = xray.DataArray(data, coords=[times, locs], dims=['time', 'space'])
    mean_seas=xray.DataArray(mean_seas,coords=[inputarr['time'].values,inputarr['lat'].values,\
                                             inputarr['lon'].values],dims=['time','lat','lon'])
    anom = inputarr[field].values-mean_seas
    return anom

anomsst=seas_departure(sstdata,'skt')
anomsst
Out[18]:
<xray.DataArray (time: 272, lat: 94, lon: 192)>
array([[[-4.44317245, -4.42129898, -4.42863083, ..., -4.36367035,
         -4.38543701, -4.41463852],
        [-3.66640091, -3.62452698, -3.5991745 , ..., -3.80857468,
         -3.74745941, -3.69694901],
        [-2.69404984, -2.68140411, -2.68823242, ..., -2.91536331,
         -2.82406235, -2.74878693],
        ..., 
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan]],

       [[ 0.24821091,  0.236166  ,  0.22953606, ...,  0.2504406 ,
          0.23942375,  0.23883057],
        [-0.18886185, -0.17407227, -0.17473221, ..., -0.18044281,
         -0.18090248, -0.18618202],
        [-0.53155708, -0.53410149, -0.55817604, ..., -0.47854424,
         -0.48344994, -0.49978828],
        ..., 
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan]],

       [[ 0.1597655 ,  0.1592178 ,  0.15766378, ...,  0.15960911,
          0.16175385,  0.16141357],
        [ 0.08877152,  0.08609134,  0.08308157, ...,  0.09800881,
          0.09455049,  0.0925636 ],
        [ 0.08442968,  0.07982728,  0.07721651, ...,  0.07750982,
          0.07400101,  0.07632336],
        ..., 
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan]],

       ..., 
       [[ 2.27892876,  2.31128883,  2.3406601 , ...,  2.24229813,
          2.25616646,  2.26855087],
        [ 1.16251183,  1.19216156,  1.21855164, ...,  1.09598541,
          1.11626434,  1.14008904],
        [ 0.4730587 ,  0.49069405,  0.51582336, ...,  0.38220024,
          0.4081707 ,  0.43649483],
        ..., 
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan]],

       [[-0.06331694, -0.06201929, -0.06022108, ..., -0.06985188,
         -0.06650722, -0.0647417 ],
        [-0.09672892, -0.09461612, -0.09230238, ..., -0.0998579 ,
         -0.10013518, -0.09952298],
        [-0.14694038, -0.14691013, -0.14581865, ..., -0.15556499,
         -0.15461025, -0.15049592],
        ..., 
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan]],

       [[ 7.78704834,  7.74666405,  7.72187042, ...,  7.8416481 ,
          7.81916046,  7.79504013],
        [ 8.52528572,  8.4527874 ,  8.36215687, ...,  8.60693169,
          8.59012032,  8.56120872],
        [ 8.34811211,  8.25792885,  8.17016029, ...,  8.36702538,
          8.37867928,  8.35907364],
        ..., 
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan],
        [        nan,         nan,         nan, ...,         nan,
                 nan,         nan]]], dtype=float32)
Coordinates:
  * time     (time) datetime64[ns] 1948-02-29 1948-05-31 1948-08-31 ...
  * lat      (lat) float32 88.542 86.6531 84.7532 82.8508 80.9473 79.0435 ...
  * lon      (lon) float32 0.0 1.875 3.75 5.625 7.5 9.375 11.25 13.125 15.0 ...
In [19]:
dsub = sstdata.sel(lat=slice(40,-40), lon=slice(120,290))
lat = dsub['lat'].values
lon = dsub['lon'].values




#calculate sst climatology
meansst=dsub['skt'].mean(axis=0)


plt.figure()
plt.pcolormesh(meansst.values) 
plt.clim([-20,40])
plt.colorbar

#resize to be the same shape as the data
meansst=meansst.values.reshape(1,np.shape(meansst)[0],np.shape(meansst)[1]).repeat(272,0)

#anomalies
anomsst=dsub['skt'].values-meansst
lons,lats = np.meshgrid(lon,lat)

plt.figure()
plt.pcolormesh(anomsst[2,:,:])  #summer in NH
plt.clim([-5,5])
plt.colorbar
Out[19]:
<function matplotlib.pyplot.colorbar>
In [20]:
X = np.reshape(anomsst, (anomsst.shape[0], len(lat) * len(lon)), order='F')
X.shape
Out[20]:
(272, 3822)
In [21]:
X = ma.masked_array(X, np.isnan(X))
type(X)
land = X.sum(0).mask
ocean=-land
X = X[:,ocean]
X.shape
Out[21]:
(272, 3356)
In [22]:
from sklearn import preprocessing
scaler  = preprocessing.StandardScaler()
scaler_sst = scaler.fit(X)
X = scaler_sst.transform(X)
from sklearn.decomposition import pca
skpca = pca.PCA()
skpca.fit(X)
Out[22]:
PCA(copy=True, n_components=None, whiten=False)
In [23]:
f, ax = plt.subplots(figsize=(5,5))
ax.plot(skpca.explained_variance_ratio_[0:10]*100)
ax.plot(skpca.explained_variance_ratio_[0:10]*100,'ro')
ax.set_title("% of variance explained", fontsize=14)
ax.grid()
In [24]:
PCs = skpca.transform(X)
EOFs = skpca.components_

ipc=272
EOF_recons = np.ones((ipc, len(lat) * len(lon))) * -999.

for i in range(ipc): 
    EOF_recons[i,ocean] = EOFs[i,:]
EOF_recons = ma.masked_values(np.reshape(EOF_recons, (ipc, len(lat), len(lon)), order='F'), -999.)
EOF_recons.shape
Out[24]:
(272, 42, 91)
In [25]:
def plot_field(m, X, lats, lons, vmin, vmax, step, cmap=plt.get_cmap('jet'), \
               ax=False, title=False, grid=False):
    if not ax: 
        f, ax = plt.subplots(figsize=(8, (X.shape[0] / float(X.shape[1])) * 8))
    m.ax = ax
    im = m.contourf(lons, lats, X, np.arange(vmin, vmax+step, step), \
                    latlon=True, cmap=cmap, extend='both', ax=ax)
    m.drawcoastlines()
    if grid: 
        m.drawmeridians(np.arange(0, 360, 30), labels=[0,0,0,1])
        m.drawparallels(np.arange(-80, 80, 20), labels=[1,0,0,0])
    m.colorbar(im)
    if title: 
        ax.set_title(title)
        
m = bm(projection='cyl',llcrnrlat=lat.min(),urcrnrlat=lat.max(),\
            llcrnrlon=lon.min(),urcrnrlon=lon.max(),\
            lat_ts=0,resolution='c')

plot_field(m, EOF_recons[1,:,:], lats,lons, -.1 ,.1, .01, grid =True, cmap=plt.get_cmap('RdBu_r'))
In [26]:
from sklearn.preprocessing import StandardScaler
scaler_PCs = StandardScaler()
scaler_PCs.fit(PCs)
PCs_std = scaler_PCs.transform(PCs)
PCdf = pd.DataFrame(PCs_std, index = dsub['time'], \
                    columns = ["EOF%s" % (x) for x in range(1, PCs_std.shape[1] +1)])
from scipy.signal import detrend
f, ax = plt.subplots(figsize=(10,4))
PCdf.ix[:,1].plot(ax=ax, color='k', label='PC1')
ax.axhline(0, c='0.8')
#ax.set_xlabel('period', fontsize=18)
ax.plot(PCdf.index, detrend(PCdf.ix[:,1].values), 'r',  label='PC1 (trend removed)')
ax.grid('off')
ax.legend(loc=1); 

(2) Create normalized index values (mean=0, sd=1) of Sea Level Pressure between Tahiti and Darwin similar to Fig. 4. Invert the index (multiply by -1, as in the paper). Plot the time series.

In [28]:
slp=xray.open_dataset('slp_seas_ncep.nc')
slp
tahitimslp = slp.sel(lat=-15., lon=360-149.,method='nearest')
tahitimslp

darwinmslp = slp.sel(lat=-12., lon=131.,method='nearest')
darwinmslp
plt.figure()
plt.plot(tahitimslp['time'].values,-1*(tahitimslp['slp'].values-darwinmslp['slp'].values))
plt.title('-1*Tahiti-Darwin mslp')
plt.ylabel('MSLP (hPa)')
plt.xlabel('Time (years)')

#calculate normalized index, with mean=0 and std=1
diff=-1*(tahitimslp['slp']-darwinmslp['slp'])
diff=(diff-np.mean(diff))
diff=diff/np.std(diff)
diff

plt.figure()
plt.plot(diff['time'].values,diff)
plt.title('normalized Tahiti-Darwin mslp index')
plt.ylabel('index')
plt.xlabel('Time (years)')
Out[28]:
<matplotlib.text.Text at 0x7f7e05212490>

(3) Create normalized index values of 200 hPa height at the stations listed in Fig 6. Plot the time series.

In [29]:
h200=xray.open_dataset('h200_seas_ncep.nc')

#west(lon) and south(lat) negative
sites=       ['NAI' ,'DAR' , 'NAN' ,'HIL'  , 'SAJ']
lat=np.array([-1.2, -12.4, -17.5,   19.7,   18.0])
lon=np.array([36.8, 130.0,  177.0, -155.0, -66.0])

def grab_h200(h200,lat,lon):
    return h200.sel(lat=lat,lon=360-lon, method='nearest')

all_h200={}
for i in np.arange(len(lat)):
    all_h200[sites[i]]=grab_h200(h200,lat[i],lon[i])
    
    
z200index=0.2*all_h200['NAI']+0.23*all_h200['DAR']+0.23*all_h200['NAN']+0.25*all_h200['HIL']+0.27*all_h200['SAJ']



plt.plot(z200index['time'].values,z200index['hgt'].values)

def standardize(var):
    var=var-np.mean(var)
    return var/np.std(var)

z200index_std=standardize(z200index)

(4) Compute the PNA and WP indices as on p. 823 of the paper. Plot the time series.

In [ ]:
 

(5) Compute standardized rainfall values at the locations in Fig. 3. Plot the time series.

In [ ]:
 

(6) Reproduce Fig 8 with the top 4 and bottom 4 values of PNA index (that you find in part 4), except contour the values you have rather than plot points as in the paper.

In [30]:
#200 hPa - 700 hPa correlation map

z200index_std['hgt']

h700=xray.open_dataset('h700_seas_ncep.nc')
print(np.shape(h700['hgt'].values))

def spatial_corr(index,mapvals):
    sz=np.shape(mapvals)
    
    outmap=np.zeros([sz[1],sz[2]])
    for i in np.arange(sz[1]):
        for j in np.arange(sz[2]):
            maptser=np.squeeze(mapvals[:,i,j])
            #print(np.shape(maptser))
            #print(np.shape(index))
            df_bis = pd.DataFrame({'maptser': maptser,
                       'index': np.squeeze(index)})
            outmap[i,j]=df_bis.corr().values[1,0]
            
    return outmap
(272, 73, 144)
In [31]:
outmap=spatial_corr(z200index_std['hgt'].values,h700['hgt'].values)
In [32]:
from mpl_toolkits.basemap import Basemap
import numpy as np
import matplotlib.pyplot as plt
# setup north polar stereographic basemap.
# The longitude lon_0 is at 6-o'clock, and the
# latitude circle boundinglat is tangent to the edge
# of the map at lon_0. Default value of lat_ts
# (latitude of true scale) is pole.
m = Basemap(lon_0=180)

m.pcolormesh(h700['lon'].values,h700['lat'].values,outmap)
m.drawcoastlines()
plt.colorbar()
Out[32]:
<matplotlib.colorbar.Colorbar instance at 0x7f7e04463638>

(7) Recreate Figure 9 using your data.

In [ ]: