Get data sample for HadCRUT-20CRv3 comparison¶
It’s important to take a sample from the difference field, rather than differencing the two samples. So we take a field from both datasets at the same time, put them on the same grid, take the difference and extract a sample. That process can be slow (and take a lot of memory), so we’ll do it in one year batches:
#!/usr/bin/env python
# Make an latitude slice comparing 20CRv3 and HadCRUT5 - for a given month,
# sampling across longitude and ensemble.
# Actually, do this for every month in a year - makes a more
# reasonable unit of work
import os
import iris
import numpy
import pickle
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="Year",
type=int,required=True)
parser.add_argument("--opdir", help="Directory for output files",
default="%s/EUSTACE/derived/HadCRUT5_20CRv3_comparison" % \
os.getenv('SCRATCH'),
type=str,required=False)
args = parser.parse_args()
if not os.path.isdir(args.opdir):
os.makedirs(args.opdir)
# Fix dask SPICE bug
import dask
dask.config.set(scheduler='single-threaded')
# Get EUSTACE field for grid dimensions
egrid = iris.load_cube(("%s/EUSTACE/1.0/1969/"+
"tas_global_eustace_0_19690312.nc") %
os.getenv('SCRATCH'),
iris.Constraint(cube_func=(lambda c: c.var_name == 'tas')))
# Make the climatology
n=[]
for m in range(1,13):
mc=iris.Constraint(time=lambda cell: cell.point.month == m and \
cell.point.year > 1960 and \
cell.point.year < 1991)
h=iris.load_cube('%s/20CR/version_3/monthly_means/air.2m.mon.mean.nc' %
os.getenv('SCRATCH'),
iris.Constraint(name='air_temperature') & mc)
n.append(h.extract(mc).collapsed('time', iris.analysis.MEAN))
for month in range(1,13):
# Array to store the sample in
ndata=numpy.ma.array(numpy.zeros((1,720)),mask=False)
# Load the 20CR data, anomalise, and regrid to EUSTACE grid
mc=iris.Constraint(time=lambda cell: cell.point.month == month and \
cell.point.year == args.year)
e=iris.load_cube('%s/20CR/version_3/monthly_means/air.2m.mon.mean.nc' %
os.getenv('SCRATCH'),
iris.Constraint(name='air_temperature') & mc)
e = e - n[month-1]
e = e.regrid(egrid,iris.analysis.Nearest())
# Load the HadCRUT5 ensemble and regrid to EUSTACE grid
members = (1,2,3,4,5,56,57,58,59,60)
h=[]
for member in members:
m = iris.load_cube("/scratch/hadcc/hadcrut5/build/HadCRUT5/analysis/"+
"HadCRUT.5.0.0.0.analysis.anomalies.%d.nc" % member,
iris.Constraint(time=lambda cell: \
cell.point.year==args.year and cell.point.month==month))
m = m.regrid(egrid,iris.analysis.Nearest())
h.append(m)
# Make the slice
for lat in range(720):
m_h = numpy.random.randint(10)
rand_l = numpy.random.randint(0,1440)
ndata[0,lat]=h[m_h].data[lat,rand_l]-e.data[lat,rand_l]
# Store
dfile = "%s/%04d%02d.pkl" % (args.opdir,args.year,month)
pickle.dump( ndata, open( dfile, "wb" ) )
Script to make the sample batch for each year:
#!/usr/bin/env python
# Scripts to make slices for every month
import os
import datetime
for year in range (1850,2016):
print("./make_comparison_slice.py --year=%d" % year )
Script to combine the annual batches into a complete sample:
#!/usr/bin/env python
# Get the sample cube for HadCRUT5-EUSTACE comparison
# Monthly, resolved in latitude, sampling in longitude, sampling the ensembles.
# Delegates making the slices to the parallelisable make_slice script.
# This script only puts the slices together.
import os
import iris
import numpy
import datetime
import pickle
start=datetime.datetime(1851,1,1,0,0)
end=datetime.datetime(2018,12,31,23,59)
def get_sample_cube(start=datetime.datetime(1851,1,1,0,0),
end=datetime.datetime(2018,12,31,23,59)):
slice_dir = "%s/EUSTACE/derived/HadCRUT5_20CRv3_comparison" % \
os.getenv('SCRATCH')
# Array to store the sample in
ndata=numpy.ma.array(numpy.zeros(((2016-1850)*12,720)),mask=False)
dts=[]
# Assemble the sample slice by slice
for year in range(1850,2016):
for month in range(1,13):
t=(year-1850)*12+month-1
dfile = "%s/%04d%02d.pkl" % (slice_dir,year,month)
with open(dfile, "rb") as f:
dslice = pickle.load(f)
ndata[t,:]=dslice[0,:]
dts.append(datetime.datetime(year,month,15))
return (ndata,dts)
(ndata,dts) = get_sample_cube(start,end)
pickle.dump( (ndata,dts), open( "comparison.pkl", "wb" ) )