Get data sample for HadCRUT-EUSTACE comparison

It’s important to take a sample from the difference field, rather than differencing the two samples. So we take a field from both datasets at the same time, put them on the same grid, take the difference and extract a sample. That process can be slow (and take a lot of memory), so we’ll do it in one year batches:

#!/usr/bin/env python

# Make an latitude slice comparing Eustace and HadCRUT5 - for a given month,
#  sampling across longitude and ensemble.

# Actually, do this for every month in a year - makes a more
#  reasonable unit of work

import os
import iris
import numpy
import pickle

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="Year",
                    type=int,required=True)
parser.add_argument("--opdir", help="Directory for output files",
                    default="%s/EUSTACE/derived/HadCRUT5_comparison" % \
                                           os.getenv('SCRATCH'),
                    type=str,required=False)
args = parser.parse_args()
if not os.path.isdir(args.opdir):
    os.makedirs(args.opdir)

# Fix dask SPICE bug
import dask
dask.config.set(scheduler='single-threaded')

for month in range(1,13):

    # Load the Eustace monthly normals
    n=iris.load_cube("%s/EUSTACE/1.0/monthly/climatology_1961_1990/%02d.nc" % 
                      (os.getenv('SCRATCH'),month),
                     iris.Constraint(cube_func=(lambda cell: cell.var_name == 'tas')))

    # Array to store the sample in
    ndata=numpy.ma.array(numpy.zeros((1,720)),mask=False)

    # Load the ensemble (and anomalise)
    e=[]
    for member in range(10):
        m=iris.load_cube("%s/EUSTACE/1.0/monthly/%04d/%02d.nc" % 
                          (os.getenv('SCRATCH'),args.year,month),
                         iris.Constraint(cube_func=(lambda cell: \
                             cell.var_name == 'tasensemble_%d' % member)))
        m = m-n # to anomaly
        e.append(m)

    # Load the HadCRUT5 ensemble and regrid to match EUSTACE
    members = (1,2,3,4,5,56,57,58,59,60)
    h=[]
    for member in members:
       m = iris.load_cube("/scratch/hadcc/hadcrut5/build/HadCRUT5/analysis/"+
                     "HadCRUT.5.0.0.0.analysis.anomalies.%d.nc" % member,
                          iris.Constraint(time=lambda cell: \
                            cell.point.year==args.year and cell.point.month==month))
       m = m.regrid(e[0],iris.analysis.Nearest())
       h.append(m)

    # Make the slice
    for lat in range(720):
        m_e = numpy.random.randint(10)
        m_h = numpy.random.randint(10)
        rand_l = numpy.random.randint(0,1440)
        ndata[0,lat]=h[m_h].data[lat,rand_l]-e[m_e].data[0,lat,rand_l]

    # Store
    dfile = "%s/%04d%02d.pkl" % (args.opdir,args.year,month)
    pickle.dump( ndata, open( dfile, "wb" ) )

Script to make the sample batch for each year:

#!/usr/bin/env python

# Scripts to make slices for every month

import os
import datetime

for year in range (1850,2016):
    print("./make_comparison_slice.py --year=%d" % year )

Script to combine the annual batches into a complete sample:

#!/usr/bin/env python

# Get the sample cube for HadCRUT5-EUSTACE comparison
# Monthly, resolved in latitude, sampling in longitude, sampling the ensembles.

# Delegates making the slices to the parallelisable make_slice script.
# This script only puts the slices together.

import os
import iris
import numpy
import datetime
import pickle

start=datetime.datetime(1851,1,1,0,0)
end=datetime.datetime(2018,12,31,23,59)

def get_sample_cube(start=datetime.datetime(1851,1,1,0,0),
                    end=datetime.datetime(2018,12,31,23,59)):

    slice_dir = "%s/EUSTACE/derived/HadCRUT5_comparison" % \
                                    os.getenv('SCRATCH')

    # Array to store the sample in
    ndata=numpy.ma.array(numpy.zeros(((2016-1850)*12,720)),mask=False)
    dts=[]
    # Assemble the sample slice by slice
    for year in range(1850,2016):
        for month in range(1,13):
            t=(year-1850)*12+month-1
            dfile = "%s/%04d%02d.pkl" % (slice_dir,year,month)
            with open(dfile, "rb") as f:
                dslice = pickle.load(f)
            ndata[t,:]=dslice[0,:]
            dts.append(datetime.datetime(year,month,15))
            
    return (ndata,dts)

(ndata,dts) = get_sample_cube(start,end)

pickle.dump( (ndata,dts), open( "comparison.pkl", "wb" ) )