Get data sample for HadCRUT-20CRv3 comparison

It’s important to take a sample from the difference field, rather than differencing the two samples. So we take a field from both datasets at the same time, put them on the same grid, take the difference and extract a sample. That process can be slow (and take a lot of memory), so we’ll do it in one year batches:

#!/usr/bin/env python

# Make an latitude slice comparing 20CRv3 and HadCRUT5 - for a given month,
#  sampling across longitude and ensemble.

# Actually, do this for every month in a year - makes a more
#  reasonable unit of work

import os
import iris
import numpy
import pickle

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="Year",
                    type=int,required=True)
parser.add_argument("--opdir", help="Directory for output files",
                    default="%s/EUSTACE/derived/HadCRUT5_20CRv3_comparison" % \
                                           os.getenv('SCRATCH'),
                    type=str,required=False)
args = parser.parse_args()
if not os.path.isdir(args.opdir):
    os.makedirs(args.opdir)

# Fix dask SPICE bug
import dask
dask.config.set(scheduler='single-threaded')

# Get EUSTACE field for grid dimensions
egrid = iris.load_cube(("%s/EUSTACE/1.0/1969/"+
                        "tas_global_eustace_0_19690312.nc") % 
                        os.getenv('SCRATCH'),
            iris.Constraint(cube_func=(lambda c: c.var_name == 'tas')))

# Make the climatology
n=[]
for m in range(1,13):
    mc=iris.Constraint(time=lambda cell: cell.point.month == m and \
                                         cell.point.year > 1960 and \
                                         cell.point.year < 1991)
    h=iris.load_cube('%s/20CR/version_3/monthly_means/air.2m.mon.mean.nc' % 
                                                   os.getenv('SCRATCH'),
                 iris.Constraint(name='air_temperature') & mc)
    n.append(h.extract(mc).collapsed('time', iris.analysis.MEAN))


for month in range(1,13):

    # Array to store the sample in
    ndata=numpy.ma.array(numpy.zeros((1,720)),mask=False)

    # Load the 20CR data, anomalise, and regrid to EUSTACE grid
    mc=iris.Constraint(time=lambda cell: cell.point.month == month and \
                                         cell.point.year == args.year)
    e=iris.load_cube('%s/20CR/version_3/monthly_means/air.2m.mon.mean.nc' % 
                                                   os.getenv('SCRATCH'),
                 iris.Constraint(name='air_temperature') & mc)
    e = e - n[month-1]
    e = e.regrid(egrid,iris.analysis.Nearest())

    # Load the HadCRUT5 ensemble and regrid to EUSTACE grid
    members = (1,2,3,4,5,56,57,58,59,60)
    h=[]
    for member in members:
       m = iris.load_cube("/scratch/hadcc/hadcrut5/build/HadCRUT5/analysis/"+
                     "HadCRUT.5.0.0.0.analysis.anomalies.%d.nc" % member,
                          iris.Constraint(time=lambda cell: \
                            cell.point.year==args.year and cell.point.month==month))
       m = m.regrid(egrid,iris.analysis.Nearest())
       h.append(m)

    # Make the slice
    for lat in range(720):
        m_h = numpy.random.randint(10)
        rand_l = numpy.random.randint(0,1440)
        ndata[0,lat]=h[m_h].data[lat,rand_l]-e.data[lat,rand_l]

    # Store
    dfile = "%s/%04d%02d.pkl" % (args.opdir,args.year,month)
    pickle.dump( ndata, open( dfile, "wb" ) )

Script to make the sample batch for each year:

#!/usr/bin/env python

# Scripts to make slices for every month

import os
import datetime

for year in range (1850,2016):
    print("./make_comparison_slice.py --year=%d" % year )

Script to combine the annual batches into a complete sample:

#!/usr/bin/env python

# Get the sample cube for HadCRUT5-EUSTACE comparison
# Monthly, resolved in latitude, sampling in longitude, sampling the ensembles.

# Delegates making the slices to the parallelisable make_slice script.
# This script only puts the slices together.

import os
import iris
import numpy
import datetime
import pickle

start=datetime.datetime(1851,1,1,0,0)
end=datetime.datetime(2018,12,31,23,59)

def get_sample_cube(start=datetime.datetime(1851,1,1,0,0),
                    end=datetime.datetime(2018,12,31,23,59)):

    slice_dir = "%s/EUSTACE/derived/HadCRUT5_20CRv3_comparison" % \
                                    os.getenv('SCRATCH')

    # Array to store the sample in
    ndata=numpy.ma.array(numpy.zeros(((2016-1850)*12,720)),mask=False)
    dts=[]
    # Assemble the sample slice by slice
    for year in range(1850,2016):
        for month in range(1,13):
            t=(year-1850)*12+month-1
            dfile = "%s/%04d%02d.pkl" % (slice_dir,year,month)
            with open(dfile, "rb") as f:
                dslice = pickle.load(f)
            ndata[t,:]=dslice[0,:]
            dts.append(datetime.datetime(year,month,15))
            
    return (ndata,dts)

(ndata,dts) = get_sample_cube(start,end)

pickle.dump( (ndata,dts), open( "comparison.pkl", "wb" ) )