Make monthly averages from daily EUSTACE dataΒΆ
The only complication in this process is in handling missing data. I chose (arbitrarily) that a monthly average would be the mean of all the available daily averages in that month if at least 20 daily values were available (and missing otherwise).
166 years of daily data is a lot of averaging, so parallelise it by writing a script to do one month
#!/usr/bin/env python
# Make monthly means from daily EUSTACE1.0 data.
import os
import iris
from calendar import monthrange
# Fix dask SPICE bug
import dask
dask.config.set(scheduler='single-threaded')
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--month", help="Integer month",
type=int,required=True)
parser.add_argument("--year", help="Integer year",
type=int,required=True)
args = parser.parse_args()
root_dir="%s/EUSTACE/1.0/" % os.getenv('SCRATCH')
op_dir = "%s/monthly/%04d" % (root_dir,args.year)
if not os.path.isdir(op_dir):
os.makedirs(op_dir)
op_file = "%s/%02d.nc" % (op_dir,args.month)
def average_over_month(variable):
accum = None
for dy in range (1,monthrange(args.year,args.month)[1]+1):
inst = iris.load_cube("%s/%04d/tas_global_eustace_0_%04d%02d%02d.nc" %
(root_dir,args.year,args.year,
args.month,dy),
iris.Constraint(cube_func=(lambda c: c.var_name == variable)))
if accum is None:
accum = inst
count = inst.copy()
count.data.mask = False
count.data *= 0
count.data[accum.data.mask==False] += 1
else:
accum.data.mask[inst.data.mask==False] = False
accum.data[accum.data<0] *= 0
accum.data = accum.data + inst.data
count.data[inst.data.mask==False] += 1
accum.data[count.data>=20] /= count.data[count.data>=20]
accum.data.mask[count.data<20] = True
return(accum)
avgs=[]
avgs.append(average_over_month('tas'))
for member in range(10):
avgs.append(average_over_month('tasensemble_%d' % member))
iris.save(avgs,op_file,
fill_value=-32768)
and then making a task to run that script once for each month in the period,
#!/usr/bin/env python
# Scripts to make means for every month
import os
import datetime
for year in range (1850,2016):
for month in range(1,13):
opf = "%s/EUSTACE/1.0/monthly/%04d/%02d.nc" %\
(os.getenv('SCRATCH'),year,month)
if not os.path.isfile(opf):
print("./make_means_by_month.py --year=%d --month=%d" %
(year,month))
and then running those tasks in parallel.