Making Tensors of rotated and scaled fieldsΒΆ

We already have a script to make the analysis fields as tensors, so to make the improved version we can use the same process, except that we rotate each field onto the modified Cassini projection, and re-grid it to a resolution of 79x159 (which is both shrinkable and constructable by stride-2 convolutions).

#!/usr/bin/env python

# Read in a field from 20CR as an Iris cube.
# Rescale it and rotate the pole.
# Convert it into a TensorFlow tensor.
# Serialise it and store it on $SCRATCH.

# Rescale makes the dimensions easy to generate with strided convolutions
# Rotation moveds the equator to the boundary - to reduce the problems
#  with boundary conditions.

import tensorflow as tf
tf.enable_eager_execution()
import numpy

import IRData.twcr as twcr
import iris
import datetime
import argparse
import os

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="Year",
                    type=int,required=True)
parser.add_argument("--month", help="Integer month",
                    type=int,required=True)
parser.add_argument("--day", help="Day of month",
                    type=int,required=True)
parser.add_argument("--hour", help="Hour of day (0 to 23)",
                    type=int,required=True)
parser.add_argument("--member", help="Ensemble member",
                    default=1,type=int,required=False)
parser.add_argument("--source", help="Data source",
                    default='20CR2c',type=str,required=False)
parser.add_argument("--variable", help="variable name",
                    default='prmsl',type=str,required=False)
parser.add_argument("--test", help="test data, not training",
                    action="store_true")
parser.add_argument("--opfile", help="tf data file name",
                    default=None,
                    type=str,required=False)
args = parser.parse_args()
if args.opfile is None:
    purpose='training'
    if args.test: purpose='test'
    args.opfile=("%s/Machine-Learning-experiments/datasets/rotated_pole/%s/%s/%s/%04d-%02d-%02d:%02d.tfd" %
                       (os.getenv('SCRATCH'),args.source,args.variable,purpose,
                        args.year,args.month,args.day,args.hour))

if not os.path.isdir(os.path.dirname(args.opfile)):
    os.makedirs(os.path.dirname(args.opfile))
    
# Function to resize and rotate pole
def rr_cube(cbe):
    # Use the Cassini projection (boundary is the equator)
    cs=iris.coord_systems.RotatedGeogCS(0.0,60.0,270.0)
    # Latitudes cover -90 to 90 with 79 values
    lat_values=numpy.arange(-90,91,180/78)
    latitude = iris.coords.DimCoord(lat_values,
                                    standard_name='latitude',
                                    units='degrees_north',
                                    coord_system=cs)
    # Longitudes cover -180 to 180 with 159 values
    lon_values=numpy.arange(-180,181,360/158)
    longitude = iris.coords.DimCoord(lon_values,
                                     standard_name='longitude',
                                     units='degrees_east',
                                     coord_system=cs)
    dummy_data = numpy.zeros((len(lat_values), len(lon_values)))
    dummy_cube = iris.cube.Cube(dummy_data,
                               dim_coords_and_dims=[(latitude, 0),
                                                    (longitude, 1)])
    n_cube=cbe.regrid(dummy_cube,iris.analysis.Linear())
    return(n_cube)


if args.source=='20CR2c':
    ic=twcr.load(args.variable,datetime.datetime(args.year,args.month,
                                                args.day,args.hour)+
                               datetime.timedelta(hours=24),
                 version='2c')
    ic=ic.extract(iris.Constraint(member=args.member))
    ic=rr_cube(ic)
    # Normalise to mean=0, sd=1 (approx)
    if args.variable=='prmsl':
        ic.data -= 101325
        ic.data /= 3000
    elif args.variable=='air.2m':
        ic.data -= 280
        ic.data /= 50
    elif args.variable=='z500':
        ic.data -= 5300
        ic.data /= 600
    elif args.variable=='prate':
        # Mix in the humidity to get rid of the spike at 0
        #h=twcr.load('rh9950',datetime.datetime(args.year,args.month,
        #                                        args.day,args.hour)+
        #                       datetime.timedelta(hours=24),
        #                     version='2c')
        #h=h.extract(iris.Constraint(member=args.member))
        #h=rr_cube(h)
        #s=h.shape
        #ic.data=(ic.data*1000+h.data/100+
        #         numpy.random.normal(0,0.05,len(h.data.flatten())).reshape(s))
        #ic.data -= 1
        ic.data=ic.data*1000+1.001
        ic.data=numpy.log(ic.data)
    
else:
    raise ValueError('Source %s is not supported' % args.source)

# Convert to Tensor
ict=tf.convert_to_tensor(ic.data, numpy.float32)

# Write to tfrecord file
sict=tf.serialize_tensor(ict)
tf.write_file(args.opfile,sict)

If we do that every 6hours over an extended period, we make a large batch of training data, and it will be possible to match each target field with an obs tensor. We actually need two such datasets, a large one for training models, and a smaller one for testing them. This script makes the list of commands needed to make all the training and test data, and these commands can be run in parallel.

#!/usr/bin/env python

# Make a list of the commands needed to make a few hundred tf data files
#  for training the autoencoder.

# Get one data file every 5 days+6 hours over the selected years 
#  They should be far enough apart to be mostly independent.

# Partition off 1/10 of them to be test data

import os
import datetime

# Function to check if the job is already done for this timepoint
def is_done(year,month,day,hour,group):
    op_file_name=("%s/Machine-Learning-experiments/datasets/rotated_pole/"+
                  "20CR2c/prmsl/" +
                  "%s/%04d-%02d-%02d:%02d.tfd") % (
                            os.getenv('SCRATCH'),group,
                            year,month,day,hour)
    if os.path.isfile(op_file_name):
        return True
    return False

f=open("run.txt","w+")

count=1

start_day=datetime.datetime(1969,  1,  1,  0)
end_day  =datetime.datetime(1989, 12, 31, 23)

current_day=start_day
while current_day<=end_day:
    if count%10==0:
        if not is_done(current_day.year,current_day.month,
                       current_day.day,current_day.hour,'test'):
            cmd=("./make_training_tensor.py --year=%d --month=%d" +
                " --day=%d --hour=%d --variable=prmsl --test \n") % (
                   current_day.year,current_day.month,
                   current_day.day,current_day.hour)
            f.write(cmd)
    else:
        if not is_done(current_day.year,current_day.month,
                       current_day.day,current_day.hour,'training'):
            cmd=("./make_training_tensor.py --year=%d --month=%d" +
                " --day=%d --hour=%d --variable=prmsl \n") % (
                   current_day.year,current_day.month,
                   current_day.day,current_day.hour)
            f.write(cmd)
    current_day=current_day+datetime.timedelta(hours=126)
    count += 1
    
f.close()