Making Tensors of pseudo-observations or precipitation rateΒΆ

This is analagous to the same process for prmsl observations except that it uses a different data source and normalisation.

Script to make a single tensor:

#!/usr/bin/env python

# Read in a field from 20CR as an Iris cube.
# Extract the values at the locations of DWR stations
# Convert them into a TensorFlow tensor.
# Serialise that and store it on $SCRATCH.

import tensorflow as tf
tf.enable_eager_execution()
import numpy

import IRData.twcr as twcr
import iris
import datetime
import argparse
import os

# Get the obs locations for Marxh 12, 1916
obs=twcr.load_observations_fortime(datetime.datetime(1916,3,12,6),
                                   version='2c')

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="Year",
                    type=int,required=True)
parser.add_argument("--month", help="Integer month",
                    type=int,required=True)
parser.add_argument("--day", help="Day of month",
                    type=int,required=True)
parser.add_argument("--hour", help="Hour of day (0 to 23)",
                    type=int,required=True)
parser.add_argument("--member", help="Ensemble member",
                    default=1,type=int,required=False)
parser.add_argument("--source", help="Data source",
                    default='20CR2c',type=str,required=False)
parser.add_argument("--variable", help="variable name",
                    default='prmsl',type=str,required=False)
parser.add_argument("--test", help="test data, not training",
                    action="store_true")
parser.add_argument("--opfile", help="tf data file name",
                    default=None,
                    type=str,required=False)
args = parser.parse_args()
if args.opfile is None:
    purpose='training'
    if args.test: purpose='test'
    args.opfile=(("%s/Machine-Learning-experiments/datasets/"+
                  "obs_1916/%s/%s/%s/%04d-%02d-%02d:%02d.tfd") %
                       (os.getenv('SCRATCH'),args.source,args.variable,purpose,
                        args.year,args.month,args.day,args.hour))

if not os.path.isdir(os.path.dirname(args.opfile)):
    os.makedirs(os.path.dirname(args.opfile))

if args.source=='20CR2c':
    ic=twcr.load(args.variable,datetime.datetime(args.year,args.month,
                                                args.day,args.hour),
                 version='2c')
    ic=ic.extract(iris.Constraint(member=args.member))

    #interpolator = iris.analysis.Linear().interpolator(ic, 
    #                               ['latitude', 'longitude'])
    ensemble=[]
    for index, row in obs.iterrows():
        ensemble.append(ic.interpolate(
                        [('latitude',row['Latitude']),
                         ('longitude',row['Longitude'])],
                        iris.analysis.Linear()).data.item())
    ensemble = numpy.array(ensemble, dtype=numpy.float32)
 
    # Normalise to mean=0, sd=1 (approx)
    if args.variable=='prmsl':
        ensemble -= 101325
        ensemble /= 3000
    elif args.variable=='air.2m':
        ensemble -= 280
        ensemble /= 50
    elif args.variable=='prate':
        pass
        # Don't normalise prate until later

else:
    raise ValueError('Source %s is not supported' % args.source)

# Convert to Tensor
ict=tf.convert_to_tensor(ensemble, numpy.float32)

# Write to tfrecord file
sict=tf.serialize_tensor(ict)
tf.write_file(args.opfile,sict)

Make the commands to generate all the required tensors. To be run in parallel.

#!/usr/bin/env python

# Make a list of the commands needed to make a few hundred tf data files
#  for training the autoencoder.

# Get one data file every 6 hours over the selected years 

# Do the same with 2009 to be test data

import os
import datetime

# Function to check if the job is already done for this timepoint
def is_done(year,month,day,hour,group):
    op_file_name=("%s/Machine-Learning-experiments/datasets/" +
                  "obs_1916/20CR2c/prate/" +
                  "%s/%04d-%02d-%02d:%02d.tfd") % (
                            os.getenv('SCRATCH'),group,
                            year,month,day,hour)
    if os.path.isfile(op_file_name):
        return True
    return False

f=open("run.prate.txt","w+")

start_day=datetime.datetime(1969,  1,  1,  0)
end_day  =datetime.datetime(1989, 12, 31, 23)

current_day=start_day
while current_day<=end_day:
    if not is_done(current_day.year,current_day.month,
                   current_day.day,current_day.hour,'training'):
        cmd=("./make_training_tensor.py --year=%d --month=%d" +
            " --day=%d --hour=%d --variable=prate \n") % (
               current_day.year,current_day.month,
               current_day.day,current_day.hour)
        f.write(cmd)
    current_day=current_day+datetime.timedelta(hours=6)
 
start_day=datetime.datetime(2009,  1,  1,  0)
end_day  =datetime.datetime(2009, 12, 31, 23)

current_day=start_day
while current_day<=end_day:
    if not is_done(current_day.year,current_day.month,
                   current_day.day,current_day.hour,'test'):
        cmd=("./make_training_tensor.py --year=%d --month=%d" +
            " --day=%d --hour=%d --variable=prate --test \n") % (
               current_day.year,current_day.month,
               current_day.day,current_day.hour)
        f.write(cmd)
    current_day=current_day+datetime.timedelta(hours=6)
   
f.close()