Mean-sea-level pressure - convert the data into tf.Tensors

Script to make a tensor from a single hour’s data:

#!/usr/bin/env python

# Read in a field from 20CR as an Iris cube.
# Rescale it and move UK to the centre of the field.
# Convert it into a TensorFlow tensor.
# Serialise it and store it on $SCRATCH.

import tensorflow as tf
import numpy as np

# Going to do external parallelism - run this on one core
tf.config.threading.set_inter_op_parallelism_threads(1)
import dask
dask.config.set(scheduler='single-threaded')


import IRData.twcr as twcr
import iris
import datetime
import argparse
import os
import sys

sys.path.append("%s/../../lib/" % os.path.dirname(__file__))
from normalise import normalise_t2m
from normalise import normalise_wind
from normalise import normalise_prmsl
from geometry import to_analysis_grid

import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--year", help="Year", type=int, required=True)
parser.add_argument("--month", help="Integer month", type=int, required=True)
parser.add_argument("--day", help="Day of month", type=int, required=True)
parser.add_argument("--hour", help="Hour of day (0 to 23)", type=int, required=True)
parser.add_argument(
    "--member", help="Ensemble member", default=1, type=int, required=False
)
parser.add_argument(
    "--source", help="Data source", default="20CR2c", type=str, required=False
)
parser.add_argument(
    "--variable", help="variable name", default="prmsl", type=str, required=False
)
parser.add_argument("--test", help="test data, not training", action="store_true")
parser.add_argument(
    "--opfile", help="tf data file name", default=None, type=str, required=False
)
args = parser.parse_args()
if args.opfile is None:
    purpose = "training"
    if args.test:
        purpose = "test"
    args.opfile = ("%s/Proxy_20CR/datasets/" + "%s/%s/%s/%04d-%02d-%02d:%02d.tfd") % (
        os.getenv("SCRATCH"),
        args.source,
        args.variable,
        purpose,
        args.year,
        args.month,
        args.day,
        args.hour,
    )

if not os.path.isdir(os.path.dirname(args.opfile)):
    os.makedirs(os.path.dirname(args.opfile))

# Load and standardise data
if args.source == "20CR2c":
    ic = twcr.load(
        args.variable,
        datetime.datetime(args.year, args.month, args.day, args.hour),
        version="2c",
    )
    ic = ic.extract(iris.Constraint(member=args.member))
    ic = to_analysis_grid(ic)
    if args.variable == "uwnd.10m" or args.variable == "vwnd.10m":
        ic.data = normalise_wind(ic.data)
    elif args.variable == "air.2m":
        ic.data = normalise_t2m(ic.data)
    elif args.variable == "prmsl":
        ic.data = normalise_prmsl(ic.data)
    else:
        raise ValueError("Variable %s is not supported" % args.variable)

else:
    raise ValueError("Source %s is not supported" % args.source)

# Convert to Tensor
ict = tf.convert_to_tensor(ic.data, np.float32)

# Write to file
sict = tf.io.serialize_tensor(ict)
tf.io.write_file(args.opfile, sict)

Script to make a tensor for every day in a 40-year period (runs the above script many times):

#!/usr/bin/env python

# Make a few hundred tf data files
#  for training the GCM models.

# Get one data file every 5 days+6 hours over the selected years
#  They should be far enough apart to be mostly independent.

# Partition off 1/10 of them to be test data

# This script does not run the commands - it makes a list of commands
#  (in the file 'run.txt') which can be run in parallel.

import os
import datetime

# Function to check if the job is already done for this timepoint
def is_done(variable, year, month, day, hour, group):
    op_file_name = (
        ("%s/Proxy_20CR/datasets/20CR2c/%s/" + "%s/%04d-%02d-%02d:%02d.tfd")
    ) % (os.getenv("SCRATCH"), variable, group, year, month, day, hour,)
    if os.path.isfile(op_file_name):
        return True
    return False


f = open("run.txt", "w+")

start_day = datetime.datetime(1969, 1, 1, 0)
end_day = datetime.datetime(2009, 12, 31, 23) # 2009

for variable in ["prmsl"]:
    current_day = start_day
    count = 1
    while current_day <= end_day:
        if count % 10 == 0:
            if not is_done(
                variable,
                current_day.year,
                current_day.month,
                current_day.day,
                current_day.hour,
                "test",
            ):
                cmd = (
                    "./make_training_tensor.py --year=%d --month=%d"
                    + " --day=%d --hour=%d --variable=%s --test \n"
                ) % (
                    current_day.year,
                    current_day.month,
                    current_day.day,
                    current_day.hour,
                    variable,
                )
                f.write(cmd)
        else:
            if not is_done(
                variable,
                current_day.year,
                current_day.month,
                current_day.day,
                current_day.hour,
                "training",
            ):
                cmd = (
                    "./make_training_tensor.py --year=%d --month=%d"
                    + " --day=%d --hour=%d --variable=%s \n"
                ) % (
                    current_day.year,
                    current_day.month,
                    current_day.day,
                    current_day.hour,
                    variable,
                )
                f.write(cmd)
        current_day = current_day + datetime.timedelta(hours=30)
        count += 1

f.close()