Source code for ML_Utilities.prepare_data
# (C) British Crown Copyright 2019, Met Office
#
# This code is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This code is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# Prepare a piece of tf data from 20CR - convert to tensor, normalise, save as tf.load-able file.
import os
import iris
import tensorflow as tf
import IRData.twcr as twcr
from .normalise import get_normalise_function
[docs]def prepare_data(date,purpose='training',source='20CR2c',variable='prmsl',
member=1,normalise=None,opfile=None):
"""Make tf.load-able files, suitably normalised for training ML models
Data will be stored in directory $SCRATCH/Machine-Learning-experiments.
Args:
date (obj:`datetime.datetime`): datetime to get data for.
purpose (:obj:`str`): 'training' (default) or 'test'.
source (:obj:`str`): Where to get the data from - at the moment, only '20CR2c' is supported .
variable (:obj:`str`): Variable to use (e.g. 'prmsl')
normalise: (:obj:`func`): Function to normalise the data (to mean=0, sd=1). Function must take an :obj:`iris.cube.cube' as argument and returns a normalised cube as result. If None (default) use a standard normalisation function (see :func:`normalise`.
Returns:
Nothing, but creates, as side effect, a tf.load-able file with the normalised data for the given source, variable, and date.
Raises:
ValueError: Unsupported source, or can't load the original data, or normalisation failed.
|
"""
if opfile is None:
opfile=("%s/Machine-Learning-experiments/datasets/%s/%s/%s" %
(os.getenv('SCRATCH'),source,variable,purpose))
if not os.path.isdir(os.path.dirname(opfile)):
os.makedirs(os.path.dirname(opfile))
ic=twcr.load(variable,datetime.datetime(date.year,date.month,
date.day,date.hour),
version=args.version)
# Reduce to selected ensemble member
ic=ic.extract(iris.Constraint(member=args.member))
# Normalise (to mean=0, sd=1)
if normalise is None:
normalise=get_normalise_function(source,variable)
ic.data=normalise(ic.data)
# Convert to Tensor
ict=tf.convert_to_tensor(ic.data, numpy.float32)
# Write to tfrecord file
sict=tf.serialize_tensor(ict)
tf.write_file(opfile,sict)