Functions that package all the benchmark tensors as tf.data.DatasetsΒΆ
Call these functions in the ML model fitting script - they output TensorFlow Datasets which can be used directly in model fitting.
# Make tf.data.Datasets from the ATB2 fake image tensors and numbers tensors
import os
import tensorflow as tf
import numpy
# Load an image tensor from a file
def load_image_tensor(file_name):
sict = tf.io.read_file(file_name)
imt = tf.io.parse_tensor(sict, numpy.float32)
imt = tf.reshape(imt, [1024, 768, 3])
return imt
# Load an numbers tensor from a file
def load_numbers_tensor(file_name):
sict = tf.io.read_file(file_name)
imt = tf.io.parse_tensor(sict, numpy.float32)
imt = tf.reshape(imt, [436, 10])
return imt
# Get an image tensors dataset - for 'training' or 'test'.
# Optionally specify how many images to use.
def getImageDataset(purpose="training", nImages=None):
# Get a list of filenames containing image tensors
inFiles = os.listdir("%s/ML_ATB2/tensors/images" % os.getenv("SCRATCH"))
splitI = int(len(inFiles) * 0.9)
if purpose == "training":
inFiles = inFiles[:splitI]
if purpose == "test":
inFiles = inFiles[splitI:]
if nImages is not None:
if len(inFiles) >= nImages:
inFiles = inFiles[0:nImages]
else:
raise ValueError(
"Only %d images available, can't provide %d" % (len(inFiles), nImages)
)
# Create TensorFlow Dataset object from the file namelist
inFiles = [
"%s/ML_ATB2/tensors/images/%s" % (os.getenv("SCRATCH"), x) for x in inFiles
]
tr_data = tf.data.Dataset.from_tensor_slices(tf.constant(inFiles))
# Convert the Dataset from file names to file contents
tr_data = tr_data.map(
load_image_tensor, num_parallel_calls=tf.data.experimental.AUTOTUNE
)
# Optimisation
tr_data = tr_data.prefetch(tf.data.experimental.AUTOTUNE)
return tr_data
# Get an numbers tensors dataset - for 'training' or 'test'.
# Optionally specify how many images to use.
def getNumbersDataset(purpose="training", nImages=None):
# Get a list of filenames containing numbers tensors
inFiles = os.listdir("%s/ML_ATB2/tensors/numbers" % os.getenv("SCRATCH"))
splitI = int(len(inFiles) * 0.9)
if purpose == "training":
inFiles = inFiles[:splitI]
if purpose == "test":
inFiles = inFiles[splitI:]
if nImages is not None:
if len(inFiles) >= nImages:
inFiles = inFiles[0:nImages]
else:
raise ValueError(
"Only %d numbers available, can't provide %d" % (len(inFiles), nImages)
)
# Create TensorFlow Dataset object from the file namelist
inFiles = [
"%s/ML_ATB2/tensors/numbers/%s" % (os.getenv("SCRATCH"), x) for x in inFiles
]
tr_data = tf.data.Dataset.from_tensor_slices(tf.constant(inFiles))
# Convert the Dataset from file names to file contents
tr_data = tr_data.map(
load_numbers_tensor, num_parallel_calls=tf.data.experimental.AUTOTUNE
)
# Optimisation
tr_data = tr_data.prefetch(tf.data.experimental.AUTOTUNE)
return tr_data