ERA5 to HadUK-Grid - get the training dataΒΆ

Get the HadUK-Grid data as described in the documentation for that dataset.

Get the ERA5 data as described in the documentation for that ERA5 T2m, except that you will want daily Tmax instead of daily T2m. (It will still work with T2m, but not as well).

Utility script for ERA5 data access (imported in most other scripts using this data):

# Load daily Tmax data from ERA5

import os
import iris

coord_s = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS)


def ERA5_roll_longitude(f):
    f1 = f.extract(iris.Constraint(longitude=lambda cell: cell > 180))
    longs = f1._dim_coords_and_dims[1][0].points - 360
    f1._dim_coords_and_dims[1][0].points = longs
    f2 = f.extract(iris.Constraint(longitude=lambda cell: cell <= 180))
    f = iris.cube.CubeList([f1, f2]).concatenate_cube()
    return f


def ERA5_trim(f, x=1440, y=720):
    xmin = f.coord("longitude").points[-x]
    cx = iris.Constraint(longitude=lambda cell: cell >= xmin)
    ymin = f.coord("latitude").points[y - 1]
    cy = iris.Constraint(latitude=lambda cell: cell >= ymin)
    return f.extract(cx & cy)


def ERA5_load_LS_mask():
    fname = (
        "%s/Proxy_20CR/datasets/ERA5/daily_SST/"
        + "era5_daily_0m_sea_surface_temperature_19790101to20210831.nc"
    ) % os.getenv("SCRATCH")
    ddata = iris.load_cube(
        fname,
        iris.Constraint(
            time=lambda cell: cell.point.year == 1979
            and cell.point.month == 1
            and cell.point.day == 1
        ),
    )
    ddata.data = ddata.data.data  # Remove mask
    ddata.data[ddata.data < 10000] = 0
    ddata.data[ddata.data > 0] = 1
    return ddata


def ERA5_load_Tmax(year, month, day):
    fname = (
        "%s/Proxy_20CR/datasets/ERA5/daily_Tmax/"
        + "era5_daily_2m_maximum_temperature_19790101to20200831.nc"
    ) % os.getenv("SCRATCH")
    ddata = iris.load_cube(
        fname,
        iris.Constraint(
            time=lambda cell: cell.point.year == year
            and cell.point.month == month
            and cell.point.day == day
        ),
    )
    ddata.coord("latitude").coord_system = coord_s
    ddata.coord("longitude").coord_system = coord_s
    return ddata


def ERA5_load_Tmax_climatology(year, month, day):
    if month == 2 and day == 29:
        day = 28
    fname = ("%s/Proxy_20CR/datasets/ERA5/daily_Tmax/" + "climatology/%02d/%02d.nc") % (
        os.getenv("SCRATCH"),
        month,
        day,
    )
    ddata = iris.load_cube(fname)
    ddata.coord("latitude").coord_system = coord_s
    ddata.coord("longitude").coord_system = coord_s
    return ddata


def ERA5_load_Tmax_variability_climatology(year, month, day):
    if month == 2 and day == 29:
        day = 28
    fname = (
        "%s/Proxy_20CR/datasets/ERA5/daily_Tmax/"
        + "variability_climatology/%02d/%02d.nc"
    ) % (
        os.getenv("SCRATCH"),
        month,
        day,
    )
    ddata = iris.load_cube(fname)
    return ddata

Utility script for HadUK-Grid data access (imported in most other scripts using this data):

# Load daily tmax data from HadUKGrid

import os
import iris
import netCDF4
import calendar
import datetime


def HUKG_trim(f, x=896, y=1440):
    xmin = f.coord("projection_x_coordinate").points[-x]
    cx = iris.Constraint(projection_x_coordinate=lambda cell: cell >= xmin)
    ymin = f.coord("projection_y_coordinate").points[-y]
    cy = iris.Constraint(projection_y_coordinate=lambda cell: cell >= ymin)
    return f.extract(cx & cy)


def HUKG_load_tmax(year, month, day):
    if year > 2018:
        dirname = (
            "%s/Proxy_20CR/datasets/haduk-grid/"
            + "series_archive_provisional/grid/daily_maxtemp/%04d/%02d"
        ) % (os.getenv("SCRATCH"), year, month)
    else:
        dirname = (
            "%s/Proxy_20CR/datasets/haduk-grid/"
            + "v1.0.3.0/grid/daily_maxtemp/%04d/%02d"
        ) % (os.getenv("SCRATCH"), year, month)
    filename = "%02d.nc" % day
    hdata = iris.load_cube("%s/%s" % (dirname, filename))
    return hdata


def HUKG_load_observations(year, month, day):
    if year > 2018:
        dirname = (
            "%s/Proxy_20CR/datasets/haduk-grid/"
            + "series_archive_provisional/station/daily_maxtemp/%04d/%02d"
        ) % (os.getenv("SCRATCH"), year, month)
    else:
        dirname = (
            "%s/Proxy_20CR/datasets/haduk-grid/"
            + "v1.0.3.0/station/daily_maxtemp/%04d/%02d"
        ) % (os.getenv("SCRATCH"), year, month)
    filename = "%02d.nc" % day
    nc = netCDF4.Dataset("%s/%s" % (dirname, filename))
    return {
        "projection_x_coordinate": nc.variables["projection_x_coordinate"][:].data,
        "projection_y_coordinate": nc.variables["projection_y_coordinate"][:].data,
        "Tmax": nc.variables["daily_maxtemp"][:].data,
    }


# Get an approximate daily climatology from the HadUKGrid monthlies
def HUKG_load_tmax_climatology(year, month, day):
    dirname = (
        "%s/Proxy_20CR/datasets/haduk-grid/v1.0.3.0/"
        + "monthly_maxtemp_climatology/"
        + "1981-2010/"
    ) % os.getenv("SCRATCH")
    if day == 15:
        filename = "%s.nc" % calendar.month_abbr[month].lower()
        hdata = iris.load_cube("%s/%s" % (dirname, filename))
    elif day < 15:
        dte = datetime.date(year, month, day)
        dt2 = datetime.date(year, month, 15)
        dt1 = dt2 - datetime.timedelta(days=30)
        dt1 = datetime.date(dt1.year, dt1.month, 15)
        fn1 = "%s.nc" % calendar.month_abbr[dt1.month].lower()
        hdata = iris.load_cube("%s/%s" % (dirname, fn1))
        fn2 = "%s.nc" % calendar.month_abbr[dt2.month].lower()
        hd2 = iris.load_cube("%s/%s" % (dirname, fn2))
        weight = (dt2 - dte).total_seconds() / (dt2 - dt1).total_seconds()
        hdata.data = hdata.data * weight + hd2.data * (1 - weight)
    else:
        dte = datetime.date(year, month, day)
        dt1 = datetime.date(year, month, 15)
        dt2 = dt1 + datetime.timedelta(days=30)
        dt2 = datetime.date(dt2.year, dt2.month, 15)
        fn1 = "%s.nc" % calendar.month_abbr[month].lower()
        hdata = iris.load_cube("%s/%s" % (dirname, fn1))
        fn2 = "%s.nc" % calendar.month_abbr[dt2.month].lower()
        hd2 = iris.load_cube("%s/%s" % (dirname, fn2))
        weight = (dt2 - dte).total_seconds() / (dt2 - dt1).total_seconds()
        hdata.data = hdata.data * weight + hd2.data * (1 - weight)
    return hdata