ERA5 to HadUK-Grid - get the training dataΒΆ
Get the HadUK-Grid data as described in the documentation for that dataset.
Get the ERA5 data as described in the documentation for that ERA5 T2m, except that you will want daily Tmax instead of daily T2m. (It will still work with T2m, but not as well).
Utility script for ERA5 data access (imported in most other scripts using this data):
# Load daily Tmax data from ERA5
import os
import iris
coord_s = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS)
def ERA5_roll_longitude(f):
f1 = f.extract(iris.Constraint(longitude=lambda cell: cell > 180))
longs = f1._dim_coords_and_dims[1][0].points - 360
f1._dim_coords_and_dims[1][0].points = longs
f2 = f.extract(iris.Constraint(longitude=lambda cell: cell <= 180))
f = iris.cube.CubeList([f1, f2]).concatenate_cube()
return f
def ERA5_trim(f, x=1440, y=720):
xmin = f.coord("longitude").points[-x]
cx = iris.Constraint(longitude=lambda cell: cell >= xmin)
ymin = f.coord("latitude").points[y - 1]
cy = iris.Constraint(latitude=lambda cell: cell >= ymin)
return f.extract(cx & cy)
def ERA5_load_LS_mask():
fname = (
"%s/Proxy_20CR/datasets/ERA5/daily_SST/"
+ "era5_daily_0m_sea_surface_temperature_19790101to20210831.nc"
) % os.getenv("SCRATCH")
ddata = iris.load_cube(
fname,
iris.Constraint(
time=lambda cell: cell.point.year == 1979
and cell.point.month == 1
and cell.point.day == 1
),
)
ddata.data = ddata.data.data # Remove mask
ddata.data[ddata.data < 10000] = 0
ddata.data[ddata.data > 0] = 1
return ddata
def ERA5_load_Tmax(year, month, day):
fname = (
"%s/Proxy_20CR/datasets/ERA5/daily_Tmax/"
+ "era5_daily_2m_maximum_temperature_19790101to20200831.nc"
) % os.getenv("SCRATCH")
ddata = iris.load_cube(
fname,
iris.Constraint(
time=lambda cell: cell.point.year == year
and cell.point.month == month
and cell.point.day == day
),
)
ddata.coord("latitude").coord_system = coord_s
ddata.coord("longitude").coord_system = coord_s
return ddata
def ERA5_load_Tmax_climatology(year, month, day):
if month == 2 and day == 29:
day = 28
fname = ("%s/Proxy_20CR/datasets/ERA5/daily_Tmax/" + "climatology/%02d/%02d.nc") % (
os.getenv("SCRATCH"),
month,
day,
)
ddata = iris.load_cube(fname)
ddata.coord("latitude").coord_system = coord_s
ddata.coord("longitude").coord_system = coord_s
return ddata
def ERA5_load_Tmax_variability_climatology(year, month, day):
if month == 2 and day == 29:
day = 28
fname = (
"%s/Proxy_20CR/datasets/ERA5/daily_Tmax/"
+ "variability_climatology/%02d/%02d.nc"
) % (
os.getenv("SCRATCH"),
month,
day,
)
ddata = iris.load_cube(fname)
return ddata
Utility script for HadUK-Grid data access (imported in most other scripts using this data):
# Load daily tmax data from HadUKGrid
import os
import iris
import netCDF4
import calendar
import datetime
def HUKG_trim(f, x=896, y=1440):
xmin = f.coord("projection_x_coordinate").points[-x]
cx = iris.Constraint(projection_x_coordinate=lambda cell: cell >= xmin)
ymin = f.coord("projection_y_coordinate").points[-y]
cy = iris.Constraint(projection_y_coordinate=lambda cell: cell >= ymin)
return f.extract(cx & cy)
def HUKG_load_tmax(year, month, day):
if year > 2018:
dirname = (
"%s/Proxy_20CR/datasets/haduk-grid/"
+ "series_archive_provisional/grid/daily_maxtemp/%04d/%02d"
) % (os.getenv("SCRATCH"), year, month)
else:
dirname = (
"%s/Proxy_20CR/datasets/haduk-grid/"
+ "v1.0.3.0/grid/daily_maxtemp/%04d/%02d"
) % (os.getenv("SCRATCH"), year, month)
filename = "%02d.nc" % day
hdata = iris.load_cube("%s/%s" % (dirname, filename))
return hdata
def HUKG_load_observations(year, month, day):
if year > 2018:
dirname = (
"%s/Proxy_20CR/datasets/haduk-grid/"
+ "series_archive_provisional/station/daily_maxtemp/%04d/%02d"
) % (os.getenv("SCRATCH"), year, month)
else:
dirname = (
"%s/Proxy_20CR/datasets/haduk-grid/"
+ "v1.0.3.0/station/daily_maxtemp/%04d/%02d"
) % (os.getenv("SCRATCH"), year, month)
filename = "%02d.nc" % day
nc = netCDF4.Dataset("%s/%s" % (dirname, filename))
return {
"projection_x_coordinate": nc.variables["projection_x_coordinate"][:].data,
"projection_y_coordinate": nc.variables["projection_y_coordinate"][:].data,
"Tmax": nc.variables["daily_maxtemp"][:].data,
}
# Get an approximate daily climatology from the HadUKGrid monthlies
def HUKG_load_tmax_climatology(year, month, day):
dirname = (
"%s/Proxy_20CR/datasets/haduk-grid/v1.0.3.0/"
+ "monthly_maxtemp_climatology/"
+ "1981-2010/"
) % os.getenv("SCRATCH")
if day == 15:
filename = "%s.nc" % calendar.month_abbr[month].lower()
hdata = iris.load_cube("%s/%s" % (dirname, filename))
elif day < 15:
dte = datetime.date(year, month, day)
dt2 = datetime.date(year, month, 15)
dt1 = dt2 - datetime.timedelta(days=30)
dt1 = datetime.date(dt1.year, dt1.month, 15)
fn1 = "%s.nc" % calendar.month_abbr[dt1.month].lower()
hdata = iris.load_cube("%s/%s" % (dirname, fn1))
fn2 = "%s.nc" % calendar.month_abbr[dt2.month].lower()
hd2 = iris.load_cube("%s/%s" % (dirname, fn2))
weight = (dt2 - dte).total_seconds() / (dt2 - dt1).total_seconds()
hdata.data = hdata.data * weight + hd2.data * (1 - weight)
else:
dte = datetime.date(year, month, day)
dt1 = datetime.date(year, month, 15)
dt2 = dt1 + datetime.timedelta(days=30)
dt2 = datetime.date(dt2.year, dt2.month, 15)
fn1 = "%s.nc" % calendar.month_abbr[month].lower()
hdata = iris.load_cube("%s/%s" % (dirname, fn1))
fn2 = "%s.nc" % calendar.month_abbr[dt2.month].lower()
hd2 = iris.load_cube("%s/%s" % (dirname, fn2))
weight = (dt2 - dte).total_seconds() / (dt2 - dt1).total_seconds()
hdata.data = hdata.data * weight + hd2.data * (1 - weight)
return hdata