Assemble data for the ERA5 videoΒΆ

We will download the data from the Copernicus Climate Data Store. This requires installing the CDS API (see the environment for this repository) and registering for an account on the CDS website.

Script to download one variable for one year:

#!/usr/bin/env python

# Retrieve ERA5 hourly_values.
#  Every month in one year

import os
import cdsapi
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--variable", help="Variable name", type=str, required=True)
parser.add_argument("--year", help="Year", type=int, required=True)
parser.add_argument(
    "--opdir",
    help="Directory for output files",
    default="%s/ERA5/hourly/reanalysis" % os.getenv("SCRATCH"),
)
args = parser.parse_args()
args.opdir += "/%04d" % args.year
if not os.path.isdir(args.opdir):
    os.makedirs(args.opdir, exist_ok=True)

c = cdsapi.Client()

dataset = "reanalysis-era5-single-levels"
for month in range(1, 13):
    os.makedirs("%s/%02d" % (args.opdir, month), exist_ok=True)
    opfile = "%s/%02d/%s.nc" % (args.opdir, month, args.variable)
    if os.path.isfile(opfile):
        print("File %s already exists, skipping download" % opfile)
        continue
    request = {
        "product_type": ["reanalysis"],
        "variable": [args.variable],
        "year": [args.year],
        "month": [month],
        "day": [
            "01",
            "02",
            "03",
            "04",
            "05",
            "06",
            "07",
            "08",
            "09",
            "10",
            "11",
            "12",
            "13",
            "14",
            "15",
            "16",
            "17",
            "18",
            "19",
            "20",
            "21",
            "22",
            "23",
            "24",
            "25",
            "26",
            "27",
            "28",
            "29",
            "30",
            "31",
        ],
        "time": [
            "00:00",
            "01:00",
            "02:00",
            "03:00",
            "04:00",
            "05:00",
            "06:00",
            "07:00",
            "08:00",
            "09:00",
            "10:00",
            "11:00",
            "12:00",
            "13:00",
            "14:00",
            "15:00",
            "16:00",
            "17:00",
            "18:00",
            "19:00",
            "20:00",
            "21:00",
            "22:00",
            "23:00",
        ],
        "data_format": "netcdf",
        "download_format": "unarchived",
    }

    print("Downloading %s for %04d-%02d" % (args.variable, args.year, month))
    c.retrieve(dataset, request).download(opfile)

Run the script above repeatedly to get all the variables for the whole period.

#!/usr/bin/env python

# Get monthly ERA5 data for several years, and store on SCRATCH.

import os
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--startyear", type=int, required=False, default=1940)
parser.add_argument("--endyear", type=int, required=False, default=2024)
args = parser.parse_args()

for year in range(args.startyear, args.endyear + 1):
    for var in [
        "2m_temperature",
        "mean_sea_level_pressure",
        "total_precipitation",
        "10m_u_component_of_wind",
        "10m_v_component_of_wind"
    ]:
        opfile = "%s/ERA5/hourly/reanalysis/%04d/%s.nc" % (
            os.getenv("SCRATCH"),
            year,
            var,
        )
        if not os.path.isfile(opfile):
            print(
                ("./get_year_of_hourlies.py --year=%d --variable=%s")
                % (
                    year,
                    var,
                )
            )

As well as the weather data, the plot script needs a land-mask file to plot the continents. This script retrieves that.

#!/usr/bin/env python

# Retrieve a soil temperature file from ERA5-land

# This is just an easy way to get a high-resolution land mask for plotting

import os
import cdsapi

opdir = "%s/ERA5/" % os.getenv("SCRATCH")
if not os.path.isdir(opdir):
    os.makedirs(opdir, exist_ok=True)

if not os.path.isfile("%s/land_mask.nc" % opdir):  # Only bother if we don't have it

    c = cdsapi.Client() 

    # Variable and date are arbitrary
    # Just want something that is only defined in land grid-cells.

    dataset = "reanalysis-era5-land-monthly-means"
    request = {
        "product_type": ["monthly_averaged_reanalysis"],
        "variable": ["soil_temperature_level_1"],
        "year": ["2024"],
        "month": ["01"],
        "time": ["00:00"],
        "data_format": "netcdf",
        "download_format": "unarchived"
    }

    c.retrieve(dataset, request).download("%s/%s.nc" % (opdir, "land_mask"))

    # Use system call to run ncks on output file
    # To remove 'expver' variable (otherwise iris won't load it)
    # os.system(
    #     "ncks -O -C -x -v expver %s/%s.nc %s/%s.nc"
    #     % (opdir, "land_mask", opdir, "land_mask")
    # ) test

As well as downloading the data, we want some convenience functions to load it into a convenient format (iris cubes). This script does that for the ERA5 data.:

# Functions to load ERA5 Hourly data

import os
import iris
from iris.util import squeeze
import iris.coord_systems
import numpy as np

# Don't really understand this, but it gets rid of the error messages.
iris.FUTURE.datum_support = True

# ERA5 data does not have explicit coordinate systems
# Specify one to add on load so the cubes work properly with iris.
cs_ERA5 = iris.coord_systems.RotatedGeogCS(90, 180, 0)


# And a function to add the coord system to a cube (in-place)
def add_coord_system(cbe):
    cbe.coord("latitude").coord_system = cs_ERA5
    cbe.coord("longitude").coord_system = cs_ERA5


def get_land_mask():
    # Load the land mask
    fname = "%s/ERA5/land_mask.nc" % os.getenv("SCRATCH")
    if not os.path.isfile(fname):
        raise Exception("No data file %s" % fname)
    land_mask = squeeze(iris.load_cube(fname))
    # Set data to 1 for land, 0 for sea
    land_mask.data.data[land_mask.data.mask] = 0
    land_mask.data.data[~land_mask.data.mask] = 1
    add_coord_system(land_mask)
    return land_mask


def load(
    variable="total_precipitation",
    year=None,
    month=None,
    day=None,
    hour=None,
    constraint=None,
    grid=None,
):
    if year is None or month is None or day is None or hour is None:
        raise Exception("Year, month, day, and hour must be specified")
    fname = "%s/ERA5/hourly/reanalysis/%04d/%02d/%s.nc" % (
        os.getenv("SCRATCH"),
        year,
        month,
        variable,
    )
    if not os.path.isfile(fname):
        raise Exception("No data file %s" % fname)
    ftt = iris.Constraint(
        time=lambda cell: cell.point.day == day and cell.point.hour == hour
    )
    varC = iris.load_cube(fname, ftt)
    # Get rid of unnecessary height dimensions
    if len(varC.data.shape) == 3:
        varC = varC.extract(iris.Constraint(expver=1))
    add_coord_system(varC)
    varC.long_name = variable
    if grid is not None:
        varC = varC.regrid(grid, iris.analysis.Nearest())
    if constraint is not None:
        varC = varC.extract(constraint)
    return varC