Assemble data for the Met Office Global Analysis video

Note: To get this data you will need access to the Met Office operational MASS archive, and such access is not generally available outside the Met Office. The same data are distributed through the Met Office Data Portal but only a few days data is online at any one time.

The analysis is run once every 6 hours, but we want data at a higher time resolution than that. So we collect hourly data (the highest time resolution output) - each analysis, and the first 5 hours of forecast.

Script to collect the variables shown for all the analyses run in one calendar day:

#!/usr/bin/env python

# Retrieve surface weather variables from the global analysis
#  for one day.

import os
from tempfile import NamedTemporaryFile
import subprocess

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="Year",
                    type=int,required=True)
parser.add_argument("--month", help="Integer month",
                    type=int,required=True)
parser.add_argument("--day", help="Day of month",
                    type=int,required=True)
parser.add_argument("--opdir", help="Directory for output files",
                    default="%s/opfc/glm" % os.getenv('SCRATCH'))
args = parser.parse_args()
opdir="%s/%04d/%02d" % (args.opdir,args.year,args.month)
if not os.path.isdir(opdir):
    os.makedirs(opdir)

print("%04d-%02d-%02d" % (args.year,args.month,args.day))

# Mass directory to use
mass_dir="moose:/opfc/atm/global/prods/%04d.pp" % args.year

# Files to retrieve from
flist=[]
for hour in (0,6,12,18):
    for fcst in (0,3,6):
        flist.append("prods_op_gl-mn_%04d%02d%02d_%02d_%03d.pp" % (
                       args.year,args.month,args.day,hour,fcst))
        flist.append("prods_op_gl-mn_%04d%02d%02d_%02d_%03d.calc.pp" % (
                       args.year,args.month,args.day,hour,fcst))
        flist.append("prods_op_gl-up_%04d%02d%02d_%02d_%03d.calc.pp" % (
                       args.year,args.month,args.day,hour,fcst))

# Create the query file
qfile=NamedTemporaryFile(mode='w+',delete=False)
qfile.write("begin_global\n")
qfile.write("   pp_file = (")
for ppfl in flist:
    qfile.write("\"%s\"" % ppfl)
    if ppfl != flist[-1]:
        qfile.write(",")
    else:
        qfile.write(")\n")
qfile.write("end_global\n")
qfile.write("begin\n")
qfile.write("    stash = (16222,24,3236,31,3225,3226)\n")
qfile.write("    lbproc = 0\n")
qfile.write("end\n")
qfile.write("begin\n")
qfile.write("    stash = (5216,5206,5205,4203,4204)\n")
qfile.write("    lbproc = 128\n")
qfile.write("end\n")
qfile.close()

# Run the query
opfile="%s/%02d.pp" % (opdir,args.day)
subprocess.call("moo select -C %s %s %s" % (qfile.name,mass_dir,opfile),
                 shell=True)

os.remove(qfile.name)

This script must be run repeatedly to gather data for all the days shown in the video.

#!/usr/bin/env python

# Extract the data for a range of days

import os
import subprocess
import datetime


# Function to check if the job is already done for this timepoint
def is_done(year,month,day):
    op_file_name=("%s/opfc/%04d/%02d/%02d.pp") % (
                            os.getenv('SCRATCH'),
                            year,month,day)
    if os.path.isfile(op_file_name):
        return True
    return False

f=open("fetch2.txt","w+")

start_day=datetime.datetime(2019,  1,  1,  0)
end_day  =datetime.datetime(2018,  9, 25, 23)

current_day=start_day
while current_day>=end_day:
    if is_done(current_day.year,current_day.month,
                   current_day.day):
        current_day=current_day-datetime.timedelta(days=1)
        continue
    cmd=("./get_data_for_day.py --year=%d --month=%d " +
         "--day=%d"+
         "\n") % (
           current_day.year,current_day.month,
             current_day.day)
    f.write(cmd)
    current_day=current_day-datetime.timedelta(days=1)
f.close()

This produces a list of data retrieval commands - one for each day. Running these in parallel would overload the MASS system, so just run them in series - it’ll take a few days to get all the data from tape.

As well as the weather data, the plot script needs a land-mask file to plot the continents. This script retrieves that.

#!/usr/bin/env python

# Download everything we need except the 20CR data.
# Store it on $SCRATCH

import os
import urllib.request

# Land mask for plots

srce=("https://s3-eu-west-1.amazonaws.com/"+
      "philip.brohan.org.ml-gcm/opfc_global_2019.nc")
dst="%s/fixed_fields/land_mask/opfc_global_2019.nc" % os.getenv('SCRATCH')
if not os.path.exists(dst):
    if not os.path.isdir(os.path.dirname(dst)):
        os.makedirs(os.path.dirname(dst))
    urllib.request.urlretrieve(srce, dst)