Argentine Daily Weather Reports 1902: Comparing with 20CR

It’s helpful to separate comparing station data with 20CR into two steps:

  • Get the 20CRv3 data at the time and location of the station observations
  • Make the comparison with the new obs.

This page documents the first step. It uses the IRData library to handle the 20CR data

Get the 20CRv3 data at one time-point

Extracts 20CRv3 data at one point in time and pickles comparators at the location of each station.

#!/usr/bin/env python

# Extract data from 20CRv3 for all the Argentinian stations.

import IRData.twcr as twcr
import iris
import iris.analysis
import pandas
import numpy
import datetime
import pickle
import os
import sys

# Fix dask SPICE bug
import dask
dask.config.set(scheduler='single-threaded')

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="year",
                    type=int,required=True)
parser.add_argument("--month", help="month",
                    type=int,required=True)
parser.add_argument("--day", help="day",
                    type=int,required=True)
parser.add_argument("--hour", help="hour",
                    type=int,required=True)
parser.add_argument("--var", help="Variable to extract",
                    type=str,required=True)
parser.add_argument("--opdir", help="Directory for output files",
                    default=("%s/sef_comparators/Argentinian_DWR/" %
                               os.getenv('SCRATCH')),
                    type=str,required=False)

args = parser.parse_args()
args.opdir="%s/%04d/%02d/%02d/%02d" % (args.opdir,args.year,
                                       args.month,args.day,
                                       args.hour)
if not os.path.isdir(args.opdir):
    os.makedirs(args.opdir)

# Find the directory with this script in
try:
    bindir=os.path.abspath(os.path.dirname(__file__))
except NameError:
    bindir='.'

# Get the station metadata (names and locations)
station_names=pandas.read_csv("%s/../../raw_data/Positions.csv" % bindir,
                              skipinitialspace=True,quotechar="'",
                              encoding='utf-8')

# Load the 20CRv3 field
rdata=twcr.load(args.var,
                datetime.datetime(args.year,args.month,
                                  args.day,args.hour),
                version='4.5.1')
interpolator = iris.analysis.Linear().interpolator(rdata, 
                                ['latitude', 'longitude'])
 
# Pickle the ensemble at each station
for station in station_names.SEF_ID.values:
   sn=station_names[station_names.SEF_ID==station]
   if numpy.isnan(sn['lat'].values): continue
   ensemble = interpolator([numpy.array(sn['lat'].values[0]),
                            numpy.array(sn['lon'].values[0])]).data
   opfile="%s/%s_%s.pkl" % (args.opdir,station,args.var)
   fh=open(opfile,'wb')
   pickle.dump(ensemble,fh)
   fh.close()

Run the script above for all time-points in a year

#!/usr/bin/env python

# Extract data from 20CRv3 for each station at a range of timepoints

# Generates a set of jobs to be run in parallel on SPICE

import os
import sys
import subprocess
import datetime

max_jobs_in_queue=500
# Where to put the output files
opdir="%s/slurm_output" % os.getenv('SCRATCH')
if not os.path.isdir(opdir):
    os.makedirs(opdir)

# Function to check if the job is already done for this timepoint
def is_done(year,month,day,hour,var):
    op_file_name=("%s/sef_comparators/Argentinian_DWR/" +
                  "%04d/%02d/%02d/%02d/DWR_Zarate_%s.pkl") % (
                            os.getenv('SCRATCH'),
                            year,month,day,hour,var)
    if os.path.isfile(op_file_name):
        return True
    return False


start_day=datetime.datetime(1902, 2, 1, 0)
end_day  =datetime.datetime(1902,12,31,21)

f=open("run.txt","w+")
current_day=start_day
while current_day<=end_day:
    for hour in (0,3,6,9,12,15,18,21):
        for var in ('prmsl','air.2m'):
            if not is_done(current_day.year,current_day.month,
                           current_day.day,hour,var):
                cmd=("./get_comparators.py --year=%d --month=%d" +
                    " --day=%d --hour=%d --var=%s\n") % (
                       current_day.year,current_day.month,
                       current_day.day,hour,var)
                f.write(cmd)
    current_day=current_day+datetime.timedelta(days=1)
f.close()

This script makes the list of commands needed to do all the extractions, which can be run in parallel.