Argentine Daily Weather Reports 1902: Comparing with 20CR¶

It’s helpful to separate comparing station data with 20CR into two steps:

Get the 20CRv3 data at the time and location of the station observations
Make the comparison with the new obs.

This page documents the first step. It uses the IRData library to handle the 20CR data

Get the 20CRv3 data at one time-point¶

Extracts 20CRv3 data at one point in time and pickles comparators at the location of each station.

#!/usr/bin/env python

# Extract data from 20CRv3 for all the Argentinian stations.

import IRData.twcr as twcr
import iris
import iris.analysis
import pandas
import numpy
import datetime
import pickle
import os
import sys

# Fix dask SPICE bug
import dask
dask.config.set(scheduler='single-threaded')

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="year",
                    type=int,required=True)
parser.add_argument("--month", help="month",
                    type=int,required=True)
parser.add_argument("--day", help="day",
                    type=int,required=True)
parser.add_argument("--hour", help="hour",
                    type=int,required=True)
parser.add_argument("--var", help="Variable to extract",
                    type=str,required=True)
parser.add_argument("--opdir", help="Directory for output files",
                    default=("%s/sef_comparators/Argentinian_DWR/" %
                               os.getenv('SCRATCH')),
                    type=str,required=False)

args = parser.parse_args()
args.opdir="%s/%04d/%02d/%02d/%02d" % (args.opdir,args.year,
                                       args.month,args.day,
                                       args.hour)
if not os.path.isdir(args.opdir):
    os.makedirs(args.opdir)

# Find the directory with this script in
try:
    bindir=os.path.abspath(os.path.dirname(__file__))
except NameError:
    bindir='.'

# Get the station metadata (names and locations)
station_names=pandas.read_csv("%s/../../raw_data/Positions.csv" % bindir,
                              skipinitialspace=True,quotechar="'",
                              encoding='utf-8')

# Load the 20CRv3 field
rdata=twcr.load(args.var,
                datetime.datetime(args.year,args.month,
                                  args.day,args.hour),
                version='4.5.1')
interpolator = iris.analysis.Linear().interpolator(rdata, 
                                ['latitude', 'longitude'])
 
# Pickle the ensemble at each station
for station in station_names.SEF_ID.values:
   sn=station_names[station_names.SEF_ID==station]
   if numpy.isnan(sn['lat'].values): continue
   ensemble = interpolator([numpy.array(sn['lat'].values[0]),
                            numpy.array(sn['lon'].values[0])]).data
   opfile="%s/%s_%s.pkl" % (args.opdir,station,args.var)
   fh=open(opfile,'wb')
   pickle.dump(ensemble,fh)
   fh.close()

Run the script above for all time-points in a year¶

#!/usr/bin/env python

# Extract data from 20CRv3 for each station at a range of timepoints

# Generates a set of jobs to be run in parallel on SPICE

import os
import sys
import subprocess
import datetime

max_jobs_in_queue=500
# Where to put the output files
opdir="%s/slurm_output" % os.getenv('SCRATCH')
if not os.path.isdir(opdir):
    os.makedirs(opdir)

# Function to check if the job is already done for this timepoint
def is_done(year,month,day,hour,var):
    op_file_name=("%s/sef_comparators/Argentinian_DWR/" +
                  "%04d/%02d/%02d/%02d/DWR_Zarate_%s.pkl") % (
                            os.getenv('SCRATCH'),
                            year,month,day,hour,var)
    if os.path.isfile(op_file_name):
        return True
    return False


start_day=datetime.datetime(1902, 2, 1, 0)
end_day  =datetime.datetime(1902,12,31,21)

f=open("run.txt","w+")
current_day=start_day
while current_day<=end_day:
    for hour in (0,3,6,9,12,15,18,21):
        for var in ('prmsl','air.2m'):
            if not is_done(current_day.year,current_day.month,
                           current_day.day,hour,var):
                cmd=("./get_comparators.py --year=%d --month=%d" +
                    " --day=%d --hour=%d --var=%s\n") % (
                       current_day.year,current_day.month,
                       current_day.day,hour,var)
                f.write(cmd)
    current_day=current_day+datetime.timedelta(days=1)
f.close()

This script makes the list of commands needed to do all the extractions, which can be run in parallel.