Argentine Daily Weather Reports 1902: Comparing with 20CR¶
It’s helpful to separate comparing station data with 20CR into two steps:
- Get the 20CRv3 data at the time and location of the station observations
- Make the comparison with the new obs.
This page documents the first step. It uses the IRData library to handle the 20CR data
Get the 20CRv3 data at one time-point¶
Extracts 20CRv3 data at one point in time and pickles comparators at the location of each station.
#!/usr/bin/env python
# Extract data from 20CRv3 for all the Argentinian stations.
import IRData.twcr as twcr
import iris
import iris.analysis
import pandas
import numpy
import datetime
import pickle
import os
import sys
# Fix dask SPICE bug
import dask
dask.config.set(scheduler='single-threaded')
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--year", help="year",
type=int,required=True)
parser.add_argument("--month", help="month",
type=int,required=True)
parser.add_argument("--day", help="day",
type=int,required=True)
parser.add_argument("--hour", help="hour",
type=int,required=True)
parser.add_argument("--var", help="Variable to extract",
type=str,required=True)
parser.add_argument("--opdir", help="Directory for output files",
default=("%s/sef_comparators/Argentinian_DWR/" %
os.getenv('SCRATCH')),
type=str,required=False)
args = parser.parse_args()
args.opdir="%s/%04d/%02d/%02d/%02d" % (args.opdir,args.year,
args.month,args.day,
args.hour)
if not os.path.isdir(args.opdir):
os.makedirs(args.opdir)
# Find the directory with this script in
try:
bindir=os.path.abspath(os.path.dirname(__file__))
except NameError:
bindir='.'
# Get the station metadata (names and locations)
station_names=pandas.read_csv("%s/../../raw_data/Positions.csv" % bindir,
skipinitialspace=True,quotechar="'",
encoding='utf-8')
# Load the 20CRv3 field
rdata=twcr.load(args.var,
datetime.datetime(args.year,args.month,
args.day,args.hour),
version='4.5.1')
interpolator = iris.analysis.Linear().interpolator(rdata,
['latitude', 'longitude'])
# Pickle the ensemble at each station
for station in station_names.SEF_ID.values:
sn=station_names[station_names.SEF_ID==station]
if numpy.isnan(sn['lat'].values): continue
ensemble = interpolator([numpy.array(sn['lat'].values[0]),
numpy.array(sn['lon'].values[0])]).data
opfile="%s/%s_%s.pkl" % (args.opdir,station,args.var)
fh=open(opfile,'wb')
pickle.dump(ensemble,fh)
fh.close()
Run the script above for all time-points in a year¶
#!/usr/bin/env python
# Extract data from 20CRv3 for each station at a range of timepoints
# Generates a set of jobs to be run in parallel on SPICE
import os
import sys
import subprocess
import datetime
max_jobs_in_queue=500
# Where to put the output files
opdir="%s/slurm_output" % os.getenv('SCRATCH')
if not os.path.isdir(opdir):
os.makedirs(opdir)
# Function to check if the job is already done for this timepoint
def is_done(year,month,day,hour,var):
op_file_name=("%s/sef_comparators/Argentinian_DWR/" +
"%04d/%02d/%02d/%02d/DWR_Zarate_%s.pkl") % (
os.getenv('SCRATCH'),
year,month,day,hour,var)
if os.path.isfile(op_file_name):
return True
return False
start_day=datetime.datetime(1902, 2, 1, 0)
end_day =datetime.datetime(1902,12,31,21)
f=open("run.txt","w+")
current_day=start_day
while current_day<=end_day:
for hour in (0,3,6,9,12,15,18,21):
for var in ('prmsl','air.2m'):
if not is_done(current_day.year,current_day.month,
current_day.day,hour,var):
cmd=("./get_comparators.py --year=%d --month=%d" +
" --day=%d --hour=%d --var=%s\n") % (
current_day.year,current_day.month,
current_day.day,hour,var)
f.write(cmd)
current_day=current_day+datetime.timedelta(days=1)
f.close()
This script makes the list of commands needed to do all the extractions, which can be run in parallel.