Pretty-print the entriesΒΆ
This script will search for a particular set of records, and print them out in a way that shows the record structure clearly. (See some example output)
#!/usr/bin/env python
# Take a subset of the TNA catalog (selected by record group, series, and string match)
# parse and pretty-print.
import os
import sys
import json
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--rg", help="Record group", type=int, required=True)
parser.add_argument(
"--subgroup", help="Record group file", type=int, required=False, default=None
)
parser.add_argument("--match", help="Filter", type=str, required=False, default=None)
parser.add_argument(
"--startl", help="First line to print", type=int, required=False, default=0
)
parser.add_argument(
"--endl", help="Last line to print", type=int, required=False, default=None
)
args = parser.parse_args()
# Write to stdout
fw = sys.stdout
# Input files
fileN = []
filed = "%s/WW2_US_logs/US_TNA_Catalog/record-groups/rg_%03d/" % (
os.getenv("SCRATCH"),
args.rg,
)
if args.subgroup is not None:
fileN.append("%s/rg_%03d-%03d.json" % (filed, args.rg, args.subgroup))
else:
files = os.listdir(filed)
for fn in files:
fileN.append("%s/%s" % (filed, fn))
count = 0
for filen in fileN:
fd = open(filen, "r")
while True:
line = fd.readline()
if not line:
break
if args.match is not None and args.match not in line:
continue
if line[0] == ",":
continue
count += 1
if count < args.startl:
continue
if args.endl is not None and count > args.endl:
break
if line[:2] == "{[":
line = line[2:]
if line[-3:-1] == "]}":
line = line[:-3] + "\n"
try:
fj = json.loads(line)
except:
print(line)
break
fw.write(json.dumps(fj, indent=4))
fd.close()