Script to run TextractΒΆ
#!/usr/bin/env python
# Run Textract on the selected image
import argparse
import pickle
import boto3
parser = argparse.ArgumentParser()
parser.add_argument("--source", help="Image file name",
type=str,default='modified.jpg')
parser.add_argument("--opfile", help="Output file name",
default="detection.pkl",
type=str,required=False)
args = parser.parse_args()
# Load the jpeg
with open(args.source,'rb') as jf:
ie=jf.read()
# Analyze the document
client = boto3.client('textract')
response = client.detect_document_text(Document={'Bytes': ie})
# Save the resulting JSON
pickle.dump(response, open( args.opfile, "wb" ) )