Full page extraction - 10-year monthlies

The test on the daily-rainfall page suggested that Gemini worked almost perfectly, but struggled with the missing entries in a table. To check this, I’m going to try a test with a page that has no missing data - the 10-year monthly totals for a station. (Image)

The code is almost the same - I’ve just changed the metadata names to match the image, and tweaked the data structure for monthly totals rather than daily.

#!/usr/bin/env python3

# Get all required data from the page
# This version does a sample from the 10-year monthlies

import os
import PIL.Image
import google.generativeai as genai
import typing_extensions as typing

# You will need an API key get it from https://ai.google.dev/gemini-api/docs/api-key

# I keep my API key in the .gemini_api file in my home directory.
with open("%s/.gemini_api" % os.getenv("HOME"), "r") as file:
    api_key = file.read().strip()

# Default protocol is 'GRPC' - but that is blocked by the Office firewall.
#  Use 'REST' instead.
genai.configure(api_key=api_key, transport="rest")


# Specify a structure for the station metadata
class MetaData(typing.TypedDict):
    Year: int
    StationNumber: int
    Location: str
    County: str
    River_basin: str
    Type_of_gauge: str
    Observer: str


# Specify a structure for the daily observations
class Monthly(typing.TypedDict):
    Month: str
    rainfall: str


class Annual(typing.TypedDict):
    Year: int
    rainfall: list[Monthly]


class Decadal(typing.TypedDict):
    rainfall: list[Annual]


class Totals(typing.TypedDict):
    Totals: list[str]


# Load the sample image
img = PIL.Image.open("../../images/monthlies/TYRain_1941-1950_25_pt1-10.jpg")

# Pick an AI to use - this one is the latest as of 2025-01-29
model = genai.GenerativeModel("gemini-2.0-flash-exp")

# Get metadata from the image
result = model.generate_content(
    [
        img,
        "\n\n",
        "List the station metadata",
    ],
    generation_config=genai.GenerationConfig(
        response_mime_type="application/json", response_schema=MetaData
    ),
)
# Structured data as JSON
with open("metadata.json", "w") as file:
    file.write(result.text)
with open("metadata.txt", "w") as file:
    file.write(str(result))

# Get the Monthly observations from the image
result = model.generate_content(
    [img, "\n\n", "List the monthly observations. "],
    generation_config=genai.GenerationConfig(
        response_mime_type="application/json", response_schema=Decadal
    ),
)
with open("monthly.json", "w") as file:
    file.write(result.text)
with open("monthly.txt", "w") as file:
    file.write(str(result))


# Get the Annually totals
result = model.generate_content(
    [img, "\n\n", "List the annual totals."],
    generation_config=genai.GenerationConfig(
        response_mime_type="application/json", response_schema=Totals
    ),
)
with open("totals.json", "w") as file:
    file.write(result.text)
with open("totals.txt", "w") as file:
    file.write(str(result))

Producing three JSON output files:

{
  "County": "Monmouth",
  "Location": "Tredegar Laboratory",
  "Observer": "Tredegar Iron & Coal Co. Ltd.\nNational Coal Board.",
  "River_basin": "Ebbw-2",
  "StationNumber": 4007,
  "Type_of_gauge": "Howard"
}
{
  "rainfall": [
    {
      "Year": 1941,
      "rainfall": [
        {
          "Month": "January",
          "rainfall": "4.72"
        },
        {
          "Month": "February",
          "rainfall": "6.12"
        },
        {
          "Month": "March",
          "rainfall": "4.16"
        },
        {
          "Month": "April",
          "rainfall": "1.19"
        },
        {
          "Month": "May",
          "rainfall": "3.72"
        },
        {
          "Month": "June",
          "rainfall": "2.06"
        },
        {
          "Month": "July",
          "rainfall": "2.89"
        },
         {
          "Month": "August",
          "rainfall": "7.00"
        },
        {
          "Month": "September",
           "rainfall": "0.82"
         },
        {
          "Month": "October",
           "rainfall": "6.93"
         },
        {
          "Month": "November",
           "rainfall": "4.18"
         },
        {
          "Month":"December",
          "rainfall":"4.16"
        }
      ]
    },
    {
      "Year": 1942,
      "rainfall": [
        {
          "Month": "January",
          "rainfall": "5.42"
        },
        {
          "Month": "February",
          "rainfall": "0.55"
        },
         {
          "Month": "March",
          "rainfall": "4.33"
        },
        {
          "Month": "April",
          "rainfall": "3.59"
        },
        {
          "Month": "May",
          "rainfall": "8.98"
        },
        {
          "Month":"June",
          "rainfall":"0.31"
        },
         {
          "Month": "July",
          "rainfall":"3.42"
         },
         {
          "Month":"August",
          "rainfall":"6.54"
         },
        {
          "Month": "September",
          "rainfall": "3.95"
         },
        {
          "Month":"October",
          "rainfall":"4.52"
        },
        {
           "Month":"November",
           "rainfall":"0.78"
        },
       {
          "Month":"December",
          "rainfall":"9.00"
       }
      ]
    },
    {
      "Year": 1943,
      "rainfall": [
       {
          "Month": "January",
          "rainfall": "11.82"
        },
        {
          "Month": "February",
          "rainfall": "4.44"
        },
         {
          "Month": "March",
          "rainfall": "1.22"
        },
        {
          "Month": "April",
          "rainfall": "1.68"
        },
        {
          "Month": "May",
          "rainfall": "5.92"
        },
        {
          "Month":"June",
          "rainfall":"4.44"
        },
         {
          "Month": "July",
          "rainfall":"3.43"
         },
         {
          "Month":"August",
          "rainfall":"5.30"
         },
        {
          "Month": "September",
          "rainfall":"5.30"
         },
        {
          "Month":"October",
          "rainfall":"5.21"
        },
        {
           "Month":"November",
           "rainfall":"4.21"
        },
       {
          "Month":"December",
          "rainfall":"3.30"
       }
      ]
    },
    {
      "Year": 1944,
      "rainfall": [
         {
          "Month": "January",
          "rainfall": "6.68"
        },
        {
          "Month": "February",
          "rainfall": "1.19"
        },
        {
          "Month": "March",
          "rainfall": "0.38"
         },
        {
          "Month": "April",
          "rainfall": "2.96"
        },
        {
          "Month": "May",
          "rainfall": "2.11"
        },
         {
          "Month":"June",
          "rainfall":"3.41"
        },
         {
          "Month":"July",
          "rainfall":"3.56"
        },
         {
          "Month":"August",
          "rainfall":"4.40"
        },
        {
          "Month":"September",
          "rainfall":"5.55"
         },
        {
          "Month":"October",
          "rainfall":"8.79"
        },
        {
           "Month":"November",
           "rainfall":"9.75"
        },
       {
          "Month":"December",
          "rainfall":"5.31"
        }
      ]
    },
    {
      "Year": 1945,
      "rainfall": [
         {
          "Month": "January",
           "rainfall": "4.71"
        },
        {
          "Month": "February",
          "rainfall": "7.64"
        },
         {
          "Month": "March",
          "rainfall": "2.63"
         },
        {
          "Month": "April",
          "rainfall": "2.81"
        },
        {
          "Month": "May",
          "rainfall":"3.76"
        },
         {
          "Month":"June",
          "rainfall":"5.86"
        },
         {
          "Month":"July",
          "rainfall":"3.95"
        },
         {
          "Month":"August",
          "rainfall":"3.83"
        },
        {
           "Month":"September",
           "rainfall":"4.58"
        },
         {
          "Month":"October",
          "rainfall":"7.54"
         },
         {
          "Month":"November",
          "rainfall":"0.80"
         },
        {
          "Month":"December",
          "rainfall":"10.19"
        }
      ]
    },
    {
      "Year": 1946,
      "rainfall": [
        {
          "Month": "January",
          "rainfall": "8.55"
        },
        {
          "Month": "February",
          "rainfall": "6.00"
        },
        {
          "Month": "March",
          "rainfall":"2.48"
         },
        {
          "Month": "April",
          "rainfall": "1.96"
        },
        {
          "Month": "May",
           "rainfall":"5.14"
         },
         {
          "Month":"June",
          "rainfall":"5.68"
        },
        {
          "Month":"July",
          "rainfall":"2.66"
         },
        {
          "Month":"August",
           "rainfall":"9.30"
        },
        {
          "Month":"September",
          "rainfall":"9.23"
        },
        {
          "Month":"October",
          "rainfall":"1.63"
        },
        {
           "Month":"November",
           "rainfall":"14.24"
         },
        {
          "Month":"December",
          "rainfall":"7.17"
        }
      ]
    },
    {
      "Year": 1947,
       "rainfall": [
         {
          "Month": "January",
          "rainfall": "5.91"
        },
        {
          "Month": "February",
          "rainfall": "2.16"
        },
        {
          "Month":"March",
          "rainfall":"12.37"
        },
        {
          "Month": "April",
          "rainfall": "6.67"
        },
        {
          "Month":"May",
          "rainfall":"3.54"
        },
         {
          "Month":"June",
          "rainfall":"3.30"
        },
        {
          "Month":"July",
          "rainfall":"3.77"
         },
         {
          "Month":"August",
          "rainfall":"0.55"
         },
        {
          "Month":"September",
          "rainfall":"2.87"
         },
        {
          "Month":"October",
          "rainfall":"1.71"
         },
         {
           "Month":"November",
           "rainfall":"6.37"
         },
        {
           "Month":"December",
           "rainfall":"5.64"
        }
      ]
    },
    {
      "Year": 1948,
      "rainfall": [
        {
          "Month": "January",
           "rainfall": "14.19"
        },
        {
           "Month":"February",
          "rainfall":"3.56"
        },
         {
          "Month":"March",
          "rainfall":"3.58"
         },
        {
          "Month":"April",
          "rainfall":"3.75"
        },
        {
          "Month":"May",
          "rainfall":"4.79"
        },
         {
          "Month":"June",
          "rainfall":"6.05"
        },
         {
          "Month":"July",
          "rainfall":"1.83"
         },
        {
          "Month":"August",
          "rainfall":"0.662"
        },
        {
          "Month":"September",
          "rainfall":"5.27"
        },
        {
          "Month":"October",
           "rainfall":"6.08"
        },
        {
           "Month":"November",
           "rainfall":"3.10"
         },
       {
          "Month":"December",
           "rainfall":"11.89"
         }
      ]
    },
    {
      "Year": 1949,
       "rainfall": [
         {
          "Month": "January",
          "rainfall": "2.02"
        },
        {
          "Month": "February",
          "rainfall": "3.33"
        },
         {
          "Month":"March",
          "rainfall":"2.63"
        },
         {
          "Month":"April",
          "rainfall":"5.45"
        },
        {
          "Month":"May",
          "rainfall":"4.45"
        },
         {
          "Month":"June",
          "rainfall":"1.09"
        },
        {
          "Month":"July",
          "rainfall":"1.05"
         },
         {
          "Month":"August",
           "rainfall":"3.57"
        },
         {
           "Month":"September",
           "rainfall":"2.95"
        },
         {
          "Month":"October",
           "rainfall":"12.28"
        },
        {
           "Month":"November",
           "rainfall":"7.71"
         },
        {
          "Month":"December",
          "rainfall":"6.44"
        }
      ]
    },
    {
      "Year": 1950,
       "rainfall": [
          {
          "Month": "January",
           "rainfall": "1.81"
        },
        {
           "Month":"February",
           "rainfall":"10.92"
        },
         {
           "Month":"March",
          "rainfall":"4.19"
         },
        {
          "Month":"April",
          "rainfall":"4.67"
        },
        {
          "Month":"May",
           "rainfall":"2.91"
        },
         {
          "Month":"June",
           "rainfall":"2.34"
        },
        {
          "Month":"July",
           "rainfall":"7.58"
         },
        {
           "Month":"August",
           "rainfall":"8.69"
        },
        {
          "Month":"September",
          "rainfall":"7.02"
        },
        {
          "Month":"October",
          "rainfall":"3.18"
        },
        {
           "Month":"November",
           "rainfall":"7.62"
        },
         {
           "Month":"December",
          "rainfall":"3.63"
         }
      ]
    }
  ]
}
{
"Totals": ["47.95","51.39","56.57","54.09","58.30","74.04","54.86","70.71","52.97","64.56","58544"]
}

And this works pretty-much perfectly.

Image showing extracted data