#!/usr/bin/env python3
"""Parse rate card text extracted from PDF into structured JSON."""
import json
import re
import pdfplumber

with pdfplumber.open("rate_card_scan.pdf") as pdf:
    text = pdf.pages[0].extract_text()

items = []
for line in text.strip().split("\n"):
    m = re.match(r"(MS-\d+)\s+(.+?)\s+(Gallon|Liter|Box|Each|Pack/10|Case)\s+\$([0-9,]+\.\d{2})", line)
    if m:
        items.append({
            "item_code": m.group(1),
            "description": m.group(2).strip(),
            "unit": m.group(3),
            "price": float(m.group(4).replace(",", "")),
        })

result = {
    "vendor": "Meridian Supply Co.",
    "rate_card_year": 2026,
    "effective_date": "2026-01-01",
    "items": items,
    "volume_discounts": {
        "10-49_units": "5%",
        "50-99_units": "10%",
        "100+_units": "15%",
    },
    "terms": {
        "payment": "Net 30",
        "shipping": "FOB Origin",
        "minimum_order": 250.00,
    },
}

with open("rate_card_prices.json", "w") as f:
    json.dump(result, f, indent=2)

print(json.dumps(result, indent=2))
