#!/usr/bin/env python3
import csv
import sys
from collections import Counter, defaultdict
from urllib.parse import urlparse

def analyze_csv(filepath):
    products = []
    
    with open(filepath, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            products.append(row)
    
    # Extract key metrics
    total = len(products)
    amazon_count = sum(1 for p in products if 'amazon.com' in p.get('url', ''))
    walmart_count = sum(1 for p in products if 'walmart.com' in p.get('url', ''))
    
    # Price analysis
    prices = []
    for p in products:
        price_str = p.get('offers/price', '')
        if price_str:
            try:
                price = float(price_str)
                prices.append(price)
            except:
                pass
    
    entry_tier = sum(1 for p in prices if p < 50)
    mid_tier = sum(1 for p in prices if 50 <= p < 150)
    premium_tier = sum(1 for p in prices if p >= 150)
    
    # Seller analysis
    sellers = []
    for p in products:
        seller = p.get('additionalProperties/sellerName', '') or p.get('additionalProperties/seller/name', '')
        if seller:
            sellers.append(seller)
    
    seller_counts = Counter(sellers)
    top_sellers = seller_counts.most_common(10)
    
    # Stock analysis
    in_stock_count = 0
    availability_flags = []
    
    for p in products:
        in_stock = p.get('additionalProperties/inStock', '')
        in_stock_text = p.get('additionalProperties/inStockText', '')
        availability = p.get('additionalProperties/availability', '')
        
        if in_stock == 'true' or in_stock == '1':
            in_stock_count += 1
        
        if in_stock_text:
            availability_flags.append(in_stock_text)
        if availability:
            availability_flags.append(availability)
    
    availability_counts = Counter(availability_flags)
    
    # Stock by retailer
    amazon_in_stock = sum(1 for p in products if 'amazon.com' in p.get('url', '') and (p.get('additionalProperties/inStock', '') in ['true', '1']))
    walmart_in_stock = sum(1 for p in products if 'walmart.com' in p.get('url', '') and (p.get('additionalProperties/inStock', '') in ['true', '1']))
    
    # Print results
    print(f"Total products: {total}")
    print(f"Amazon: {amazon_count}")
    print(f"Walmart: {walmart_count}")
    print()
    print(f"Price bands (n={len(prices)}):")
    print(f"  Entry (<$50): {entry_tier}")
    print(f"  Mid ($50-$149): {mid_tier}")
    print(f"  Premium ($150+): {premium_tier}")
    if prices:
        print(f"  Avg price: ${sum(prices)/len(prices):.2f}")
        print(f"  Min: ${min(prices):.2f}, Max: ${max(prices):.2f}")
    print()
    print(f"Top sellers:")
    for seller, count in top_sellers[:5]:
        print(f"  {seller}: {count} products")
    print()
    print(f"Stock status:")
    print(f"  Total in-stock: {in_stock_count}/{total} ({100*in_stock_count/total:.1f}%)")
    if amazon_count > 0:
        print(f"  Amazon in-stock: {amazon_in_stock}/{amazon_count} ({100*amazon_in_stock/amazon_count:.1f}%)")
    if walmart_count > 0:
        print(f"  Walmart in-stock: {walmart_in_stock}/{walmart_count} ({100*walmart_in_stock/walmart_count:.1f}%)")
    print()
    print(f"Top availability flags:")
    for flag, count in availability_counts.most_common(5):
        if flag:
            print(f"  '{flag}': {count}")

if __name__ == '__main__':
    analyze_csv(sys.argv[1])
