#!/usr/bin/env python3
import csv
import sys
from collections import Counter

def analyze_csv(filepath):
    products = []
    
    with open(filepath, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            products.append(row)
    
    # Extract key metrics
    total = len(products)
    amazon_count = sum(1 for p in products if 'amazon.com' in p.get('url', ''))
    walmart_count = sum(1 for p in products if 'walmart.com' in p.get('url', ''))
    
    # Price analysis with outlier filtering
    prices = []
    price_by_retailer = {'amazon': [], 'walmart': []}
    
    for p in products:
        price_str = p.get('offers/price', '')
        if price_str:
            try:
                price = float(price_str)
                # Filter obvious outliers (fitness smartwatches typically $10-$1000)
                if 5 < price < 2000:
                    prices.append(price)
                    if 'amazon.com' in p.get('url', ''):
                        price_by_retailer['amazon'].append(price)
                    elif 'walmart.com' in p.get('url', ''):
                        price_by_retailer['walmart'].append(price)
            except:
                pass
    
    entry_tier = sum(1 for p in prices if p < 50)
    mid_tier = sum(1 for p in prices if 50 <= p < 150)
    premium_tier = sum(1 for p in prices if p >= 150)
    
    # Seller analysis
    sellers = []
    amazon_sellers = []
    walmart_sellers = []
    
    for p in products:
        seller = p.get('additionalProperties/sellerName', '') or p.get('additionalProperties/seller/name', '')
        if seller:
            sellers.append(seller)
            if 'amazon.com' in p.get('url', ''):
                amazon_sellers.append(seller)
            elif 'walmart.com' in p.get('url', ''):
                walmart_sellers.append(seller)
    
    seller_counts = Counter(sellers)
    
    # Stock analysis
    in_stock_count = 0
    availability_flags = []
    
    for p in products:
        in_stock = p.get('additionalProperties/inStock', '')
        in_stock_text = p.get('additionalProperties/inStockText', '')
        availability = p.get('additionalProperties/availability', '')
        
        if in_stock in ['true', '1', 'True']:
            in_stock_count += 1
        
        if in_stock_text:
            availability_flags.append(in_stock_text)
        elif availability:
            availability_flags.append(availability)
    
    availability_counts = Counter(availability_flags)
    
    # Stock by retailer
    amazon_in_stock = sum(1 for p in products if 'amazon.com' in p.get('url', '') and p.get('additionalProperties/inStock', '') in ['true', '1', 'True'])
    walmart_in_stock = sum(1 for p in products if 'walmart.com' in p.get('url', '') and p.get('additionalProperties/inStock', '') in ['true', '1', 'True'])
    
    # Brand analysis
    brands = []
    for p in products:
        name = p.get('name', '')
        # Extract first word as potential brand
        if name:
            first_word = name.split()[0] if name.split() else ''
            if first_word:
                brands.append(first_word)
    
    brand_counts = Counter(brands)
    
    # Print results
    print("=" * 60)
    print("COVERAGE")
    print("=" * 60)
    print(f"Total products captured: {total}")
    print(f"  Amazon: {amazon_count} ({100*amazon_count/total:.1f}%)")
    print(f"  Walmart: {walmart_count} ({100*walmart_count/total:.1f}%)")
    print()
    
    print("=" * 60)
    print("PRICE BANDS (USD)")
    print("=" * 60)
    print(f"Valid prices: {len(prices)}/{total}")
    if prices:
        print(f"  Entry tier (<$50): {entry_tier} products ({100*entry_tier/len(prices):.1f}%)")
        print(f"  Mid tier ($50-$149): {mid_tier} products ({100*mid_tier/len(prices):.1f}%)")
        print(f"  Premium tier ($150+): {premium_tier} products ({100*premium_tier/len(prices):.1f}%)")
        print(f"\n  Average: ${sum(prices)/len(prices):.2f}")
        print(f"  Median: ${sorted(prices)[len(prices)//2]:.2f}")
        print(f"  Range: ${min(prices):.2f} - ${max(prices):.2f}")
        
        if price_by_retailer['amazon']:
            print(f"\n  Amazon avg: ${sum(price_by_retailer['amazon'])/len(price_by_retailer['amazon']):.2f}")
        if price_by_retailer['walmart']:
            print(f"  Walmart avg: ${sum(price_by_retailer['walmart'])/len(price_by_retailer['walmart']):.2f}")
    print()
    
    print("=" * 60)
    print("SELLER PATTERNS")
    print("=" * 60)
    amazon_direct = amazon_sellers.count('Amazon.com')
    print(f"Amazon.com (1P): {amazon_direct}/{amazon_count} ({100*amazon_direct/amazon_count:.1f}%)")
    print(f"Third-party sellers: {amazon_count - amazon_direct}/{amazon_count} ({100*(amazon_count-amazon_direct)/amazon_count:.1f}%)")
    print(f"\nTop sellers across listings:")
    for seller, count in seller_counts.most_common(8):
        if seller:
            print(f"  {seller}: {count} products")
    print()
    
    print("=" * 60)
    print("STOCK / AVAILABILITY")
    print("=" * 60)
    print(f"Overall in-stock: {in_stock_count}/{total} ({100*in_stock_count/total:.1f}%)")
    if amazon_count > 0:
        print(f"  Amazon: {amazon_in_stock}/{amazon_count} ({100*amazon_in_stock/amazon_count:.1f}%)")
    if walmart_count > 0:
        print(f"  Walmart: {walmart_in_stock}/{walmart_count} ({100*walmart_in_stock/walmart_count:.1f}%)")
    
    print(f"\nCommon availability signals:")
    for flag, count in availability_counts.most_common(6):
        if flag and len(flag) < 100:
            print(f"  '{flag}': {count}")
    print()
    
    print("=" * 60)
    print("TOP BRANDS (by first word in product name)")
    print("=" * 60)
    for brand, count in brand_counts.most_common(10):
        if brand and len(brand) > 2:
            print(f"  {brand}: {count}")

if __name__ == '__main__':
    analyze_csv(sys.argv[1])