#!/usr/bin/env python3
"""
Convert apr_data.py to use abbreviated team names from CSV
"""
import csv
import re

# Read CSV to build mapping from full name to abbreviated name
name_to_abbrev = {}
seen_abbrevs = set()

with open('cfb_mastersheet.csv', 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        # FBS teams have 'FBS' in family_name
        if row.get('family_name') == 'FBS':
            flair_name = row.get('flair_name', '').strip()
            abbrev = row.get('flair_abbreviated', '').strip()
            
            # Skip non-team entries (like /r/CFB)
            if flair_name and abbrev and not flair_name.startswith('/'):
                # Store primary mapping
                name_to_abbrev[flair_name] = abbrev
                
                # Also map school name without mascot
                parts = flair_name.split()
                if len(parts) >= 2:
                    school_name = ' '.join(parts[:-1])
                    if school_name not in name_to_abbrev:
                        name_to_abbrev[school_name] = abbrev

print(f"Loaded {len(name_to_abbrev)} team name mappings from CSV")

# Read current apr_data.py
with open('apr_data.py', 'r') as f:
    content = f.read()
    teams = re.findall(r"'([^']+)':\s*(\d+)", content)

print(f"Found {len(teams)} teams in apr_data.py")

# Map each team to its abbreviation
abbrev_to_apr = {}
unmatched = []

for full_name, score in teams:
    abbrev = None
    
    # Try exact match first
    if full_name in name_to_abbrev:
        abbrev = name_to_abbrev[full_name]
    else:
        # Try fuzzy matching
        full_lower = full_name.lower()
        best_match = None
        best_score = 0
        
        for csv_name, csv_abbrev in name_to_abbrev.items():
            csv_lower = csv_name.lower()
            
            # Exact match
            if full_lower == csv_lower:
                best_match = csv_abbrev
                best_score = 100
                break
            # Contains match
            elif full_lower in csv_lower or csv_lower in full_lower:
                score = min(len(full_lower), len(csv_lower)) / max(len(full_lower), len(csv_lower))
                if score > best_score:
                    best_score = score
                    best_match = csv_abbrev
        
        if best_match and best_score > 0.5:
            abbrev = best_match
    
    if abbrev:
        abbrev_to_apr[abbrev] = int(score)
    else:
        unmatched.append((full_name, score))

print(f"\nMatched {len(abbrev_to_apr)} teams to abbreviations")
if unmatched:
    print(f"Unmatched {len(unmatched)} teams:")
    for name, score in unmatched[:10]:
        print(f"  {name}: {score}")

# Generate new apr_data.py with abbreviations
with open('apr_data.py', 'w') as f:
    f.write('# APR Rankings for 5-7 teams\n')
    f.write('# Format: {team_abbreviation: apr_score}\n')
    f.write('# Higher APR scores are better\n')
    f.write('# Data sourced from collegefootballnews.com (2023-2024 APR)\n')
    f.write('# Team names use flair_abbreviated from cfb_mastersheet.csv\n\n')
    f.write('APR_RANKINGS = {\n')
    
    # Sort by APR score (descending)
    sorted_apr = sorted(abbrev_to_apr.items(), key=lambda x: x[1], reverse=True)
    for abbrev, apr_score in sorted_apr:
        f.write(f"    '{abbrev}': {apr_score},\n")
    
    f.write('}\n')

print(f"\n✓ Successfully created apr_data.py with {len(abbrev_to_apr)} teams using abbreviated names")

if unmatched:
    print(f"\n⚠ Note: {len(unmatched)} teams could not be matched and were excluded.")
    print("You may need to manually add them to apr_data.py")