#!/usr/bin/env python3
"""
Script to create APR data mapping from CSV team names.
This script helps map APR scores to team names using the cfb_mastersheet.csv.
You can manually add APR scores or paste data from the website.
"""
import csv
import re

def load_team_mappings():
    """Load team name mappings from CSV - maps CFBD team names to filenames"""
    mappings = {}
    team_variations = {}  # Store all variations of team names
    
    with open('cfb_mastersheet.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            flair_name = row.get('flair_name', '').strip()
            image_source = row.get('image_source', '').strip()
            abbreviated = row.get('flair_abbreviated', '').strip()
            
            if flair_name and image_source and image_source.startswith('cfb/'):
                filename = image_source.replace('cfb/', '')
                
                # Extract school name (remove mascot)
                parts = flair_name.split()
                if len(parts) >= 2:
                    school_name = ' '.join(parts[:-1])  # Everything except last word (mascot)
                    
                    # Store primary mapping
                    mappings[flair_name] = {
                        'filename': filename,
                        'school_name': school_name,
                        'abbreviated': abbreviated
                    }
                    
                    # Store variations for matching
                    team_variations[school_name] = flair_name
                    if abbreviated:
                        team_variations[abbreviated] = flair_name
                    team_variations[flair_name] = flair_name
    
    return mappings, team_variations

def parse_apr_text(text):
    """Parse APR data from pasted text"""
    apr_data = {}
    
    # Try various patterns
    patterns = [
        r'(\d+)\.\s*([A-Z][A-Za-z\s&\-\.]+?)\s+(\d{3,4})',  # "1. Notre Dame 990"
        r'(\d+)\)\s+([A-Z][A-Za-z\s&\-\.]+?)\s+(\d{3,4})',  # "1) Notre Dame 990"
        r'([A-Z][A-Za-z\s&\-\.]+?)\s+[-–]\s+(\d{3,4})',  # "Notre Dame - 990"
        r'([A-Z][A-Za-z\s&\-\.]+?)\s+\((\d{3,4})\)',  # "Notre Dame (990)"
        r'([A-Z][A-Za-z\s&\-\.]+?)\s+(\d{3,4})',  # "Notre Dame 990"
    ]
    
    for pattern in patterns:
        matches = re.finditer(pattern, text)
        for match in matches:
            groups = match.groups()
            if len(groups) >= 2:
                # Find APR score (3-4 digit number 900-1000)
                apr_score = None
                team_name = None
                
                for group in groups:
                    if group.isdigit() and 900 <= int(group) <= 1000:
                        apr_score = int(group)
                    elif not group.isdigit() and len(group.strip()) > 3:
                        team_name = group.strip()
                
                if team_name and apr_score:
                    # Clean up team name
                    team_name = re.sub(r'^\d+[\.\)]\s*', '', team_name)
                    team_name = re.sub(r'\s+', ' ', team_name).strip()
                    apr_data[team_name] = apr_score
    
    return apr_data

def match_apr_to_teams(apr_data, team_mappings, team_variations):
    """Match APR data to team names"""
    matched = {}
    unmatched = []
    
    for apr_team_name, apr_score in apr_data.items():
        matched_team = None
        
        # Try exact match
        if apr_team_name in team_mappings:
            matched_team = apr_team_name
        elif apr_team_name in team_variations:
            matched_team = team_variations[apr_team_name]
        else:
            # Try fuzzy matching
            apr_lower = apr_team_name.lower()
            for team_name, data in team_mappings.items():
                school_lower = data['school_name'].lower()
                team_lower = team_name.lower()
                
                # Check if APR name contains school name or vice versa
                if (apr_lower in team_lower or team_lower in apr_lower or 
                    apr_lower in school_lower or school_lower in apr_lower):
                    matched_team = team_name
                    break
        
        if matched_team:
            matched[matched_team] = apr_score
        else:
            unmatched.append((apr_team_name, apr_score))
    
    return matched, unmatched

if __name__ == '__main__':
    print("Loading team mappings from CSV...")
    team_mappings, team_variations = load_team_mappings()
    print(f"Loaded {len(team_mappings)} team mappings\n")
    
    print("=" * 60)
    print("APR Data Entry")
    print("=" * 60)
    print("\nPlease paste the APR rankings from the website.")
    print("You can paste data in formats like:")
    print("  1. Notre Dame 990")
    print("  2. Alabama 985")
    print("  ...")
    print("\nOr paste the entire list and press Enter twice when done:\n")
    
    # Read multi-line input
    lines = []
    while True:
        try:
            line = input()
            if line.strip() == '' and lines:
                break
            if line.strip():
                lines.append(line)
        except EOFError:
            break
    
    text = '\n'.join(lines)
    
    print("\nParsing APR data...")
    apr_data = parse_apr_text(text)
    print(f"Found {len(apr_data)} APR entries\n")
    
    if apr_data:
        print("Sample entries found:")
        for i, (team, score) in enumerate(list(apr_data.items())[:5]):
            print(f"  {team}: {score}")
        print()
    
    print("Matching to team names...")
    matched, unmatched = match_apr_to_teams(apr_data, team_mappings, team_variations)
    print(f"Matched {len(matched)} teams")
    
    if unmatched:
        print(f"\n⚠ {len(unmatched)} teams could not be automatically matched:")
        for team, score in unmatched[:10]:
            print(f"  {team}: {score}")
        if len(unmatched) > 10:
            print(f"  ... and {len(unmatched) - 10} more")
    
    # Generate apr_data.py
    print("\nGenerating apr_data.py...")
    with open('apr_data.py', 'w') as f:
        f.write('# APR Rankings for 5-7 teams\n')
        f.write('# Format: {team_name: apr_score}\n')
        f.write('# Higher APR scores are better\n')
        f.write('# Data sourced from collegefootballnews.com\n\n')
        f.write('APR_RANKINGS = {\n')
        
        # Sort by APR score (descending)
        sorted_apr = sorted(matched.items(), key=lambda x: x[1], reverse=True)
        for team_name, apr_score in sorted_apr:
            f.write(f"    '{team_name}': {apr_score},\n")
        
        f.write('}\n')
    
    print(f"\n✓ Successfully created apr_data.py with {len(matched)} teams")
    
    if unmatched:
        print(f"\nNote: {len(unmatched)} teams need manual mapping.")
        print("You can manually add them to apr_data.py using the team names from cfb_mastersheet.csv")

