#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Phone Number Standardizer
Processes Excel file with phone numbers and standardizes them according to country code rules.
"""

import pandas as pd
import re
import sys
from pathlib import Path

def clean_phone_number(phone):
    """
    Clean and standardize a phone number according to the specified rules.
    
    Rules:
    - If first digit is 6 or 9, add +34
    - If starts with 00, change to +
    - If starts with 47, add +
    - If starts with 46, add +
    - If starts with 45, add +
    - If starts with 49, add +
    - If starts with 352, add +
    - If starts with 353, add +
    """
    if pd.isna(phone) or phone == '' or phone is None:
        return phone
    
    # Convert to string and strip whitespace
    phone_str = str(phone).strip()
    
    # Remove any existing + at the beginning for processing
    if phone_str.startswith('+'):
        phone_str = phone_str[1:]
    
    # Remove any non-digit characters except + at the beginning
    phone_str = re.sub(r'[^\d+]', '', phone_str)
    
    # If empty after cleaning, return original
    if not phone_str:
        return phone
    
    # Apply standardization rules
    if len(phone_str) > 0:
        first_char = phone_str[0]
        first_two = phone_str[:2] if len(phone_str) >= 2 else ''
        first_three = phone_str[:3] if len(phone_str) >= 3 else ''
        
        # Rule 1: If first digit is 6 or 9, add +34
        if first_char in ['6', '9']:
            return "+34" + phone_str
        
        # Rule 2: If starts with 00, change to +
        elif phone_str.startswith('00'):
            return "+" + phone_str[2:]
        
        # Rule 3: If starts with 47, add +
        elif first_two == '47':
            return "+" + phone_str
        
        # Rule 4: If starts with 46, add +
        elif first_two == '46':
            return "+" + phone_str
        
        # Rule 5: If starts with 45, add +
        elif first_two == '45':
            return "+" + phone_str
        
        # Rule 6: If starts with 49, add +
        elif first_two == '49':
            return "+" + phone_str
        
        # Rule 7: If starts with 352, add +
        elif first_three == '352':
            return "+" + phone_str
        
        # Rule 8: If starts with 353, add +
        elif first_three == '353':
            return "+" + phone_str
        
        # If no rules match, return as is (or add + if it doesn't start with +)
        else:
            if not phone_str.startswith('+'):
                return "+" + phone_str
            return phone_str
    
    return phone

def process_excel_file(input_file, output_file=None):
    """
    Process the Excel file and standardize phone numbers.
    """
    try:
        # Read the Excel file
        print("Reading file: " + str(input_file))
        df = pd.read_excel(input_file)
        
        print("Found " + str(len(df)) + " rows and " + str(len(df.columns)) + " columns")
        print("Columns: " + str(list(df.columns)))
        
        # Find phone columns (case-insensitive search)
        phone_columns = []
        for col in df.columns:
            col_lower = str(col).lower()
            if any(phone_keyword in col_lower for phone_keyword in ['phone', 'tel', 'mobile', 'cell']):
                phone_columns.append(col)
        
        print("Found phone columns: " + str(phone_columns))
        
        # If no phone columns found, ask user to specify
        if not phone_columns:
            print("\nNo phone columns automatically detected.")
            print("Available columns:")
            for i, col in enumerate(df.columns):
                print(str(i) + ": " + str(col))
            
            # Try to find columns that might contain phone numbers
            potential_phone_cols = []
            for col in df.columns:
                sample_values = df[col].dropna().head(5).astype(str)
                if any(re.search(r'\d{6,}', str(val)) for val in sample_values):
                    potential_phone_cols.append(col)
            
            if potential_phone_cols:
                print("\nPotential phone columns found: " + str(potential_phone_cols))
                phone_columns = potential_phone_cols
            else:
                print("Please specify which columns contain phone numbers.")
                return
        
        # Process each phone column
        for col in phone_columns:
            print("\nProcessing column: " + str(col))
            original_count = df[col].notna().sum()
            print("Original non-empty values: " + str(original_count))
            
            # Apply standardization
            df[col] = df[col].apply(clean_phone_number)
            
            # Show some examples
            sample_values = df[col].dropna().head(10)
            print("Sample standardized values:")
            for i, val in enumerate(sample_values):
                print("  " + str(i+1) + ": " + str(val))
        
        # Generate output filename if not provided
        if output_file is None:
            input_path = Path(input_file)
            output_file = input_path.parent / (input_path.stem + "_standardized" + input_path.suffix)
        
        # Save the processed file
        print("\nSaving processed file to: " + str(output_file))
        df.to_excel(output_file, index=False)
        
        print("\n✅ Processing complete!")
        print("Original file: " + str(input_file))
        print("Processed file: " + str(output_file))
        print("Rows processed: " + str(len(df)))
        
        return output_file
        
    except Exception as e:
        print("❌ Error processing file: " + str(e))
        return None

def main():
    """
    Main function to run the phone number standardizer.
    """
    input_file = "aa phone numbers.xlsx"
    
    # Check if file exists
    if not Path(input_file).exists():
        print("❌ File not found: " + str(input_file))
        print("Please make sure the file exists in the current directory.")
        return
    
    print("🔧 Phone Number Standardizer")
    print("=" * 50)
    
    # Process the file
    output_file = process_excel_file(input_file)
    
    if output_file:
        print("\n📁 Output saved to: " + str(output_file))
        print("\n📋 Summary of changes:")
        print("- Numbers starting with 6 or 9: Added +34")
        print("- Numbers starting with 00: Changed to +")
        print("- Numbers starting with 47, 46, 45, 49: Added +")
        print("- Numbers starting with 352, 353: Added +")
        print("- Other numbers: Added + if not present")
    else:
        print("❌ Processing failed. Please check the error messages above.")

if __name__ == "__main__":
    main()
