#!/usr/bin/env python3
"""
Convert Excel formulas to Google Sheets compatible formulas.
Preserves all formatting while only modifying formula content.
"""

import openpyxl
from openpyxl.utils import get_column_letter
import re
import sys
import os

try:
    import pandas as pd
    PANDAS_AVAILABLE = True
except ImportError:
    PANDAS_AVAILABLE = False

def convert_excel_formula_to_googlesheets(formula, sheet_names):
    """Convert Excel formula syntax to Google Sheets compatible syntax."""
    if not formula or not isinstance(formula, str):
        return formula

    converted = formula

    # Fix sheet references with [index] notation (e.g., '[1]Members ' -> 'Members'!)
    if sheet_names:
        # Pattern to match '[index]SheetName'! (with quotes) or [index]SheetName! (without quotes)
        pattern = r"'(?:\[(\d+)\])?([^']+?)'!"  # Matches '[1]Sheet Name' or 'Sheet Name'
        pattern_unquoted = r"\[(\d+)\]([A-Za-z0-9_\s]+)!" # Matches [1]SheetName!

        def replace_sheet_ref(match):
            # For quoted pattern
            if match.group(1): # If index is present
                index = int(match.group(1))
                if 0 <= index < len(sheet_names):
                    actual_name = sheet_names[index]
                    return f"'{actual_name}'!"
            else: # If no index, just use the name found
                name_in_formula = match.group(2).strip()
                if name_in_formula in sheet_names: # Check if it's a valid sheet name
                    return f"'{name_in_formula}'!"
            return match.group(0) # Return original if no match or invalid index

        def replace_sheet_ref_unquoted(match):
            index = int(match.group(1))
            if 0 <= index < len(sheet_names):
                actual_name = sheet_names[index]
                # Always quote sheet names for Google Sheets compatibility
                return f"'{actual_name}'!"
            return match.group(0)

        converted = re.sub(pattern, replace_sheet_ref, converted)
        converted = re.sub(pattern_unquoted, replace_sheet_ref_unquoted, converted)

    # Handle array formulas - this is the main incompatibility
    if converted.strip().startswith('{') and converted.strip().endswith('}'):
        inner = converted.strip()[1:-1].strip()
        if inner.startswith('='):
            inner = inner[1:].strip()
        return '=ARRAYFORMULA(' + inner + ')'

    return converted

def convert_workbook_formulas(input_file, output_file):
    """Convert all formulas in an Excel workbook to Google Sheets compatible format."""
    print(f"Loading workbook: {input_file}")

    wb = None
    try:
        # Load workbook - don't use keep_vba/keep_links as they can cause corruption
        # We only need to modify formulas, not preserve VBA or external links
        wb = openpyxl.load_workbook(input_file, data_only=False)
    except Exception as e:
        error_msg = str(e)
        print(f"Error loading workbook with openpyxl: {e}")

        if 'does not support file format' in error_msg or 'openpyxl does not support' in error_msg:
            print("Attempting to convert file format using pandas...")
            if PANDAS_AVAILABLE:
                try:
                    temp_converted = output_file.replace('.xlsx', '_temp_pandas_converted.xlsx')
                    excel_data = pd.read_excel(input_file, sheet_name=None, engine=None)
                    with pd.ExcelWriter(temp_converted, engine='openpyxl') as writer:
                        for sheet_name, df in excel_data.items():
                            df.to_excel(writer, sheet_name=sheet_name, index=False)
                    if os.path.exists(temp_converted):
                        print(f"File converted successfully to: {temp_converted}")
                        try:
                            wb = openpyxl.load_workbook(temp_converted, data_only=False)
                            input_file = temp_converted
                            print("Note: Formulas may not be preserved in format conversion. Values have been converted.")
                        except Exception as e2:
                            print(f"Error loading pandas-converted file: {e2}")
                            return False
                    else:
                        print("Pandas conversion failed - file not created")
                        return False
                except Exception as e_pandas:
                    print(f"Pandas conversion failed: {e_pandas}")
                    return False
            else:
                print("Pandas not available. Cannot convert file format.")
                return False
        else:
            return False

    print(f"Found {len(wb.sheetnames)} sheets: {wb.sheetnames}")

    total_converted = 0
    sheet_names_list = wb.sheetnames # Get actual sheet names once

    for sheet_name in wb.sheetnames:
        ws = wb[sheet_name]
        print(f"\nProcessing sheet: {sheet_name}")

        converted_count = 0

        for row in ws.iter_rows():
            for cell in row:
                if cell.data_type == 'f':
                    if cell.value and isinstance(cell.value, str):
                        original = cell.value
                        try:
                            converted = convert_excel_formula_to_googlesheets(original, sheet_names_list)
                            
                            if converted != original:
                                # Only update if conversion is valid
                                cell.value = converted
                                converted_count += 1
                                print(f"  Converted formula at {cell.coordinate}: {original[:50]}... -> {converted[:50]}...")
                        except Exception as e_formula:
                            print(f"  Warning: Could not convert formula at {cell.coordinate}: {e_formula}")
                            # Keep original formula if conversion fails

        print(f"  Converted {converted_count} formulas in sheet '{sheet_name}'")
        total_converted += converted_count

    print(f"\nSaving converted workbook to: {output_file}")
    try:
        # Ensure output directory exists
        output_dir = os.path.dirname(output_file)
        if output_dir and not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)
        
        # Close any open file handles before saving
        if hasattr(wb, '_archive'):
            # Ensure workbook is ready to save
            pass
        
        # Save the workbook
        wb.save(output_file)
        
        # Close the workbook explicitly
        wb.close()
        
        # Verify the file was created and is valid
        if not os.path.exists(output_file):
            print(f"Error: Output file was not created: {output_file}")
            return False
        
        file_size = os.path.getsize(output_file)
        if file_size == 0:
            print(f"Error: Output file is empty: {output_file}")
            return False
        
        # Try to validate by loading it back (read-only to avoid locking)
        try:
            test_wb = openpyxl.load_workbook(output_file, read_only=True)
            test_wb.close()
            print(f"✓ Workbook converted successfully! ({total_converted} formulas converted)")
            print(f"✓ File size: {file_size} bytes")
            print(f"✓ File validated - ready for upload")
            return True
        except Exception as e_validate:
            print(f"Warning: Saved file validation failed: {e_validate}")
            # Still return True if file exists and has size, as it might be a validation issue
            if file_size > 0:
                print(f"✓ File saved ({file_size} bytes) - validation warning but file may be usable")
                return True
            return False
            
    except Exception as e:
        print(f"Error saving workbook: {e}")
        import traceback
        traceback.print_exc()
        # Make sure to close workbook even on error
        try:
            wb.close()
        except:
            pass
        return False

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python3 convert_excel_to_googlesheets.py <input_file.xlsx> [output_file.xlsx]")
        print("If output_file is not specified, it will be created as <input_file>_googlesheets.xlsx")
        sys.exit(1)

    input_file = sys.argv[1]

    if len(sys.argv) > 2:
        output_file = sys.argv[2]
    else:
        base, ext = os.path.splitext(input_file)
        output_file = f"{base}_googlesheets{ext}"

    if not os.path.exists(input_file):
        print(f"Error: File not found: {input_file}")
        sys.exit(1)

    success = convert_workbook_formulas(input_file, output_file)
    sys.exit(0 if success else 1)

