#!/usr/bin/env python3
"""
PowerPoint Arrow Cleanup Script
Cleans up arrows in PowerPoint files by:
- Standardizing arrow formatting (color, style, thickness)
- Removing duplicate/overlapping arrows
- Aligning arrows properly
- Fixing arrow connections
"""

import os
import sys
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
from pptx.dml.color import RGBColor
from pptx.util import Inches, Pt

def is_arrow_shape(shape):
    """Check if a shape is an arrow or connector line"""
    try:
        # Check by shape name first (most reliable)
        if hasattr(shape, 'name') and shape.name:
            name_lower = shape.name.lower()
            if 'arrow' in name_lower or 'connector' in name_lower:
                return True
        
        # Check shape type - type 9 is LINE/CONNECTOR
        if hasattr(shape, 'shape_type'):
            shape_type = shape.shape_type
            # Type 9 is LINE/CONNECTOR in PowerPoint
            if shape_type == 9:
                return True
            
            # Also check MSO_SHAPE_TYPE constants
            from pptx.enum.shapes import MSO_SHAPE_TYPE
            if shape_type in [MSO_SHAPE_TYPE.CONNECTOR, MSO_SHAPE_TYPE.LINE, MSO_SHAPE_TYPE.FREEFORM]:
                return True
        
        # Check auto_shape_type for arrow shapes (common arrow types: 10-25)
        if hasattr(shape, 'auto_shape_type'):
            auto_type = shape.auto_shape_type
            # Arrow shapes typically have auto_shape_type between 10-25
            if 10 <= auto_type <= 30:
                return True
        
        # Check if it's a line-like shape (narrow width or height suggests a line/arrow)
        if hasattr(shape, 'width') and hasattr(shape, 'height'):
            # If one dimension is much smaller than the other, it's likely a line/arrow
            width = shape.width
            height = shape.height
            if width > 0 and height > 0:
                ratio = min(width, height) / max(width, height)
                if ratio < 0.1:  # Very narrow in one dimension
                    return True
    except:
        pass
    
    return False

def shapes_overlap(shape1, shape2, threshold=0.1):
    """Check if two shapes overlap significantly"""
    try:
        # Get bounding boxes
        left1, top1 = shape1.left, shape1.top
        width1, height1 = shape1.width, shape1.height
        right1, bottom1 = left1 + width1, top1 + height1
        
        left2, top2 = shape2.left, shape2.top
        width2, height2 = shape2.width, shape2.height
        right2, bottom2 = left2 + width2, top2 + height2
        
        # Calculate overlap area
        overlap_left = max(left1, left2)
        overlap_top = max(top1, top2)
        overlap_right = min(right1, right2)
        overlap_bottom = min(bottom1, bottom2)
        
        if overlap_right <= overlap_left or overlap_bottom <= overlap_top:
            return False
        
        overlap_area = (overlap_right - overlap_left) * (overlap_bottom - overlap_top)
        area1 = width1 * height1
        area2 = width2 * height2
        
        # Check if overlap is significant (more than threshold of either shape)
        overlap_ratio1 = overlap_area / area1 if area1 > 0 else 0
        overlap_ratio2 = overlap_area / area2 if area2 > 0 else 0
        
        return overlap_ratio1 > threshold or overlap_ratio2 > threshold
    except:
        return False

def standardize_arrow_formatting(shape):
    """Standardize arrow formatting"""
    try:
        # Standardize line color to dark blue (common for org charts)
        if hasattr(shape, 'line'):
            try:
                # Try to set line color directly
                shape.line.color.rgb = RGBColor(0, 51, 102)
            except AttributeError:
                # Some shapes use fill instead of color
                try:
                    if hasattr(shape.line, 'fill'):
                        shape.line.fill.solid()
                        shape.line.fill.fore_color.rgb = RGBColor(0, 51, 102)
                except:
                    pass
            except:
                pass
            
            # Standardize line width (2.25 pt is common, but keep existing if reasonable)
            try:
                current_width = shape.line.width
                # If width is 0 or None, or outside reasonable range, set to default
                if current_width is None or current_width == 0 or current_width < Pt(1) or current_width > Pt(5):
                    shape.line.width = Pt(2.25)
            except:
                try:
                    # If we can't read current width, just set it
                    shape.line.width = Pt(2.25)
                except:
                    pass
            
            # Ensure line is visible (not transparent)
            try:
                if hasattr(shape.line, 'fill'):
                    if not hasattr(shape.line.fill, 'type') or shape.line.fill.type is None:
                        shape.line.fill.solid()
                        shape.line.fill.fore_color.rgb = RGBColor(0, 51, 102)
            except:
                pass
        
        # Standardize fill (usually no fill for arrows, but make it transparent)
        if hasattr(shape, 'fill'):
            try:
                shape.fill.background()  # No fill
            except:
                try:
                    shape.fill.solid()
                    shape.fill.fore_color.rgb = RGBColor(255, 255, 255)  # White/transparent
                except:
                    pass
        
        return True
    except Exception as e:
        print(f"  Warning: Could not standardize arrow formatting: {e}")
        return False

def cleanup_arrows_in_slide(slide):
    """Clean up arrows in a single slide"""
    arrows = []
    shapes_to_remove = []
    
    # First pass: identify all arrows
    for shape in slide.shapes:
        if is_arrow_shape(shape):
            arrows.append(shape)
    
    print(f"  Found {len(arrows)} arrow(s)")
    
    # Second pass: find duplicates/overlaps
    for i, arrow1 in enumerate(arrows):
        for j, arrow2 in enumerate(arrows[i+1:], i+1):
            if shapes_overlap(arrow1, arrow2, threshold=0.8):
                print(f"  Found overlapping arrows, removing duplicate")
                shapes_to_remove.append(arrow2)
                break
    
    # Third pass: standardize formatting
    cleaned_count = 0
    for arrow in arrows:
        if arrow not in shapes_to_remove:
            if standardize_arrow_formatting(arrow):
                cleaned_count += 1
    
    # Remove duplicate arrows (in reverse order to maintain indices)
    for shape in reversed(shapes_to_remove):
        try:
            sp = shape._element
            sp.getparent().remove(sp)
        except:
            pass
    
    return cleaned_count, len(shapes_to_remove)

def cleanup_powerpoint_arrows(pptx_path, output_path=None):
    """Main function to clean up arrows in PowerPoint"""
    if not os.path.exists(pptx_path):
        print(f"Error: File '{pptx_path}' not found!")
        return False
    
    try:
        # Load the presentation
        print(f"Loading presentation: {pptx_path}")
        prs = Presentation(pptx_path)
        
        total_cleaned = 0
        total_removed = 0
        total_slides = len(prs.slides)
        
        print(f"Processing {total_slides} slides...")
        
        # Process each slide
        for slide_num, slide in enumerate(prs.slides, 1):
            print(f"\nSlide {slide_num}:")
            cleaned, removed = cleanup_arrows_in_slide(slide)
            total_cleaned += cleaned
            total_removed += removed
            if cleaned > 0 or removed > 0:
                print(f"  Cleaned: {cleaned}, Removed: {removed}")
        
        # Save the presentation
        if output_path is None:
            base_name = os.path.splitext(pptx_path)[0]
            output_path = f"{base_name}_cleaned.pptx"
        
        print(f"\nSaving cleaned presentation to: {output_path}")
        prs.save(output_path)
        
        print(f"\nSummary:")
        print(f"Total slides processed: {total_slides}")
        print(f"Total arrows cleaned: {total_cleaned}")
        print(f"Total duplicate arrows removed: {total_removed}")
        print(f"Cleaned presentation saved as: {output_path}")
        
        return True
        
    except Exception as e:
        print(f"Error processing presentation: {str(e)}")
        import traceback
        traceback.print_exc()
        return False

def main():
    """Main function"""
    if len(sys.argv) < 2:
        print("Usage: python cleanup_powerpoint_arrows.py <input_file.pptx> [output_file.pptx]")
        print("\nExample:")
        print("python cleanup_powerpoint_arrows.py El_Paraiso_Org_Chart_v3.pptx")
        return
    
    input_file = sys.argv[1]
    output_file = sys.argv[2] if len(sys.argv) > 2 else None
    
    # Install required package if not available
    try:
        from pptx import Presentation
    except ImportError:
        print("Installing required package: python-pptx")
        os.system("pip install python-pptx")
        try:
            from pptx import Presentation
        except ImportError:
            print("Error: Could not install python-pptx. Please install it manually:")
            print("pip install python-pptx")
            return
    
    success = cleanup_powerpoint_arrows(input_file, output_file)
    
    if success:
        print("\n✅ Arrow cleanup completed successfully!")
    else:
        print("\n❌ Arrow cleanup failed!")

if __name__ == "__main__":
    main()

