#!/usr/bin/env python3
"""
Download all files from a web directory recursively
Usage: python3 download_directory.py <URL> [-o OUTPUT_DIR] [-u USERNAME] [-p PASSWORD]
"""
import os
import sys
import requests
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
import argparse
import time

def download_file(url, local_path, auth=None, timeout=30):
    """Download a single file"""
    try:
        if auth:
            response = requests.get(url, stream=True, timeout=timeout, auth=auth)
        else:
            response = requests.get(url, stream=True, timeout=timeout)
        response.raise_for_status()
        
        os.makedirs(os.path.dirname(local_path), exist_ok=True)
        
        file_size = 0
        with open(local_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    file_size += len(chunk)
        
        print(f"✓ Downloaded: {local_path} ({file_size:,} bytes)")
        return True
    except requests.exceptions.RequestException as e:
        print(f"✗ Failed to download {url}: {e}")
        return False
    except Exception as e:
        print(f"✗ Error downloading {url}: {e}")
        return False

def get_directory_listing(url, auth=None, timeout=30):
    """Get list of files and directories from a web directory"""
    try:
        if auth:
            response = requests.get(url, auth=auth, timeout=timeout)
        else:
            response = requests.get(url, timeout=timeout)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        links = []
        
        # Find all links (works with Apache-style directory listings and most web servers)
        for link in soup.find_all('a', href=True):
            href = link['href']
            text = link.text.strip()
            
            # Skip parent directory and current directory links
            if href in ['../', './', '/']:
                continue
            
            # Skip query strings and anchors
            if '?' in href or '#' in href:
                continue
            
            # Build full URL
            full_url = urljoin(url, href)
            
            # Determine if it's a directory (ends with /) or file
            is_dir = href.endswith('/') or text.endswith('/')
            
            links.append((full_url, text, is_dir))
        
        return links
    except requests.exceptions.RequestException as e:
        print(f"✗ Failed to list directory {url}: {e}")
        return []
    except Exception as e:
        print(f"✗ Error listing directory {url}: {e}")
        return []

def download_recursive(base_url, output_dir, auth=None, visited=None, max_depth=100, current_depth=0):
    """Recursively download all files from a directory"""
    if visited is None:
        visited = set()
    
    if current_depth > max_depth:
        print(f"⚠ Maximum depth reached, stopping at: {base_url}")
        return
    
    if base_url in visited:
        return
    
    visited.add(base_url)
    print(f"\n📁 Processing directory: {base_url}")
    
    links = get_directory_listing(base_url, auth)
    
    if not links:
        print(f"  (No files or subdirectories found)")
        return
    
    files_downloaded = 0
    dirs_processed = 0
    
    for url, name, is_dir in links:
        if is_dir:
            # It's a directory, recurse
            dirs_processed += 1
            download_recursive(url, output_dir, auth, visited, max_depth, current_depth + 1)
        else:
            # It's a file, download it
            parsed = urlparse(url)
            # Remove the base URL path to get relative path
            base_parsed = urlparse(base_url)
            relative_path = parsed.path.replace(base_parsed.path, '').lstrip('/')
            
            if not relative_path:
                relative_path = name
            
            local_path = os.path.join(output_dir, relative_path)
            
            # Skip if file already exists (optional - remove this check to re-download)
            if os.path.exists(local_path):
                print(f"⊘ Skipped (exists): {local_path}")
            else:
                if download_file(url, local_path, auth):
                    files_downloaded += 1
                time.sleep(0.1)  # Small delay to avoid overwhelming server
    
    if files_downloaded > 0 or dirs_processed > 0:
        print(f"  ✓ Directory complete: {files_downloaded} files, {dirs_processed} subdirectories")

def main():
    parser = argparse.ArgumentParser(
        description='Download directory recursively from web',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python3 download_directory.py https://your-golf-club.com/files/ -o ./downloads
  python3 download_directory.py https://your-golf-club.com/files/ -o ./downloads -u username -p password
  python3 download_directory.py https://your-golf-club.com/files/ -o ./downloads --max-depth 5
        """
    )
    parser.add_argument('url', help='URL of the directory to download')
    parser.add_argument('-o', '--output', default='./downloads', 
                       help='Output directory (default: ./downloads)')
    parser.add_argument('-u', '--username', help='Username for HTTP authentication')
    parser.add_argument('-p', '--password', help='Password for HTTP authentication')
    parser.add_argument('--max-depth', type=int, default=100,
                       help='Maximum directory depth to recurse (default: 100)')
    parser.add_argument('--timeout', type=int, default=30,
                       help='Request timeout in seconds (default: 30)')
    
    args = parser.parse_args()
    
    # Validate URL
    if not args.url.startswith(('http://', 'https://')):
        print("Error: URL must start with http:// or https://")
        sys.exit(1)
    
    # Ensure URL ends with / if it's a directory
    if not args.url.endswith('/'):
        args.url += '/'
    
    # Setup authentication
    auth = None
    if args.username and args.password:
        from requests.auth import HTTPBasicAuth
        auth = HTTPBasicAuth(args.username, args.password)
        print(f"Using authentication for user: {args.username}")
    elif args.username or args.password:
        print("Warning: Both username and password are required for authentication")
    
    # Create output directory
    os.makedirs(args.output, exist_ok=True)
    print(f"Output directory: {os.path.abspath(args.output)}")
    print(f"Starting download from: {args.url}\n")
    
    try:
        download_recursive(args.url, args.output, auth, max_depth=args.max_depth)
        print(f"\n✅ Download complete! Files saved to: {os.path.abspath(args.output)}")
    except KeyboardInterrupt:
        print("\n\n⚠ Download interrupted by user")
        sys.exit(1)
    except Exception as e:
        print(f"\n✗ Error: {e}")
        sys.exit(1)

if __name__ == '__main__':
    main()




