#!/usr/bin/env python3
"""
Ultra-fast S3 image swapping script optimized for EC2
Maximizes parallelism and uses AWS CLI with optimal settings
"""

import pandas as pd
import subprocess
import concurrent.futures
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import os
import sys
from queue import Queue
import threading

# Configure AWS CLI for maximum performance
def setup_aws_cli():
    """Configure AWS CLI for optimal performance on EC2"""
    config_commands = [
        "aws configure set default.s3.max_concurrent_requests 20",
        "aws configure set default.s3.max_bandwidth 1000MB/s", 
        "aws configure set default.s3.multipart_threshold 64MB",
        "aws configure set default.s3.multipart_chunksize 16MB",
        "aws configure set default.s3.max_queue_size 10000"
    ]
    
    print("Configuring AWS CLI for maximum performance...")
    for cmd in config_commands:
        subprocess.run(cmd, shell=True, capture_output=True)
    print("AWS CLI configured for high performance")

def execute_commands_fast(commands):
    """Execute a list of commands as fast as possible"""
    results = []
    for cmd in commands:
        try:
            # Use shell=True for faster execution, suppress output for speed
            result = subprocess.run(
                cmd, 
                shell=True, 
                capture_output=True, 
                text=True,
                timeout=30  # 30 second timeout per command
            )
            if result.returncode != 0:
                results.append(False)
                print(f"Command failed: {cmd[:50]}... Error: {result.stderr.strip()}")
            else:
                results.append(True)
        except subprocess.TimeoutExpired:
            print(f"Command timed out: {cmd[:50]}...")
            results.append(False)
        except Exception as e:
            print(f"Command exception: {cmd[:50]}... Error: {e}")
            results.append(False)
    
    return all(results)

def swap_single_pair_ultra_fast(row_data):
    """Swap a single pair of images with maximum speed"""
    index, row = row_data
    image1 = row['image1']
    image2 = row['image2']
    
    # Use timestamp + index for unique temp names to avoid conflicts
    timestamp = int(time.time() * 1000000)  # microseconds
    temp1 = f"ultratemp_{timestamp}_{index}_1"
    temp2 = f"ultratemp_{timestamp}_{index}_2"
    
    # All commands for this swap
    commands = [
        f"aws s3 cp s3://darveys-static/media{image1} s3://darveys-static/media/{temp1} --only-show-errors",
        f"aws s3 cp s3://darveys-static/media{image2} s3://darveys-static/media/{temp2} --only-show-errors", 
        f"aws s3 cp s3://darveys-static/media/{temp2} s3://darveys-static/media{image1} --acl public-read --only-show-errors",
        f"aws s3 cp s3://darveys-static/media/{temp1} s3://darveys-static/media{image2} --acl public-read --only-show-errors",
        f"aws s3 rm s3://darveys-static/media/{temp1} --only-show-errors",
        f"aws s3 rm s3://darveys-static/media/{temp2} --only-show-errors"
    ]
    
    success = execute_commands_fast(commands)
    
    if success:
        return index, True
    else:
        print(f"Failed to swap pair {index + 1}")
        return index, False

def process_mega_batch(df_chunk, chunk_id, max_workers=50):
    """Process a chunk with maximum parallelism"""
    print(f"Starting chunk {chunk_id} with {len(df_chunk)} pairs using {max_workers} workers")
    start_time = time.time()
    
    successful = 0
    failed = 0
    
    # Use high number of workers for maximum speed
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all tasks
        future_to_index = {
            executor.submit(swap_single_pair_ultra_fast, row_data): row_data[0]
            for row_data in df_chunk.iterrows()
        }
        
        # Collect results as they complete
        for future in as_completed(future_to_index):
            try:
                index, success = future.result()
                if success:
                    successful += 1
                else:
                    failed += 1
                    
                # Progress update every 100 completions
                if (successful + failed) % 100 == 0:
                    print(f"Chunk {chunk_id}: {successful + failed}/{len(df_chunk)} completed")
                    
            except Exception as e:
                failed += 1
                print(f"Exception in chunk {chunk_id}: {e}")
    
    end_time = time.time()
    chunk_time = end_time - start_time
    
    print(f"Chunk {chunk_id} completed: {successful} success, {failed} failed in {chunk_time:.1f}s")
    print(f"Chunk {chunk_id} average: {chunk_time/len(df_chunk):.3f}s per pair")
    
    return successful, failed, chunk_time

def ultra_fast_parallel_swap(df, chunk_size=2000, max_workers_per_chunk=50):
    """
    Process with maximum possible parallelism
    Uses multiple chunks processed simultaneously with high worker counts
    """
    total_pairs = len(df)
    print(f"\n=== ULTRA FAST MODE ===")
    print(f"Processing {total_pairs} image pairs")
    print(f"Chunk size: {chunk_size}")
    print(f"Workers per chunk: {max_workers_per_chunk}")
    
    # Split into chunks
    chunks = [df[i:i + chunk_size] for i in range(0, len(df), chunk_size)]
    num_chunks = len(chunks)
    
    print(f"Split into {num_chunks} chunks")
    
    overall_start = time.time()
    total_successful = 0
    total_failed = 0
    
    # Process chunks one at a time to avoid overwhelming EC2
    for i, chunk in enumerate(chunks):
        chunk.reset_index(drop=True, inplace=True)  # Reset index for unique temp names
        
        successful, failed, chunk_time = process_mega_batch(
            chunk, 
            i + 1, 
            max_workers_per_chunk
        )
        
        total_successful += successful
        total_failed += failed
        
        # Show progress
        pairs_completed = (i + 1) * chunk_size if i < num_chunks - 1 else total_pairs
        pairs_completed = min(pairs_completed, total_pairs)
        
        progress_pct = (pairs_completed / total_pairs) * 100
        elapsed = time.time() - overall_start
        
        if i < num_chunks - 1:  # Not the last chunk
            avg_time_per_chunk = elapsed / (i + 1)
            remaining_chunks = num_chunks - (i + 1)
            eta_minutes = (remaining_chunks * avg_time_per_chunk) / 60
            
            print(f"\n--- PROGRESS UPDATE ---")
            print(f"Completed: {pairs_completed}/{total_pairs} pairs ({progress_pct:.1f}%)")
            print(f"Success rate: {(total_successful/(total_successful+total_failed))*100:.1f}%")
            print(f"Elapsed time: {elapsed/60:.1f} minutes")
            print(f"ETA: {eta_minutes:.1f} minutes remaining")
            print(f"Current speed: {pairs_completed/elapsed:.1f} pairs/second\n")
    
    overall_end = time.time()
    total_time = overall_end - overall_start
    
    print(f"\n=== FINAL RESULTS ===")
    print(f"Total time: {total_time:.1f} seconds ({total_time/60:.1f} minutes)")
    print(f"Successful swaps: {total_successful}")
    print(f"Failed swaps: {total_failed}")
    print(f"Success rate: {(total_successful/(total_successful+total_failed))*100:.1f}%")
    print(f"Average speed: {total_pairs/total_time:.2f} pairs/second")
    print(f"Average time per pair: {total_time/total_pairs:.3f} seconds")

def check_ec2_setup():
    """Check if running on EC2 and verify setup"""
    print("Checking EC2 setup...")
    
    # Check if on EC2
    try:
        result = subprocess.run(
            "curl -s http://169.254.169.254/latest/meta-data/instance-id", 
            shell=True, 
            capture_output=True, 
            text=True,
            timeout=5
        )
        if result.returncode == 0:
            instance_id = result.stdout.strip()
            print(f"✓ Running on EC2 instance: {instance_id}")
        else:
            print("⚠ Not running on EC2 - performance may be slower")
    except:
        print("⚠ Could not verify EC2 status")
    
    # Check AWS CLI
    try:
        result = subprocess.run("aws --version", shell=True, capture_output=True, text=True)
        if result.returncode == 0:
            print(f"✓ AWS CLI available: {result.stdout.strip()}")
        else:
            print("✗ AWS CLI not found")
            sys.exit(1)
    except:
        print("✗ AWS CLI not available")
        sys.exit(1)
    
    # Check credentials
    try:
        result = subprocess.run("aws sts get-caller-identity", shell=True, capture_output=True, text=True)
        if result.returncode == 0:
            print("✓ AWS credentials configured")
        else:
            print("✗ AWS credentials not configured")
            sys.exit(1)
    except:
        print("✗ Could not verify AWS credentials")
        sys.exit(1)

def main():
    print("🚀 ULTRA-FAST S3 IMAGE SWAPPER 🚀")
    print("Optimized for maximum speed on EC2\n")
    
    # Setup checks
    check_ec2_setup()
    setup_aws_cli()
    
    # Load data
    if not os.path.exists('images.xlsx'):
        print("✗ images.xlsx not found in current directory")
        sys.exit(1)
    
    print("Loading image pairs...")
    df = pd.read_excel('images.xlsx')
    print(f"Loaded {len(df)} image pairs")
    
    # Ask for confirmation
    print(f"\nThis will make approximately {len(df) * 6:,} S3 API calls")
    print(f"Estimated cost: ~${len(df) * 6 * 0.0005 / 1000:.3f}")
    
    if len(df) > 1000:
        confirm = input(f"\nProceed with {len(df)} image swaps? (y/N): ")
        if confirm.lower() != 'y':
            print("Aborted")
            sys.exit(0)
    
    # Process with maximum speed
    ultra_fast_parallel_swap(
        df, 
        chunk_size=2000,  # Optimize chunk size based on your dataset
        max_workers_per_chunk=50  # High parallelism
    )

if __name__ == "__main__":
    main()
