Asset-Frameworker/monitor.py

# monitor.py

import os
import sys
import time
import logging
import re
import shutil
import tempfile # For potential temporary workspace if needed directly
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from watchdog.observers.polling import PollingObserver as Observer # Use polling for better compatibility
from watchdog.events import FileSystemEventHandler, FileCreatedEvent

# --- Import from local modules ---
# Assuming standard project structure
from configuration import load_config, ConfigurationError # Assuming load_config is here
from processing_engine import ProcessingEngine, ProcessingError # Assuming ProcessingError exists
from rule_structure import SourceRule # Assuming SourceRule is here
# Assuming workspace utils exist - adjust path if necessary
try:
    from utils.workspace_utils import prepare_processing_workspace, WorkspaceError
except ImportError:
    log = logging.getLogger(__name__) # Need logger early for this message
    log.warning("Could not import workspace_utils. Workspace preparation/cleanup might fail.")
    # Define dummy functions/exceptions if import fails to avoid NameErrors later,
    # but log prominently.
    def prepare_processing_workspace(archive_path: Path) -> Path:
        log.error("prepare_processing_workspace is not available!")
        # Create a dummy temp dir to allow code flow, but it won't be the real one
        return Path(tempfile.mkdtemp(prefix="dummy_workspace_"))
    class WorkspaceError(Exception): pass

from utils.prediction_utils import generate_source_rule_from_archive, PredictionError


# --- Configuration ---
# Read from environment variables with defaults
INPUT_DIR = Path(os.environ.get('INPUT_DIR', '/data/input'))
OUTPUT_DIR = Path(os.environ.get('OUTPUT_DIR', '/data/output'))
PROCESSED_DIR = Path(os.environ.get('PROCESSED_DIR', '/data/processed'))
ERROR_DIR = Path(os.environ.get('ERROR_DIR', '/data/error'))
LOG_LEVEL_STR = os.environ.get('LOG_LEVEL', 'INFO').upper()
POLL_INTERVAL = int(os.environ.get('POLL_INTERVAL', '5'))
PROCESS_DELAY = int(os.environ.get('PROCESS_DELAY', '2'))
# Default workers for monitor - can be overridden if needed via env var
DEFAULT_WORKERS = max(1, os.cpu_count() // 2 if os.cpu_count() else 1)
NUM_WORKERS = int(os.environ.get('NUM_WORKERS', str(DEFAULT_WORKERS)))

# --- Logging Setup ---
# Configure logging (ensure logger is available before potential import errors)
log_level = getattr(logging, LOG_LEVEL_STR, logging.INFO)
log_format = '%(asctime)s [%(levelname)-8s] %(name)s: %(message)s'
date_format = '%Y-%m-%d %H:%M:%S'
logging.basicConfig(level=log_level, format=log_format, datefmt=date_format, handlers=[logging.StreamHandler(sys.stdout)])
log = logging.getLogger("monitor") # Define logger after basicConfig

# Log configuration values after logger is set up
log.info(f"Logging level set to: {logging.getLevelName(log_level)}")
log.info(f"Monitoring Input Directory: {INPUT_DIR}")
log.info(f"Output Directory: {OUTPUT_DIR}")
log.info(f"Processed Files Directory: {PROCESSED_DIR}")
log.info(f"Error Files Directory: {ERROR_DIR}")
log.info(f"Polling Interval: {POLL_INTERVAL}s")
log.info(f"Processing Delay: {PROCESS_DELAY}s")
log.info(f"Max Workers: {NUM_WORKERS}")


# --- Preset Validation ---
# --- Constants ---
SUPPORTED_SUFFIXES = ['.zip', '.rar', '.7z']

# --- Watchdog Event Handler ---
class ZipHandler(FileSystemEventHandler):
    """Handles file system events for new ZIP files."""

    def __init__(self, input_dir: Path, output_dir: Path, processed_dir: Path, error_dir: Path):
        self.input_dir = input_dir.resolve()
        self.output_dir = output_dir.resolve()
        self.processed_dir = processed_dir.resolve()
        self.error_dir = error_dir.resolve()
        # Ensure target directories exist
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.processed_dir.mkdir(parents=True, exist_ok=True)
        self.error_dir.mkdir(parents=True, exist_ok=True)

        # Initialize ThreadPoolExecutor
        self.executor = ThreadPoolExecutor(max_workers=NUM_WORKERS)
        log.info(f"Handler initialized, target directories ensured. ThreadPoolExecutor started with {NUM_WORKERS} workers.")

    def on_created(self, event: FileCreatedEvent):
        """Called when a file or directory is created. Submits task to executor."""
        if event.is_directory:
            return

        src_path = Path(event.src_path)
        log.debug(f"File creation event detected: {src_path}")

        # Check if the file has a supported archive extension
        if src_path.suffix.lower() not in SUPPORTED_SUFFIXES:
            log.debug(f"Ignoring file with unsupported extension: {src_path.name}")
            return

        log.info(f"Detected new archive: {src_path.name}. Waiting {PROCESS_DELAY}s before queueing...")
        time.sleep(PROCESS_DELAY) # Wait for file write to complete

        # Re-check if file still exists (might have been temporary or moved quickly)
        if not src_path.exists():
            log.warning(f"File disappeared after delay: {src_path.name}")
            return

        log.info(f"Queueing processing task for: {src_path.name}")
        # Submit the processing task to the thread pool
        # Pass necessary context like directories
        self.executor.submit(
            _process_archive_task,
            archive_path=src_path,
            output_dir=self.output_dir,
            processed_dir=self.processed_dir,
            error_dir=self.error_dir
        )

    def shutdown(self):
        """Shuts down the thread pool executor."""
        log.info("Shutting down thread pool executor...")
        self.executor.shutdown(wait=True)
        log.info("Executor shut down.")

    # move_file remains largely the same, but called from _process_archive_task now
    # We make it static or move it outside the class if _process_archive_task is outside
    @staticmethod
    def move_file(src: Path, dest_dir: Path, reason: str):
        """Safely moves a file, handling potential name collisions."""
        if not src.exists():
            log.warning(f"Source file {src} does not exist, cannot move for reason: {reason}.")
            return
        try:
            dest_path = dest_dir / src.name
            # Handle potential name collision in destination
            counter = 1
            while dest_path.exists():
                dest_path = dest_dir / f"{src.stem}_{counter}{src.suffix}"
                counter += 1
                if counter > 100: # Safety break
                     log.error(f"Could not find unique name for {src.name} in {dest_dir} after 100 attempts. Aborting move.")
                     return

            log.info(f"Moving '{src.name}' to '{dest_dir.name}/' directory (Reason: {reason}). Final path: {dest_path.name}")
            shutil.move(str(src), str(dest_path))
        except Exception as e:
            log.exception(f"Failed to move file {src.name} to {dest_dir}: {e}")


# --- Processing Task Function ---
def _process_archive_task(archive_path: Path, output_dir: Path, processed_dir: Path, error_dir: Path):
    """
    Task executed by the ThreadPoolExecutor to process a single archive file.
    """
    log.info(f"[Task:{archive_path.name}] Starting processing.")
    temp_workspace_path: Optional[Path] = None
    config = None
    source_rule = None
    move_reason = "unknown_error" # Default reason if early exit

    try:
        # --- a. Load Configuration ---
        log.debug(f"[Task:{archive_path.name}] Loading configuration...")
        # Assuming load_config() loads the main app config (e.g., app_settings.json)
        # and potentially merges preset defaults or paths. Adjust if needed.
        config = load_config() # Might need path argument depending on implementation
        if not config:
            raise ConfigurationError("Failed to load application configuration.")
        log.debug(f"[Task:{archive_path.name}] Configuration loaded.")

        # --- b. Generate Prediction (SourceRule) ---
        log.debug(f"[Task:{archive_path.name}] Generating source rule prediction...")
        # This function now handles preset extraction and validation internally
        source_rule = generate_source_rule_from_archive(archive_path, config)
        log.info(f"[Task:{archive_path.name}] SourceRule generated successfully.")

        # --- c. Prepare Workspace ---
        log.debug(f"[Task:{archive_path.name}] Preparing processing workspace...")
        # This utility should handle extraction and return the temp dir path
        temp_workspace_path = prepare_processing_workspace(archive_path)
        log.info(f"[Task:{archive_path.name}] Workspace prepared at: {temp_workspace_path}")

        # --- d. Run Processing Engine ---
        log.debug(f"[Task:{archive_path.name}] Initializing Processing Engine...")
        # Pass necessary parts of the config to the engine
        engine = ProcessingEngine(config=config, output_base_dir=output_dir)
        log.info(f"[Task:{archive_path.name}] Running Processing Engine...")
        # The engine uses the source_rule to guide processing on the workspace files
        engine.run(workspace_path=temp_workspace_path, source_rule=source_rule)
        log.info(f"[Task:{archive_path.name}] Processing Engine finished successfully.")
        move_reason = "processed" # Set success reason

        # --- e. Handle Results & Move File (Implicit success if no exception) ---
        # If engine.run completes without exception, assume success for now.
        # More granular results could be returned by engine.run if needed.
        # Moving is handled outside the main try block based on move_reason

        # --- f. Blender Integration (Placeholder) ---
        # TODO: Add call to utils.blender_utils.run_blender_script if needed later
        # if config.get('blender', {}).get('run_script_after_processing'):
        #     log.info(f"[Task:{archive_path.name}] Running Blender script (placeholder)...")
        #     # blender_utils.run_blender_script(output_dir / source_rule.name, config)


    except FileNotFoundError as e:
        log.error(f"[Task:{archive_path.name}] Prerequisite file not found: {e}")
        move_reason = "file_not_found"
    except (ConfigurationError, PredictionError, WorkspaceError, ProcessingError) as e:
        log.error(f"[Task:{archive_path.name}] Processing failed: {e}", exc_info=True)
        move_reason = f"{type(e).__name__.lower()}" # e.g., "predictionerror"
    except Exception as e:
        log.exception(f"[Task:{archive_path.name}] An unexpected error occurred during processing: {e}")
        move_reason = "unexpected_exception"

    finally:
        # --- Move Original Archive ---
        log.debug(f"[Task:{archive_path.name}] Moving original archive based on outcome: {move_reason}")
        dest_dir = processed_dir if move_reason == "processed" else error_dir
        try:
            # Use the static method from the handler class
            ZipHandler.move_file(archive_path, dest_dir, move_reason)
        except Exception as move_err:
            log.exception(f"[Task:{archive_path.name}] CRITICAL: Failed to move archive file {archive_path} after processing: {move_err}")

        # --- g. Cleanup Workspace ---
        if temp_workspace_path and temp_workspace_path.exists():
            log.debug(f"[Task:{archive_path.name}] Cleaning up workspace: {temp_workspace_path}")
            try:
                shutil.rmtree(temp_workspace_path)
                log.info(f"[Task:{archive_path.name}] Workspace cleaned up successfully.")
            except OSError as e:
                log.error(f"[Task:{archive_path.name}] Error removing temporary workspace {temp_workspace_path}: {e}", exc_info=True)
        elif temp_workspace_path:
             log.warning(f"[Task:{archive_path.name}] Temporary workspace path recorded but not found for cleanup: {temp_workspace_path}")

        log.info(f"[Task:{archive_path.name}] Processing task finished with status: {move_reason}")


# --- Main Monitor Loop ---
if __name__ == "__main__":
    # Ensure input directory exists
    if not INPUT_DIR.is_dir():
        log.error(f"Input directory does not exist or is not a directory: {INPUT_DIR}")
        log.error("Please create the directory or mount a volume correctly.")
        sys.exit(1)

    event_handler = ZipHandler(INPUT_DIR, OUTPUT_DIR, PROCESSED_DIR, ERROR_DIR)
    observer = Observer()
    observer.schedule(event_handler, str(INPUT_DIR), recursive=False) # Don't watch subdirectories

    log.info("Starting file system monitor...")
    observer.start()
    log.info("Monitor started. Press Ctrl+C to stop.")

    try:
        while True:
            # Keep the main thread alive, observer runs in background thread
            time.sleep(1)
    except KeyboardInterrupt:
        log.info("Keyboard interrupt received, stopping monitor and executor...")
        observer.stop()
        event_handler.shutdown() # Gracefully shutdown the executor
    except Exception as e:
        log.exception(f"An unexpected error occurred in the main loop: {e}")
        observer.stop()
        event_handler.shutdown() # Ensure shutdown on other exceptions too

    observer.join()
    log.info("Monitor stopped.")