Asset-Frameworker/monitor.py
Rusfort ce26d54a5d Pre-Codebase-review commit :3
Codebase dedublication and Cleanup refactor

Documentation updated as well

Preferences update

Removed testfiles from repository
2025-05-03 13:19:25 +02:00

273 lines
13 KiB
Python

# monitor.py
import os
import sys
import time
import logging
import re
import shutil
import tempfile # For potential temporary workspace if needed directly
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from watchdog.observers.polling import PollingObserver as Observer # Use polling for better compatibility
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
# --- Import from local modules ---
# Assuming standard project structure
from configuration import load_config, ConfigurationError # Assuming load_config is here
from processing_engine import ProcessingEngine, ProcessingError # Assuming ProcessingError exists
from rule_structure import SourceRule # Assuming SourceRule is here
# Assuming workspace utils exist - adjust path if necessary
try:
from utils.workspace_utils import prepare_processing_workspace, WorkspaceError
except ImportError:
log = logging.getLogger(__name__) # Need logger early for this message
log.warning("Could not import workspace_utils. Workspace preparation/cleanup might fail.")
# Define dummy functions/exceptions if import fails to avoid NameErrors later,
# but log prominently.
def prepare_processing_workspace(archive_path: Path) -> Path:
log.error("prepare_processing_workspace is not available!")
# Create a dummy temp dir to allow code flow, but it won't be the real one
return Path(tempfile.mkdtemp(prefix="dummy_workspace_"))
class WorkspaceError(Exception): pass
from utils.prediction_utils import generate_source_rule_from_archive, PredictionError
# --- Configuration ---
# Read from environment variables with defaults
INPUT_DIR = Path(os.environ.get('INPUT_DIR', '/data/input'))
OUTPUT_DIR = Path(os.environ.get('OUTPUT_DIR', '/data/output'))
PROCESSED_DIR = Path(os.environ.get('PROCESSED_DIR', '/data/processed'))
ERROR_DIR = Path(os.environ.get('ERROR_DIR', '/data/error'))
LOG_LEVEL_STR = os.environ.get('LOG_LEVEL', 'INFO').upper()
POLL_INTERVAL = int(os.environ.get('POLL_INTERVAL', '5'))
PROCESS_DELAY = int(os.environ.get('PROCESS_DELAY', '2'))
# Default workers for monitor - can be overridden if needed via env var
DEFAULT_WORKERS = max(1, os.cpu_count() // 2 if os.cpu_count() else 1)
NUM_WORKERS = int(os.environ.get('NUM_WORKERS', str(DEFAULT_WORKERS)))
# --- Logging Setup ---
# Configure logging (ensure logger is available before potential import errors)
log_level = getattr(logging, LOG_LEVEL_STR, logging.INFO)
log_format = '%(asctime)s [%(levelname)-8s] %(name)s: %(message)s'
date_format = '%Y-%m-%d %H:%M:%S'
logging.basicConfig(level=log_level, format=log_format, datefmt=date_format, handlers=[logging.StreamHandler(sys.stdout)])
log = logging.getLogger("monitor") # Define logger after basicConfig
# Log configuration values after logger is set up
log.info(f"Logging level set to: {logging.getLevelName(log_level)}")
log.info(f"Monitoring Input Directory: {INPUT_DIR}")
log.info(f"Output Directory: {OUTPUT_DIR}")
log.info(f"Processed Files Directory: {PROCESSED_DIR}")
log.info(f"Error Files Directory: {ERROR_DIR}")
log.info(f"Polling Interval: {POLL_INTERVAL}s")
log.info(f"Processing Delay: {PROCESS_DELAY}s")
log.info(f"Max Workers: {NUM_WORKERS}")
# --- Preset Validation ---
# --- Constants ---
SUPPORTED_SUFFIXES = ['.zip', '.rar', '.7z']
# --- Watchdog Event Handler ---
class ZipHandler(FileSystemEventHandler):
"""Handles file system events for new ZIP files."""
def __init__(self, input_dir: Path, output_dir: Path, processed_dir: Path, error_dir: Path):
self.input_dir = input_dir.resolve()
self.output_dir = output_dir.resolve()
self.processed_dir = processed_dir.resolve()
self.error_dir = error_dir.resolve()
# Ensure target directories exist
self.output_dir.mkdir(parents=True, exist_ok=True)
self.processed_dir.mkdir(parents=True, exist_ok=True)
self.error_dir.mkdir(parents=True, exist_ok=True)
# Initialize ThreadPoolExecutor
self.executor = ThreadPoolExecutor(max_workers=NUM_WORKERS)
log.info(f"Handler initialized, target directories ensured. ThreadPoolExecutor started with {NUM_WORKERS} workers.")
def on_created(self, event: FileCreatedEvent):
"""Called when a file or directory is created. Submits task to executor."""
if event.is_directory:
return
src_path = Path(event.src_path)
log.debug(f"File creation event detected: {src_path}")
# Check if the file has a supported archive extension
if src_path.suffix.lower() not in SUPPORTED_SUFFIXES:
log.debug(f"Ignoring file with unsupported extension: {src_path.name}")
return
log.info(f"Detected new archive: {src_path.name}. Waiting {PROCESS_DELAY}s before queueing...")
time.sleep(PROCESS_DELAY) # Wait for file write to complete
# Re-check if file still exists (might have been temporary or moved quickly)
if not src_path.exists():
log.warning(f"File disappeared after delay: {src_path.name}")
return
log.info(f"Queueing processing task for: {src_path.name}")
# Submit the processing task to the thread pool
# Pass necessary context like directories
self.executor.submit(
_process_archive_task,
archive_path=src_path,
output_dir=self.output_dir,
processed_dir=self.processed_dir,
error_dir=self.error_dir
)
def shutdown(self):
"""Shuts down the thread pool executor."""
log.info("Shutting down thread pool executor...")
self.executor.shutdown(wait=True)
log.info("Executor shut down.")
# move_file remains largely the same, but called from _process_archive_task now
# We make it static or move it outside the class if _process_archive_task is outside
@staticmethod
def move_file(src: Path, dest_dir: Path, reason: str):
"""Safely moves a file, handling potential name collisions."""
if not src.exists():
log.warning(f"Source file {src} does not exist, cannot move for reason: {reason}.")
return
try:
dest_path = dest_dir / src.name
# Handle potential name collision in destination
counter = 1
while dest_path.exists():
dest_path = dest_dir / f"{src.stem}_{counter}{src.suffix}"
counter += 1
if counter > 100: # Safety break
log.error(f"Could not find unique name for {src.name} in {dest_dir} after 100 attempts. Aborting move.")
return
log.info(f"Moving '{src.name}' to '{dest_dir.name}/' directory (Reason: {reason}). Final path: {dest_path.name}")
shutil.move(str(src), str(dest_path))
except Exception as e:
log.exception(f"Failed to move file {src.name} to {dest_dir}: {e}")
# --- Processing Task Function ---
def _process_archive_task(archive_path: Path, output_dir: Path, processed_dir: Path, error_dir: Path):
"""
Task executed by the ThreadPoolExecutor to process a single archive file.
"""
log.info(f"[Task:{archive_path.name}] Starting processing.")
temp_workspace_path: Optional[Path] = None
config = None
source_rule = None
move_reason = "unknown_error" # Default reason if early exit
try:
# --- a. Load Configuration ---
log.debug(f"[Task:{archive_path.name}] Loading configuration...")
# Assuming load_config() loads the main app config (e.g., app_settings.json)
# and potentially merges preset defaults or paths. Adjust if needed.
config = load_config() # Might need path argument depending on implementation
if not config:
raise ConfigurationError("Failed to load application configuration.")
log.debug(f"[Task:{archive_path.name}] Configuration loaded.")
# --- b. Generate Prediction (SourceRule) ---
log.debug(f"[Task:{archive_path.name}] Generating source rule prediction...")
# This function now handles preset extraction and validation internally
source_rule = generate_source_rule_from_archive(archive_path, config)
log.info(f"[Task:{archive_path.name}] SourceRule generated successfully.")
# --- c. Prepare Workspace ---
log.debug(f"[Task:{archive_path.name}] Preparing processing workspace...")
# This utility should handle extraction and return the temp dir path
temp_workspace_path = prepare_processing_workspace(archive_path)
log.info(f"[Task:{archive_path.name}] Workspace prepared at: {temp_workspace_path}")
# --- d. Run Processing Engine ---
log.debug(f"[Task:{archive_path.name}] Initializing Processing Engine...")
# Pass necessary parts of the config to the engine
engine = ProcessingEngine(config=config, output_base_dir=output_dir)
log.info(f"[Task:{archive_path.name}] Running Processing Engine...")
# The engine uses the source_rule to guide processing on the workspace files
engine.run(workspace_path=temp_workspace_path, source_rule=source_rule)
log.info(f"[Task:{archive_path.name}] Processing Engine finished successfully.")
move_reason = "processed" # Set success reason
# --- e. Handle Results & Move File (Implicit success if no exception) ---
# If engine.run completes without exception, assume success for now.
# More granular results could be returned by engine.run if needed.
# Moving is handled outside the main try block based on move_reason
# --- f. Blender Integration (Placeholder) ---
# TODO: Add call to utils.blender_utils.run_blender_script if needed later
# if config.get('blender', {}).get('run_script_after_processing'):
# log.info(f"[Task:{archive_path.name}] Running Blender script (placeholder)...")
# # blender_utils.run_blender_script(output_dir / source_rule.name, config)
except FileNotFoundError as e:
log.error(f"[Task:{archive_path.name}] Prerequisite file not found: {e}")
move_reason = "file_not_found"
except (ConfigurationError, PredictionError, WorkspaceError, ProcessingError) as e:
log.error(f"[Task:{archive_path.name}] Processing failed: {e}", exc_info=True)
move_reason = f"{type(e).__name__.lower()}" # e.g., "predictionerror"
except Exception as e:
log.exception(f"[Task:{archive_path.name}] An unexpected error occurred during processing: {e}")
move_reason = "unexpected_exception"
finally:
# --- Move Original Archive ---
log.debug(f"[Task:{archive_path.name}] Moving original archive based on outcome: {move_reason}")
dest_dir = processed_dir if move_reason == "processed" else error_dir
try:
# Use the static method from the handler class
ZipHandler.move_file(archive_path, dest_dir, move_reason)
except Exception as move_err:
log.exception(f"[Task:{archive_path.name}] CRITICAL: Failed to move archive file {archive_path} after processing: {move_err}")
# --- g. Cleanup Workspace ---
if temp_workspace_path and temp_workspace_path.exists():
log.debug(f"[Task:{archive_path.name}] Cleaning up workspace: {temp_workspace_path}")
try:
shutil.rmtree(temp_workspace_path)
log.info(f"[Task:{archive_path.name}] Workspace cleaned up successfully.")
except OSError as e:
log.error(f"[Task:{archive_path.name}] Error removing temporary workspace {temp_workspace_path}: {e}", exc_info=True)
elif temp_workspace_path:
log.warning(f"[Task:{archive_path.name}] Temporary workspace path recorded but not found for cleanup: {temp_workspace_path}")
log.info(f"[Task:{archive_path.name}] Processing task finished with status: {move_reason}")
# --- Main Monitor Loop ---
if __name__ == "__main__":
# Ensure input directory exists
if not INPUT_DIR.is_dir():
log.error(f"Input directory does not exist or is not a directory: {INPUT_DIR}")
log.error("Please create the directory or mount a volume correctly.")
sys.exit(1)
event_handler = ZipHandler(INPUT_DIR, OUTPUT_DIR, PROCESSED_DIR, ERROR_DIR)
observer = Observer()
observer.schedule(event_handler, str(INPUT_DIR), recursive=False) # Don't watch subdirectories
log.info("Starting file system monitor...")
observer.start()
log.info("Monitor started. Press Ctrl+C to stop.")
try:
while True:
# Keep the main thread alive, observer runs in background thread
time.sleep(1)
except KeyboardInterrupt:
log.info("Keyboard interrupt received, stopping monitor and executor...")
observer.stop()
event_handler.shutdown() # Gracefully shutdown the executor
except Exception as e:
log.exception(f"An unexpected error occurred in the main loop: {e}")
observer.stop()
event_handler.shutdown() # Ensure shutdown on other exceptions too
observer.join()
log.info("Monitor stopped.")