# processing_engine.py import os import math import shutil import tempfile import logging from pathlib import Path from typing import List, Dict, Tuple, Optional, Set # Attempt to import image processing libraries try: import cv2 import numpy as np except ImportError as e: log.error(f"Failed to import cv2 or numpy in processing_engine.py: {e}", exc_info=True) print("ERROR: Missing required image processing libraries. Please install opencv-python and numpy:") print("pip install opencv-python numpy") # Allow import to fail but log error; execution will likely fail later cv2 = None np = None try: from configuration import Configuration, ConfigurationError from rule_structure import SourceRule, AssetRule, FileRule from utils.path_utils import generate_path_from_pattern, sanitize_filename from processing.utils import image_processing_utils as ipu # Corrected import except ImportError as e: # Temporarily print to console as log might not be initialized yet print(f"ERROR during initial imports in processing_engine.py: {e}") # log.error(f"Failed to import Configuration or rule_structure classes in processing_engine.py: {e}", exc_info=True) # Log will be used after init print("ERROR: Cannot import Configuration or rule_structure classes.") print("Ensure configuration.py and rule_structure.py are in the same directory or Python path.") # Allow import to fail but log error; execution will likely fail later Configuration = None SourceRule = None AssetRule = None FileRule = None # Initialize logger early log = logging.getLogger(__name__) # Basic config if logger hasn't been set up elsewhere (e.g., during testing) if not log.hasHandlers(): logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') # Use logger defined in main.py (or configure one here if run standalone) from processing.pipeline.orchestrator import PipelineOrchestrator # from processing.pipeline.asset_context import AssetProcessingContext # AssetProcessingContext is used by the orchestrator from processing.pipeline.stages.supplier_determination import SupplierDeterminationStage from processing.pipeline.stages.asset_skip_logic import AssetSkipLogicStage from processing.pipeline.stages.metadata_initialization import MetadataInitializationStage from processing.pipeline.stages.file_rule_filter import FileRuleFilterStage from processing.pipeline.stages.gloss_to_rough_conversion import GlossToRoughConversionStage from processing.pipeline.stages.alpha_extraction_to_mask import AlphaExtractionToMaskStage from processing.pipeline.stages.normal_map_green_channel import NormalMapGreenChannelStage from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage from processing.pipeline.stages.map_merging import MapMergingStage from processing.pipeline.stages.metadata_finalization_save import MetadataFinalizationAndSaveStage from processing.pipeline.stages.output_organization import OutputOrganizationStage # --- Custom Exception --- class ProcessingEngineError(Exception): """Custom exception for errors during processing engine operations.""" pass # Helper functions moved to processing.utils.image_processing_utils # --- Processing Engine Class --- class ProcessingEngine: """ Handles the core processing pipeline for assets based on explicit rules provided in a SourceRule object and static configuration. It does not perform classification, prediction, or rule fallback internally. """ def __init__(self, config_obj: Configuration): """ Initializes the processing engine with static configuration. Args: config_obj: The loaded Configuration object containing static settings. """ if cv2 is None or np is None or Configuration is None or SourceRule is None: raise ProcessingEngineError("Essential libraries (OpenCV, NumPy) or classes (Configuration, SourceRule) are not available.") if not isinstance(config_obj, Configuration): raise ProcessingEngineError("config_obj must be a valid Configuration object.") self.config_obj: Configuration = config_obj self.temp_dir: Path | None = None # Path to the temporary working directory for a process run self.loaded_data_cache: dict = {} # Cache for loaded/resized data within a single process call # --- Pipeline Orchestrator Setup --- self.stages = [ SupplierDeterminationStage(), AssetSkipLogicStage(), MetadataInitializationStage(), FileRuleFilterStage(), GlossToRoughConversionStage(), AlphaExtractionToMaskStage(), NormalMapGreenChannelStage(), IndividualMapProcessingStage(), MapMergingStage(), MetadataFinalizationAndSaveStage(), OutputOrganizationStage(), ] try: self.pipeline_orchestrator = PipelineOrchestrator(config_obj=self.config_obj, stages=self.stages) log.info("PipelineOrchestrator initialized successfully in ProcessingEngine.") except Exception as e: log.error(f"Failed to initialize PipelineOrchestrator in ProcessingEngine: {e}", exc_info=True) self.pipeline_orchestrator = None # Ensure it's None if init fails log.debug("ProcessingEngine initialized.") def process( self, source_rule: SourceRule, workspace_path: Path, output_base_path: Path, overwrite: bool = False, incrementing_value: Optional[str] = None, sha5_value: Optional[str] = None ) -> Dict[str, List[str]]: """ Executes the processing pipeline for all assets defined in the SourceRule. Args: source_rule: The SourceRule object containing explicit instructions for all assets and files. workspace_path: The path to the directory containing the source files (e.g., extracted archive). output_base_path: The base directory where processed output will be saved. overwrite: If True, forces reprocessing even if output exists for an asset. incrementing_value: Optional incrementing value for path tokens. sha5_value: Optional SHA5 hash value for path tokens. Returns: Dict[str, List[str]]: A dictionary summarizing the status of each asset: {"processed": [asset_name1, ...], "skipped": [asset_name2, ...], "failed": [asset_name3, ...]} """ log.info(f"VERIFY: ProcessingEngine.process called with rule for input: {source_rule.input_path}") # DEBUG Verify log.debug(f" VERIFY Rule Details: {source_rule}") # DEBUG Verify (Optional detailed log) if not isinstance(source_rule, SourceRule): raise ProcessingEngineError("process() requires a valid SourceRule object.") if not isinstance(workspace_path, Path) or not workspace_path.is_dir(): raise ProcessingEngineError(f"Invalid workspace path provided: {workspace_path}") if not isinstance(output_base_path, Path): raise ProcessingEngineError(f"Invalid output base path provided: {output_base_path}") log.info(f"ProcessingEngine starting process for {len(source_rule.assets)} asset(s) defined in SourceRule.") overall_status = {"processed": [], "skipped": [], "failed": []} self.loaded_data_cache = {} # Reset cache for this run # Store incoming optional values for use in path generation self.current_incrementing_value = incrementing_value self.current_sha5_value = sha5_value log.debug(f"Received incrementing_value: {self.current_incrementing_value}, sha5_value: {self.current_sha5_value}") # Use a temporary directory for intermediate files (like saved maps) try: self.temp_dir = Path(tempfile.mkdtemp(prefix=self.config_obj.temp_dir_prefix)) log.debug(f"Created temporary workspace for engine: {self.temp_dir}") # --- NEW PIPELINE ORCHESTRATOR LOGIC --- if hasattr(self, 'pipeline_orchestrator') and self.pipeline_orchestrator: log.info("Processing source rule using PipelineOrchestrator.") overall_status = self.pipeline_orchestrator.process_source_rule( source_rule=source_rule, workspace_path=workspace_path, # This is the path to the source files (e.g. extracted archive) output_base_path=output_base_path, overwrite=overwrite, incrementing_value=self.current_incrementing_value, sha5_value=self.current_sha5_value ) else: log.error(f"PipelineOrchestrator not available for SourceRule '{source_rule.input_path}'. Marking all {len(source_rule.assets)} assets as failed.") for asset_rule in source_rule.assets: overall_status["failed"].append(asset_rule.asset_name) log.info(f"ProcessingEngine finished. Summary: {overall_status}") return overall_status except Exception as e: log.exception(f"Processing engine failed unexpectedly: {e}") # Ensure all assets not processed/skipped are marked as failed processed_or_skipped = set(overall_status["processed"] + overall_status["skipped"]) for asset_rule in source_rule.assets: if asset_rule.asset_name not in processed_or_skipped: overall_status["failed"].append(asset_rule.asset_name) return overall_status # Return partial status if possible finally: self._cleanup_workspace() def _cleanup_workspace(self): """Removes the temporary workspace directory if it exists.""" if self.temp_dir and self.temp_dir.exists(): try: log.debug(f"Cleaning up engine temporary workspace: {self.temp_dir}") # Ignore errors during cleanup (e.g., permission errors on copied .git files) shutil.rmtree(self.temp_dir, ignore_errors=True) self.temp_dir = None log.debug("Engine temporary workspace cleaned up successfully.") except Exception as e: log.error(f"Failed to remove engine temporary workspace {self.temp_dir}: {e}", exc_info=True) self.loaded_data_cache = {} # Clear cache after cleanup