211 lines
10 KiB
Python
211 lines
10 KiB
Python
# processing_engine.py
|
|
|
|
import os
|
|
import math
|
|
import shutil
|
|
import tempfile
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import List, Dict, Tuple, Optional, Set
|
|
|
|
# Attempt to import image processing libraries
|
|
try:
|
|
import cv2
|
|
import numpy as np
|
|
except ImportError as e:
|
|
log.error(f"Failed to import cv2 or numpy in processing_engine.py: {e}", exc_info=True)
|
|
print("ERROR: Missing required image processing libraries. Please install opencv-python and numpy:")
|
|
print("pip install opencv-python numpy")
|
|
# Allow import to fail but log error; execution will likely fail later
|
|
cv2 = None
|
|
np = None
|
|
|
|
|
|
|
|
try:
|
|
from configuration import Configuration, ConfigurationError
|
|
from rule_structure import SourceRule, AssetRule, FileRule
|
|
from utils.path_utils import generate_path_from_pattern, sanitize_filename
|
|
from processing.utils import image_processing_utils as ipu # Corrected import
|
|
except ImportError as e:
|
|
# Temporarily print to console as log might not be initialized yet
|
|
print(f"ERROR during initial imports in processing_engine.py: {e}")
|
|
# log.error(f"Failed to import Configuration or rule_structure classes in processing_engine.py: {e}", exc_info=True) # Log will be used after init
|
|
print("ERROR: Cannot import Configuration or rule_structure classes.")
|
|
print("Ensure configuration.py and rule_structure.py are in the same directory or Python path.")
|
|
# Allow import to fail but log error; execution will likely fail later
|
|
Configuration = None
|
|
SourceRule = None
|
|
AssetRule = None
|
|
FileRule = None
|
|
|
|
|
|
# Initialize logger early
|
|
log = logging.getLogger(__name__)
|
|
# Basic config if logger hasn't been set up elsewhere (e.g., during testing)
|
|
if not log.hasHandlers():
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
|
|
|
|
# Use logger defined in main.py (or configure one here if run standalone)
|
|
|
|
from processing.pipeline.orchestrator import PipelineOrchestrator
|
|
# from processing.pipeline.asset_context import AssetProcessingContext # AssetProcessingContext is used by the orchestrator
|
|
from processing.pipeline.stages.supplier_determination import SupplierDeterminationStage
|
|
from processing.pipeline.stages.asset_skip_logic import AssetSkipLogicStage
|
|
from processing.pipeline.stages.metadata_initialization import MetadataInitializationStage
|
|
from processing.pipeline.stages.file_rule_filter import FileRuleFilterStage
|
|
from processing.pipeline.stages.gloss_to_rough_conversion import GlossToRoughConversionStage
|
|
from processing.pipeline.stages.alpha_extraction_to_mask import AlphaExtractionToMaskStage
|
|
from processing.pipeline.stages.normal_map_green_channel import NormalMapGreenChannelStage
|
|
from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage
|
|
from processing.pipeline.stages.map_merging import MapMergingStage
|
|
from processing.pipeline.stages.metadata_finalization_save import MetadataFinalizationAndSaveStage
|
|
from processing.pipeline.stages.output_organization import OutputOrganizationStage
|
|
|
|
# --- Custom Exception ---
|
|
class ProcessingEngineError(Exception):
|
|
"""Custom exception for errors during processing engine operations."""
|
|
pass
|
|
|
|
# Helper functions moved to processing.utils.image_processing_utils
|
|
|
|
# --- Processing Engine Class ---
|
|
class ProcessingEngine:
|
|
"""
|
|
Handles the core processing pipeline for assets based on explicit rules
|
|
provided in a SourceRule object and static configuration.
|
|
It does not perform classification, prediction, or rule fallback internally.
|
|
"""
|
|
def __init__(self, config_obj: Configuration):
|
|
"""
|
|
Initializes the processing engine with static configuration.
|
|
|
|
Args:
|
|
config_obj: The loaded Configuration object containing static settings.
|
|
"""
|
|
if cv2 is None or np is None or Configuration is None or SourceRule is None:
|
|
raise ProcessingEngineError("Essential libraries (OpenCV, NumPy) or classes (Configuration, SourceRule) are not available.")
|
|
|
|
if not isinstance(config_obj, Configuration):
|
|
raise ProcessingEngineError("config_obj must be a valid Configuration object.")
|
|
|
|
self.config_obj: Configuration = config_obj
|
|
self.temp_dir: Path | None = None # Path to the temporary working directory for a process run
|
|
self.loaded_data_cache: dict = {} # Cache for loaded/resized data within a single process call
|
|
|
|
# --- Pipeline Orchestrator Setup ---
|
|
self.stages = [
|
|
SupplierDeterminationStage(),
|
|
AssetSkipLogicStage(),
|
|
MetadataInitializationStage(),
|
|
FileRuleFilterStage(),
|
|
GlossToRoughConversionStage(),
|
|
AlphaExtractionToMaskStage(),
|
|
NormalMapGreenChannelStage(),
|
|
IndividualMapProcessingStage(),
|
|
MapMergingStage(),
|
|
MetadataFinalizationAndSaveStage(),
|
|
OutputOrganizationStage(),
|
|
]
|
|
try:
|
|
self.pipeline_orchestrator = PipelineOrchestrator(config_obj=self.config_obj, stages=self.stages)
|
|
log.info("PipelineOrchestrator initialized successfully in ProcessingEngine.")
|
|
except Exception as e:
|
|
log.error(f"Failed to initialize PipelineOrchestrator in ProcessingEngine: {e}", exc_info=True)
|
|
self.pipeline_orchestrator = None # Ensure it's None if init fails
|
|
|
|
log.debug("ProcessingEngine initialized.")
|
|
|
|
|
|
def process(
|
|
self,
|
|
source_rule: SourceRule,
|
|
workspace_path: Path,
|
|
output_base_path: Path,
|
|
overwrite: bool = False,
|
|
incrementing_value: Optional[str] = None,
|
|
sha5_value: Optional[str] = None
|
|
) -> Dict[str, List[str]]:
|
|
"""
|
|
Executes the processing pipeline for all assets defined in the SourceRule.
|
|
|
|
Args:
|
|
source_rule: The SourceRule object containing explicit instructions for all assets and files.
|
|
workspace_path: The path to the directory containing the source files (e.g., extracted archive).
|
|
output_base_path: The base directory where processed output will be saved.
|
|
overwrite: If True, forces reprocessing even if output exists for an asset.
|
|
incrementing_value: Optional incrementing value for path tokens.
|
|
sha5_value: Optional SHA5 hash value for path tokens.
|
|
|
|
Returns:
|
|
Dict[str, List[str]]: A dictionary summarizing the status of each asset:
|
|
{"processed": [asset_name1, ...],
|
|
"skipped": [asset_name2, ...],
|
|
"failed": [asset_name3, ...]}
|
|
"""
|
|
log.info(f"VERIFY: ProcessingEngine.process called with rule for input: {source_rule.input_path}") # DEBUG Verify
|
|
log.debug(f" VERIFY Rule Details: {source_rule}") # DEBUG Verify (Optional detailed log)
|
|
if not isinstance(source_rule, SourceRule):
|
|
raise ProcessingEngineError("process() requires a valid SourceRule object.")
|
|
if not isinstance(workspace_path, Path) or not workspace_path.is_dir():
|
|
raise ProcessingEngineError(f"Invalid workspace path provided: {workspace_path}")
|
|
if not isinstance(output_base_path, Path):
|
|
raise ProcessingEngineError(f"Invalid output base path provided: {output_base_path}")
|
|
|
|
log.info(f"ProcessingEngine starting process for {len(source_rule.assets)} asset(s) defined in SourceRule.")
|
|
overall_status = {"processed": [], "skipped": [], "failed": []}
|
|
self.loaded_data_cache = {} # Reset cache for this run
|
|
# Store incoming optional values for use in path generation
|
|
self.current_incrementing_value = incrementing_value
|
|
self.current_sha5_value = sha5_value
|
|
log.debug(f"Received incrementing_value: {self.current_incrementing_value}, sha5_value: {self.current_sha5_value}")
|
|
|
|
# Use a temporary directory for intermediate files (like saved maps)
|
|
try:
|
|
self.temp_dir = Path(tempfile.mkdtemp(prefix=self.config_obj.temp_dir_prefix))
|
|
log.debug(f"Created temporary workspace for engine: {self.temp_dir}")
|
|
# --- NEW PIPELINE ORCHESTRATOR LOGIC ---
|
|
if hasattr(self, 'pipeline_orchestrator') and self.pipeline_orchestrator:
|
|
log.info("Processing source rule using PipelineOrchestrator.")
|
|
overall_status = self.pipeline_orchestrator.process_source_rule(
|
|
source_rule=source_rule,
|
|
workspace_path=workspace_path, # This is the path to the source files (e.g. extracted archive)
|
|
output_base_path=output_base_path,
|
|
overwrite=overwrite,
|
|
incrementing_value=self.current_incrementing_value,
|
|
sha5_value=self.current_sha5_value
|
|
)
|
|
else:
|
|
log.error(f"PipelineOrchestrator not available for SourceRule '{source_rule.input_path}'. Marking all {len(source_rule.assets)} assets as failed.")
|
|
for asset_rule in source_rule.assets:
|
|
overall_status["failed"].append(asset_rule.asset_name)
|
|
|
|
log.info(f"ProcessingEngine finished. Summary: {overall_status}")
|
|
return overall_status
|
|
|
|
except Exception as e:
|
|
log.exception(f"Processing engine failed unexpectedly: {e}")
|
|
# Ensure all assets not processed/skipped are marked as failed
|
|
processed_or_skipped = set(overall_status["processed"] + overall_status["skipped"])
|
|
for asset_rule in source_rule.assets:
|
|
if asset_rule.asset_name not in processed_or_skipped:
|
|
overall_status["failed"].append(asset_rule.asset_name)
|
|
return overall_status # Return partial status if possible
|
|
finally:
|
|
self._cleanup_workspace()
|
|
|
|
|
|
def _cleanup_workspace(self):
|
|
"""Removes the temporary workspace directory if it exists."""
|
|
if self.temp_dir and self.temp_dir.exists():
|
|
try:
|
|
log.debug(f"Cleaning up engine temporary workspace: {self.temp_dir}")
|
|
# Ignore errors during cleanup (e.g., permission errors on copied .git files)
|
|
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
self.temp_dir = None
|
|
log.debug("Engine temporary workspace cleaned up successfully.")
|
|
except Exception as e:
|
|
log.error(f"Failed to remove engine temporary workspace {self.temp_dir}: {e}", exc_info=True)
|
|
self.loaded_data_cache = {} # Clear cache after cleanup
|
|
|