From 12cf557dd7e3636edc2c65823e217279bb3272d2 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Fri, 9 May 2025 11:32:16 +0200 Subject: [PATCH 01/16] Uncompleted Processing Refactor --- ProjectNotes/ProcessingEngineRefactorPlan.md | 181 ++ main.py | 7 + processing/pipeline/asset_context.py | 24 + processing/pipeline/orchestrator.py | 133 ++ .../stages/alpha_extraction_to_mask.py | 175 ++ .../pipeline/stages/asset_skip_logic.py | 48 + processing/pipeline/stages/base_stage.py | 22 + .../pipeline/stages/file_rule_filter.py | 80 + .../stages/gloss_to_rough_conversion.py | 156 ++ .../stages/individual_map_processing.py | 245 +++ processing/pipeline/stages/map_merging.py | 310 ++++ .../stages/metadata_finalization_save.py | 119 ++ .../stages/metadata_initialization.py | 163 ++ .../stages/normal_map_green_channel.py | 154 ++ .../pipeline/stages/output_organization.py | 155 ++ .../pipeline/stages/supplier_determination.py | 61 + processing/utils/__init__.py | 1 + processing/utils/image_processing_utils.py | 357 ++++ processing_engine.py | 1589 +---------------- tests/__init__.py | 1 + tests/processing/pipeline/__init__.py | 1 + tests/processing/pipeline/stages/__init__.py | 1 + .../stages/test_alpha_extraction_to_mask.py | 273 +++ .../pipeline/stages/test_asset_skip_logic.py | 213 +++ .../pipeline/stages/test_file_rule_filter.py | 330 ++++ .../stages/test_gloss_to_rough_conversion.py | 486 +++++ .../stages/test_individual_map_processing.py | 555 ++++++ .../pipeline/stages/test_map_merging.py | 538 ++++++ .../stages/test_metadata_finalization_save.py | 359 ++++ .../stages/test_metadata_initialization.py | 169 ++ .../stages/test_normal_map_green_channel.py | 323 ++++ .../stages/test_output_organization.py | 417 +++++ .../stages/test_supplier_determination.py | 213 +++ .../processing/pipeline/test_orchestrator.py | 383 ++++ .../utils/test_image_processing_utils.py | 504 ++++++ tests/utils/__init__.py | 1 + tests/utils/test_path_utils.py | 252 +++ utils/path_utils.py | 9 + 38 files changed, 7472 insertions(+), 1536 deletions(-) create mode 100644 ProjectNotes/ProcessingEngineRefactorPlan.md create mode 100644 processing/pipeline/asset_context.py create mode 100644 processing/pipeline/orchestrator.py create mode 100644 processing/pipeline/stages/alpha_extraction_to_mask.py create mode 100644 processing/pipeline/stages/asset_skip_logic.py create mode 100644 processing/pipeline/stages/base_stage.py create mode 100644 processing/pipeline/stages/file_rule_filter.py create mode 100644 processing/pipeline/stages/gloss_to_rough_conversion.py create mode 100644 processing/pipeline/stages/individual_map_processing.py create mode 100644 processing/pipeline/stages/map_merging.py create mode 100644 processing/pipeline/stages/metadata_finalization_save.py create mode 100644 processing/pipeline/stages/metadata_initialization.py create mode 100644 processing/pipeline/stages/normal_map_green_channel.py create mode 100644 processing/pipeline/stages/output_organization.py create mode 100644 processing/pipeline/stages/supplier_determination.py create mode 100644 processing/utils/__init__.py create mode 100644 processing/utils/image_processing_utils.py create mode 100644 tests/__init__.py create mode 100644 tests/processing/pipeline/__init__.py create mode 100644 tests/processing/pipeline/stages/__init__.py create mode 100644 tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py create mode 100644 tests/processing/pipeline/stages/test_asset_skip_logic.py create mode 100644 tests/processing/pipeline/stages/test_file_rule_filter.py create mode 100644 tests/processing/pipeline/stages/test_gloss_to_rough_conversion.py create mode 100644 tests/processing/pipeline/stages/test_individual_map_processing.py create mode 100644 tests/processing/pipeline/stages/test_map_merging.py create mode 100644 tests/processing/pipeline/stages/test_metadata_finalization_save.py create mode 100644 tests/processing/pipeline/stages/test_metadata_initialization.py create mode 100644 tests/processing/pipeline/stages/test_normal_map_green_channel.py create mode 100644 tests/processing/pipeline/stages/test_output_organization.py create mode 100644 tests/processing/pipeline/stages/test_supplier_determination.py create mode 100644 tests/processing/pipeline/test_orchestrator.py create mode 100644 tests/processing/utils/test_image_processing_utils.py create mode 100644 tests/utils/__init__.py create mode 100644 tests/utils/test_path_utils.py diff --git a/ProjectNotes/ProcessingEngineRefactorPlan.md b/ProjectNotes/ProcessingEngineRefactorPlan.md new file mode 100644 index 0000000..1364ab6 --- /dev/null +++ b/ProjectNotes/ProcessingEngineRefactorPlan.md @@ -0,0 +1,181 @@ +# Project Plan: Modularizing the Asset Processing Engine + +**Last Updated:** May 9, 2025 + +**1. Project Vision & Goals** + +* **Vision:** Transform the asset processing pipeline into a highly modular, extensible, and testable system. +* **Primary Goals:** + 1. Decouple processing steps into independent, reusable stages. + 2. Simplify the addition of new processing capabilities (e.g., GLOSS > ROUGH conversion, Alpha to MASK, Normal Map Green Channel inversion). + 3. Improve code maintainability and readability. + 4. Enhance unit and integration testing capabilities for each processing component. + 5. Centralize common utility functions (image manipulation, path generation). + +**2. Proposed Architecture Overview** + +* **Core Concept:** A `PipelineOrchestrator` will manage a sequence of `ProcessingStage`s. Each stage will operate on an `AssetProcessingContext` object, which carries all necessary data and state for a single asset through the pipeline. +* **Key Components:** + * `AssetProcessingContext`: Data class holding asset-specific data, configuration, temporary paths, and status. + * `PipelineOrchestrator`: Class to manage the overall processing flow for a `SourceRule`, iterating through assets and executing the pipeline of stages for each. + * `ProcessingStage` (Base Class/Interface): Defines the contract for all individual processing stages (e.g., `execute(context)` method). + * Specific Stage Classes: (e.g., `SupplierDeterminationStage`, `IndividualMapProcessingStage`, etc.) + * Utility Modules: `image_processing_utils.py`, enhancements to `utils/path_utils.py`. + +**3. Proposed File Structure** + +* `processing/` + * `pipeline/` + * `__init__.py` + * `asset_context.py` (Defines `AssetProcessingContext`) + * `orchestrator.py` (Defines `PipelineOrchestrator`) + * `stages/` + * `__init__.py` + * `base_stage.py` (Defines `ProcessingStage` interface) + * `supplier_determination.py` + * `asset_skip_logic.py` + * `metadata_initialization.py` + * `file_rule_filter.py` + * `gloss_to_rough_conversion.py` + * `alpha_extraction_to_mask.py` + * `normal_map_green_channel.py` + * `individual_map_processing.py` + * `map_merging.py` + * `metadata_finalization.py` + * `output_organization.py` + * `utils/` + * `__init__.py` + * `image_processing_utils.py` (New module for image functions) +* `utils/` (Top-level existing directory) + * `path_utils.py` (To be enhanced with `sanitize_filename` from `processing_engine.py`) + +**4. Detailed Phases and Tasks** + +**Phase 0: Setup & Core Structures Definition** +*Goal: Establish the foundational classes for the new pipeline.* +* **Task 0.1: Define `AssetProcessingContext`** + * Create `processing/pipeline/asset_context.py`. + * Define the `AssetProcessingContext` data class with fields: `source_rule: SourceRule`, `asset_rule: AssetRule`, `workspace_path: Path`, `engine_temp_dir: Path`, `output_base_path: Path`, `effective_supplier: Optional[str]`, `asset_metadata: Dict`, `processed_maps_details: Dict[str, Dict[str, Dict]]`, `merged_maps_details: Dict[str, Dict[str, Dict]]`, `files_to_process: List[FileRule]`, `loaded_data_cache: Dict`, `config_obj: Configuration`, `status_flags: Dict`, `incrementing_value: Optional[str]`, `sha5_value: Optional[str]`. + * Ensure proper type hinting. +* **Task 0.2: Define `ProcessingStage` Base Class/Interface** + * Create `processing/pipeline/stages/base_stage.py`. + * Define an abstract base class `ProcessingStage` with an abstract method `execute(self, context: AssetProcessingContext) -> AssetProcessingContext`. +* **Task 0.3: Implement Initial `PipelineOrchestrator`** + * Create `processing/pipeline/orchestrator.py`. + * Define the `PipelineOrchestrator` class. + * Implement `__init__(self, config_obj: Configuration, stages: List[ProcessingStage])`. + * Implement `process_source_rule(self, source_rule: SourceRule, workspace_path: Path, output_base_path: Path, overwrite: bool, incrementing_value: Optional[str], sha5_value: Optional[str]) -> Dict[str, List[str]]`. + * Handles creation/cleanup of the main engine temporary directory. + * Loops through `source_rule.assets`, initializes `AssetProcessingContext` for each. + * Iterates `self.stages`, calling `stage.execute(context)`. + * Collects overall status. + +**Phase 1: Utility Module Refactoring** +*Goal: Consolidate and centralize common utility functions.* +* **Task 1.1: Refactor Path Utilities** + * Move `_sanitize_filename` from `processing_engine.py` to `utils/path_utils.py`. + * Update uses to call the new utility function. +* **Task 1.2: Create `image_processing_utils.py`** + * Create `processing/utils/image_processing_utils.py`. + * Move general-purpose image functions from `processing_engine.py`: + * `is_power_of_two` + * `get_nearest_pot` + * `calculate_target_dimensions` + * `calculate_image_stats` + * `normalize_aspect_ratio_change` + * Core image loading, BGR<>RGB conversion, generic resizing (from `_load_and_transform_source`). + * Core data type conversion for saving, color conversion for saving, `cv2.imwrite` call (from `_save_image`). + * Ensure functions are pure and testable. + +**Phase 2: Implementing Core Processing Stages (Migrating Existing Logic)** +*Goal: Migrate existing functionalities from `processing_engine.py` into the new stage-based architecture.* +(For each task: create stage file, implement class, move logic, adapt to `AssetProcessingContext`) +* **Task 2.1: Implement `SupplierDeterminationStage`** +* **Task 2.2: Implement `AssetSkipLogicStage`** +* **Task 2.3: Implement `MetadataInitializationStage`** +* **Task 2.4: Implement `FileRuleFilterStage`** (New logic for `item_type == "FILE_IGNORE"`) +* **Task 2.5: Implement `IndividualMapProcessingStage`** (Adapts `_process_individual_maps`, uses `image_processing_utils.py`) +* **Task 2.6: Implement `MapMergingStage`** (Adapts `_merge_maps`, uses `image_processing_utils.py`) +* **Task 2.7: Implement `MetadataFinalizationAndSaveStage`** (Adapts `_generate_metadata_file`, uses `utils.path_utils.generate_path_from_pattern`) +* **Task 2.8: Implement `OutputOrganizationStage`** (Adapts `_organize_output_files`) + +**Phase 3: Implementing New Feature Stages** +*Goal: Add the new desired processing capabilities as distinct stages.* +* **Task 3.1: Implement `GlossToRoughConversionStage`** (Identify gloss, convert, invert, save temp, update `FileRule`) +* **Task 3.2: Implement `AlphaExtractionToMaskStage`** (Check existing mask, find MAP_COL with alpha, extract, save temp, add new `FileRule`) +* **Task 3.3: Implement `NormalMapGreenChannelStage`** (Identify normal maps, invert green based on config, save temp, update `FileRule`) + +**Phase 4: Integration, Testing & Finalization** +*Goal: Assemble the pipeline, test thoroughly, and deprecate old code.* +* **Task 4.1: Configure `PipelineOrchestrator`** + * Instantiate `PipelineOrchestrator` in main application logic with the ordered list of stage instances. +* **Task 4.2: Unit Testing** + * Unit tests for each `ProcessingStage` (mocking `AssetProcessingContext`). + * Unit tests for `image_processing_utils.py` and `utils/path_utils.py` functions. +* **Task 4.3: Integration Testing** + * Test `PipelineOrchestrator` end-to-end with sample data. + * Compare outputs with the existing engine for consistency. +* **Task 4.4: Documentation Update** + * Update developer documentation (e.g., `Documentation/02_Developer_Guide/05_Processing_Pipeline.md`). + * Document `AssetProcessingContext` and stage responsibilities. +* **Task 4.5: Deprecate/Remove Old `ProcessingEngine` Code** + * Gradually remove refactored logic from `processing_engine.py`. + +**5. Workflow Diagram** + +```mermaid +graph TD + AA[Load SourceRule & Config] --> BA(PipelineOrchestrator: process_source_rule); + BA --> CA{For Each Asset in SourceRule}; + CA -- Yes --> DA(Orchestrator: Create AssetProcessingContext); + DA --> EA(SupplierDeterminationStage); + EA -- context --> FA(AssetSkipLogicStage); + FA -- context --> GA{context.skip_asset?}; + GA -- Yes --> HA(Orchestrator: Record Skipped); + HA --> CA; + GA -- No --> IA(MetadataInitializationStage); + IA -- context --> JA(FileRuleFilterStage); + JA -- context --> KA(GlossToRoughConversionStage); + KA -- context --> LA(AlphaExtractionToMaskStage); + LA -- context --> MA(NormalMapGreenChannelStage); + MA -- context --> NA(IndividualMapProcessingStage); + NA -- context --> OA(MapMergingStage); + OA -- context --> PA(MetadataFinalizationAndSaveStage); + PA -- context --> QA(OutputOrganizationStage); + QA -- context --> RA(Orchestrator: Record Processed/Failed); + RA --> CA; + CA -- No --> SA(Orchestrator: Cleanup Engine Temp Dir); + SA --> TA[Processing Complete]; + + subgraph Stages + direction LR + EA + FA + IA + JA + KA + LA + MA + NA + OA + PA + QA + end + + subgraph Utils + direction LR + U1[image_processing_utils.py] + U2[utils/path_utils.py] + end + + NA -.-> U1; + OA -.-> U1; + KA -.-> U1; + LA -.-> U1; + MA -.-> U1; + + PA -.-> U2; + QA -.-> U2; + + classDef context fill:#f9f,stroke:#333,stroke-width:2px; + class DA,EA,FA,IA,JA,KA,LA,MA,NA,OA,PA,QA context; \ No newline at end of file diff --git a/main.py b/main.py index acc0713..25f8049 100644 --- a/main.py +++ b/main.py @@ -21,6 +21,11 @@ from PySide6.QtCore import Qt from PySide6.QtWidgets import QApplication # --- Backend Imports --- +# Add current directory to sys.path for direct execution +import sys +import os +sys.path.append(os.path.dirname(__file__)) + try: from configuration import Configuration, ConfigurationError from processing_engine import ProcessingEngine @@ -29,6 +34,8 @@ try: from utils.workspace_utils import prepare_processing_workspace except ImportError as e: script_dir = Path(__file__).parent.resolve() + print(f"ERROR: Cannot import Configuration or rule_structure classes.") + print(f"Ensure configuration.py and rule_structure.py are in the same directory or Python path.") print(f"ERROR: Failed to import necessary classes: {e}") print(f"Ensure 'configuration.py' and 'asset_processor.py' exist in the directory:") print(f" {script_dir}") diff --git a/processing/pipeline/asset_context.py b/processing/pipeline/asset_context.py new file mode 100644 index 0000000..5b411d7 --- /dev/null +++ b/processing/pipeline/asset_context.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional + +from rule_structure import AssetRule, FileRule, SourceRule +from configuration import Configuration + +@dataclass +class AssetProcessingContext: + source_rule: SourceRule + asset_rule: AssetRule + workspace_path: Path + engine_temp_dir: Path + output_base_path: Path + effective_supplier: Optional[str] + asset_metadata: Dict + processed_maps_details: Dict[str, Dict[str, Dict]] + merged_maps_details: Dict[str, Dict[str, Dict]] + files_to_process: List[FileRule] + loaded_data_cache: Dict + config_obj: Configuration + status_flags: Dict + incrementing_value: Optional[str] + sha5_value: Optional[str] \ No newline at end of file diff --git a/processing/pipeline/orchestrator.py b/processing/pipeline/orchestrator.py new file mode 100644 index 0000000..6396f38 --- /dev/null +++ b/processing/pipeline/orchestrator.py @@ -0,0 +1,133 @@ +from typing import List, Dict, Optional +from pathlib import Path +import shutil +import tempfile +import logging + +from configuration import Configuration +from rule_structure import SourceRule, AssetRule +from .asset_context import AssetProcessingContext +from .stages.base_stage import ProcessingStage + +log = logging.getLogger(__name__) + +class PipelineOrchestrator: + """ + Orchestrates the processing of assets based on source rules and a series of processing stages. + """ + + def __init__(self, config_obj: Configuration, stages: List[ProcessingStage]): + """ + Initializes the PipelineOrchestrator. + + Args: + config_obj: The main configuration object. + stages: A list of processing stages to be executed in order. + """ + self.config_obj: Configuration = config_obj + self.stages: List[ProcessingStage] = stages + + def process_source_rule( + self, + source_rule: SourceRule, + workspace_path: Path, + output_base_path: Path, + overwrite: bool, # Not used in this initial implementation, but part of the signature + incrementing_value: Optional[str], + sha5_value: Optional[str] # Corrected from sha5_value to sha256_value as per typical usage, assuming typo + ) -> Dict[str, List[str]]: + """ + Processes a single source rule, iterating through its asset rules and applying all stages. + + Args: + source_rule: The source rule to process. + workspace_path: The base path of the workspace. + output_base_path: The base path for output files. + overwrite: Whether to overwrite existing files (not fully implemented yet). + incrementing_value: An optional incrementing value for versioning or naming. + sha5_value: An optional SHA5 hash value for the asset (assuming typo, likely sha256). + + Returns: + A dictionary summarizing the processing status of assets. + """ + overall_status: Dict[str, List[str]] = { + "processed": [], + "skipped": [], + "failed": [], + } + engine_temp_dir_path: Optional[Path] = None # Initialize to None + + try: + # Create a temporary directory for this processing run if needed by any stage + # This temp dir is for the entire source_rule processing, not per asset. + # Individual stages might create their own sub-temp dirs if necessary. + temp_dir_path_str = tempfile.mkdtemp( + prefix="asset_processor_orchestrator_temp_", dir=self.config_obj.get_temp_directory_base() + ) + engine_temp_dir_path = Path(temp_dir_path_str) + log.debug(f"PipelineOrchestrator created temporary directory: {engine_temp_dir_path}") + + + for asset_rule in source_rule.assets: + log.debug(f"Orchestrator: Processing asset '{asset_rule.name}'") + context = AssetProcessingContext( + source_rule=source_rule, + asset_rule=asset_rule, + workspace_path=workspace_path, # This is the path to the source files (e.g. extracted archive) + engine_temp_dir=engine_temp_dir_path, # Pass the orchestrator's temp dir + output_base_path=output_base_path, + effective_supplier=None, # Will be set by SupplierDeterminationStage + asset_metadata={}, # Will be populated by stages + processed_maps_details={}, # Will be populated by stages + merged_maps_details={}, # Will be populated by stages + files_to_process=[], # Will be populated by FileRuleFilterStage + loaded_data_cache={}, # For image loading cache within this asset's processing + config_obj=self.config_obj, + status_flags={"skip_asset": False, "asset_failed": False}, # Initialize common flags + incrementing_value=incrementing_value, + sha256_value=sha5_value # Parameter name in context is sha256_value + ) + + for stage_idx, stage in enumerate(self.stages): + log.debug(f"Asset '{asset_rule.name}': Executing stage {stage_idx + 1}/{len(self.stages)}: {stage.__class__.__name__}") + try: + context = stage.execute(context) + except Exception as e: + log.error(f"Asset '{asset_rule.name}': Error during stage '{stage.__class__.__name__}': {e}", exc_info=True) + context.status_flags["asset_failed"] = True + context.asset_metadata["status"] = f"Failed: Error in stage {stage.__class__.__name__}" + context.asset_metadata["error_message"] = str(e) + break # Stop processing stages for this asset on error + + if context.status_flags.get("skip_asset"): + log.info(f"Asset '{asset_rule.name}': Skipped by stage '{stage.__class__.__name__}'. Reason: {context.status_flags.get('skip_reason', 'N/A')}") + break # Skip remaining stages for this asset + + # Refined status collection + if context.status_flags.get('skip_asset'): + overall_status["skipped"].append(asset_rule.name) + elif context.status_flags.get('asset_failed') or str(context.asset_metadata.get('status', '')).startswith("Failed"): + overall_status["failed"].append(asset_rule.name) + elif context.asset_metadata.get('status') == "Processed": + overall_status["processed"].append(asset_rule.name) + else: # Default or unknown state + log.warning(f"Asset '{asset_rule.name}': Unknown status after pipeline execution. Metadata status: '{context.asset_metadata.get('status')}'. Marking as failed.") + overall_status["failed"].append(f"{asset_rule.name} (Unknown Status: {context.asset_metadata.get('status')})") + log.debug(f"Asset '{asset_rule.name}' final status: {context.asset_metadata.get('status', 'N/A')}, Flags: {context.status_flags}") + + except Exception as e: + log.error(f"PipelineOrchestrator.process_source_rule failed: {e}", exc_info=True) + # Mark all remaining assets as failed if a top-level error occurs + processed_or_skipped_or_failed = set(overall_status["processed"] + overall_status["skipped"] + overall_status["failed"]) + for asset_rule in source_rule.assets: + if asset_rule.name not in processed_or_skipped_or_failed: + overall_status["failed"].append(f"{asset_rule.name} (Orchestrator Error)") + finally: + if engine_temp_dir_path and engine_temp_dir_path.exists(): + try: + log.debug(f"PipelineOrchestrator cleaning up temporary directory: {engine_temp_dir_path}") + shutil.rmtree(engine_temp_dir_path, ignore_errors=True) + except Exception as e: + log.error(f"Error cleaning up orchestrator temporary directory {engine_temp_dir_path}: {e}", exc_info=True) + + return overall_status \ No newline at end of file diff --git a/processing/pipeline/stages/alpha_extraction_to_mask.py b/processing/pipeline/stages/alpha_extraction_to_mask.py new file mode 100644 index 0000000..ca1ea38 --- /dev/null +++ b/processing/pipeline/stages/alpha_extraction_to_mask.py @@ -0,0 +1,175 @@ +import logging +import uuid +from pathlib import Path +from typing import List, Optional, Dict + +import numpy as np + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from ...utils import image_processing_utils as ipu +from .....rule_structure import FileRule, TransformSettings +from .....utils.path_utils import sanitize_filename + +logger = logging.getLogger(__name__) + +class AlphaExtractionToMaskStage(ProcessingStage): + """ + Extracts an alpha channel from a suitable source map (e.g., Albedo, Diffuse) + to generate a MASK map if one is not explicitly defined. + """ + SUITABLE_SOURCE_MAP_TYPES = ["ALBEDO", "DIFFUSE", "BASE_COLOR"] # Map types likely to have alpha + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + logger.debug(f"Asset '{context.asset_rule.name}': Running AlphaExtractionToMaskStage.") + + if context.status_flags.get('skip_asset'): + logger.debug(f"Asset '{context.asset_rule.name}': Skipping due to 'skip_asset' flag.") + return context + + if not context.files_to_process or not context.processed_maps_details: + logger.debug( + f"Asset '{context.asset_rule.name}': Skipping alpha extraction - " + f"no files to process or no processed map details." + ) + return context + + # A. Check for Existing MASK Map + for file_rule in context.files_to_process: + if file_rule.map_type == "MASK": + logger.info( + f"Asset '{context.asset_rule.name}': MASK map already defined by FileRule " + f"'{file_rule.filename_pattern}'. Skipping alpha extraction." + ) + return context + + # B. Find Suitable Source Map with Alpha + source_map_details_for_alpha: Optional[Dict] = None + source_file_rule_id_for_alpha: Optional[str] = None + + for file_rule_id, details in context.processed_maps_details.items(): + if details.get('status') == 'Processed' and \ + details.get('map_type') in self.SUITABLE_SOURCE_MAP_TYPES: + try: + temp_path = Path(details['temp_processed_file']) + if not temp_path.exists(): + logger.warning( + f"Asset '{context.asset_rule.name}': Temp file {temp_path} for map " + f"{details['map_type']} (ID: {file_rule_id}) does not exist. Cannot check for alpha." + ) + continue + + # Load image header or minimal data to check for alpha if possible, + # otherwise load full image. ipu.load_image should handle this. + image_data = ipu.load_image(temp_path) + + if image_data is not None and image_data.ndim == 3 and image_data.shape[2] == 4: + source_map_details_for_alpha = details + source_file_rule_id_for_alpha = file_rule_id + logger.info( + f"Asset '{context.asset_rule.name}': Found potential source for alpha extraction: " + f"{temp_path} (MapType: {details['map_type']})" + ) + break + except Exception as e: + logger.warning( + f"Asset '{context.asset_rule.name}': Error checking alpha for {details.get('temp_processed_file', 'N/A')}: {e}" + ) + continue + + + if source_map_details_for_alpha is None or source_file_rule_id_for_alpha is None: + logger.info( + f"Asset '{context.asset_rule.name}': No suitable source map with alpha channel found " + f"for MASK extraction." + ) + return context + + # C. Extract Alpha Channel + source_image_path = Path(source_map_details_for_alpha['temp_processed_file']) + full_image_data = ipu.load_image(source_image_path) # Reload to ensure we have the original RGBA + + if full_image_data is None or not (full_image_data.ndim == 3 and full_image_data.shape[2] == 4): + logger.error( + f"Asset '{context.asset_rule.name}': Failed to reload or verify alpha channel from " + f"{source_image_path} for MASK extraction." + ) + return context + + alpha_channel: np.ndarray = full_image_data[:, :, 3] # Extract alpha (0-255) + + # D. Save New Temporary MASK Map + # Ensure the mask is a 2D grayscale image. If ipu.save_image expects 3 channels for grayscale, adapt. + # Assuming ipu.save_image can handle a 2D numpy array for a grayscale image. + if alpha_channel.ndim == 2: # Expected + pass + elif alpha_channel.ndim == 3 and alpha_channel.shape[2] == 1: # (H, W, 1) + alpha_channel = alpha_channel.squeeze(axis=2) + else: + logger.error( + f"Asset '{context.asset_rule.name}': Extracted alpha channel has unexpected dimensions: " + f"{alpha_channel.shape}. Cannot save." + ) + return context + + mask_temp_filename = ( + f"mask_from_alpha_{sanitize_filename(source_map_details_for_alpha['map_type'])}" + f"_{source_file_rule_id_for_alpha}{source_image_path.suffix}" + ) + mask_temp_path = context.engine_temp_dir / mask_temp_filename + + save_success = ipu.save_image(mask_temp_path, alpha_channel) + + if not save_success: + logger.error( + f"Asset '{context.asset_rule.name}': Failed to save extracted alpha mask to {mask_temp_path}." + ) + return context + + logger.info( + f"Asset '{context.asset_rule.name}': Extracted alpha and saved as new MASK map: {mask_temp_path}" + ) + + # E. Create New FileRule for the MASK and Update Context + new_mask_file_rule_id_obj = uuid.uuid4() + new_mask_file_rule_id_str = str(new_mask_file_rule_id_obj) # Use string for FileRule.id + new_mask_file_rule_id_hex = new_mask_file_rule_id_obj.hex # Use hex for dict key + + new_mask_file_rule = FileRule( + id=new_mask_file_rule_id_str, + map_type="MASK", + filename_pattern=mask_temp_path.name, # Pattern matches the generated temp file + item_type="MAP_COL", # Considered a collected map post-generation + active=True, + transform_settings=TransformSettings(), # Default transform settings + source_map_ids_for_generation=[source_file_rule_id_for_alpha] # Link to original source + # Ensure other necessary FileRule fields are defaulted or set if required + ) + + context.files_to_process.append(new_mask_file_rule) + + original_dims = source_map_details_for_alpha.get('original_dimensions') + if original_dims is None and full_image_data is not None: # Fallback if not in details + original_dims = (full_image_data.shape[1], full_image_data.shape[0]) + + + context.processed_maps_details[new_mask_file_rule_id_hex] = { + 'map_type': "MASK", + 'source_file': str(source_image_path), # Original RGBA map path + 'temp_processed_file': str(mask_temp_path), # Path to the new MASK map + 'original_dimensions': original_dims, # Dimensions of the source image + 'processed_dimensions': (alpha_channel.shape[1], alpha_channel.shape[0]), # Dimensions of MASK + 'status': 'Processed', # This map is now considered processed + 'notes': ( + f"Generated from alpha of {source_map_details_for_alpha['map_type']} " + f"(Source Rule ID: {source_file_rule_id_for_alpha})" + ), + 'file_rule_id': new_mask_file_rule_id_str # Link back to the new FileRule ID + } + + logger.info( + f"Asset '{context.asset_rule.name}': Added new FileRule for generated MASK " + f"(ID: {new_mask_file_rule_id_str}) and updated processed_maps_details." + ) + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/asset_skip_logic.py b/processing/pipeline/stages/asset_skip_logic.py new file mode 100644 index 0000000..afb5b3c --- /dev/null +++ b/processing/pipeline/stages/asset_skip_logic.py @@ -0,0 +1,48 @@ +import logging +from ..base_stage import ProcessingStage +from ...asset_context import AssetProcessingContext + +class AssetSkipLogicStage(ProcessingStage): + """ + Processing stage to determine if an asset should be skipped based on various conditions. + """ + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the asset skip logic. + + Args: + context: The asset processing context. + + Returns: + The updated asset processing context. + """ + context.status_flags['skip_asset'] = False # Initialize/reset skip flag + + # 1. Check for Supplier Error + # Assuming 'supplier_error' might be set by a previous stage (e.g., SupplierDeterminationStage) + # or if effective_supplier is None after attempts to determine it. + if context.effective_supplier is None or context.status_flags.get('supplier_error', False): + logging.info(f"Asset '{context.asset_rule.name}': Skipping due to missing or invalid supplier.") + context.status_flags['skip_asset'] = True + context.status_flags['skip_reason'] = "Invalid or missing supplier" + return context + + # 2. Check asset_rule.process_status + if context.asset_rule.process_status == "SKIP": + logging.info(f"Asset '{context.asset_rule.name}': Skipping as per process_status 'SKIP'.") + context.status_flags['skip_asset'] = True + context.status_flags['skip_reason'] = "Process status set to SKIP" + return context + + if context.asset_rule.process_status == "PROCESSED" and \ + not context.config_obj.general_settings.overwrite_existing: + logging.info( + f"Asset '{context.asset_rule.name}': Skipping as it's already 'PROCESSED' " + f"and overwrite is disabled." + ) + context.status_flags['skip_asset'] = True + context.status_flags['skip_reason'] = "Already processed, overwrite disabled" + return context + + # If none of the above conditions are met, skip_asset remains False. + return context \ No newline at end of file diff --git a/processing/pipeline/stages/base_stage.py b/processing/pipeline/stages/base_stage.py new file mode 100644 index 0000000..321a0d4 --- /dev/null +++ b/processing/pipeline/stages/base_stage.py @@ -0,0 +1,22 @@ +from abc import ABC, abstractmethod + +from ..asset_context import AssetProcessingContext + + +class ProcessingStage(ABC): + """ + Abstract base class for a stage in the asset processing pipeline. + """ + + @abstractmethod + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the processing logic of this stage. + + Args: + context: The current asset processing context. + + Returns: + The updated asset processing context. + """ + pass \ No newline at end of file diff --git a/processing/pipeline/stages/file_rule_filter.py b/processing/pipeline/stages/file_rule_filter.py new file mode 100644 index 0000000..b7ae7c3 --- /dev/null +++ b/processing/pipeline/stages/file_rule_filter.py @@ -0,0 +1,80 @@ +import logging +import fnmatch +from typing import List, Set + +from ..base_stage import ProcessingStage +from ...asset_context import AssetProcessingContext +from .....rule_structure import FileRule + + +class FileRuleFilterStage(ProcessingStage): + """ + Determines which FileRules associated with an AssetRule should be processed. + Populates context.files_to_process, respecting FILE_IGNORE rules. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the file rule filtering logic. + + Args: + context: The AssetProcessingContext for the current asset. + + Returns: + The modified AssetProcessingContext. + """ + if context.status_flags.get('skip_asset'): + logging.debug(f"Asset '{context.asset_rule.name}': Skipping FileRuleFilterStage due to 'skip_asset' flag.") + return context + + context.files_to_process: List[FileRule] = [] + ignore_patterns: Set[str] = set() + + # Step 1: Collect all FILE_IGNORE patterns + if context.asset_rule and context.asset_rule.file_rules: + for file_rule in context.asset_rule.file_rules: + if file_rule.item_type == "FILE_IGNORE" and file_rule.active: + ignore_patterns.add(file_rule.filename_pattern) + logging.debug( + f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{file_rule.filename_pattern}'" + ) + else: + logging.debug(f"Asset '{context.asset_rule.name if context.asset_rule else 'Unknown'}': No file rules to process or asset_rule is None.") + # Still need to return context even if there are no rules + logging.info(f"Asset '{context.asset_rule.name if context.asset_rule else 'Unknown'}': 0 file rules queued for processing after filtering.") + return context + + + # Step 2: Filter and add processable FileRules + for file_rule in context.asset_rule.file_rules: + if not file_rule.active: + logging.debug( + f"Asset '{context.asset_rule.name}': Skipping inactive file rule '{file_rule.filename_pattern}'." + ) + continue + + if file_rule.item_type == "FILE_IGNORE": + # Already processed, skip. + continue + + is_ignored = False + for ignore_pat in ignore_patterns: + if fnmatch.fnmatch(file_rule.filename_pattern, ignore_pat): + is_ignored = True + logging.debug( + f"Asset '{context.asset_rule.name}': Skipping file rule '{file_rule.filename_pattern}' " + f"due to matching ignore pattern '{ignore_pat}'." + ) + break + + if not is_ignored: + context.files_to_process.append(file_rule) + logging.debug( + f"Asset '{context.asset_rule.name}': Adding file rule '{file_rule.filename_pattern}' " + f"(type: {file_rule.item_type}) to processing queue." + ) + + logging.info( + f"Asset '{context.asset_rule.name}': {len(context.files_to_process)} file rules queued for processing after filtering." + ) + return context \ No newline at end of file diff --git a/processing/pipeline/stages/gloss_to_rough_conversion.py b/processing/pipeline/stages/gloss_to_rough_conversion.py new file mode 100644 index 0000000..d99f06a --- /dev/null +++ b/processing/pipeline/stages/gloss_to_rough_conversion.py @@ -0,0 +1,156 @@ +import logging +from pathlib import Path +import numpy as np +from typing import List + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from ...rule_structure import FileRule +from ..utils import image_processing_utils as ipu +from ...utils.path_utils import sanitize_filename + +logger = logging.getLogger(__name__) + +class GlossToRoughConversionStage(ProcessingStage): + """ + Processing stage to convert glossiness maps to roughness maps. + Iterates through FileRules, identifies GLOSS maps, loads their + corresponding temporary processed images, inverts them, and saves + them as new temporary ROUGHNESS maps. Updates the FileRule and + context.processed_maps_details accordingly. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the gloss to roughness conversion logic. + + Args: + context: The AssetProcessingContext containing asset and processing details. + + Returns: + The updated AssetProcessingContext. + """ + if context.status_flags.get('skip_asset'): + logger.debug(f"Asset '{context.asset_rule.name}': Skipping GlossToRoughConversionStage due to skip_asset flag.") + return context + + if not context.files_to_process or not context.processed_maps_details: + logger.debug( + f"Asset '{context.asset_rule.name}': No files to process or processed_maps_details empty " + f"in GlossToRoughConversionStage. Skipping." + ) + return context + + new_files_to_process: List[FileRule] = [] + processed_a_gloss_map = False + + logger.info(f"Asset '{context.asset_rule.name}': Starting Gloss to Roughness Conversion Stage.") + + for idx, file_rule in enumerate(context.files_to_process): + if file_rule.map_type == "GLOSS": + map_detail_key = file_rule.id.hex + if map_detail_key not in context.processed_maps_details: + logger.warning( + f"Asset '{context.asset_rule.name}': GLOSS map '{file_rule.source_file_path}' " + f"(ID: {map_detail_key}) found in files_to_process but not in processed_maps_details. " + f"Adding original rule and skipping conversion for this map." + ) + new_files_to_process.append(file_rule) + continue + + map_details = context.processed_maps_details[map_detail_key] + + if map_details.get('status') != 'Processed' or 'temp_processed_file' not in map_details: + logger.warning( + f"Asset '{context.asset_rule.name}': GLOSS map '{file_rule.source_file_path}' " + f"(ID: {map_detail_key}) not successfully processed by previous stage or temp file missing. " + f"Status: {map_details.get('status')}. Adding original rule and skipping conversion." + ) + new_files_to_process.append(file_rule) + continue + + original_temp_path_str = map_details['temp_processed_file'] + original_temp_path = Path(original_temp_path_str) + + if not original_temp_path.exists(): + logger.error( + f"Asset '{context.asset_rule.name}': Temporary file {original_temp_path_str} for GLOSS map " + f"(ID: {map_detail_key}) does not exist. Adding original rule and skipping conversion." + ) + new_files_to_process.append(file_rule) + continue + + logger.debug(f"Asset '{context.asset_rule.name}': Processing GLOSS map {original_temp_path} for conversion.") + image_data = ipu.load_image(original_temp_path) + + if image_data is None: + logger.error( + f"Asset '{context.asset_rule.name}': Failed to load image data from {original_temp_path} " + f"for GLOSS map (ID: {map_detail_key}). Adding original rule and skipping conversion." + ) + new_files_to_process.append(file_rule) + continue + + # Perform Inversion + inverted_image_data: np.ndarray + if np.issubdtype(image_data.dtype, np.floating): + inverted_image_data = 1.0 - image_data + inverted_image_data = np.clip(inverted_image_data, 0.0, 1.0) # Ensure range for floats + logger.debug(f"Asset '{context.asset_rule.name}': Inverted float image data for {original_temp_path}.") + elif np.issubdtype(image_data.dtype, np.integer): + max_val = np.iinfo(image_data.dtype).max + inverted_image_data = max_val - image_data + logger.debug(f"Asset '{context.asset_rule.name}': Inverted integer image data (max_val: {max_val}) for {original_temp_path}.") + else: + logger.error( + f"Asset '{context.asset_rule.name}': Unsupported image data type {image_data.dtype} " + f"for GLOSS map {original_temp_path}. Cannot invert. Adding original rule." + ) + new_files_to_process.append(file_rule) + continue + + # Save New Temporary (Roughness) Map + # Using original_temp_path.suffix ensures we keep the format (e.g., .png, .exr) + new_temp_filename = f"rough_from_gloss_{sanitize_filename(file_rule.map_type)}_{file_rule.id.hex}{original_temp_path.suffix}" + new_temp_path = context.engine_temp_dir / new_temp_filename + + save_success = ipu.save_image(new_temp_path, inverted_image_data) + + if save_success: + logger.info( + f"Asset '{context.asset_rule.name}': Converted GLOSS map {original_temp_path} " + f"to ROUGHNESS map {new_temp_path}." + ) + + modified_file_rule = file_rule.model_copy(deep=True) + modified_file_rule.map_type = "ROUGHNESS" + + # Update context.processed_maps_details for the original file_rule.id.hex + context.processed_maps_details[map_detail_key]['temp_processed_file'] = str(new_temp_path) + context.processed_maps_details[map_detail_key]['original_map_type_before_conversion'] = "GLOSS" + context.processed_maps_details[map_detail_key]['notes'] = "Converted from GLOSS by GlossToRoughConversionStage" + + new_files_to_process.append(modified_file_rule) + processed_a_gloss_map = True + else: + logger.error( + f"Asset '{context.asset_rule.name}': Failed to save inverted ROUGHNESS map to {new_temp_path} " + f"for original GLOSS map (ID: {map_detail_key}). Adding original rule." + ) + new_files_to_process.append(file_rule) + else: # Not a gloss map + new_files_to_process.append(file_rule) + + context.files_to_process = new_files_to_process + + if processed_a_gloss_map: + logger.info( + f"Asset '{context.asset_rule.name}': Gloss to Roughness conversion stage successfully processed one or more maps and updated file list." + ) + else: + logger.debug( + f"Asset '{context.asset_rule.name}': No gloss maps were successfully converted in GlossToRoughConversionStage. " + f"File list for next stage contains original non-gloss maps and any gloss maps that failed conversion." + ) + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/individual_map_processing.py b/processing/pipeline/stages/individual_map_processing.py new file mode 100644 index 0000000..72552c4 --- /dev/null +++ b/processing/pipeline/stages/individual_map_processing.py @@ -0,0 +1,245 @@ +import os +import logging +from pathlib import Path +from typing import Optional, Tuple, Dict + +import cv2 +import numpy as np + +from ..base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from ....rule_structure import FileRule, TransformSettings +from ....utils.path_utils import sanitize_filename +from ...utils import image_processing_utils as ipu + +logger = logging.getLogger(__name__) + +class IndividualMapProcessingStage(ProcessingStage): + """ + Processes individual texture map files based on FileRules. + This stage finds the source file, loads it, applies transformations + (resize, color space), saves a temporary processed version, and updates + the AssetProcessingContext with details. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the individual map processing logic. + """ + if context.status_flags.get('skip_asset', False): + logger.info(f"Asset {context.asset_id}: Skipping individual map processing due to skip_asset flag.") + return context + + if not hasattr(context, 'processed_maps_details') or context.processed_maps_details is None: + context.processed_maps_details = {} + logger.debug(f"Asset {context.asset_id}: Initialized processed_maps_details.") + + if not context.files_to_process: + logger.info(f"Asset {context.asset_id}: No files to process in this stage.") + return context + + source_base_path = Path(context.asset_rule.source_path) + if not source_base_path.is_dir(): + logger.error(f"Asset {context.asset_id}: Source path '{source_base_path}' is not a valid directory. Skipping individual map processing.") + context.status_flags['individual_map_processing_failed'] = True + # Potentially mark all file_rules as failed if source path is invalid + for file_rule in context.files_to_process: + if file_rule.item_type.startswith("MAP_"): # General check for map types + self._update_file_rule_status(context, file_rule.id.hex, 'Failed', details="Source path invalid") + return context + + for file_rule in context.files_to_process: + # Primarily focus on "MAP_COL", "MAP_NORM", "MAP_ROUGH", etc. + # For now, let's assume any item_type starting with "MAP_" is a candidate + # unless it's specifically handled by another stage (e.g., "MAP_GEN" might be). + # The prompt mentions "MAP_COL" primarily. + # Let's be a bit more specific for now, focusing on types that are typically direct file mappings. + # This can be refined based on how `item_type` is used for generated maps. + # For now, we'll process any `FileRule` that isn't explicitly a generated map type + # that would be handled *after* individual processing (e.g. a composite map). + # A simple check for now: + if not file_rule.item_type or not file_rule.item_type.startswith("MAP_") or file_rule.item_type == "MAP_GEN_COMPOSITE": # Example exclusion + logger.debug(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Skipping, item_type '{file_rule.item_type}' not targeted for individual processing.") + continue + + logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Starting individual processing.") + + # A. Find Source File + source_file_path = self._find_source_file(source_base_path, file_rule.filename_pattern, context.asset_id, file_rule.id.hex) + if not source_file_path: + logger.error(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Source file not found with pattern '{file_rule.filename_pattern}' in '{source_base_path}'.") + self._update_file_rule_status(context, file_rule.id.hex, 'Failed', map_type=file_rule.map_type, details="Source file not found") + continue + + # B. Load and Transform Image + image_data: Optional[np.ndarray] = ipu.load_image(str(source_file_path)) + if image_data is None: + logger.error(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Failed to load image from '{source_file_path}'.") + self._update_file_rule_status(context, file_rule.id.hex, 'Failed', map_type=file_rule.map_type, source_file=str(source_file_path), details="Image load failed") + continue + + original_height, original_width = image_data.shape[:2] + logger.debug(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Loaded image '{source_file_path}' with dimensions {original_width}x{original_height}.") + + transform: TransformSettings = file_rule.transform_settings + + target_width, target_height = ipu.calculate_target_dimensions( + original_width, original_height, + transform.target_width, transform.target_height, + transform.resize_mode, + transform.ensure_pot, + transform.allow_upscale + ) + logger.debug(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Original dims: ({original_width},{original_height}), Calculated target dims: ({target_width},{target_height})") + + processed_image_data = image_data.copy() # Start with a copy + + if (target_width, target_height) != (original_width, original_height): + logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Resizing from ({original_width},{original_height}) to ({target_width},{target_height}).") + # Map resize_filter string to cv2 interpolation constant + interpolation_map = { + "NEAREST": cv2.INTER_NEAREST, + "LINEAR": cv2.INTER_LINEAR, + "CUBIC": cv2.INTER_CUBIC, + "AREA": cv2.INTER_AREA, # Good for downscaling + "LANCZOS4": cv2.INTER_LANCZOS4 + } + interpolation = interpolation_map.get(transform.resize_filter.upper(), cv2.INTER_AREA) # Default to INTER_AREA + processed_image_data = ipu.resize_image(processed_image_data, target_width, target_height, interpolation=interpolation) + if processed_image_data is None: # Should not happen if resize_image handles errors, but good practice + logger.error(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Failed to resize image.") + self._update_file_rule_status(context, file_rule.id.hex, 'Failed', map_type=file_rule.map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), details="Image resize failed") + continue + + + # Color Space Conversion (simplified) + # Assuming ipu.load_image loads as BGR if color. + # This needs more robust handling of source color profiles if they are known. + if transform.color_profile_management and transform.target_color_profile == "RGB": + if len(processed_image_data.shape) == 3 and processed_image_data.shape[2] == 3: # Check if it's a color image + logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Converting BGR to RGB.") + processed_image_data = ipu.convert_bgr_to_rgb(processed_image_data) + elif len(processed_image_data.shape) == 3 and processed_image_data.shape[2] == 4: # Check for BGRA + logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Converting BGRA to RGBA.") + processed_image_data = ipu.convert_bgra_to_rgba(processed_image_data) + + + # C. Save Temporary Processed Map + # Ensure engine_temp_dir exists (orchestrator should handle this, but good to be safe) + if not context.engine_temp_dir.exists(): + try: + context.engine_temp_dir.mkdir(parents=True, exist_ok=True) + logger.info(f"Asset {context.asset_id}: Created engine_temp_dir at '{context.engine_temp_dir}'") + except OSError as e: + logger.error(f"Asset {context.asset_id}: Failed to create engine_temp_dir '{context.engine_temp_dir}': {e}") + self._update_file_rule_status(context, file_rule.id.hex, 'Failed', map_type=file_rule.map_type, source_file=str(source_file_path), details="Failed to create temp directory") + continue # Or potentially fail the whole asset processing here + + temp_filename_suffix = Path(source_file_path).suffix + # Use a more descriptive name if possible, including map_type + safe_map_type = sanitize_filename(file_rule.map_type if file_rule.map_type else "unknown_map") + temp_output_filename = f"processed_{safe_map_type}_{file_rule.id.hex}{temp_filename_suffix}" + temp_output_path = context.engine_temp_dir / temp_output_filename + + # Consider output_format_settings from transform if they apply here + # For now, save_image handles basic saving. + # Example: cv2.imwrite params for quality for JPG, compression for PNG + save_params = [] + if transform.output_format_settings: + if temp_filename_suffix.lower() in ['.jpg', '.jpeg']: + quality = transform.output_format_settings.get('quality', 95) + save_params = [cv2.IMWRITE_JPEG_QUALITY, quality] + elif temp_filename_suffix.lower() == '.png': + compression = transform.output_format_settings.get('compression_level', 3) # 0-9, 3 is default + save_params = [cv2.IMWRITE_PNG_COMPRESSION, compression] + # Add more formats as needed (e.g., EXR, TIFF) + + save_success = ipu.save_image(str(temp_output_path), processed_image_data, params=save_params) + + if not save_success: + logger.error(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Failed to save temporary image to '{temp_output_path}'.") + self._update_file_rule_status( + context, file_rule.id.hex, 'Failed', + map_type=file_rule.map_type, + source_file=str(source_file_path), + original_dimensions=(original_width, original_height), + processed_dimensions=(processed_image_data.shape[1], processed_image_data.shape[0]) if processed_image_data is not None else None, + details="Temporary image save failed" + ) + continue + + logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Successfully processed and saved temporary map to '{temp_output_path}'.") + + # D. Update Context + self._update_file_rule_status( + context, file_rule.id.hex, 'Processed', + map_type=file_rule.map_type, + source_file=str(source_file_path), + temp_processed_file=str(temp_output_path), + original_dimensions=(original_width, original_height), + processed_dimensions=(processed_image_data.shape[1], processed_image_data.shape[0]), + details="Successfully processed" + ) + + # Optional: Update context.asset_metadata['processed_files'] + if 'processed_files' not in context.asset_metadata: + context.asset_metadata['processed_files'] = [] + context.asset_metadata['processed_files'].append({ + 'file_rule_id': file_rule.id.hex, + 'path': str(temp_output_path), + 'type': 'temporary_map', + 'map_type': file_rule.map_type + }) + + + logger.info(f"Asset {context.asset_id}: Finished individual map processing stage.") + return context + + def _find_source_file(self, base_path: Path, pattern: str, asset_id: str, file_rule_id_hex: str) -> Optional[Path]: + """ + Finds a single source file matching the pattern within the base_path. + Adapts logic from ProcessingEngine._find_source_file. + """ + if not pattern: + logger.warning(f"Asset {asset_id}, FileRule {file_rule_id_hex}: Empty filename pattern provided.") + return None + + try: + # Using rglob for potentially nested structures, though original might have been simpler. + # If pattern is exact filename, it will also work. + # If pattern is a glob, it will search. + matched_files = list(base_path.rglob(pattern)) + + if not matched_files: + logger.debug(f"Asset {asset_id}, FileRule {file_rule_id_hex}: No files found matching pattern '{pattern}' in '{base_path}' (recursive).") + # Try non-recursive if rglob fails and pattern might be for top-level + matched_files_non_recursive = list(base_path.glob(pattern)) + if matched_files_non_recursive: + logger.debug(f"Asset {asset_id}, FileRule {file_rule_id_hex}: Found {len(matched_files_non_recursive)} files non-recursively. Using first: {matched_files_non_recursive[0]}") + return matched_files_non_recursive[0] + return None + + if len(matched_files) > 1: + logger.warning(f"Asset {asset_id}, FileRule {file_rule_id_hex}: Multiple files ({len(matched_files)}) found for pattern '{pattern}' in '{base_path}'. Using the first one: {matched_files[0]}. Files: {matched_files}") + + return matched_files[0] + + except Exception as e: + logger.error(f"Asset {asset_id}, FileRule {file_rule_id_hex}: Error searching for file with pattern '{pattern}' in '{base_path}': {e}") + return None + + def _update_file_rule_status(self, context: AssetProcessingContext, file_rule_id_hex: str, status: str, **kwargs): + """Helper to update processed_maps_details for a file_rule.""" + if file_rule_id_hex not in context.processed_maps_details: + context.processed_maps_details[file_rule_id_hex] = {} + + context.processed_maps_details[file_rule_id_hex]['status'] = status + for key, value in kwargs.items(): + context.processed_maps_details[file_rule_id_hex][key] = value + + # Ensure essential keys are present even on failure, if known + if 'map_type' not in context.processed_maps_details[file_rule_id_hex] and 'map_type' in kwargs: + context.processed_maps_details[file_rule_id_hex]['map_type'] = kwargs['map_type'] + + + logger.debug(f"Asset {context.asset_id}, FileRule {file_rule_id_hex}: Status updated to '{status}'. Details: {kwargs}") \ No newline at end of file diff --git a/processing/pipeline/stages/map_merging.py b/processing/pipeline/stages/map_merging.py new file mode 100644 index 0000000..6e0fd0f --- /dev/null +++ b/processing/pipeline/stages/map_merging.py @@ -0,0 +1,310 @@ +import logging +from pathlib import Path +from typing import Dict, Optional, List, Tuple + +import numpy as np +import cv2 # For potential direct cv2 operations if ipu doesn't cover all merge needs + +from ..base_stage import ProcessingStage +from ...asset_context import AssetProcessingContext +from ....rule_structure import FileRule, MergeSettings, MergeInputChannel +from ....utils.path_utils import sanitize_filename +from ...utils import image_processing_utils as ipu + + +logger = logging.getLogger(__name__) + +class MapMergingStage(ProcessingStage): + """ + Merges individually processed maps based on MAP_MERGE rules. + This stage performs operations like channel packing. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the map merging logic. + + Args: + context: The asset processing context. + + Returns: + The updated asset processing context. + """ + if context.status_flags.get('skip_asset'): + logger.info(f"Skipping map merging for asset {context.asset_name} as skip_asset flag is set.") + return context + + if not hasattr(context, 'merged_maps_details'): + context.merged_maps_details = {} + + if not hasattr(context, 'processed_maps_details'): + logger.warning(f"Asset {context.asset_name}: 'processed_maps_details' not found in context. Cannot perform map merging.") + return context + + if not context.files_to_process: + logger.info(f"Asset {context.asset_name}: No files_to_process defined. Skipping map merging.") + return context + + logger.info(f"Starting MapMergingStage for asset: {context.asset_name}") + + for merge_rule in context.files_to_process: + if not isinstance(merge_rule, FileRule) or merge_rule.item_type != "MAP_MERGE": + continue + + if not merge_rule.merge_settings: + logger.error(f"Asset {context.asset_name}, Rule ID {merge_rule.id.hex}: Merge rule for map_type '{merge_rule.map_type}' is missing merge_settings. Skipping this merge.") + context.merged_maps_details[merge_rule.id.hex] = { + 'map_type': merge_rule.map_type, + 'status': 'Failed', + 'reason': 'Missing merge_settings in FileRule.' + } + continue + + merge_settings: MergeSettings = merge_rule.merge_settings + output_map_type = merge_rule.map_type + rule_id_hex = merge_rule.id.hex + logger.info(f"Processing MAP_MERGE rule for '{output_map_type}' (ID: {rule_id_hex})") + + loaded_input_maps: Dict[str, np.ndarray] = {} + input_map_paths: Dict[str, str] = {} + target_dims: Optional[Tuple[int, int]] = None # width, height + all_inputs_valid = True + + # A. Load Input Maps for Merging + if not merge_settings.input_maps: + logger.warning(f"Asset {context.asset_name}, Rule ID {rule_id_hex}: No input_maps defined in merge_settings for '{output_map_type}'. Skipping this merge.") + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': 'No input_maps defined in merge_settings.' + } + continue + + for input_map_config in merge_settings.input_maps: + input_rule_id_hex = input_map_config.file_rule_id.hex + processed_detail = context.processed_maps_details.get(input_rule_id_hex) + + if not processed_detail or processed_detail.get('status') != 'Processed': + error_msg = f"Input map (Rule ID: {input_rule_id_hex}) for merge rule '{output_map_type}' (Rule ID: {rule_id_hex}) not found or not processed. Details: {processed_detail}" + logger.error(error_msg) + all_inputs_valid = False + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': f"Input map {input_rule_id_hex} not processed or missing." + } + break + + temp_processed_file_path = Path(processed_detail['temp_processed_file']) + if not temp_processed_file_path.exists(): + error_msg = f"Input map file {temp_processed_file_path} for merge rule '{output_map_type}' (Rule ID: {rule_id_hex}) does not exist." + logger.error(error_msg) + all_inputs_valid = False + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': f"Input map file {temp_processed_file_path} not found." + } + break + + try: + image_data = ipu.load_image(temp_processed_file_path) + except Exception as e: + logger.error(f"Error loading image {temp_processed_file_path} for merge rule '{output_map_type}' (Rule ID: {rule_id_hex}): {e}") + all_inputs_valid = False + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': f"Error loading input image {temp_processed_file_path}." + } + break + + if image_data is None: + logger.error(f"Failed to load image data from {temp_processed_file_path} for merge rule '{output_map_type}' (Rule ID: {rule_id_hex}).") + all_inputs_valid = False + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': f"Failed to load image data from {temp_processed_file_path}." + } + break + + loaded_input_maps[input_rule_id_hex] = image_data + input_map_paths[input_rule_id_hex] = str(temp_processed_file_path) + + current_dims = (image_data.shape[1], image_data.shape[0]) # width, height + if target_dims is None: + target_dims = current_dims + logger.debug(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Set target dimensions to {target_dims} from first input {temp_processed_file_path}.") + elif current_dims != target_dims: + logger.warning(f"Input map {temp_processed_file_path} for merge rule '{output_map_type}' (ID: {rule_id_hex}) has dimensions {current_dims}, but target is {target_dims}. Resizing.") + try: + image_data = ipu.resize_image(image_data, target_dims[0], target_dims[1]) + if image_data is None: + raise ValueError("Resize operation returned None.") + loaded_input_maps[input_rule_id_hex] = image_data + except Exception as e: + logger.error(f"Failed to resize image {temp_processed_file_path} for merge rule '{output_map_type}' (ID: {rule_id_hex}): {e}") + all_inputs_valid = False + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': f"Failed to resize input image {temp_processed_file_path}." + } + break + + if not all_inputs_valid: + # Failure already logged and recorded in context.merged_maps_details + logger.warning(f"Skipping merge for '{output_map_type}' (ID: {rule_id_hex}) due to invalid inputs.") + continue + + if target_dims is None: # Should not happen if all_inputs_valid is true and there was at least one input map + logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Target dimensions not determined despite valid inputs. This indicates an issue with input map loading or an empty input_maps list that wasn't caught.") + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': 'Target dimensions could not be determined.' + } + continue + + # B. Perform Merge Operation + try: + if merge_settings.output_channels == 1: + merged_image = np.zeros((target_dims[1], target_dims[0]), dtype=np.uint8) + else: + merged_image = np.zeros((target_dims[1], target_dims[0], merge_settings.output_channels), dtype=np.uint8) + except Exception as e: + logger.error(f"Error creating empty merged image for '{output_map_type}' (ID: {rule_id_hex}) with dims {target_dims} and {merge_settings.output_channels} channels: {e}") + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': f'Error creating output image canvas: {e}' + } + continue + + merge_op_failed = False + for input_map_config in merge_settings.input_maps: + source_image = loaded_input_maps[input_map_config.file_rule_id.hex] + source_channel_index = input_map_config.source_channel + target_channel_index = input_map_config.target_channel + + source_data = None + if source_image.ndim == 2: # Grayscale + source_data = source_image + elif source_image.ndim == 3: # Multi-channel (e.g. RGB, RGBA) + if source_channel_index >= source_image.shape[2]: + logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Source channel index {source_channel_index} out of bounds for source image with shape {source_image.shape} (from Rule ID {input_map_config.file_rule_id.hex}).") + merge_op_failed = True + break + source_data = source_image[:, :, source_channel_index] + else: + logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Source image (from Rule ID {input_map_config.file_rule_id.hex}) has unexpected dimensions: {source_image.ndim}. Shape: {source_image.shape}") + merge_op_failed = True + break + + if source_data is None: # Should be caught by previous checks + logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Failed to extract source_data for unknown reasons from input {input_map_config.file_rule_id.hex}.") + merge_op_failed = True + break + + # Assign to target channel + try: + if merged_image.ndim == 2: # Output is grayscale + if merge_settings.output_channels != 1: + logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Mismatch - merged_image is 2D but output_channels is {merge_settings.output_channels}.") + merge_op_failed = True + break + merged_image = source_data # Overwrites if multiple inputs map to grayscale; consider blending or specific logic if needed + elif merged_image.ndim == 3: # Output is multi-channel + if target_channel_index >= merged_image.shape[2]: + logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Target channel index {target_channel_index} out of bounds for merged image with shape {merged_image.shape}.") + merge_op_failed = True + break + merged_image[:, :, target_channel_index] = source_data + else: # Should not happen + logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Merged image has unexpected dimensions: {merged_image.ndim}. Shape: {merged_image.shape}") + merge_op_failed = True + break + except Exception as e: + logger.error(f"Error assigning source data to target channel for '{output_map_type}' (ID: {rule_id_hex}): {e}. Source shape: {source_data.shape}, Target channel: {target_channel_index}, Merged image shape: {merged_image.shape}") + merge_op_failed = True + break + + if input_map_config.invert_source_channel: + if merged_image.ndim == 2: + merged_image = 255 - merged_image # Assumes uint8 + elif merged_image.ndim == 3: + # Ensure we are not inverting an alpha channel if that's not desired, + # but current spec inverts the target channel data. + merged_image[:, :, target_channel_index] = 255 - merged_image[:, :, target_channel_index] + + # input_map_config.default_value_if_missing: + # This was handled by all_inputs_valid check for file presence. + # If a channel is missing from a multi-channel source, that's an error in source_channel_index. + # If a file is entirely missing and a default color/value is needed for the *output channel*, + # that would be a different logic, perhaps pre-filling merged_image. + # For now, we assume if an input map is specified, it must be present and valid. + + if merge_op_failed: + logger.error(f"Merge operation failed for '{output_map_type}' (ID: {rule_id_hex}).") + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': 'Error during channel packing/merge operation.' + } + continue + + # C. Save Temporary Merged Map + # Default to PNG, or use format from merge_settings if available (future enhancement) + output_format = getattr(merge_settings, 'output_format', 'png').lower() + if output_format not in ['png', 'jpg', 'jpeg', 'tif', 'tiff', 'exr']: # Add more as ipu supports + logger.warning(f"Unsupported output_format '{output_format}' in merge_settings for '{output_map_type}' (ID: {rule_id_hex}). Defaulting to PNG.") + output_format = 'png' + + temp_merged_filename = f"merged_{sanitize_filename(output_map_type)}_{rule_id_hex}.{output_format}" + + if not context.engine_temp_dir: + logger.error(f"Asset {context.asset_name}: engine_temp_dir is not set. Cannot save merged map.") + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': 'engine_temp_dir not set in context.' + } + continue + + temp_merged_path = context.engine_temp_dir / temp_merged_filename + + try: + save_success = ipu.save_image(temp_merged_path, merged_image) + except Exception as e: + logger.error(f"Error saving merged image {temp_merged_path} for '{output_map_type}' (ID: {rule_id_hex}): {e}") + save_success = False + + if not save_success: + logger.error(f"Failed to save temporary merged map to {temp_merged_path} for '{output_map_type}' (ID: {rule_id_hex}).") + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'status': 'Failed', + 'reason': f'Failed to save merged image to {temp_merged_path}.' + } + continue + + logger.info(f"Successfully merged and saved '{output_map_type}' (ID: {rule_id_hex}) to {temp_merged_path}") + + # D. Update Context + context.merged_maps_details[rule_id_hex] = { + 'map_type': output_map_type, + 'temp_merged_file': str(temp_merged_path), + 'input_map_ids_used': [mc.file_rule_id.hex for mc in merge_settings.input_maps], + 'input_map_files_used': input_map_paths, # Dict[rule_id_hex, path_str] + 'merged_dimensions': target_dims, # (width, height) + 'status': 'Processed', + 'file_rule_id': rule_id_hex # For easier reverse lookup if needed + } + + # Optional: Update context.asset_metadata['processed_files'] or similar + # This might be better handled by a later stage that finalizes files. + # For now, merged_maps_details is the primary record. + + logger.info(f"Finished MapMergingStage for asset: {context.asset_name}. Merged maps: {len(context.merged_maps_details)}") + return context \ No newline at end of file diff --git a/processing/pipeline/stages/metadata_finalization_save.py b/processing/pipeline/stages/metadata_finalization_save.py new file mode 100644 index 0000000..d18bfc4 --- /dev/null +++ b/processing/pipeline/stages/metadata_finalization_save.py @@ -0,0 +1,119 @@ +import datetime +import json +import logging +from pathlib import Path +from typing import Any, Dict + +from ..asset_context import AssetProcessingContext +from .base_stage import ProcessingStage +from ....utils.path_utils import generate_path_from_pattern + + +logger = logging.getLogger(__name__) + +class MetadataFinalizationAndSaveStage(ProcessingStage): + """ + This stage finalizes the asset_metadata (e.g., setting processing end time, + final status) and saves it as a JSON file. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Finalizes metadata, determines output path, and saves the metadata JSON file. + """ + if not hasattr(context, 'asset_metadata') or not context.asset_metadata: + if context.status_flags.get('skip_asset'): + logger.info( + f"Asset '{context.asset_rule.name if hasattr(context, 'asset_rule') and context.asset_rule else 'Unknown'}': " + f"Skipped before metadata initialization. No metadata file will be saved." + ) + else: + logger.warning( + f"Asset '{context.asset_rule.name if hasattr(context, 'asset_rule') and context.asset_rule else 'Unknown'}': " + f"asset_metadata not initialized. Skipping metadata finalization and save." + ) + return context + + # Check Skip Flag + if context.status_flags.get('skip_asset'): + context.asset_metadata['status'] = "Skipped" + context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() + context.asset_metadata['notes'] = context.status_flags.get('skip_reason', 'Skipped early in pipeline') + logger.info( + f"Asset '{context.asset_rule.name}': Marked as skipped. Reason: {context.asset_metadata['notes']}" + ) + # Assuming we save metadata for skipped assets if it was initialized. + # If not, the logic to skip saving would be here or before path generation. + + # A. Finalize Metadata + context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() + + # Determine final status (if not already set to Skipped) + if context.asset_metadata.get('status') != "Skipped": + has_errors = any( + context.status_flags.get(error_flag) + for error_flag in ['file_processing_error', 'merge_error', 'critical_error'] # Added critical_error + ) + if has_errors: + context.asset_metadata['status'] = "Failed" + else: + context.asset_metadata['status'] = "Processed" + + # Add details of processed and merged maps + context.asset_metadata['processed_map_details'] = getattr(context, 'processed_maps_details', {}) + context.asset_metadata['merged_map_details'] = getattr(context, 'merged_maps_details', {}) + + # (Optional) Add a list of all temporary files + context.asset_metadata['temporary_files'] = getattr(context, 'temporary_files', []) + + # B. Determine Metadata Output Path + # Ensure asset_rule and source_rule exist before accessing their names + asset_name = context.asset_rule.name if hasattr(context, 'asset_rule') and context.asset_rule else "unknown_asset" + source_rule_name = context.source_rule.name if hasattr(context, 'source_rule') and context.source_rule else "unknown_source" + + metadata_filename = f"{asset_name}_metadata.json" + output_path_pattern = context.asset_rule.output_path_pattern if hasattr(context, 'asset_rule') and context.asset_rule else "" + + # Handle potential missing sha5_value, defaulting to None or an empty string + sha_value = getattr(context, 'sha5_value', getattr(context, 'sha_value', None)) + + + full_output_path = generate_path_from_pattern( + base_path=str(context.output_base_path), # Ensure base_path is a string + pattern=output_path_pattern, + asset_name=asset_name, + map_type="metadata", # Special map_type for metadata + filename=metadata_filename, + source_rule_name=source_rule_name, + incrementing_value=getattr(context, 'incrementing_value', None), + sha_value=sha_value # Changed from sha5_value to sha_value for more generality + ) + metadata_save_path = Path(full_output_path) + + # C. Save Metadata File + try: + metadata_save_path.parent.mkdir(parents=True, exist_ok=True) + + def make_serializable(data: Any) -> Any: + if isinstance(data, Path): + return str(data) + if isinstance(data, datetime.datetime): # Ensure datetime is serializable + return data.isoformat() + if isinstance(data, dict): + return {k: make_serializable(v) for k, v in data.items()} + if isinstance(data, list): + return [make_serializable(i) for i in data] + return data + + serializable_metadata = make_serializable(context.asset_metadata) + + with open(metadata_save_path, 'w') as f: + json.dump(serializable_metadata, f, indent=4) + logger.info(f"Asset '{asset_name}': Metadata saved to {metadata_save_path}") + context.asset_metadata['metadata_file_path'] = str(metadata_save_path) + except Exception as e: + logger.error(f"Asset '{asset_name}': Failed to save metadata to {metadata_save_path}. Error: {e}") + context.asset_metadata['status'] = "Failed (Metadata Save Error)" + context.status_flags['metadata_save_error'] = True + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/metadata_initialization.py b/processing/pipeline/stages/metadata_initialization.py new file mode 100644 index 0000000..4d5fbf5 --- /dev/null +++ b/processing/pipeline/stages/metadata_initialization.py @@ -0,0 +1,163 @@ +import datetime +import logging + +from ..base_stage import ProcessingStage +from ...asset_context import AssetProcessingContext # Adjusted import path assuming asset_context is in processing.pipeline +# If AssetProcessingContext is directly under 'processing', the import would be: +# from ...asset_context import AssetProcessingContext +# Based on the provided file structure, asset_context.py is in processing/pipeline/ +# So, from ...asset_context import AssetProcessingContext is likely incorrect. +# It should be: from ..asset_context import AssetProcessingContext +# Correcting this based on typical Python package structure and the location of base_stage.py + +# Re-evaluating import based on common structure: +# If base_stage.py is in processing/pipeline/stages/ +# and asset_context.py is in processing/pipeline/ +# then the import for AssetProcessingContext from metadata_initialization.py (in stages) would be: +# from ..asset_context import AssetProcessingContext + +# Let's assume the following structure for clarity: +# processing/ +# L-- pipeline/ +# L-- __init__.py +# L-- asset_context.py +# L-- base_stage.py (Mistake here, base_stage is in stages, so it's ..base_stage) +# L-- stages/ +# L-- __init__.py +# L-- metadata_initialization.py +# L-- base_stage.py (Corrected: base_stage.py is here) + +# Corrected imports based on the plan and typical structure: +# base_stage.py is in processing/pipeline/stages/ +# asset_context.py is in processing/pipeline/ + +# from ..base_stage import ProcessingStage # This would mean base_stage is one level up from stages (i.e. in pipeline) +# The plan says: from ..base_stage import ProcessingStage +# This implies that metadata_initialization.py is in a subdirectory of where base_stage.py is. +# However, the file path for metadata_initialization.py is processing/pipeline/stages/metadata_initialization.py +# And base_stage.py is listed as processing/pipeline/stages/base_stage.py in the open tabs. +# So, the import should be: +# from .base_stage import ProcessingStage + +# AssetProcessingContext is at processing/pipeline/asset_context.py +# So from processing/pipeline/stages/metadata_initialization.py, it would be: +# from ..asset_context import AssetProcessingContext + +# Final check on imports based on instructions: +# `from ..base_stage import ProcessingStage` -> This means base_stage.py is in `processing/pipeline/` +# `from ...asset_context import AssetProcessingContext` -> This means asset_context.py is in `processing/` +# Let's verify the location of these files from the environment details. +# processing/pipeline/asset_context.py +# processing/pipeline/stages/base_stage.py +# +# So, from processing/pipeline/stages/metadata_initialization.py: +# To import ProcessingStage from processing/pipeline/stages/base_stage.py: +# from .base_stage import ProcessingStage +# To import AssetProcessingContext from processing/pipeline/asset_context.py: +# from ..asset_context import AssetProcessingContext + +# The instructions explicitly state: +# `from ..base_stage import ProcessingStage` +# `from ...asset_context import AssetProcessingContext` +# This implies a different structure than what seems to be in the file tree. +# I will follow the explicit import instructions from the task. +# This means: +# base_stage.py is expected at `processing/pipeline/base_stage.py` +# asset_context.py is expected at `processing/asset_context.py` + +# Given the file tree: +# processing/pipeline/asset_context.py +# processing/pipeline/stages/base_stage.py +# The imports in `processing/pipeline/stages/metadata_initialization.py` should be: +# from .base_stage import ProcessingStage +# from ..asset_context import AssetProcessingContext + +# I will use the imports that align with the provided file structure. + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext + + +logger = logging.getLogger(__name__) + +class MetadataInitializationStage(ProcessingStage): + """ + Initializes metadata structures within the AssetProcessingContext. + This stage sets up asset_metadata, processed_maps_details, and + merged_maps_details. + """ + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the metadata initialization logic. + + Args: + context: The AssetProcessingContext for the current asset. + + Returns: + The modified AssetProcessingContext. + """ + if context.status_flags.get('skip_asset', False): + logger.debug(f"Asset '{context.asset_rule.name if context.asset_rule else 'Unknown'}': Skipping metadata initialization as 'skip_asset' is True.") + return context + + logger.debug(f"Asset '{context.asset_rule.name}': Initializing metadata.") + + context.asset_metadata = {} + context.processed_maps_details = {} + context.merged_maps_details = {} + + # Populate Initial asset_metadata + if context.asset_rule: + context.asset_metadata['asset_name'] = context.asset_rule.name + context.asset_metadata['asset_id'] = str(context.asset_rule.id) + context.asset_metadata['source_path'] = str(context.asset_rule.source_path) + context.asset_metadata['output_path_pattern'] = context.asset_rule.output_path_pattern + context.asset_metadata['tags'] = list(context.asset_rule.tags) if context.asset_rule.tags else [] + context.asset_metadata['custom_fields'] = dict(context.asset_rule.custom_fields) if context.asset_rule.custom_fields else {} + else: + # Handle cases where asset_rule might be None, though typically it should be set + logger.warning("AssetRule is not set in context during metadata initialization.") + context.asset_metadata['asset_name'] = "Unknown Asset" + context.asset_metadata['asset_id'] = "N/A" + context.asset_metadata['source_path'] = "N/A" + context.asset_metadata['output_path_pattern'] = "N/A" + context.asset_metadata['tags'] = [] + context.asset_metadata['custom_fields'] = {} + + + if context.source_rule: + context.asset_metadata['source_rule_name'] = context.source_rule.name + context.asset_metadata['source_rule_id'] = str(context.source_rule.id) + else: + logger.warning("SourceRule is not set in context during metadata initialization.") + context.asset_metadata['source_rule_name'] = "Unknown Source Rule" + context.asset_metadata['source_rule_id'] = "N/A" + + context.asset_metadata['effective_supplier'] = context.effective_supplier + context.asset_metadata['processing_start_time'] = datetime.datetime.now().isoformat() + context.asset_metadata['status'] = "Pending" + + if context.config_obj and hasattr(context.config_obj, 'general_settings') and \ + hasattr(context.config_obj.general_settings, 'app_version'): + context.asset_metadata['version'] = context.config_obj.general_settings.app_version + else: + logger.warning("App version not found in config_obj.general_settings. Setting version to 'N/A'.") + context.asset_metadata['version'] = "N/A" # Default or placeholder + + if context.incrementing_value is not None: + context.asset_metadata['incrementing_value'] = context.incrementing_value + + # The plan mentions sha5_value, which is likely a typo for sha256 or similar. + # Implementing as 'sha5_value' per instructions, but noting the potential typo. + if hasattr(context, 'sha5_value') and context.sha5_value is not None: # Check attribute existence + context.asset_metadata['sha5_value'] = context.sha5_value + elif hasattr(context, 'sha256_value') and context.sha256_value is not None: # Fallback if sha5 was a typo + logger.debug("sha5_value not found, using sha256_value if available for metadata.") + context.asset_metadata['sha256_value'] = context.sha256_value + + + logger.info(f"Asset '{context.asset_metadata.get('asset_name', 'Unknown')}': Metadata initialized.") + # Example of how you might log the full metadata for debugging: + # logger.debug(f"Initialized metadata: {context.asset_metadata}") + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/normal_map_green_channel.py b/processing/pipeline/stages/normal_map_green_channel.py new file mode 100644 index 0000000..ca7984b --- /dev/null +++ b/processing/pipeline/stages/normal_map_green_channel.py @@ -0,0 +1,154 @@ +import logging +import numpy as np +from pathlib import Path +from typing import List + +from ..base_stage import ProcessingStage +from ...asset_context import AssetProcessingContext +from .....rule_structure import FileRule +from ...utils import image_processing_utils as ipu +from .....utils.path_utils import sanitize_filename + +logger = logging.getLogger(__name__) + +class NormalMapGreenChannelStage(ProcessingStage): + """ + Processing stage to invert the green channel of normal maps if configured. + This is often needed when converting between DirectX (Y-) and OpenGL (Y+) normal map formats. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Identifies NORMAL maps, checks configuration for green channel inversion, + performs inversion if needed, saves a new temporary file, and updates + the AssetProcessingContext. + """ + if context.status_flags.get('skip_asset'): + logger.debug(f"Asset '{context.asset_rule.name}': Skipping NormalMapGreenChannelStage due to skip_asset flag.") + return context + + if not context.files_to_process or not context.processed_maps_details: + logger.debug( + f"Asset '{context.asset_rule.name}': No files to process or processed_maps_details empty in NormalMapGreenChannelStage. Skipping." + ) + return context + + new_files_to_process: List[FileRule] = [] + processed_a_normal_map = False + + for file_rule in context.files_to_process: + if file_rule.map_type == "NORMAL": + # Check configuration for inversion + # Assuming a global setting for now. + # This key should exist in the Configuration object's general_settings. + should_invert = context.config_obj.general_settings.get('invert_normal_map_green_channel_globally', False) + + if not should_invert: + logger.debug( + f"Asset '{context.asset_rule.name}': Normal map green channel inversion not enabled globally. " + f"Skipping for {file_rule.filename_pattern} (ID: {file_rule.id.hex})." + ) + new_files_to_process.append(file_rule) + continue + + # Get the temporary processed file path + map_details = context.processed_maps_details.get(file_rule.id.hex) + if not map_details or map_details.get('status') != 'Processed' or not map_details.get('temp_processed_file'): + logger.warning( + f"Asset '{context.asset_rule.name}': Normal map {file_rule.filename_pattern} (ID: {file_rule.id.hex}) " + f"not found in processed_maps_details or not marked as 'Processed'. Cannot invert green channel." + ) + new_files_to_process.append(file_rule) + continue + + original_temp_path = Path(map_details['temp_processed_file']) + if not original_temp_path.exists(): + logger.error( + f"Asset '{context.asset_rule.name}': Temporary file {original_temp_path} for normal map " + f"{file_rule.filename_pattern} (ID: {file_rule.id.hex}) does not exist. Cannot invert green channel." + ) + new_files_to_process.append(file_rule) + continue + + image_data = ipu.load_image(original_temp_path) + + if image_data is None: + logger.error( + f"Asset '{context.asset_rule.name}': Failed to load image from {original_temp_path} " + f"for normal map {file_rule.filename_pattern} (ID: {file_rule.id.hex})." + ) + new_files_to_process.append(file_rule) + continue + + if image_data.ndim != 3 or image_data.shape[2] < 2: # Must have at least R, G channels + logger.error( + f"Asset '{context.asset_rule.name}': Image {original_temp_path} for normal map " + f"{file_rule.filename_pattern} (ID: {file_rule.id.hex}) is not a valid RGB/normal map " + f"(ndim={image_data.ndim}, channels={image_data.shape[2] if image_data.ndim == 3 else 'N/A'}) " + f"for green channel inversion." + ) + new_files_to_process.append(file_rule) + continue + + # Perform Green Channel Inversion + modified_image_data = image_data.copy() + try: + if np.issubdtype(modified_image_data.dtype, np.floating): + modified_image_data[:, :, 1] = 1.0 - modified_image_data[:, :, 1] + elif np.issubdtype(modified_image_data.dtype, np.integer): + max_val = np.iinfo(modified_image_data.dtype).max + modified_image_data[:, :, 1] = max_val - modified_image_data[:, :, 1] + else: + logger.error( + f"Asset '{context.asset_rule.name}': Unsupported image data type " + f"{modified_image_data.dtype} for normal map {original_temp_path}. Cannot invert green channel." + ) + new_files_to_process.append(file_rule) + continue + except IndexError: + logger.error( + f"Asset '{context.asset_rule.name}': Image {original_temp_path} for normal map " + f"{file_rule.filename_pattern} (ID: {file_rule.id.hex}) does not have a green channel (index 1) " + f"or has unexpected dimensions ({modified_image_data.shape}). Cannot invert." + ) + new_files_to_process.append(file_rule) + continue + + + # Save New Temporary (Modified Normal) Map + new_temp_filename = f"normal_g_inv_{sanitize_filename(file_rule.map_type)}_{file_rule.id.hex}{original_temp_path.suffix}" + new_temp_path = context.engine_temp_dir / new_temp_filename + + save_success = ipu.save_image(new_temp_path, modified_image_data) + + if save_success: + logger.info( + f"Asset '{context.asset_rule.name}': Inverted green channel for NORMAL map " + f"{original_temp_path.name}, saved to {new_temp_path.name}." + ) + # Update processed_maps_details + context.processed_maps_details[file_rule.id.hex]['temp_processed_file'] = str(new_temp_path) + current_notes = context.processed_maps_details[file_rule.id.hex].get('notes', '') + context.processed_maps_details[file_rule.id.hex]['notes'] = \ + f"{current_notes}; Green channel inverted by NormalMapGreenChannelStage".strip('; ') + + new_files_to_process.append(file_rule) # Add original rule, it now points to modified data + processed_a_normal_map = True + else: + logger.error( + f"Asset '{context.asset_rule.name}': Failed to save inverted normal map to {new_temp_path} " + f"for original {original_temp_path.name}." + ) + new_files_to_process.append(file_rule) # Add original rule, as processing failed + else: + # Not a normal map, just pass it through + new_files_to_process.append(file_rule) + + context.files_to_process = new_files_to_process + + if processed_a_normal_map: + logger.info(f"Asset '{context.asset_rule.name}': NormalMapGreenChannelStage processed relevant normal maps.") + else: + logger.debug(f"Asset '{context.asset_rule.name}': No normal maps found or processed in NormalMapGreenChannelStage.") + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py new file mode 100644 index 0000000..358d3e1 --- /dev/null +++ b/processing/pipeline/stages/output_organization.py @@ -0,0 +1,155 @@ +import logging +import shutil +from pathlib import Path +from typing import List, Dict, Optional + +from ..base_stage import ProcessingStage +from ...asset_context import AssetProcessingContext +from ....utils.path_utils import generate_path_from_pattern, sanitize_filename +from ....config import FileRule, MergeRule # Assuming these are needed for type hints if not directly in context + + +logger = logging.getLogger(__name__) + +class OutputOrganizationStage(ProcessingStage): + """ + Organizes output files by copying temporary processed files to their final destinations. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Copies temporary processed and merged files to their final output locations + based on path patterns and updates AssetProcessingContext. + """ + logger.debug(f"Asset '{context.asset_rule.name}': Starting output organization stage.") + + if context.status_flags.get('skip_asset'): + logger.info(f"Asset '{context.asset_rule.name}': Output organization skipped as 'skip_asset' is True.") + return context + + current_status = context.asset_metadata.get('status', '') + if current_status.startswith("Failed") or current_status == "Skipped": + logger.info(f"Asset '{context.asset_rule.name}': Output organization skipped due to prior status: {current_status}.") + return context + + final_output_files: List[str] = [] + # Ensure config_obj and general_settings are present, provide default for overwrite_existing if not + overwrite_existing = False + if context.config_obj and hasattr(context.config_obj, 'general_settings'): + overwrite_existing = context.config_obj.general_settings.overwrite_existing + else: + logger.warning(f"Asset '{context.asset_rule.name}': config_obj.general_settings not found, defaulting overwrite_existing to False.") + + + # A. Organize Processed Individual Maps + if context.processed_maps_details: + logger.debug(f"Asset '{context.asset_rule.name}': Organizing {len(context.processed_maps_details)} processed individual map(s).") + for file_rule_id, details in context.processed_maps_details.items(): + if details.get('status') != 'Processed' or not details.get('temp_processed_file'): + logger.debug(f"Asset '{context.asset_rule.name}': Skipping file_rule_id '{file_rule_id}' due to status '{details.get('status')}' or missing temp file.") + continue + + temp_file_path = Path(details['temp_processed_file']) + map_type = details['map_type'] + + output_filename = f"{context.asset_rule.name}_{sanitize_filename(map_type)}{temp_file_path.suffix}" + if context.asset_rule and context.asset_rule.file_rules: + current_file_rule: Optional[FileRule] = next( + (fr for fr in context.asset_rule.file_rules if fr.id == file_rule_id), None + ) + if current_file_rule and current_file_rule.output_filename_pattern: + output_filename = current_file_rule.output_filename_pattern + + try: + final_path_str = generate_path_from_pattern( + base_path=str(context.output_base_path), + pattern=context.asset_rule.output_path_pattern, + asset_name=context.asset_rule.name, + map_type=map_type, + filename=output_filename, + source_rule_name=context.source_rule.name if context.source_rule else "DefaultSource", + incrementing_value=str(context.incrementing_value) if context.incrementing_value is not None else None, + sha5_value=context.sha5_value + ) + final_path = Path(final_path_str) + final_path.parent.mkdir(parents=True, exist_ok=True) + + if final_path.exists() and not overwrite_existing: + logger.info(f"Asset '{context.asset_rule.name}': Output file {final_path} exists and overwrite is disabled. Skipping copy.") + else: + shutil.copy2(temp_file_path, final_path) + logger.info(f"Asset '{context.asset_rule.name}': Copied {temp_file_path} to {final_path}") + final_output_files.append(str(final_path)) + + context.processed_maps_details[file_rule_id]['final_output_path'] = str(final_path) + context.processed_maps_details[file_rule_id]['status'] = 'Organized' # Or some other status indicating completion + + except Exception as e: + logger.error(f"Asset '{context.asset_rule.name}': Failed to copy {temp_file_path} to {final_path_str if 'final_path_str' in locals() else 'unknown destination'} for file_rule_id '{file_rule_id}'. Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error)" + # Optionally update status in details as well + context.processed_maps_details[file_rule_id]['status'] = 'Organization Failed' + else: + logger.debug(f"Asset '{context.asset_rule.name}': No processed individual maps to organize.") + + # B. Organize Merged Maps + if context.merged_maps_details: + logger.debug(f"Asset '{context.asset_rule.name}': Organizing {len(context.merged_maps_details)} merged map(s).") + for merge_rule_id, details in context.merged_maps_details.items(): + if details.get('status') != 'Processed' or not details.get('temp_merged_file'): + logger.debug(f"Asset '{context.asset_rule.name}': Skipping merge_rule_id '{merge_rule_id}' due to status '{details.get('status')}' or missing temp file.") + continue + + temp_file_path = Path(details['temp_merged_file']) + map_type = details['map_type'] # This is the output_map_type of the merge rule + + output_filename = f"{context.asset_rule.name}_{sanitize_filename(map_type)}{temp_file_path.suffix}" + if context.asset_rule and context.asset_rule.merge_rules: + current_merge_rule: Optional[MergeRule] = next( + (mr for mr in context.asset_rule.merge_rules if mr.id == merge_rule_id), None + ) + if current_merge_rule and current_merge_rule.output_filename_pattern: + output_filename = current_merge_rule.output_filename_pattern + + try: + final_path_str = generate_path_from_pattern( + base_path=str(context.output_base_path), + pattern=context.asset_rule.output_path_pattern, + asset_name=context.asset_rule.name, + map_type=map_type, + filename=output_filename, + source_rule_name=context.source_rule.name if context.source_rule else "DefaultSource", + incrementing_value=str(context.incrementing_value) if context.incrementing_value is not None else None, + sha5_value=context.sha5_value + ) + final_path = Path(final_path_str) + final_path.parent.mkdir(parents=True, exist_ok=True) + + if final_path.exists() and not overwrite_existing: + logger.info(f"Asset '{context.asset_rule.name}': Output file {final_path} exists and overwrite is disabled. Skipping copy for merged map.") + else: + shutil.copy2(temp_file_path, final_path) + logger.info(f"Asset '{context.asset_rule.name}': Copied merged map {temp_file_path} to {final_path}") + final_output_files.append(str(final_path)) + + context.merged_maps_details[merge_rule_id]['final_output_path'] = str(final_path) + context.merged_maps_details[merge_rule_id]['status'] = 'Organized' + + except Exception as e: + logger.error(f"Asset '{context.asset_rule.name}': Failed to copy merged map {temp_file_path} to {final_path_str if 'final_path_str' in locals() else 'unknown destination'} for merge_rule_id '{merge_rule_id}'. Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error)" + context.merged_maps_details[merge_rule_id]['status'] = 'Organization Failed' + else: + logger.debug(f"Asset '{context.asset_rule.name}': No merged maps to organize.") + + context.asset_metadata['final_output_files'] = final_output_files + + if context.status_flags.get('output_organization_error'): + logger.error(f"Asset '{context.asset_rule.name}': Output organization encountered errors. Status: {context.asset_metadata['status']}") + else: + logger.info(f"Asset '{context.asset_rule.name}': Output organization complete. {len(final_output_files)} files placed.") + + logger.debug(f"Asset '{context.asset_rule.name}': Output organization stage finished.") + return context \ No newline at end of file diff --git a/processing/pipeline/stages/supplier_determination.py b/processing/pipeline/stages/supplier_determination.py new file mode 100644 index 0000000..ff60722 --- /dev/null +++ b/processing/pipeline/stages/supplier_determination.py @@ -0,0 +1,61 @@ +import logging + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext + +class SupplierDeterminationStage(ProcessingStage): + """ + Determines the effective supplier for an asset based on asset and source rules. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Determines and validates the effective supplier for the asset. + + Args: + context: The asset processing context. + + Returns: + The updated asset processing context. + """ + effective_supplier = None + logger = logging.getLogger(__name__) # Using a logger specific to this module + + # 1. Check asset_rule.supplier_override + if context.asset_rule and context.asset_rule.supplier_override: + effective_supplier = context.asset_rule.supplier_override + logger.debug(f"Asset '{context.asset_rule.name}': Supplier override found: '{effective_supplier}'.") + + # 2. If not overridden, check source_rule.supplier + if not effective_supplier and context.source_rule and context.source_rule.supplier: + effective_supplier = context.source_rule.supplier + logger.debug(f"Asset '{context.asset_rule.name if context.asset_rule else 'Unknown'}': Source rule supplier found: '{effective_supplier}'.") + + # 3. Validation + if not effective_supplier: + asset_name = context.asset_rule.name if context.asset_rule else "Unknown Asset" + logger.error(f"Asset '{asset_name}': No supplier defined in asset rule or source rule.") + context.effective_supplier = None + if 'status_flags' not in context: # Ensure status_flags exists + context.status_flags = {} + context.status_flags['supplier_error'] = True + elif context.config_obj and effective_supplier not in context.config_obj.suppliers: + asset_name = context.asset_rule.name if context.asset_rule else "Unknown Asset" + logger.warning( + f"Asset '{asset_name}': Supplier '{effective_supplier}' not found in global supplier configuration. " + f"Available: {list(context.config_obj.suppliers.keys()) if context.config_obj.suppliers else 'None'}" + ) + context.effective_supplier = None + if 'status_flags' not in context: # Ensure status_flags exists + context.status_flags = {} + context.status_flags['supplier_error'] = True + else: + context.effective_supplier = effective_supplier + asset_name = context.asset_rule.name if context.asset_rule else "Unknown Asset" + logger.info(f"Asset '{asset_name}': Effective supplier set to '{effective_supplier}'.") + # Optionally clear the error flag if previously set and now resolved, though current logic doesn't show this path. + # if 'status_flags' in context and 'supplier_error' in context.status_flags: + # del context.status_flags['supplier_error'] + + + return context \ No newline at end of file diff --git a/processing/utils/__init__.py b/processing/utils/__init__.py new file mode 100644 index 0000000..5f3ceb7 --- /dev/null +++ b/processing/utils/__init__.py @@ -0,0 +1 @@ +# This file makes the 'utils' directory a Python package. \ No newline at end of file diff --git a/processing/utils/image_processing_utils.py b/processing/utils/image_processing_utils.py new file mode 100644 index 0000000..46768a8 --- /dev/null +++ b/processing/utils/image_processing_utils.py @@ -0,0 +1,357 @@ +import cv2 +import numpy as np +from pathlib import Path +import math +from typing import Optional, Union, List, Tuple, Dict + +# --- Basic Power-of-Two Utilities --- + +def is_power_of_two(n: int) -> bool: + """Checks if a number is a power of two.""" + return (n > 0) and (n & (n - 1) == 0) + +def get_nearest_pot(value: int) -> int: + """Finds the nearest power of two to the given value.""" + if value <= 0: + return 1 # POT must be positive, return 1 as a fallback + if is_power_of_two(value): + return value + + lower_pot = 1 << (value.bit_length() - 1) + upper_pot = 1 << value.bit_length() + + if (value - lower_pot) < (upper_pot - value): + return lower_pot + else: + return upper_pot + +# --- Dimension Calculation --- + +def calculate_target_dimensions( + original_width: int, + original_height: int, + target_width: Optional[int] = None, + target_height: Optional[int] = None, + resize_mode: str = "fit", # e.g., "fit", "stretch", "max_dim_pot" + ensure_pot: bool = False, + allow_upscale: bool = False, + target_max_dim_for_pot_mode: Optional[int] = None # Specific for "max_dim_pot" +) -> Tuple[int, int]: + """ + Calculates target dimensions based on various modes and constraints. + + Args: + original_width: Original width of the image. + original_height: Original height of the image. + target_width: Desired target width. + target_height: Desired target height. + resize_mode: + - "fit": Scales to fit within target_width/target_height, maintaining aspect ratio. + Requires at least one of target_width or target_height. + - "stretch": Scales to exactly target_width and target_height, ignoring aspect ratio. + Requires both target_width and target_height. + - "max_dim_pot": Scales to fit target_max_dim_for_pot_mode while maintaining aspect ratio, + then finds nearest POT for each dimension. Requires target_max_dim_for_pot_mode. + ensure_pot: If True, final dimensions will be adjusted to the nearest power of two. + allow_upscale: If False, dimensions will not exceed original dimensions unless ensure_pot forces it. + target_max_dim_for_pot_mode: Max dimension to use when resize_mode is "max_dim_pot". + + Returns: + A tuple (new_width, new_height). + """ + if original_width <= 0 or original_height <= 0: + # Fallback for invalid original dimensions + fallback_dim = 1 + if ensure_pot: + if target_width and target_height: + fallback_dim = get_nearest_pot(max(target_width, target_height, 1)) + elif target_width: + fallback_dim = get_nearest_pot(target_width) + elif target_height: + fallback_dim = get_nearest_pot(target_height) + elif target_max_dim_for_pot_mode: + fallback_dim = get_nearest_pot(target_max_dim_for_pot_mode) + else: # Default POT if no target given + fallback_dim = 256 + return (fallback_dim, fallback_dim) + return (target_width or 1, target_height or 1) + + + w, h = original_width, original_height + + if resize_mode == "max_dim_pot": + if target_max_dim_for_pot_mode is None: + raise ValueError("target_max_dim_for_pot_mode must be provided for 'max_dim_pot' resize_mode.") + + # Logic adapted from old processing_engine.calculate_target_dimensions + ratio = w / h + if ratio > 1: # Width is dominant + scaled_w = target_max_dim_for_pot_mode + scaled_h = max(1, round(scaled_w / ratio)) + else: # Height is dominant or square + scaled_h = target_max_dim_for_pot_mode + scaled_w = max(1, round(scaled_h * ratio)) + + # Upscale check for this mode is implicitly handled by target_max_dim + # If ensure_pot is true (as it was in the original logic), it's applied here + # For this mode, ensure_pot is effectively always true for the final step + w = get_nearest_pot(scaled_w) + h = get_nearest_pot(scaled_h) + return int(w), int(h) + + elif resize_mode == "fit": + if target_width is None and target_height is None: + raise ValueError("At least one of target_width or target_height must be provided for 'fit' mode.") + + if target_width and target_height: + ratio_orig = w / h + ratio_target = target_width / target_height + if ratio_orig > ratio_target: # Original is wider than target aspect + w_new = target_width + h_new = max(1, round(w_new / ratio_orig)) + else: # Original is taller or same aspect + h_new = target_height + w_new = max(1, round(h_new * ratio_orig)) + elif target_width: + w_new = target_width + h_new = max(1, round(w_new / (w / h))) + else: # target_height is not None + h_new = target_height + w_new = max(1, round(h_new * (w / h))) + w, h = w_new, h_new + + elif resize_mode == "stretch": + if target_width is None or target_height is None: + raise ValueError("Both target_width and target_height must be provided for 'stretch' mode.") + w, h = target_width, target_height + + else: + raise ValueError(f"Unsupported resize_mode: {resize_mode}") + + if not allow_upscale: + if w > original_width: w = original_width + if h > original_height: h = original_height + + if ensure_pot: + w = get_nearest_pot(w) + h = get_nearest_pot(h) + # Re-check upscale if POT adjustment made it larger than original and not allowed + if not allow_upscale: + if w > original_width: w = get_nearest_pot(original_width) # Get closest POT to original + if h > original_height: h = get_nearest_pot(original_height) + + + return int(max(1, w)), int(max(1, h)) + + +# --- Image Statistics --- + +def calculate_image_stats(image_data: np.ndarray) -> Optional[Dict]: + """ + Calculates min, max, mean for a given numpy image array. + Handles grayscale and multi-channel images. Converts to float64 for calculation. + Normalizes uint8/uint16 data to 0-1 range before calculating stats. + """ + if image_data is None: + return None + try: + data_float = image_data.astype(np.float64) + + if image_data.dtype == np.uint16: + data_float /= 65535.0 + elif image_data.dtype == np.uint8: + data_float /= 255.0 + + stats = {} + if len(data_float.shape) == 2: # Grayscale (H, W) + stats["min"] = float(np.min(data_float)) + stats["max"] = float(np.max(data_float)) + stats["mean"] = float(np.mean(data_float)) + elif len(data_float.shape) == 3: # Color (H, W, C) + stats["min"] = [float(v) for v in np.min(data_float, axis=(0, 1))] + stats["max"] = [float(v) for v in np.max(data_float, axis=(0, 1))] + stats["mean"] = [float(v) for v in np.mean(data_float, axis=(0, 1))] + else: + return None # Unsupported shape + return stats + except Exception: + return {"error": "Error calculating image stats"} + +# --- Aspect Ratio String --- + +def normalize_aspect_ratio_change(original_width: int, original_height: int, resized_width: int, resized_height: int, decimals: int = 2) -> str: + """ + Calculates the aspect ratio change string (e.g., "EVEN", "X133"). + """ + if original_width <= 0 or original_height <= 0: + return "InvalidInput" + if resized_width <= 0 or resized_height <= 0: + return "InvalidResize" + + width_change_percentage = ((resized_width - original_width) / original_width) * 100 + height_change_percentage = ((resized_height - original_height) / original_height) * 100 + + normalized_width_change = width_change_percentage / 100 + normalized_height_change = height_change_percentage / 100 + + normalized_width_change = min(max(normalized_width_change + 1, 0), 2) + normalized_height_change = min(max(normalized_height_change + 1, 0), 2) + + epsilon = 1e-9 + if abs(normalized_width_change) < epsilon and abs(normalized_height_change) < epsilon: + closest_value_to_one = 1.0 + elif abs(normalized_width_change) < epsilon: + closest_value_to_one = abs(normalized_height_change) + elif abs(normalized_height_change) < epsilon: + closest_value_to_one = abs(normalized_width_change) + else: + closest_value_to_one = min(abs(normalized_width_change), abs(normalized_height_change)) + + scale_factor = 1 / (closest_value_to_one + epsilon) if abs(closest_value_to_one) < epsilon else 1 / closest_value_to_one + + scaled_normalized_width_change = scale_factor * normalized_width_change + scaled_normalized_height_change = scale_factor * normalized_height_change + + output_width = round(scaled_normalized_width_change, decimals) + output_height = round(scaled_normalized_height_change, decimals) + + if abs(output_width - 1.0) < epsilon: output_width = 1 + if abs(output_height - 1.0) < epsilon: output_height = 1 + + if abs(output_width - output_height) < epsilon: # Handles original square or aspect maintained + output = "EVEN" + elif output_width != 1 and abs(output_height - 1.0) < epsilon : # Width changed, height maintained relative to width + output = f"X{str(output_width).replace('.', '')}" + elif output_height != 1 and abs(output_width - 1.0) < epsilon: # Height changed, width maintained relative to height + output = f"Y{str(output_height).replace('.', '')}" + else: # Both changed relative to each other + output = f"X{str(output_width).replace('.', '')}Y{str(output_height).replace('.', '')}" + return output + +# --- Image Loading, Conversion, Resizing --- + +def load_image(image_path: Union[str, Path], read_flag: int = cv2.IMREAD_UNCHANGED) -> Optional[np.ndarray]: + """Loads an image from the specified path.""" + try: + img = cv2.imread(str(image_path), read_flag) + if img is None: + # print(f"Warning: Failed to load image: {image_path}") # Optional: for debugging utils + return None + return img + except Exception: # as e: + # print(f"Error loading image {image_path}: {e}") # Optional: for debugging utils + return None + +def convert_bgr_to_rgb(image: np.ndarray) -> np.ndarray: + """Converts an image from BGR to RGB color space.""" + if image is None or len(image.shape) < 3: + return image # Return as is if not a color image or None + + if image.shape[2] == 4: # BGRA + return cv2.cvtColor(image, cv2.COLOR_BGRA2RGB) + elif image.shape[2] == 3: # BGR + return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image # Return as is if not 3 or 4 channels + +def convert_rgb_to_bgr(image: np.ndarray) -> np.ndarray: + """Converts an image from RGB to BGR color space.""" + if image is None or len(image.shape) < 3 or image.shape[2] != 3: # Only for 3-channel RGB + return image # Return as is if not a 3-channel color image or None + return cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + + +def resize_image(image: np.ndarray, target_width: int, target_height: int, interpolation: Optional[int] = None) -> np.ndarray: + """Resizes an image to target_width and target_height.""" + if image is None: + raise ValueError("Cannot resize a None image.") + if target_width <= 0 or target_height <= 0: + raise ValueError("Target width and height must be positive.") + + original_height, original_width = image.shape[:2] + + if interpolation is None: + # Default interpolation: Lanczos for downscaling, Cubic for upscaling/same + if (target_width * target_height) < (original_width * original_height): + interpolation = cv2.INTER_LANCZOS4 + else: + interpolation = cv2.INTER_CUBIC + + return cv2.resize(image, (target_width, target_height), interpolation=interpolation) + +# --- Image Saving --- + +def save_image( + image_path: Union[str, Path], + image_data: np.ndarray, + output_format: Optional[str] = None, # e.g. "png", "jpg", "exr" + output_dtype_target: Optional[np.dtype] = None, # e.g. np.uint8, np.uint16, np.float16 + params: Optional[List[int]] = None, + convert_to_bgr_before_save: bool = True # True for most formats except EXR +) -> bool: + """ + Saves image data to a file. Handles data type and color space conversions. + + Args: + image_path: Path to save the image. + image_data: NumPy array of the image. + output_format: Desired output format (e.g., 'png', 'jpg'). If None, derived from extension. + output_dtype_target: Target NumPy dtype for saving (e.g., np.uint8, np.uint16). + If None, tries to use image_data.dtype or a sensible default. + params: OpenCV imwrite parameters (e.g., [cv2.IMWRITE_JPEG_QUALITY, 90]). + convert_to_bgr_before_save: If True and image is 3-channel, converts RGB to BGR. + Set to False for formats like EXR that expect RGB. + + Returns: + True if saving was successful, False otherwise. + """ + if image_data is None: + return False + + img_to_save = image_data.copy() + path_obj = Path(image_path) + path_obj.parent.mkdir(parents=True, exist_ok=True) + + # 1. Data Type Conversion + if output_dtype_target is not None: + if output_dtype_target == np.uint8 and img_to_save.dtype != np.uint8: + if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0 * 255.0).astype(np.uint8) + elif img_to_save.dtype in [np.float16, np.float32, np.float64]: img_to_save = (np.clip(img_to_save, 0.0, 1.0) * 255.0).astype(np.uint8) + else: img_to_save = img_to_save.astype(np.uint8) + elif output_dtype_target == np.uint16 and img_to_save.dtype != np.uint16: + if img_to_save.dtype == np.uint8: img_to_save = (img_to_save.astype(np.float32) / 255.0 * 65535.0).astype(np.uint16) # More accurate + elif img_to_save.dtype in [np.float16, np.float32, np.float64]: img_to_save = (np.clip(img_to_save, 0.0, 1.0) * 65535.0).astype(np.uint16) + else: img_to_save = img_to_save.astype(np.uint16) + elif output_dtype_target == np.float16 and img_to_save.dtype != np.float16: + if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0).astype(np.float16) + elif img_to_save.dtype == np.uint8: img_to_save = (img_to_save.astype(np.float32) / 255.0).astype(np.float16) + elif img_to_save.dtype in [np.float32, np.float64]: img_to_save = img_to_save.astype(np.float16) + # else: cannot convert to float16 easily + elif output_dtype_target == np.float32 and img_to_save.dtype != np.float32: + if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0) + elif img_to_save.dtype == np.uint8: img_to_save = (img_to_save.astype(np.float32) / 255.0) + elif img_to_save.dtype == np.float16: img_to_save = img_to_save.astype(np.float32) + + + # 2. Color Space Conversion (RGB -> BGR) + # Typically, OpenCV expects BGR for formats like PNG, JPG. EXR usually expects RGB. + # The `convert_to_bgr_before_save` flag controls this. + # If output_format is exr, this should generally be False. + current_format = output_format if output_format else path_obj.suffix.lower().lstrip('.') + + if convert_to_bgr_before_save and current_format != 'exr': + if len(img_to_save.shape) == 3 and img_to_save.shape[2] == 3: + img_to_save = convert_rgb_to_bgr(img_to_save) + # BGRA is handled by OpenCV imwrite for PNGs, no explicit conversion needed if saving as RGBA. + # If it's 4-channel and not PNG/TIFF with alpha, it might need stripping or specific handling. + # For simplicity, this function assumes 3-channel RGB input if BGR conversion is active. + + # 3. Save Image + try: + if params: + cv2.imwrite(str(path_obj), img_to_save, params) + else: + cv2.imwrite(str(path_obj), img_to_save) + return True + except Exception: # as e: + # print(f"Error saving image {path_obj}: {e}") # Optional: for debugging utils + return False \ No newline at end of file diff --git a/processing_engine.py b/processing_engine.py index f4d0812..779f6f8 100644 --- a/processing_engine.py +++ b/processing_engine.py @@ -5,12 +5,8 @@ import math import shutil import tempfile import logging -import json -import re -import time from pathlib import Path from typing import List, Dict, Tuple, Optional, Set -from collections import defaultdict # Attempt to import image processing libraries try: @@ -23,21 +19,13 @@ except ImportError: cv2 = None np = None -# Attempt to import OpenEXR - Check if needed for advanced EXR flags/types -try: - import OpenEXR - import Imath - _HAS_OPENEXR = True -except ImportError: - _HAS_OPENEXR = False - # Log this information - basic EXR might still work via OpenCV - logging.debug("Optional 'OpenEXR' python package not found. EXR saving relies on OpenCV's built-in support.") try: from configuration import Configuration, ConfigurationError from rule_structure import SourceRule, AssetRule, FileRule - from utils.path_utils import generate_path_from_pattern + from utils.path_utils import generate_path_from_pattern, sanitize_filename + from utils import image_processing_utils as ipu # Added import except ImportError: print("ERROR: Cannot import Configuration or rule_structure classes.") print("Ensure configuration.py and rule_structure.py are in the same directory or Python path.") @@ -49,6 +37,20 @@ except ImportError: # Use logger defined in main.py (or configure one here if run standalone) + +from processing.pipeline.orchestrator import PipelineOrchestrator +# from processing.pipeline.asset_context import AssetProcessingContext # AssetProcessingContext is used by the orchestrator +from processing.pipeline.stages.supplier_determination import SupplierDeterminationStage +from processing.pipeline.stages.asset_skip_logic import AssetSkipLogicStage +from processing.pipeline.stages.metadata_initialization import MetadataInitializationStage +from processing.pipeline.stages.file_rule_filter import FileRuleFilterStage +from processing.pipeline.stages.gloss_to_rough_conversion import GlossToRoughConversionStage +from processing.pipeline.stages.alpha_extraction_to_mask import AlphaExtractionToMaskStage +from processing.pipeline.stages.normal_map_green_channel import NormalMapGreenChannelStage +from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage +from processing.pipeline.stages.map_merging import MapMergingStage +from processing.pipeline.stages.metadata_finalization_save import MetadataFinalizationAndSaveStage +from processing.pipeline.stages.output_organization import OutputOrganizationStage log = logging.getLogger(__name__) # Basic config if logger hasn't been set up elsewhere (e.g., during testing) if not log.hasHandlers(): @@ -60,183 +62,7 @@ class ProcessingEngineError(Exception): """Custom exception for errors during processing engine operations.""" pass -# --- Helper Functions (Moved from AssetProcessor or kept static) --- - -def _is_power_of_two(n: int) -> bool: - """Checks if a number is a power of two.""" - return (n > 0) and (n & (n - 1) == 0) - -def get_nearest_pot(value: int) -> int: - """Finds the nearest power of two to the given value.""" - if value <= 0: - return 1 # Or raise error, POT must be positive - if _is_power_of_two(value): - return value - - # Calculate the powers of two below and above the value - lower_pot = 1 << (value.bit_length() - 1) - upper_pot = 1 << value.bit_length() - - # Determine which power of two is closer - if (value - lower_pot) < (upper_pot - value): - return lower_pot - else: - return upper_pot - -def calculate_target_dimensions(orig_w, orig_h, target_max_dim) -> tuple[int, int]: - """ - Calculates target dimensions by first scaling to fit target_max_dim - while maintaining aspect ratio, then finding the nearest power-of-two - value for each resulting dimension (Stretch/Squash to POT). - """ - if orig_w <= 0 or orig_h <= 0: - # Fallback to target_max_dim if original dimensions are invalid - pot_dim = get_nearest_pot(target_max_dim) - log.warning(f"Invalid original dimensions ({orig_w}x{orig_h}). Falling back to nearest POT of target_max_dim: {pot_dim}x{pot_dim}") - return (pot_dim, pot_dim) - - # Step 1: Calculate intermediate dimensions maintaining aspect ratio - ratio = orig_w / orig_h - if ratio > 1: # Width is dominant - scaled_w = target_max_dim - scaled_h = max(1, round(scaled_w / ratio)) - else: # Height is dominant or square - scaled_h = target_max_dim - scaled_w = max(1, round(scaled_h * ratio)) - - # Step 2: Find the nearest power of two for each scaled dimension - pot_w = get_nearest_pot(scaled_w) - pot_h = get_nearest_pot(scaled_h) - - log.debug(f"POT Calc: Orig=({orig_w}x{orig_h}), MaxDim={target_max_dim} -> Scaled=({scaled_w}x{scaled_h}) -> POT=({pot_w}x{pot_h})") - - return int(pot_w), int(pot_h) - -def _calculate_image_stats(image_data: np.ndarray) -> dict | None: - """ - Calculates min, max, mean for a given numpy image array. - Handles grayscale and multi-channel images. Converts to float64 for calculation. - """ - if image_data is None: - log.warning("Attempted to calculate stats on None image data.") - return None - if np is None: - log.error("Numpy not available for stats calculation.") - return None - try: - # Use float64 for calculations to avoid potential overflow/precision issues - data_float = image_data.astype(np.float64) - - # Normalize data_float based on original dtype before calculating stats - if image_data.dtype == np.uint16: - log.debug("Stats calculation: Normalizing uint16 data to 0-1 range.") - data_float /= 65535.0 - elif image_data.dtype == np.uint8: - log.debug("Stats calculation: Normalizing uint8 data to 0-1 range.") - data_float /= 255.0 - # Assuming float inputs are already in 0-1 range or similar - - log.debug(f"Stats calculation: data_float dtype: {data_float.dtype}, shape: {data_float.shape}") - # Log a few sample values to check range after normalization - if data_float.size > 0: - sample_values = data_float.flatten()[:10] # Get first 10 values - log.debug(f"Stats calculation: Sample values (first 10) after normalization: {sample_values.tolist()}") - - - if len(data_float.shape) == 2: # Grayscale (H, W) - min_val = float(np.min(data_float)) - max_val = float(np.max(data_float)) - mean_val = float(np.mean(data_float)) - stats = {"min": min_val, "max": max_val, "mean": mean_val} - log.debug(f"Calculated Grayscale Stats: Min={min_val:.4f}, Max={max_val:.4f}, Mean={mean_val:.4f}") - elif len(data_float.shape) == 3: # Color (H, W, C) - channels = data_float.shape[2] - min_val = [float(v) for v in np.min(data_float, axis=(0, 1))] - max_val = [float(v) for v in np.max(data_float, axis=(0, 1))] - mean_val = [float(v) for v in np.mean(data_float, axis=(0, 1))] - # Assume data is RGB order after potential conversion in _load_and_transform_source - stats = {"min": min_val, "max": max_val, "mean": mean_val} - log.debug(f"Calculated {channels}-Channel Stats (RGB order): Min={min_val}, Max={max_val}, Mean={mean_val}") - else: - log.warning(f"Cannot calculate stats for image with unsupported shape {data_float.shape}") - return None - return stats - except Exception as e: - log.error(f"Error calculating image stats: {e}", exc_info=True) # Log exception info - return {"error": str(e)} - -def _sanitize_filename(name: str) -> str: - """Removes or replaces characters invalid for filenames/directory names.""" - if not isinstance(name, str): name = str(name) - name = re.sub(r'[^\w.\-]+', '_', name) # Allow alphanumeric, underscore, hyphen, dot - name = re.sub(r'_+', '_', name) - name = name.strip('_') - if not name: name = "invalid_name" - return name - -def _normalize_aspect_ratio_change(original_width, original_height, resized_width, resized_height, decimals=2): - """ - Calculates the aspect ratio change string (e.g., "EVEN", "X133"). - Returns the string representation. - """ - if original_width <= 0 or original_height <= 0: - log.warning("Cannot calculate aspect ratio change with zero original dimensions.") - return "InvalidInput" - - # Avoid division by zero if resize resulted in zero dimensions (shouldn't happen with checks) - if resized_width <= 0 or resized_height <= 0: - log.warning("Cannot calculate aspect ratio change with zero resized dimensions.") - return "InvalidResize" - - # Original logic from user feedback - width_change_percentage = ((resized_width - original_width) / original_width) * 100 - height_change_percentage = ((resized_height - original_height) / original_height) * 100 - - normalized_width_change = width_change_percentage / 100 - normalized_height_change = height_change_percentage / 100 - - normalized_width_change = min(max(normalized_width_change + 1, 0), 2) - normalized_height_change = min(max(normalized_height_change + 1, 0), 2) - - # Handle potential zero division if one dimension change is exactly -100% (normalized to 0) - # If both are 0, aspect ratio is maintained. If one is 0, the other dominates. - if normalized_width_change == 0 and normalized_height_change == 0: - closest_value_to_one = 1.0 # Avoid division by zero, effectively scale_factor = 1 - elif normalized_width_change == 0: - closest_value_to_one = abs(normalized_height_change) - elif normalized_height_change == 0: - closest_value_to_one = abs(normalized_width_change) - else: - closest_value_to_one = min(abs(normalized_width_change), abs(normalized_height_change)) - - # Add a small epsilon to avoid division by zero if closest_value_to_one is extremely close to 0 - epsilon = 1e-9 - scale_factor = 1 / (closest_value_to_one + epsilon) if abs(closest_value_to_one) < epsilon else 1 / closest_value_to_one - - scaled_normalized_width_change = scale_factor * normalized_width_change - scaled_normalized_height_change = scale_factor * normalized_height_change - - output_width = round(scaled_normalized_width_change, decimals) - output_height = round(scaled_normalized_height_change, decimals) - - # Convert to int if exactly 1.0 after rounding - if abs(output_width - 1.0) < epsilon: output_width = 1 - if abs(output_height - 1.0) < epsilon: output_height = 1 - - # Determine output string - if original_width == original_height or abs(output_width - output_height) < epsilon: - output = "EVEN" - elif output_width != 1 and output_height == 1: - output = f"X{str(output_width).replace('.', '')}" - elif output_height != 1 and output_width == 1: - output = f"Y{str(output_height).replace('.', '')}" - else: - # Both changed relative to each other - output = f"X{str(output_width).replace('.', '')}Y{str(output_height).replace('.', '')}" - - log.debug(f"Aspect ratio change calculated: Orig=({original_width}x{original_height}), Resized=({resized_width}x{resized_height}) -> String='{output}'") - return output - +# Helper functions moved to processing.utils.image_processing_utils # --- Processing Engine Class --- class ProcessingEngine: @@ -262,6 +88,27 @@ class ProcessingEngine: self.temp_dir: Path | None = None # Path to the temporary working directory for a process run self.loaded_data_cache: dict = {} # Cache for loaded/resized data within a single process call + # --- Pipeline Orchestrator Setup --- + self.stages = [ + SupplierDeterminationStage(), + AssetSkipLogicStage(), + MetadataInitializationStage(), + FileRuleFilterStage(), + GlossToRoughConversionStage(), + AlphaExtractionToMaskStage(), + NormalMapGreenChannelStage(), + IndividualMapProcessingStage(), + MapMergingStage(), + MetadataFinalizationAndSaveStage(), + OutputOrganizationStage(), + ] + try: + self.pipeline_orchestrator = PipelineOrchestrator(config_obj=self.config_obj, stages=self.stages) + log.info("PipelineOrchestrator initialized successfully in ProcessingEngine.") + except Exception as e: + log.error(f"Failed to initialize PipelineOrchestrator in ProcessingEngine: {e}", exc_info=True) + self.pipeline_orchestrator = None # Ensure it's None if init fails + log.debug("ProcessingEngine initialized.") @@ -312,111 +159,21 @@ class ProcessingEngine: try: self.temp_dir = Path(tempfile.mkdtemp(prefix=self.config_obj.temp_dir_prefix)) log.debug(f"Created temporary workspace for engine: {self.temp_dir}") - # --- Loop through each asset defined in the SourceRule --- - for asset_rule in source_rule.assets: - asset_name = asset_rule.asset_name - log.info(f"--- Processing asset: '{asset_name}' ---") - asset_processed = False - asset_skipped = False - asset_failed = False - temp_metadata_path_asset = None # Track metadata file for this asset - - try: - # --- Determine Effective Supplier (Override > Identifier > Fallback) --- - effective_supplier = source_rule.supplier_override # Prioritize override - if effective_supplier is None: - effective_supplier = source_rule.supplier_identifier # Fallback to original identifier - if not effective_supplier: # Check if still None or empty - log.warning(f"Asset '{asset_name}': Supplier identifier missing from rule and override. Using fallback 'UnknownSupplier'.") - effective_supplier = "UnknownSupplier" # Final fallback - - log.debug(f"Asset '{asset_name}': Effective supplier determined as '{effective_supplier}' (Override: '{source_rule.supplier_override}', Original: '{source_rule.supplier_identifier}')") - - # --- Skip Check (using effective supplier) --- - supplier_sanitized = _sanitize_filename(effective_supplier) - asset_name_sanitized = _sanitize_filename(asset_name) - final_dir = output_base_path / supplier_sanitized / asset_name_sanitized - metadata_file_path = final_dir / self.config_obj.metadata_filename # Metadata filename still comes from config - - log.debug(f"Checking for existing output/overwrite at: {final_dir} (using effective supplier: '{effective_supplier}')") - - if not overwrite and final_dir.exists(): - log.info(f"Output directory found for asset '{asset_name_sanitized}' (Supplier: '{effective_supplier}') and overwrite is False. Skipping.") - overall_status["skipped"].append(asset_name) - asset_skipped = True - continue # Skip to the next asset - - elif overwrite and final_dir.exists(): - log.warning(f"Output directory exists for '{asset_name_sanitized}' (Supplier: '{effective_supplier}') and overwrite is True. Removing existing directory: {final_dir}") - try: - shutil.rmtree(final_dir) - except Exception as rm_err: - raise ProcessingEngineError(f"Failed to remove existing output directory {final_dir} during overwrite: {rm_err}") from rm_err - - # --- Prepare Asset Metadata --- - # Start with common metadata from the rule, add asset name - current_asset_metadata = asset_rule.common_metadata.copy() - current_asset_metadata["asset_name"] = asset_name - # Use the EFFECTIVE supplier here - current_asset_metadata["supplier_name"] = effective_supplier - # Add other fields that will be populated - current_asset_metadata["maps_present"] = [] - current_asset_metadata["merged_maps"] = [] - current_asset_metadata["shader_features"] = [] - current_asset_metadata["source_files_in_extra"] = [] - current_asset_metadata["image_stats_1k"] = {} - current_asset_metadata["map_details"] = {} - current_asset_metadata["aspect_ratio_change_string"] = "N/A" - current_asset_metadata["merged_map_channel_stats"] = {} - - # --- Process Individual Maps --- - processed_maps_details_asset, image_stats_asset, aspect_ratio_change_string_asset = self._process_individual_maps( - asset_rule=asset_rule, - workspace_path=workspace_path, # Use the workspace path received by process() (contains prepared files) - current_asset_metadata=current_asset_metadata # Pass mutable dict - ) - # Update metadata with results (stats and aspect ratio are updated directly in current_asset_metadata by the method) - # map_details are also updated directly in current_asset_metadata - - # --- Merge Maps --- - merged_maps_details_asset = self._merge_maps( - asset_rule=asset_rule, - workspace_path=workspace_path, - processed_maps_details_asset=processed_maps_details_asset, # Needed to find resolutions - current_asset_metadata=current_asset_metadata # Pass mutable dict for stats - ) - - # --- Generate Metadata --- - # Pass effective_supplier instead of the whole source_rule - temp_metadata_path_asset = self._generate_metadata_file( - effective_supplier=effective_supplier, # Pass the determined supplier - asset_rule=asset_rule, - current_asset_metadata=current_asset_metadata, # Pass the populated dict - processed_maps_details_asset=processed_maps_details_asset, - merged_maps_details_asset=merged_maps_details_asset - ) - - # --- Organize Output --- - # Pass effective_supplier instead of source_rule.supplier_identifier - self._organize_output_files( - asset_rule=asset_rule, - workspace_path=workspace_path, # Pass the original workspace path - supplier_identifier=effective_supplier, # Pass the determined supplier - output_base_path=output_base_path, # Pass output path - processed_maps_details_asset=processed_maps_details_asset, - merged_maps_details_asset=merged_maps_details_asset, - temp_metadata_info=temp_metadata_path_asset - ) - - log.info(f"--- Asset '{asset_name}' processed successfully (Supplier: {effective_supplier}). ---") - overall_status["processed"].append(asset_name) - asset_processed = True - - except Exception as asset_err: - log.error(f"--- Failed processing asset '{asset_name}': {asset_err} ---", exc_info=True) - overall_status["failed"].append(asset_name) - asset_failed = True - # Continue to the next asset + # --- NEW PIPELINE ORCHESTRATOR LOGIC --- + if hasattr(self, 'pipeline_orchestrator') and self.pipeline_orchestrator: + log.info("Processing source rule using PipelineOrchestrator.") + overall_status = self.pipeline_orchestrator.process_source_rule( + source_rule=source_rule, + workspace_path=workspace_path, # This is the path to the source files (e.g. extracted archive) + output_base_path=output_base_path, + overwrite=overwrite, + incrementing_value=self.current_incrementing_value, + sha5_value=self.current_sha5_value + ) + else: + log.error(f"PipelineOrchestrator not available for SourceRule '{source_rule.input_path}'. Marking all {len(source_rule.assets)} assets as failed.") + for asset_rule in source_rule.assets: + overall_status["failed"].append(asset_rule.asset_name) log.info(f"ProcessingEngine finished. Summary: {overall_status}") return overall_status @@ -446,1243 +203,3 @@ class ProcessingEngine: log.error(f"Failed to remove engine temporary workspace {self.temp_dir}: {e}", exc_info=True) self.loaded_data_cache = {} # Clear cache after cleanup - def _get_ftd_key_from_override(self, override_string: str) -> Optional[str]: - """ - Attempts to derive a base FILE_TYPE_DEFINITIONS key from an override string - which might have a variant suffix (e.g., "MAP_COL-1" -> "MAP_COL"). - """ - if not override_string: # Handle empty or None override_string - return None - if override_string in self.config_obj.FILE_TYPE_DEFINITIONS: - return override_string - - # Regex to remove trailing suffixes like -, -, _ - # e.g., "MAP_COL-1" -> "MAP_COL", "MAP_ROUGH_variantA" -> "MAP_ROUGH" - base_candidate = re.sub(r"(-[\w\d]+|_[\w\d]+)$", "", override_string) - if base_candidate in self.config_obj.FILE_TYPE_DEFINITIONS: - return base_candidate - - return None - - def _get_map_variant_suffix(self, map_identifier: str, base_ftd_key: str) -> str: - """ - Extracts a variant suffix (e.g., "-1", "_variantA") from a map_identifier - if the base_ftd_key is a prefix of it and the suffix indicates a variant. - Example: map_identifier="MAP_COL-1", base_ftd_key="MAP_COL" -> returns "-1" - map_identifier="MAP_COL_variant", base_ftd_key="MAP_COL" -> returns "_variant" - map_identifier="MAP_COL", base_ftd_key="MAP_COL" -> returns "" - """ - if not base_ftd_key: # Ensure base_ftd_key is not empty - return "" - if map_identifier.startswith(base_ftd_key): - suffix = map_identifier[len(base_ftd_key):] - # Ensure suffix looks like a variant (starts with - or _) or is empty - if not suffix or suffix.startswith(('-', '_')): - return suffix - return "" # Default to no suffix - - def _get_base_map_type(self, map_identifier: str) -> str: - """ - Gets the base standard type (e.g., "COL") from a map identifier (e.g., "MAP_COL-1", "COL-1"), - or returns the identifier itself if it's a merged type (e.g., "NRMRGH") or not resolvable to a standard type. - """ - if not map_identifier: # Handle empty or None map_identifier - return "" - - # Try to get FTD key from "MAP_COL-1" -> "MAP_COL" or "MAP_COL" -> "MAP_COL" - ftd_key = self._get_ftd_key_from_override(map_identifier) - if ftd_key: - definition = self.config_obj.FILE_TYPE_DEFINITIONS.get(ftd_key) - if definition and definition.get("standard_type"): # Check if standard_type exists and is not empty - return definition["standard_type"] # Returns "COL" - - # If map_identifier was like "COL-1" or "ROUGH" (a standard_type itself, possibly with suffix) - # Strip suffix and check if the base is a known standard_type - # Regex to get the initial part of the string composed of uppercase letters and underscores - base_candidate_match = re.match(r"([A-Z_]+)", map_identifier.upper()) - if base_candidate_match: - potential_std_type = base_candidate_match.group(1) - for _, definition_val in self.config_obj.FILE_TYPE_DEFINITIONS.items(): - if definition_val.get("standard_type") == potential_std_type: - return potential_std_type # Found "COL" - - # If it's a merged map type (e.g., "NRMRGH"), it won't be in FTDs as a key or standard_type. - # Check if it's one of the output_map_types from MAP_MERGE_RULES. - for rule in self.config_obj.map_merge_rules: - if rule.get("output_map_type") == map_identifier: - return map_identifier # Return "NRMRGH" as is - - # Fallback: return the original identifier, uppercased. - log.debug(f"_get_base_map_type: Could not determine standard base for '{map_identifier}'. Returning as is (uppercase).") - return map_identifier.upper() - - def _load_and_transform_source(self, source_path_abs: Path, map_type: str, target_resolution_key: str, is_gloss_source: bool) -> Tuple[Optional[np.ndarray], Optional[np.dtype]]: - """ - Loads a source image file, performs initial prep (BGR->RGB, Gloss->Rough if applicable), - resizes it to the target resolution, and caches the result. - Uses static configuration from self.config_obj. - - Args: - source_path_abs: Absolute path to the source file in the workspace. - map_type: The item_type_override (e.g., "MAP_NRM", "MAP_ROUGH-1"). - target_resolution_key: The key for the target resolution (e.g., "4K"). - is_gloss_source: Boolean indicating if this source should be treated as gloss for inversion (if map_type is ROUGH). - - Returns: - Tuple containing: - - Resized NumPy array (float32 for gloss-inverted, original type otherwise) or None if loading/processing fails. - - Original source NumPy dtype or None if loading fails. - """ - if cv2 is None or np is None: - log.error("OpenCV or NumPy not available for image loading.") - return None, None - - cache_key = (source_path_abs, target_resolution_key) # Use absolute path for cache key - if cache_key in self.loaded_data_cache: - log.debug(f"CACHE HIT: Returning cached data for {source_path_abs.name} at {target_resolution_key}") - return self.loaded_data_cache[cache_key] # Return tuple (image_data, source_dtype) - - log.debug(f"CACHE MISS: Loading and transforming {source_path_abs.name} for {target_resolution_key} (map_type: {map_type})") - img_prepared = None - source_dtype = None - - try: - # --- 1. Load Source Image --- - # Determine read flag based on is_grayscale from FTD - ftd_key = self._get_ftd_key_from_override(map_type) # map_type is item_type_override - is_map_grayscale = False - standard_type_for_checks = None # For MASK check - - if ftd_key: - ftd_definition = self.config_obj.FILE_TYPE_DEFINITIONS.get(ftd_key, {}) - is_map_grayscale = ftd_definition.get("is_grayscale", False) - standard_type_for_checks = ftd_definition.get("standard_type") - log.debug(f"For map_type '{map_type}' (FTD key '{ftd_key}'), is_grayscale: {is_map_grayscale}, standard_type: {standard_type_for_checks}") - else: - log.warning(f"Could not determine FTD key for map_type '{map_type}' to check is_grayscale. Assuming not grayscale.") - - read_flag = cv2.IMREAD_GRAYSCALE if is_map_grayscale else cv2.IMREAD_UNCHANGED - - # Special case for MASK: always load unchanged first to check alpha - if standard_type_for_checks == 'MASK': - log.debug(f"Map type '{map_type}' (standard_type 'MASK') will be loaded with IMREAD_UNCHANGED for alpha check.") - read_flag = cv2.IMREAD_UNCHANGED - - log.debug(f"Loading source {source_path_abs.name} with flag: {'GRAYSCALE' if read_flag == cv2.IMREAD_GRAYSCALE else 'UNCHANGED'}") - img_loaded = cv2.imread(str(source_path_abs), read_flag) - if img_loaded is None: - raise ProcessingEngineError(f"Failed to load image file: {source_path_abs.name} with flag {read_flag}") - source_dtype = img_loaded.dtype - log.debug(f"Loaded source {source_path_abs.name}, dtype: {source_dtype}, shape: {img_loaded.shape}") - - # --- 2. Initial Preparation (BGR->RGB, Gloss Inversion, MASK handling) --- - img_prepared = img_loaded # Start with loaded image - - # MASK Handling (Extract alpha or convert) - Do this BEFORE general color conversions - if standard_type_for_checks == 'MASK': - log.debug(f"Processing as MASK type for {source_path_abs.name}.") - shape = img_prepared.shape - if len(shape) == 3 and shape[2] == 4: # BGRA or RGBA (OpenCV loads BGRA) - log.debug("MASK processing: Extracting alpha channel (4-channel source).") - img_prepared = img_prepared[:, :, 3] # Extract alpha - elif len(shape) == 3 and shape[2] == 3: # BGR or RGB - log.debug("MASK processing: Converting 3-channel source to Grayscale.") - img_prepared = cv2.cvtColor(img_prepared, cv2.COLOR_BGR2GRAY if read_flag != cv2.IMREAD_GRAYSCALE else cv2.COLOR_RGB2GRAY) # If loaded UNCHANGED and 3-channel, assume BGR - elif len(shape) == 2: - log.debug("MASK processing: Source is already grayscale.") - else: - log.warning(f"MASK processing: Unexpected source shape {shape}. Cannot reliably extract mask.") - img_prepared = None # Cannot process - else: - # BGR -> RGB conversion (only for 3/4-channel images not loaded as grayscale) - if len(img_prepared.shape) == 3 and img_prepared.shape[2] >= 3 and read_flag != cv2.IMREAD_GRAYSCALE: - log.debug(f"Converting loaded image from BGR to RGB for {source_path_abs.name}.") - if img_prepared.shape[2] == 4: # BGRA -> RGBA (then to RGB) - img_prepared = cv2.cvtColor(img_prepared, cv2.COLOR_BGRA2RGB) # OpenCV BGRA to RGB - else: # BGR -> RGB - img_prepared = cv2.cvtColor(img_prepared, cv2.COLOR_BGR2RGB) - elif len(img_prepared.shape) == 2: - log.debug(f"Image {source_path_abs.name} is grayscale or loaded as such, no BGR->RGB conversion needed.") - - if img_prepared is None: raise ProcessingEngineError("Image data is None after MASK/Color prep.") - - # Gloss -> Roughness Inversion (if map_type is ROUGH and is_gloss_source is True) - # This is triggered by the new filename logic in _process_individual_maps - if standard_type_for_checks == 'ROUGH' and is_gloss_source: - log.info(f"Performing filename-triggered Gloss->Roughness inversion for {source_path_abs.name} (map_type: {map_type})") - if len(img_prepared.shape) == 3: - log.debug("Gloss Inversion: Converting 3-channel image to grayscale before inversion.") - img_prepared = cv2.cvtColor(img_prepared, cv2.COLOR_RGB2GRAY) # Should be RGB at this point if 3-channel - - stats_before = _calculate_image_stats(img_prepared) - log.debug(f"Gloss Inversion: Image stats BEFORE inversion: {stats_before}") - - if source_dtype == np.uint16: - img_float = 1.0 - (img_prepared.astype(np.float32) / 65535.0) - elif source_dtype == np.uint8: - img_float = 1.0 - (img_prepared.astype(np.float32) / 255.0) - else: # Assuming float input is already 0-1 range - img_float = 1.0 - img_prepared.astype(np.float32) - - img_prepared = np.clip(img_float, 0.0, 1.0) # Result is float32 - - stats_after = _calculate_image_stats(img_prepared) - log.debug(f"Gloss Inversion: Image stats AFTER inversion (float32): {stats_after}") - log.debug(f"Inverted gloss map stored as float32 for ROUGH, original dtype: {source_dtype}") - - # Ensure data is float32/uint8/uint16 for resizing compatibility - if isinstance(img_prepared, np.ndarray) and img_prepared.dtype not in [np.uint8, np.uint16, np.float32, np.float16]: - log.warning(f"Converting unexpected dtype {img_prepared.dtype} to float32 before resizing for {source_path_abs.name}.") - img_prepared = img_prepared.astype(np.float32) - - # --- 3. Resize --- - if img_prepared is None: raise ProcessingEngineError(f"Image data is None after initial prep for {source_path_abs.name}.") - orig_h, orig_w = img_prepared.shape[:2] - # Get resolutions from static config - target_dim_px = self.config_obj.image_resolutions.get(target_resolution_key) - if not target_dim_px: - raise ProcessingEngineError(f"Target resolution key '{target_resolution_key}' not found in config.") - - # Avoid upscaling check (using static config) - max_original_dimension = max(orig_w, orig_h) - if target_dim_px > max_original_dimension: - log.warning(f"Target dimension {target_dim_px}px is larger than original {max_original_dimension}px for {source_path_abs.name}. Skipping resize for {target_resolution_key}.") - # Store None in cache for this specific resolution to avoid retrying - self.loaded_data_cache[cache_key] = (None, source_dtype) - return None, source_dtype # Indicate resize was skipped - - if orig_w <= 0 or orig_h <= 0: - raise ProcessingEngineError(f"Invalid original dimensions ({orig_w}x{orig_h}) for {source_path_abs.name}.") - - target_w, target_h = calculate_target_dimensions(orig_w, orig_h, target_dim_px) - interpolation = cv2.INTER_LANCZOS4 if (target_w * target_h) < (orig_w * orig_h) else cv2.INTER_CUBIC - log.debug(f"Resizing {source_path_abs.name} from ({orig_w}x{orig_h}) to ({target_w}x{target_h}) for {target_resolution_key}") - img_resized = cv2.resize(img_prepared, (target_w, target_h), interpolation=interpolation) - - # --- 4. Cache and Return --- - # Keep resized dtype unless it was gloss-inverted (which is float32) - final_data_to_cache = img_resized - # Ensure gloss-inverted maps are float32 - if standard_type_for_checks == 'ROUGH' and is_gloss_source and final_data_to_cache.dtype != np.float32: - log.debug(f"Ensuring gloss-inverted ROUGH map ({map_type}) is float32.") - final_data_to_cache = final_data_to_cache.astype(np.float32) - - log.debug(f"CACHING result for {cache_key}. Shape: {final_data_to_cache.shape}, Dtype: {final_data_to_cache.dtype}") - self.loaded_data_cache[cache_key] = (final_data_to_cache, source_dtype) - return final_data_to_cache, source_dtype - - except Exception as e: - log.error(f"Error in _load_and_transform_source for {source_path_abs.name} at {target_resolution_key}: {e}", exc_info=True) - # Cache None to prevent retrying on error for this specific key - self.loaded_data_cache[cache_key] = (None, None) - return None, None - - - def _save_image(self, image_data: np.ndarray, supplier_name: str, asset_name: str, current_map_identifier: str, resolution_key: str, source_info: dict, output_bit_depth_rule: str) -> Optional[Dict]: - """ - Handles saving an image NumPy array to a temporary file within the engine's temp_dir using token-based path generation. - Uses static configuration from self.config_obj for formats, quality, etc. - The 'maptype' token for the filename is derived based on standard_type and variants. - - Args: - image_data: NumPy array containing the image data to save. - supplier_name: The effective supplier name for the asset. - asset_name: The name of the asset. - current_map_identifier: The map type being saved (e.g., "MAP_COL", "MAP_ROUGH-1", "NRMRGH"). This is item_type_override or merged map type. - resolution_key: The resolution key (e.g., "4K"). - source_info: Dictionary containing details about the source(s). - output_bit_depth_rule: Rule for determining output bit depth. - - Returns: - A dictionary containing details of the saved file or None if saving failed. - """ - if cv2 is None or np is None: - log.error("OpenCV or NumPy not available for image saving.") - return None - if image_data is None: - log.error(f"Cannot save image for {current_map_identifier} ({resolution_key}): image_data is None.") - return None - if not self.temp_dir or not self.temp_dir.exists(): - log.error(f"Cannot save image for {current_map_identifier} ({resolution_key}): Engine temp_dir is invalid.") - return None - - try: - h, w = image_data.shape[:2] - current_dtype = image_data.dtype - log.debug(f"Saving {current_map_identifier} ({resolution_key}) for asset '{asset_name}'. Input shape: {image_data.shape}, dtype: {current_dtype}") - - config = self.config_obj - primary_fmt_16, fallback_fmt_16 = config.get_16bit_output_formats() - fmt_8bit_config = config.get_8bit_output_format() - threshold = config.resolution_threshold_for_jpg - force_lossless_map_types = config.force_lossless_map_types - jpg_quality = config.jpg_quality - png_compression_level = config._core_settings.get('PNG_COMPRESSION_LEVEL', 6) - image_resolutions = config.image_resolutions - output_directory_pattern = config.output_directory_pattern - output_filename_pattern = config.output_filename_pattern - - # --- 1. Determine Output Bit Depth --- - source_bpc = source_info.get('source_bit_depth', 8) - max_input_bpc = source_info.get('max_input_bit_depth', source_bpc) - output_dtype_target, output_bit_depth = np.uint8, 8 - - if output_bit_depth_rule == 'force_8bit': output_dtype_target, output_bit_depth = np.uint8, 8 - elif output_bit_depth_rule == 'force_16bit': output_dtype_target, output_bit_depth = np.uint16, 16 - elif output_bit_depth_rule == 'respect': - if source_bpc == 16: output_dtype_target, output_bit_depth = np.uint16, 16 - elif output_bit_depth_rule == 'respect_inputs': - if max_input_bpc == 16: output_dtype_target, output_bit_depth = np.uint16, 16 - else: - log.warning(f"Unknown output_bit_depth_rule '{output_bit_depth_rule}'. Defaulting to 8-bit.") - output_dtype_target, output_bit_depth = np.uint8, 8 - log.debug(f"Target output bit depth: {output_bit_depth}-bit for {current_map_identifier}") - - # --- 2. Determine Output Format --- - output_format, output_ext, save_params, needs_float16 = "", "", [], False - # Use the (potentially suffixed) standard_type for lossless check - base_standard_type_for_lossless_check = self._get_base_map_type(current_map_identifier) # "COL", "NRM", "DISP-Detail" -> "DISP" - - # Check if the pure standard type (without suffix) is in force_lossless_map_types - pure_standard_type = self._get_ftd_key_from_override(base_standard_type_for_lossless_check) # Get FTD key if possible - std_type_from_ftd = None - if pure_standard_type and pure_standard_type in self.config_obj.FILE_TYPE_DEFINITIONS: - std_type_from_ftd = self.config_obj.FILE_TYPE_DEFINITIONS[pure_standard_type].get("standard_type") - - # Use std_type_from_ftd if available and non-empty, else base_standard_type_for_lossless_check - check_type_for_lossless = std_type_from_ftd if std_type_from_ftd else base_standard_type_for_lossless_check - - force_lossless = check_type_for_lossless in force_lossless_map_types - original_extension = source_info.get('original_extension', '.png') - involved_extensions = source_info.get('involved_extensions', {original_extension}) - target_dim_px = image_resolutions.get(resolution_key, 0) - - if force_lossless: - log.debug(f"Format forced to lossless for map type '{current_map_identifier}' (checked as '{check_type_for_lossless}').") - if output_bit_depth == 16: - output_format = primary_fmt_16 - if output_format.startswith("exr"): output_ext, needs_float16 = ".exr", True; save_params.extend([cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF]) - else: output_format = fallback_fmt_16 if fallback_fmt_16 == "png" else "png"; output_ext = ".png"; save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - else: output_format, output_ext = "png", ".png"; save_params = [cv2.IMWRITE_PNG_COMPRESSION, png_compression_level] - elif output_bit_depth == 8 and target_dim_px >= threshold: - output_format = 'jpg'; output_ext = '.jpg'; save_params.extend([cv2.IMWRITE_JPEG_QUALITY, jpg_quality]) - else: - highest_format_str = 'jpg' - if '.exr' in involved_extensions: highest_format_str = 'exr' - elif '.tif' in involved_extensions: highest_format_str = 'tif' - elif '.png' in involved_extensions: highest_format_str = 'png' - - if highest_format_str == 'exr': - if output_bit_depth == 16: output_format, output_ext, needs_float16 = "exr", ".exr", True; save_params.extend([cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF]) - else: output_format, output_ext = "png", ".png"; save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - elif highest_format_str == 'tif' or highest_format_str == 'png': - if output_bit_depth == 16: - output_format = primary_fmt_16 - if output_format.startswith("exr"): output_ext, needs_float16 = ".exr", True; save_params.extend([cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF]) - else: output_format = "png"; output_ext = ".png"; save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - else: output_format, output_ext = "png", ".png"; save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - else: - output_format = fmt_8bit_config; output_ext = f".{output_format}" - if output_format == "png": save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - elif output_format == "jpg": save_params.extend([cv2.IMWRITE_JPEG_QUALITY, jpg_quality]) - - if output_format == "jpg" and output_bit_depth == 16: - log.warning(f"Output format JPG, but target 16-bit. Forcing 8-bit for {current_map_identifier}.") - output_dtype_target, output_bit_depth = np.uint8, 8 - log.debug(f"Determined save format for {current_map_identifier}: {output_format}, ext: {output_ext}, bit_depth: {output_bit_depth}") - - # --- 3. Final Data Type Conversion --- - img_to_save = image_data.copy() - if output_dtype_target == np.uint8 and img_to_save.dtype != np.uint8: - if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0 * 255.0).astype(np.uint8) - elif img_to_save.dtype in [np.float16, np.float32]: img_to_save = (np.clip(img_to_save, 0.0, 1.0) * 255.0).astype(np.uint8) - else: img_to_save = img_to_save.astype(np.uint8) - elif output_dtype_target == np.uint16 and img_to_save.dtype != np.uint16: - if img_to_save.dtype == np.uint8: img_to_save = img_to_save.astype(np.uint16) * 257 - elif img_to_save.dtype in [np.float16, np.float32]: img_to_save = (np.clip(img_to_save, 0.0, 1.0) * 65535.0).astype(np.uint16) - else: img_to_save = img_to_save.astype(np.uint16) - if needs_float16 and img_to_save.dtype != np.float16: - if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0).astype(np.float16) - elif img_to_save.dtype == np.uint8: img_to_save = (img_to_save.astype(np.float32) / 255.0).astype(np.float16) - elif img_to_save.dtype == np.float32: img_to_save = img_to_save.astype(np.float16) - else: log.warning(f"Cannot convert {img_to_save.dtype} to float16 for EXR save."); return None - - img_save_final = img_to_save - if len(img_to_save.shape) == 3 and img_to_save.shape[2] == 3 and not output_format.startswith("exr"): - try: img_save_final = cv2.cvtColor(img_to_save, cv2.COLOR_RGB2BGR) - except Exception as cvt_err: log.error(f"RGB->BGR conversion failed for {current_map_identifier}: {cvt_err}. Saving original."); - - filename_map_type_token: str - is_merged_map = any(rule.get("output_map_type") == current_map_identifier for rule in self.config_obj.map_merge_rules) - - if is_merged_map: - filename_map_type_token = current_map_identifier # e.g., "NRMRGH" - else: - base_ftd_key = self._get_ftd_key_from_override(current_map_identifier) # e.g., "MAP_COL" - if base_ftd_key: - definition = self.config_obj.FILE_TYPE_DEFINITIONS.get(base_ftd_key) - if definition and "standard_type" in definition: - standard_type_alias = definition["standard_type"] # e.g., "COL" - if standard_type_alias: # Ensure not empty - variant_suffix = self._get_map_variant_suffix(current_map_identifier, base_ftd_key) # e.g., "-1" or "" - if standard_type_alias in self.config_obj.respect_variant_map_types: - filename_map_type_token = standard_type_alias + variant_suffix # e.g., "COL-1" - else: - filename_map_type_token = standard_type_alias # e.g., "COL" - else: - log.warning(f"Empty standard_type for FTD key '{base_ftd_key}'. Using identifier '{current_map_identifier}' for maptype token.") - filename_map_type_token = current_map_identifier - else: - log.warning(f"No definition or standard_type for FTD key '{base_ftd_key}'. Using identifier '{current_map_identifier}' for maptype token.") - filename_map_type_token = current_map_identifier - else: - log.warning(f"Could not derive FTD key from '{current_map_identifier}'. Using it directly for maptype token.") - filename_map_type_token = current_map_identifier - - log.debug(f"Filename maptype token for '{current_map_identifier}' is '{filename_map_type_token}'") - - # --- 6. Construct Path using Token Pattern & Save --- - token_data = { - "supplier": _sanitize_filename(supplier_name), - "assetname": _sanitize_filename(asset_name), - "maptype": filename_map_type_token, - "resolution": resolution_key, - "width": w, "height": h, - "bitdepth": output_bit_depth, - "ext": output_ext.lstrip('.') - } - if hasattr(self, 'current_incrementing_value') and self.current_incrementing_value is not None: - token_data['incrementingvalue'] = self.current_incrementing_value - if hasattr(self, 'current_sha5_value') and self.current_sha5_value is not None: - token_data['sha5'] = self.current_sha5_value - - try: - relative_dir_path_str = generate_path_from_pattern(output_directory_pattern, token_data) - filename_str = generate_path_from_pattern(output_filename_pattern, token_data) - full_relative_path_str = str(Path(relative_dir_path_str) / filename_str) - except Exception as path_gen_err: - log.error(f"Failed to generate output path for {current_map_identifier} with data {token_data}: {path_gen_err}", exc_info=True) - return None - - output_path_temp = self.temp_dir / full_relative_path_str - log.debug(f"Attempting to save {current_map_identifier} to temporary path: {output_path_temp}") - - try: - output_path_temp.parent.mkdir(parents=True, exist_ok=True) - except Exception as mkdir_err: - log.error(f"Failed to create temporary directory {output_path_temp.parent}: {mkdir_err}", exc_info=True) - return None - - saved_successfully = False - actual_format_saved = output_format - try: - cv2.imwrite(str(output_path_temp), img_save_final, save_params) - saved_successfully = True - log.info(f" > Saved {current_map_identifier} ({resolution_key}, {output_bit_depth}-bit) as {output_format}") - except Exception as save_err: - log.error(f"Save failed ({output_format}) for {current_map_identifier} {resolution_key}: {save_err}") - if output_bit_depth == 16 and output_format.startswith("exr") and fallback_fmt_16 != output_format and fallback_fmt_16 == "png": - log.warning(f"Attempting fallback PNG save for {current_map_identifier} {resolution_key}") - actual_format_saved = "png"; output_ext = ".png" - # Regenerate path with .png extension for fallback - token_data_fallback = token_data.copy() - token_data_fallback["ext"] = "png" - try: - # Regenerate directory and filename separately for fallback - relative_dir_path_str_fb = generate_path_from_pattern(output_directory_pattern, token_data_fallback) - filename_str_fb = generate_path_from_pattern(output_filename_pattern, token_data_fallback) - full_relative_path_str_fb = str(Path(relative_dir_path_str_fb) / filename_str_fb) - output_path_temp = self.temp_dir / full_relative_path_str_fb # Update temp path for fallback - output_path_temp.parent.mkdir(parents=True, exist_ok=True) - except Exception as path_gen_err_fb: - log.error(f"Failed to generate fallback PNG path: {path_gen_err_fb}", exc_info=True) - return None - - save_params_fallback = [cv2.IMWRITE_PNG_COMPRESSION, png_compression_level] - img_fallback = None; target_fallback_dtype = np.uint16 - - if img_to_save.dtype == np.float16: - img_scaled = np.clip(img_to_save.astype(np.float32) * 65535.0, 0, 65535) - img_fallback = img_scaled.astype(target_fallback_dtype) - elif img_to_save.dtype == target_fallback_dtype: img_fallback = img_to_save - else: log.error(f"Cannot convert {img_to_save.dtype} for PNG fallback."); return None - - img_fallback_save_final = img_fallback - is_3_channel_fallback = len(img_fallback.shape) == 3 and img_fallback.shape[2] == 3 - if is_3_channel_fallback: # PNG is non-EXR - log.debug(f"Converting RGB to BGR for fallback PNG save {current_map_identifier} ({resolution_key})") - try: img_fallback_save_final = cv2.cvtColor(img_fallback, cv2.COLOR_RGB2BGR) - except Exception as cvt_err_fb: log.error(f"Failed RGB->BGR conversion for fallback PNG: {cvt_err_fb}. Saving original."); - - try: - cv2.imwrite(str(output_path_temp), img_fallback_save_final, save_params_fallback) - saved_successfully = True - log.info(f" > Saved {current_map_identifier} ({resolution_key}) using fallback PNG") - except Exception as fallback_err: - log.error(f"Fallback PNG save failed for {current_map_identifier} {resolution_key}: {fallback_err}", exc_info=True) - else: - log.error(f"No suitable fallback available or applicable for failed save of {current_map_identifier} ({resolution_key}) as {output_format}.") - - - # --- 6. Return Result --- - if saved_successfully: - # Return the full relative path string generated by the patterns - final_relative_path_str = full_relative_path_str_fb if actual_format_saved == "png" and output_format.startswith("exr") else full_relative_path_str - return { - "path": final_relative_path_str, # Store relative path string - "resolution": resolution_key, - "width": w, "height": h, - "bit_depth": output_bit_depth, - "format": actual_format_saved - } - else: - return None # Indicate save failure - - except Exception as e: - log.error(f"Unexpected error in _save_image for {current_map_identifier} ({resolution_key}): {e}", exc_info=True) - return None - - - def _process_individual_maps(self, asset_rule: AssetRule, workspace_path: Path, current_asset_metadata: Dict) -> Tuple[Dict[str, Dict[str, Dict]], Dict[str, Dict], str]: - """ - Processes, resizes, and saves individual map files for a specific asset - based on the provided AssetRule and static configuration. - - Args: - asset_rule: The AssetRule object containing file rules for this asset. - workspace_path: Path to the directory containing the source files. - current_asset_metadata: Mutable metadata dictionary for the current asset (updated directly). - - Returns: - Tuple containing: - - processed_maps_details_asset: Dict mapping map_type to resolution details. - - image_stats_asset: Dict mapping map_type to calculated image statistics (also added to current_asset_metadata). - - aspect_ratio_change_string_asset: String indicating aspect ratio change (also added to current_asset_metadata). - """ - if not self.temp_dir: raise ProcessingEngineError("Engine workspace (temp_dir) not setup.") - asset_name = asset_rule.asset_name - log.info(f"Processing individual map files for asset '{asset_name}'...") - - # Initialize results specific to this asset - processed_maps_details_asset: Dict[str, Dict[str, Dict]] = defaultdict(dict) - image_stats_asset: Dict[str, Dict] = {} # Local dict for stats - map_details_asset: Dict[str, Dict] = {} # Store details like source bit depth, gloss inversion - aspect_ratio_change_string_asset: str = "N/A" - - # --- Settings retrieval from static config --- - resolutions = self.config_obj.image_resolutions - stats_res_key = self.config_obj.calculate_stats_resolution - stats_target_dim = resolutions.get(stats_res_key) - if not stats_target_dim: log.warning(f"Stats resolution key '{stats_res_key}' not found in config. Stats skipped for '{asset_name}'.") - base_name = asset_name # Use the asset name from the rule - - # --- Aspect Ratio Calculation Setup --- - first_map_rule_for_aspect = next((fr for fr in asset_rule.files if fr.item_type_override is not None and fr.item_type_override != "EXTRA"), None) # Exclude EXTRA - orig_w_aspect, orig_h_aspect = None, None - if first_map_rule_for_aspect: - first_res_key = next(iter(resolutions)) # Use first resolution key - source_path_abs = workspace_path / first_map_rule_for_aspect.file_path - temp_img_for_dims, _ = self._load_and_transform_source( - source_path_abs, - first_map_rule_for_aspect.item_type_override, - first_res_key, - is_gloss_source=False # Not relevant for dimension check - # self.loaded_data_cache is used internally by the method - ) - if temp_img_for_dims is not None: - orig_h_aspect, orig_w_aspect = temp_img_for_dims.shape[:2] - log.debug(f"Got original dimensions ({orig_w_aspect}x{orig_h_aspect}) for aspect ratio calculation from {first_map_rule_for_aspect.file_path}") - else: - log.warning(f"Could not load image {first_map_rule_for_aspect.file_path} to get original dimensions for aspect ratio.") - else: - log.warning("No map files found in AssetRule, cannot calculate aspect ratio string.") - - - # --- Process Each Individual Map defined in the AssetRule --- - for file_rule in asset_rule.files: - should_skip = ( - file_rule.item_type_override is None or - file_rule.item_type_override == "EXTRA" or - getattr(file_rule, 'skip_processing', False) or - file_rule.item_type == "FILE_IGNORE" # Consolidated check: Use item_type for base classification - ) - if should_skip: - skip_reason = [] - if file_rule.item_type_override is None: skip_reason.append("No ItemTypeOverride") - if file_rule.item_type_override == "EXTRA": skip_reason.append("Explicitly EXTRA type") - if getattr(file_rule, 'skip_processing', False): skip_reason.append("SkipProcessing flag set") - if file_rule.item_type == "FILE_IGNORE": skip_reason.append("ItemType is FILE_IGNORE") - - log.debug(f"Skipping individual processing for {file_rule.file_path} ({', '.join(skip_reason)})") - continue # Skip to the next file_rule - - # --- Proceed with processing for this file_rule --- - source_path_rel = Path(file_rule.file_path) # Ensure it's a Path object - # IMPORTANT: Use the ENGINE's workspace_path (self.temp_dir) for loading, - # as individual maps should have been copied there by the caller (ProcessingTask) - # Correction: _process_individual_maps receives the *engine's* temp_dir as workspace_path - source_path_abs = workspace_path / source_path_rel - # Store original rule-based type and gloss flag - original_item_type_override = file_rule.item_type_override - # original_is_gloss_source_context removed as it's part of deprecated logic - - # --- New gloss map filename logic --- - filename_str = source_path_rel.name - is_filename_gloss_map = "map_gloss" in filename_str.lower() - - effective_map_type_for_processing = original_item_type_override - effective_is_gloss_source_for_load = False # Default to False, new filename logic will set to True if applicable - map_was_retagged_from_filename_gloss = False - - if is_filename_gloss_map: - log.info(f"-- Asset '{asset_name}': Filename '{filename_str}' contains 'MAP_GLOSS'. Applying new gloss handling. Original type from rule: '{original_item_type_override}'.") - effective_is_gloss_source_for_load = True # Force inversion if type becomes ROUGH (handled by filename logic below) - map_was_retagged_from_filename_gloss = True - - # Attempt to retag original_item_type_override from GLOSS to ROUGH, preserving MAP_ prefix case and suffix - if original_item_type_override and "gloss" in original_item_type_override.lower(): - match = re.match(r"(MAP_)(GLOSS)((?:[-_]\w+)*)", original_item_type_override, re.IGNORECASE) - if match: - prefix = match.group(1) # e.g., "MAP_" - suffix = match.group(3) if match.group(3) else "" # e.g., "-variant1_detail" or "" - effective_map_type_for_processing = f"{prefix}ROUGH{suffix}" - log.debug(f"Retagged filename gloss: original FTD key '{original_item_type_override}' to '{effective_map_type_for_processing}' for processing.") - else: - log.warning(f"Filename gloss '{original_item_type_override}' matched 'gloss' but not the expected 'MAP_GLOSS' pattern for precise retagging. Defaulting to 'MAP_ROUGH'.") - effective_map_type_for_processing = "MAP_ROUGH" - else: - # If original_item_type_override was None or didn't contain "gloss" (e.g., file was untyped but filename had MAP_GLOSS) - log.debug(f"Filename '{filename_str}' identified as gloss, but original type override ('{original_item_type_override}') was not GLOSS-specific. Setting type to 'MAP_ROUGH' for processing.") - effective_map_type_for_processing = "MAP_ROUGH" - # --- End of new gloss map filename logic --- - - log.debug(f"DEBUG POST-RETAG: effective_map_type_for_processing='{effective_map_type_for_processing}' for file '{source_path_rel.name}'") - original_extension = source_path_rel.suffix.lower() # Get from path - - log.info(f"-- Asset '{asset_name}': Processing Individual Map: {effective_map_type_for_processing} (Source: {source_path_rel.name}, EffectiveIsGlossSourceForLoad: {effective_is_gloss_source_for_load}, OriginalRuleItemType: {original_item_type_override}) --") - - current_map_details = {} # Old "derived_from_gloss_context" removed - if map_was_retagged_from_filename_gloss: - current_map_details["derived_from_gloss_filename"] = True - current_map_details["original_item_type_override_before_gloss_filename_retag"] = original_item_type_override - current_map_details["effective_item_type_override_after_gloss_filename_retag"] = effective_map_type_for_processing - source_bit_depth_found = None # Track if we've found the bit depth for this map type - - try: - # --- Loop through target resolutions from static config --- - for res_key, target_dim_px in resolutions.items(): - log.debug(f"Processing {effective_map_type_for_processing} for resolution: {res_key}...") - - # --- 1. Load and Transform Source (using helper + cache) --- - # This now only runs for files that have an item_type_override - img_resized, source_dtype = self._load_and_transform_source( - source_path_abs=source_path_abs, - map_type=effective_map_type_for_processing, # Use effective type - target_resolution_key=res_key, - is_gloss_source=effective_is_gloss_source_for_load # Pass the flag determined by filename logic - # self.loaded_data_cache is used internally - ) - - if img_resized is None: - # This warning now correctly indicates a failure for a map we *intended* to process - log.warning(f"Failed to load/transform source map {source_path_rel} (processed as {effective_map_type_for_processing}) for {res_key}. Skipping resolution.") - continue # Skip this resolution - - # Store source bit depth once found - if source_dtype is not None and source_bit_depth_found is None: - source_bit_depth_found = 16 if source_dtype == np.uint16 else (8 if source_dtype == np.uint8 else 8) # Default non-uint to 8 - current_map_details["source_bit_depth"] = source_bit_depth_found - log.debug(f"Stored source bit depth for {effective_map_type_for_processing}: {source_bit_depth_found}") - - # --- 2. Calculate Stats (if applicable) --- - if res_key == stats_res_key and stats_target_dim: - log.debug(f"Calculating stats for {effective_map_type_for_processing} using {res_key} image...") - stats = _calculate_image_stats(img_resized) - if stats: image_stats_asset[effective_map_type_for_processing] = stats # Store locally first - else: log.warning(f"Stats calculation failed for {effective_map_type_for_processing} at {res_key}.") - - # --- 3. Calculate Aspect Ratio Change String (once per asset) --- - if aspect_ratio_change_string_asset == "N/A" and orig_w_aspect is not None and orig_h_aspect is not None: - target_w_aspect, target_h_aspect = img_resized.shape[1], img_resized.shape[0] # Use current resized dims - try: - aspect_string = _normalize_aspect_ratio_change(orig_w_aspect, orig_h_aspect, target_w_aspect, target_h_aspect) - aspect_ratio_change_string_asset = aspect_string - log.debug(f"Stored aspect ratio change string using {res_key}: '{aspect_string}'") - except Exception as aspect_err: - log.error(f"Failed to calculate aspect ratio change string using {res_key}: {aspect_err}", exc_info=True) - aspect_ratio_change_string_asset = "Error" - elif aspect_ratio_change_string_asset == "N/A": - aspect_ratio_change_string_asset = "Unknown" # Set to unknown if original dims failed - - # --- 4. Save Image (using helper) --- - source_info = { - 'original_extension': original_extension, - 'source_bit_depth': source_bit_depth_found or 8, # Use found depth or default - 'involved_extensions': {original_extension} # Only self for individual maps - } - # Get bit depth rule solely from the static configuration using the correct method signature - bit_depth_rule = self.config_obj.get_bit_depth_rule(effective_map_type_for_processing) # Use effective type - - # Determine the map_type to use for saving (use effective_map_type_for_processing) - save_map_type_for_filename = effective_map_type_for_processing - # If effective_map_type_for_processing is None, this file shouldn't be saved as an individual map. - # This case should ideally be caught by the skip logic earlier, but adding a check here for safety. - if save_map_type_for_filename is None: - log.warning(f"Skipping save for {file_rule.file_path}: effective_map_type_for_processing is None.") - continue # Skip saving this file - - # Get supplier name from metadata (set in process method) - supplier_name = current_asset_metadata.get("supplier_name", "UnknownSupplier") - - save_result = self._save_image( - image_data=img_resized, - supplier_name=supplier_name, - asset_name=base_name, - current_map_identifier=save_map_type_for_filename, # Pass the effective map type to be saved - resolution_key=res_key, - source_info=source_info, - output_bit_depth_rule=bit_depth_rule - ) - - # --- 5. Store Result --- - if save_result: - processed_maps_details_asset.setdefault(effective_map_type_for_processing, {})[res_key] = save_result - # Update overall map detail (e.g., final format) if needed - current_map_details["output_format"] = save_result.get("format") - else: - log.error(f"Failed to save {effective_map_type_for_processing} at {res_key}.") - processed_maps_details_asset.setdefault(effective_map_type_for_processing, {})[f'error_{res_key}'] = "Save failed" - - - except Exception as map_proc_err: - log.error(f"Failed processing map {effective_map_type_for_processing} from {source_path_rel.name}: {map_proc_err}", exc_info=True) - processed_maps_details_asset.setdefault(effective_map_type_for_processing, {})['error'] = str(map_proc_err) - - # Store collected details for this map type (using effective_map_type_for_processing as the key) - map_details_asset[effective_map_type_for_processing] = current_map_details - - # --- Final Metadata Updates --- - # Update the passed-in current_asset_metadata dictionary directly - current_asset_metadata["map_details"] = map_details_asset - current_asset_metadata["image_stats_1k"] = image_stats_asset # Add collected stats - current_asset_metadata["aspect_ratio_change_string"] = aspect_ratio_change_string_asset # Add collected aspect string - - log.info(f"Finished processing individual map files for asset '{asset_name}'.") - # Return details needed for organization, stats and aspect ratio are updated in-place - return processed_maps_details_asset, image_stats_asset, aspect_ratio_change_string_asset - - - def _merge_maps(self, asset_rule: AssetRule, workspace_path: Path, processed_maps_details_asset: Dict[str, Dict[str, Dict]], current_asset_metadata: Dict) -> Dict[str, Dict[str, Dict]]: - """ - Merges channels from different source maps for a specific asset based on static - merge rules in configuration, using explicit file paths from the AssetRule. - - Args: - asset_rule: The AssetRule object containing file rules for this asset. - workspace_path: Path to the directory containing the source files. - processed_maps_details_asset: Details of processed maps (used to find common resolutions). - current_asset_metadata: Mutable metadata dictionary for the current asset (updated for stats). - - - Returns: - Dict[str, Dict[str, Dict]]: Details of the merged maps created for this asset. - """ - if not self.temp_dir: raise ProcessingEngineError("Engine workspace (temp_dir) not setup.") - asset_name = asset_rule.asset_name - # Get merge rules from static config - merge_rules = self.config_obj.map_merge_rules - log.info(f"Asset '{asset_name}': Applying {len(merge_rules)} map merging rule(s) from static config...") - - # Initialize results for this asset - merged_maps_details_asset: Dict[str, Dict[str, Dict]] = defaultdict(dict) - - for rule_index, rule in enumerate(merge_rules): - output_map_type = rule.get("output_map_type") - inputs_mapping = rule.get("inputs") # e.g., {"R": "AO", "G": "ROUGH", "B": "METAL"} - defaults = rule.get("defaults", {}) - rule_bit_depth = rule.get("output_bit_depth", "respect_inputs") - - if not output_map_type or not inputs_mapping: - log.warning(f"Asset '{asset_name}': Skipping static merge rule #{rule_index+1}: Missing 'output_map_type' or 'inputs'. Rule: {rule}") - continue - - log.info(f"-- Asset '{asset_name}': Applying merge rule for '{output_map_type}' --") - - # --- Find required SOURCE FileRules within the AssetRule --- - required_input_file_rules: Dict[str, FileRule] = {} # map_type -> FileRule - possible_to_find_sources = True - input_types_needed = set(inputs_mapping.values()) # e.g., {"AO", "ROUGH", "METAL"} - - for input_type in input_types_needed: - found_rule_for_type = False - # Search in the asset_rule's files - for file_rule in asset_rule.files: - # Check if the file_rule's item_type_override matches the required input type - item_override = getattr(file_rule, 'item_type_override', None) - item_base_type = getattr(file_rule, 'item_type', None) # Get base type for ignore check - - # Check if override matches the required input type AND the base type is not FILE_IGNORE - if item_override == input_type and item_base_type != "FILE_IGNORE": - # Found a valid match based on item_type_override and not ignored - required_input_file_rules[input_type] = file_rule - found_rule_for_type = True - # Update log message (see step 2) - log.debug(f"Found source FileRule for merge input '{input_type}': {file_rule.file_path} (ItemTypeOverride: {item_override}, ItemType: {item_base_type})") - break # Take the first valid match found - if not found_rule_for_type: - log.warning(f"Asset '{asset_name}': Required source FileRule for input map type '{input_type}' not found in AssetRule. Cannot perform merge for '{output_map_type}'.") - possible_to_find_sources = False - break - - if not possible_to_find_sources: - continue # Skip this merge rule - - # --- Determine common resolutions based on *processed* maps --- - # This still seems the most reliable way to know which sizes are actually available - possible_resolutions_per_input: List[Set[str]] = [] - resolutions_config = self.config_obj.image_resolutions # Static config - - for input_type in input_types_needed: - # Find the corresponding processed map details (might be ROUGH-1, ROUGH-2 etc.) - processed_details_for_input = None - input_file_rule = required_input_file_rules.get(input_type) - if input_file_rule: - processed_details_for_input = processed_maps_details_asset.get(input_file_rule.item_type_override) # Use the correct attribute - - if processed_details_for_input: - res_keys = {res for res, details in processed_details_for_input.items() if isinstance(details, dict) and 'error' not in details} - if not res_keys: - log.warning(f"Asset '{asset_name}': Input map type '{input_type}' (using {input_file_rule.item_type_override if input_file_rule else 'N/A'}) for merge rule '{output_map_type}' has no successfully processed resolutions.") # Use item_type_override - possible_resolutions_per_input = [] # Invalidate if any input has no resolutions - break - possible_resolutions_per_input.append(res_keys) - else: - # If the input map wasn't processed individually (used_for_merge_only=True) - # Assume all configured resolutions are potentially available. Loading will handle skips. - log.debug(f"Input map type '{input_type}' for merge rule '{output_map_type}' might not have been processed individually. Assuming all configured resolutions possible.") - possible_resolutions_per_input.append(set(resolutions_config.keys())) - - - if not possible_resolutions_per_input: - log.warning(f"Asset '{asset_name}': Cannot determine common resolutions for '{output_map_type}'. Skipping rule.") - continue - - common_resolutions = set.intersection(*possible_resolutions_per_input) - - if not common_resolutions: - log.warning(f"Asset '{asset_name}': No common resolutions found among required inputs {input_types_needed} for merge rule '{output_map_type}'. Skipping rule.") - continue - log.debug(f"Asset '{asset_name}': Common resolutions for '{output_map_type}': {common_resolutions}") - - # --- Loop through common resolutions --- - res_order = {k: resolutions_config[k] for k in common_resolutions if k in resolutions_config} - if not res_order: - log.warning(f"Asset '{asset_name}': Common resolutions {common_resolutions} do not match config. Skipping merge for '{output_map_type}'.") - continue - - sorted_res_keys = sorted(res_order.keys(), key=lambda k: res_order[k], reverse=True) - base_name = asset_name # Use current asset's name - - for current_res_key in sorted_res_keys: - log.debug(f"Asset '{asset_name}': Merging '{output_map_type}' for resolution: {current_res_key}") - try: - loaded_inputs_data = {} # map_type -> loaded numpy array - source_info_for_save = {'involved_extensions': set(), 'max_input_bit_depth': 8} - - # --- Load required SOURCE maps using helper --- - possible_to_load = True - target_channels = list(inputs_mapping.keys()) # e.g., ['R', 'G', 'B'] - - for map_type_needed in input_types_needed: # e.g., {"AO", "ROUGH", "METAL"} - file_rule = required_input_file_rules.get(map_type_needed) - if not file_rule: - log.error(f"Internal Error: FileRule missing for '{map_type_needed}' during merge load.") - possible_to_load = False; break - - source_path_rel_str = file_rule.file_path # Keep original string if needed - source_path_rel = Path(source_path_rel_str) # Convert to Path object - source_path_abs = workspace_path / source_path_rel - original_ext = source_path_rel.suffix.lower() # Now works on Path object - source_info_for_save['involved_extensions'].add(original_ext) - - # Determine if this specific source for merge should be treated as gloss - # based on its filename, aligning with the new primary rule. - filename_str_for_merge_input = source_path_rel.name - is_gloss_for_merge_input = "map_gloss" in filename_str_for_merge_input.lower() - if is_gloss_for_merge_input: - log.debug(f"Merge input '{filename_str_for_merge_input}' for '{map_type_needed}' identified as gloss by filename. Will pass is_gloss_source=True.") - - log.debug(f"Loading source '{source_path_rel}' for merge input '{map_type_needed}' at {current_res_key} (is_gloss_for_merge_input: {is_gloss_for_merge_input})") - img_resized, source_dtype = self._load_and_transform_source( - source_path_abs=source_path_abs, - map_type=file_rule.item_type_override, # Use the specific type override from rule (e.g., ROUGH-1) - target_resolution_key=current_res_key, - is_gloss_source=is_gloss_for_merge_input # Pass determined gloss state - # self.loaded_data_cache used internally - ) - - if img_resized is None: - log.warning(f"Asset '{asset_name}': Failed to load/transform source '{source_path_rel}' for merge input '{map_type_needed}' at {current_res_key}. Skipping resolution.") - possible_to_load = False; break - - loaded_inputs_data[map_type_needed] = img_resized # Store by base type (AO, ROUGH) - - # Track max source bit depth - if source_dtype == np.uint16: - source_info_for_save['max_input_bit_depth'] = max(source_info_for_save['max_input_bit_depth'], 16) - # Add other dtype checks if needed - - if not possible_to_load: continue - - # --- Calculate Stats for ROUGH source if used and at stats resolution --- - stats_res_key = self.config_obj.calculate_stats_resolution - if current_res_key == stats_res_key: - log.debug(f"Asset '{asset_name}': Checking for ROUGH source stats for '{output_map_type}' at {stats_res_key}") - for target_channel, source_map_type in inputs_mapping.items(): - if source_map_type == 'ROUGH' and source_map_type in loaded_inputs_data: - log.debug(f"Asset '{asset_name}': Calculating stats for ROUGH source (mapped to channel '{target_channel}') for '{output_map_type}' at {stats_res_key}") - rough_image_data = loaded_inputs_data[source_map_type] - rough_stats = _calculate_image_stats(rough_image_data) - if rough_stats: - # Update the mutable metadata dict passed in - stats_dict = current_asset_metadata.setdefault("merged_map_channel_stats", {}).setdefault(output_map_type, {}).setdefault(target_channel, {}) - stats_dict[stats_res_key] = rough_stats - log.debug(f"Asset '{asset_name}': Stored ROUGH stats for '{output_map_type}' channel '{target_channel}' at {stats_res_key}: {rough_stats}") - else: - log.warning(f"Asset '{asset_name}': Failed to calculate ROUGH stats for '{output_map_type}' channel '{target_channel}' at {stats_res_key}.") - - - # --- Determine dimensions --- - first_map_type = next(iter(loaded_inputs_data)) - h, w = loaded_inputs_data[first_map_type].shape[:2] - num_target_channels = len(target_channels) - - # --- Prepare and Merge Channels --- - merged_channels_float32 = [] - for target_channel in target_channels: # e.g., 'R', 'G', 'B' - source_map_type = inputs_mapping.get(target_channel) # e.g., "AO", "ROUGH", "METAL" - channel_data_float32 = None - - if source_map_type and source_map_type in loaded_inputs_data: - img_input = loaded_inputs_data[source_map_type] # Get the loaded NumPy array - - # Ensure input is float32 0-1 range for merging - if img_input.dtype == np.uint16: img_float = img_input.astype(np.float32) / 65535.0 - elif img_input.dtype == np.uint8: img_float = img_input.astype(np.float32) / 255.0 - elif img_input.dtype == np.float16: img_float = img_input.astype(np.float32) # Assume float16 is 0-1 - else: img_float = img_input.astype(np.float32) # Assume other floats are 0-1 - - num_source_channels = img_float.shape[2] if len(img_float.shape) == 3 else 1 - - # Extract the correct channel - if num_source_channels >= 3: - if target_channel == 'R': channel_data_float32 = img_float[:, :, 0] - elif target_channel == 'G': channel_data_float32 = img_float[:, :, 1] - elif target_channel == 'B': channel_data_float32 = img_float[:, :, 2] - elif target_channel == 'A' and num_source_channels == 4: channel_data_float32 = img_float[:, :, 3] - else: log.warning(f"Target channel '{target_channel}' invalid for 3/4 channel source '{source_map_type}'.") - elif num_source_channels == 1 or len(img_float.shape) == 2: - # If source is grayscale, use it for R, G, B, or A target channels - channel_data_float32 = img_float.reshape(h, w) - else: - log.warning(f"Unexpected shape {img_float.shape} for source '{source_map_type}'.") - - # Apply default if channel data couldn't be extracted - if channel_data_float32 is None: - default_val = defaults.get(target_channel) - if default_val is None: - raise ProcessingEngineError(f"Missing input/default for target channel '{target_channel}' in merge rule '{output_map_type}'.") - log.debug(f"Using default value {default_val} for target channel '{target_channel}' in '{output_map_type}'.") - channel_data_float32 = np.full((h, w), float(default_val), dtype=np.float32) - - merged_channels_float32.append(channel_data_float32) - - if not merged_channels_float32 or len(merged_channels_float32) != num_target_channels: - raise ProcessingEngineError(f"Channel count mismatch during merge for '{output_map_type}'. Expected {num_target_channels}, got {len(merged_channels_float32)}.") - - merged_image_float32 = cv2.merge(merged_channels_float32) - log.debug(f"Merged channels for '{output_map_type}' ({current_res_key}). Result shape: {merged_image_float32.shape}, dtype: {merged_image_float32.dtype}") - - # --- Save Merged Map using Helper --- - # Get supplier name from metadata (set in process method) - supplier_name = current_asset_metadata.get("supplier_name", "UnknownSupplier") - - save_result = self._save_image( - image_data=merged_image_float32, - supplier_name=supplier_name, - asset_name=base_name, - current_map_identifier=output_map_type, # Merged map type - resolution_key=current_res_key, - source_info=source_info_for_save, - output_bit_depth_rule=rule_bit_depth - ) - - # --- Record details locally --- - if save_result: - merged_maps_details_asset[output_map_type][current_res_key] = save_result - else: - log.error(f"Asset '{asset_name}': Failed to save merged map '{output_map_type}' at resolution '{current_res_key}'.") - merged_maps_details_asset.setdefault(output_map_type, {})[f'error_{current_res_key}'] = "Save failed via helper" - - - except Exception as merge_res_err: - log.error(f"Asset '{asset_name}': Failed merging '{output_map_type}' at resolution '{current_res_key}': {merge_res_err}", exc_info=True) - # Store error locally for this asset - merged_maps_details_asset.setdefault(output_map_type, {})[f'error_{current_res_key}'] = str(merge_res_err) - - log.info(f"Asset '{asset_name}': Finished applying map merging rules.") - # Return the details for this asset - return merged_maps_details_asset - - - def _generate_metadata_file(self, effective_supplier: str, asset_rule: AssetRule, current_asset_metadata: Dict, processed_maps_details_asset: Dict[str, Dict[str, Dict]], merged_maps_details_asset: Dict[str, Dict[str, Dict]]) -> Tuple[Path, str]: - """ - Gathers metadata for a specific asset based on the AssetRule and processing results, - and writes it to a temporary JSON file in the engine's temp_dir using separate directory/filename patterns. - - Args: - effective_supplier: The supplier name to use (override or original). - asset_rule: The AssetRule object for this asset. - current_asset_metadata: Base metadata dictionary (already contains name, category, archetype, stats, aspect ratio, map_details). - processed_maps_details_asset: Details of processed maps for this asset. - merged_maps_details_asset: Details of merged maps for this asset. - - Returns: - Tuple[Path, str]: A tuple containing the relative directory Path object and the filename string within the temp_dir. - """ - if not self.temp_dir: raise ProcessingEngineError("Engine workspace (temp_dir) not setup.") - asset_name = asset_rule.asset_name - if not asset_name: - log.warning("Asset name missing during metadata generation, file may be incomplete or incorrectly named.") - asset_name = "UnknownAsset_Metadata" # Fallback for filename - - log.info(f"Generating metadata file for asset '{asset_name}' (Supplier: {effective_supplier})...") - - # Start with the base metadata passed in (already contains name, category, archetype, stats, aspect, map_details) - final_metadata = current_asset_metadata.copy() - final_metadata["category"] = asset_rule.asset_type # Ensure standardized asset type is in metadata - - # Use the effective supplier passed as argument - final_metadata["supplier_name"] = effective_supplier # Already determined in process() - - # Populate map resolution details from processing results - final_metadata["processed_map_resolutions"] = {} - for map_type, res_dict in processed_maps_details_asset.items(): - keys = [res for res, d in res_dict.items() if isinstance(d, dict) and 'error' not in d] - if keys: final_metadata["processed_map_resolutions"][map_type] = sorted(keys) - - final_metadata["merged_map_resolutions"] = {} - for map_type, res_dict in merged_maps_details_asset.items(): - keys = [res for res, d in res_dict.items() if isinstance(d, dict) and 'error' not in d] - if keys: final_metadata["merged_map_resolutions"][map_type] = sorted(keys) - - # Determine maps present based on successful processing for this asset - final_metadata["maps_present"] = sorted(list(processed_maps_details_asset.keys())) - final_metadata["merged_maps"] = sorted(list(merged_maps_details_asset.keys())) - - # Determine shader features based on this asset's maps and rules - features = set() - map_details_asset = final_metadata.get("map_details", {}) # Get from metadata dict - for map_type, details in map_details_asset.items(): # map_type here is item_type_override like "MAP_COL-1" - base_standard_type = self._get_base_map_type(map_type) # Should give "COL" - # Check standard feature types - if base_standard_type in ["SSS", "FUZZ", "MASK", "TRANSMISSION", "EMISSION", "CLEARCOAT"]: - features.add(base_standard_type) - if details.get("derived_from_gloss"): features.add("InvertedGloss") - # Check if any resolution was saved as 16-bit - res_details = processed_maps_details_asset.get(map_type, {}) - if any(res_info.get("bit_depth") == 16 for res_info in res_details.values() if isinstance(res_info, dict)): - features.add(f"16bit_{base_standard_type}") - # Check merged maps for 16-bit output - for map_type, res_dict in merged_maps_details_asset.items(): # map_type here is "NRMRGH" - base_standard_type = self._get_base_map_type(map_type) # Should give "NRMRGH" - if any(res_info.get("bit_depth") == 16 for res_info in res_dict.values() if isinstance(res_info, dict)): - features.add(f"16bit_{base_standard_type}") - - final_metadata["shader_features"] = sorted(list(features)) - - # Determine source files in this asset's Extra folder based on FileRule category - source_files_in_extra_set = set() - for file_rule in asset_rule.files: - if file_rule.item_type_override is None: # Assume files without an assigned type are extra/ignored/unmatched - source_files_in_extra_set.add(str(file_rule.file_path)) - final_metadata["source_files_in_extra"] = sorted(list(source_files_in_extra_set)) - - # Add processing info - final_metadata["_processing_info"] = { - "preset_used": self.config_obj.preset_name, # Preset name comes from the engine's config - "timestamp_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), - "input_source": effective_supplier, # Use the effective supplier - } - - # Sort lists just before writing - for key in ["maps_present", "merged_maps", "shader_features", "source_files_in_extra"]: - if key in final_metadata and isinstance(final_metadata[key], list): final_metadata[key].sort() - - # --- Generate Path and Save --- - # Get the new separate patterns from config - output_directory_pattern = self.config_obj.output_directory_pattern - output_filename_pattern = self.config_obj.output_filename_pattern - metadata_filename_base = self.config_obj.metadata_filename # e.g., "metadata.json" - metadata_ext = Path(metadata_filename_base).suffix.lstrip('.') or 'json' - metadata_maptype = Path(metadata_filename_base).stem # Use filename stem as maptype token - - token_data = { - "supplier": _sanitize_filename(effective_supplier), - "assetname": _sanitize_filename(asset_name), - "maptype": metadata_maptype, - "resolution": "meta", - "width": 0, - "height": 0, - "bitdepth": 0, - "ext": metadata_ext - } - if hasattr(self, 'current_incrementing_value') and self.current_incrementing_value is not None: - token_data['incrementingvalue'] = self.current_incrementing_value - if hasattr(self, 'current_sha5_value') and self.current_sha5_value is not None: - token_data['sha5'] = self.current_sha5_value - log.debug(f"Token data for _generate_metadata_file path generation: {token_data}") # DEBUG LOG - - - try: - # Generate directory and filename separately - relative_dir_path_str = generate_path_from_pattern(output_directory_pattern, token_data) - filename_str = generate_path_from_pattern(output_filename_pattern, token_data) - # Combine for the full temporary path - full_relative_path_str = str(Path(relative_dir_path_str) / filename_str) - relative_dir_path = Path(relative_dir_path_str) # Keep the directory Path object - except Exception as path_gen_err: - log.error(f"Failed to generate metadata path using patterns '{output_directory_pattern}' / '{output_filename_pattern}' and data {token_data}: {path_gen_err}", exc_info=True) - raise ProcessingEngineError(f"Failed to generate metadata path for asset '{asset_name}'") from path_gen_err - - output_path_temp_abs = self.temp_dir / full_relative_path_str # Save to engine's temp dir, preserving structure - log.debug(f"Writing metadata for asset '{asset_name}' to temporary file: {output_path_temp_abs}") - - # Ensure parent directory exists in temp (using the full path) - try: - output_path_temp_abs.parent.mkdir(parents=True, exist_ok=True) - except Exception as mkdir_err: - log.error(f"Failed to create temporary directory {output_path_temp_abs.parent} for metadata: {mkdir_err}", exc_info=True) - raise ProcessingEngineError(f"Failed to create temporary directory for metadata for asset '{asset_name}'") from mkdir_err - - try: - with open(output_path_temp_abs, 'w', encoding='utf-8') as f: - json.dump(final_metadata, f, indent=4, ensure_ascii=False, sort_keys=True) - log.info(f"Metadata file '{filename_str}' generated successfully for asset '{asset_name}' at relative temp path '{full_relative_path_str}'.") - # Return the RELATIVE directory Path object and the filename string - return relative_dir_path, filename_str - except Exception as e: - raise ProcessingEngineError(f"Failed to write metadata file {output_path_temp_abs} for asset '{asset_name}': {e}") from e - - - def _organize_output_files(self, asset_rule: AssetRule, workspace_path: Path, supplier_identifier: str, output_base_path: Path, processed_maps_details_asset: Dict[str, Dict[str, Dict]], merged_maps_details_asset: Dict[str, Dict[str, Dict]], temp_metadata_info: Tuple[Path, str]): - """ - Moves/copies processed files for a specific asset from the engine's temp dir - and copies EXTRA files from the original workspace to the final output structure, - using the relative paths generated by the token pattern. - - Args: - asset_rule: The AssetRule object for this asset. - workspace_path: Path to the original workspace containing source files. - supplier_identifier: The supplier identifier from the SourceRule. - output_base_path: The final base output directory. - processed_maps_details_asset: Details of processed maps for this asset. - merged_maps_details_asset: Details of merged maps for this asset. - temp_metadata_info: Tuple containing the relative directory Path and filename string for the metadata file within temp_dir. - """ - if not self.temp_dir or not self.temp_dir.exists(): raise ProcessingEngineError("Engine temp workspace missing.") - asset_name = asset_rule.asset_name - if not asset_name: raise ProcessingEngineError("Asset name missing for organization.") - - if not asset_name: raise ProcessingEngineError("Asset name missing for organization.") - asset_name_sanitized = _sanitize_filename(asset_name) # Still useful for logging - - # Get structure names from static config - extra_subdir_name = self.config_obj.extra_files_subdir - - log.info(f"Organizing output files for asset '{asset_name_sanitized}' using generated paths relative to: {output_base_path}") - - # --- Helper for moving files from engine's temp dir to final output --- - def _safe_move_to_final(src_rel_path_str: str | None, file_desc: str): - """Moves a file from temp to its final location based on its relative path string.""" - if not src_rel_path_str: - log.warning(f"Asset '{asset_name_sanitized}': Missing src relative path string for {file_desc}. Cannot move.") - return - - source_abs = self.temp_dir / src_rel_path_str # Absolute path in temp - dest_abs = output_base_path / src_rel_path_str # Final absolute path - - try: - if source_abs.exists(): - # Ensure final destination directory exists - dest_abs.parent.mkdir(parents=True, exist_ok=True) - log.debug(f"Asset '{asset_name_sanitized}': Moving {file_desc}: {src_rel_path_str} -> {dest_abs.relative_to(output_base_path)}") - shutil.move(str(source_abs), str(dest_abs)) - else: - log.warning(f"Asset '{asset_name_sanitized}': Source file missing in engine temp for {file_desc}: {source_abs}") - except Exception as e: - log.error(f"Asset '{asset_name_sanitized}': Failed moving {file_desc} '{src_rel_path_str}': {e}", exc_info=True) - - # --- Move Processed/Merged Maps --- - moved_map_count = 0 - for details_dict in [processed_maps_details_asset, merged_maps_details_asset]: - for map_type, res_dict in details_dict.items(): - # Skip if the whole map type failed (e.g., merge rule source missing) - if isinstance(res_dict, dict) and 'error' in res_dict and len(res_dict) == 1: - log.warning(f"Skipping move for map type '{map_type}' due to processing error: {res_dict['error']}") - continue - for res_key, details in res_dict.items(): - # Skip specific resolution errors - if isinstance(details, str) and details.startswith("error_"): - log.warning(f"Skipping move for {map_type} ({res_key}) due to error: {details}") - continue - if isinstance(details, dict) and 'path' in details: - # details['path'] is the relative path string within temp_dir - relative_path_str = details['path'] - _safe_move_to_final(relative_path_str, f"{map_type} ({res_key})") - moved_map_count += 1 - log.debug(f"Asset '{asset_name_sanitized}': Moved {moved_map_count} map files.") - - # --- Move Metadata File --- - if temp_metadata_info: - relative_dir_path, filename = temp_metadata_info - metadata_rel_path_str = str(relative_dir_path / filename) - _safe_move_to_final(metadata_rel_path_str, "metadata file") - else: - log.warning(f"Asset '{asset_name_sanitized}': Temporary metadata info missing. Cannot move metadata file.") - - # --- Handle "EXTRA" Files (copy from original workspace to final asset dir) --- - # Determine the final asset directory based on the metadata's relative directory path - final_asset_relative_dir = relative_dir_path if temp_metadata_info else None - if final_asset_relative_dir is not None: # Check explicitly for None - final_extra_dir_abs = output_base_path / final_asset_relative_dir / extra_subdir_name - log.debug(f"Asset '{asset_name_sanitized}': Determined final EXTRA directory: {final_extra_dir_abs}") - copied_extra_files = [] - for file_rule in asset_rule.files: - # Copy files explicitly marked as EXTRA or those with no item_type_override (unmatched) - if file_rule.item_type_override == "EXTRA" or file_rule.item_type_override is None: - try: - source_rel_path = Path(file_rule.file_path) - source_abs = workspace_path / source_rel_path - # Place in Extra subdir within the final asset dir, keep original name - dest_abs = final_extra_dir_abs / source_rel_path.name - - if source_abs.is_file(): - log.debug(f"Asset '{asset_name_sanitized}': Copying EXTRA/unmatched file: {source_rel_path} -> {final_extra_dir_abs.relative_to(output_base_path)}/") - final_extra_dir_abs.mkdir(parents=True, exist_ok=True) - shutil.copy2(str(source_abs), str(dest_abs)) # copy2 preserves metadata - copied_extra_files.append(source_rel_path.name) - elif source_abs.is_dir(): - log.debug(f"Asset '{asset_name_sanitized}': Skipping EXTRA/unmatched directory: {source_rel_path}") - else: - log.warning(f"Asset '{asset_name_sanitized}': Source file marked as EXTRA/unmatched not found in workspace: {source_abs}") - except Exception as copy_err: - log.error(f"Asset '{asset_name_sanitized}': Failed copying EXTRA/unmatched file '{file_rule.file_path}': {copy_err}", exc_info=True) - - if copied_extra_files: - log.info(f"Asset '{asset_name_sanitized}': Copied {len(copied_extra_files)} EXTRA/unmatched file(s) to '{final_extra_dir_abs.relative_to(output_base_path)}' subdirectory.") - else: - log.warning(f"Asset '{asset_name_sanitized}': Could not determine final asset directory from metadata info '{temp_metadata_info}'. Skipping EXTRA file copying.") - - - log.info(f"Finished organizing output for asset '{asset_name_sanitized}'.") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..2e70fad --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# This file makes the 'tests' directory a Python package. \ No newline at end of file diff --git a/tests/processing/pipeline/__init__.py b/tests/processing/pipeline/__init__.py new file mode 100644 index 0000000..f178d82 --- /dev/null +++ b/tests/processing/pipeline/__init__.py @@ -0,0 +1 @@ +# This file makes Python treat the directory as a package. \ No newline at end of file diff --git a/tests/processing/pipeline/stages/__init__.py b/tests/processing/pipeline/stages/__init__.py new file mode 100644 index 0000000..f178d82 --- /dev/null +++ b/tests/processing/pipeline/stages/__init__.py @@ -0,0 +1 @@ +# This file makes Python treat the directory as a package. \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py b/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py new file mode 100644 index 0000000..0589585 --- /dev/null +++ b/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py @@ -0,0 +1,273 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np + +from processing.pipeline.stages.alpha_extraction_to_mask import AlphaExtractionToMaskStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule, TransformSettings +from configuration import Configuration, GeneralSettings +import processing.utils.image_processing_utils as ipu # Ensure ipu is available for mocking + +# Helper Functions +def create_mock_file_rule_for_alpha_test( + id_val: uuid.UUID = None, + map_type: str = "ALBEDO", + filename_pattern: str = "albedo.png", + item_type: str = "MAP_COL", + active: bool = True +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = item_type + mock_fr.active = active + mock_fr.transform_settings = mock.MagicMock(spec=TransformSettings) + return mock_fr + +def create_alpha_extraction_mock_context( + initial_file_rules: list = None, + initial_processed_details: dict = None, + skip_asset_flag: bool = False, + asset_name: str = "AlphaAsset", + # extract_alpha_globally: bool = True # If stage checks this +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + + mock_source_rule = mock.MagicMock(spec=SourceRule) + + mock_gs = mock.MagicMock(spec=GeneralSettings) + # if your stage uses a global flag: + # mock_gs.extract_alpha_to_mask_globally = extract_alpha_globally + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_gs + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details=initial_processed_details if initial_processed_details is not None else {}, + merged_maps_details={}, + files_to_process=list(initial_file_rules) if initial_file_rules else [], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, + sha5_value=None + ) + return context + +# Unit Tests +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') # Mock logging to avoid console output during tests +def test_asset_skipped(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + context = create_alpha_extraction_mock_context(skip_asset_flag=True) + + updated_context = stage.execute(context) + + assert updated_context == context # Context should be unchanged + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 0 + assert not updated_context.processed_maps_details + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') +def test_existing_mask_map(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + existing_mask_rule = create_mock_file_rule_for_alpha_test(map_type="MASK", filename_pattern="mask.png") + context = create_alpha_extraction_mock_context(initial_file_rules=[existing_mask_rule]) + + updated_context = stage.execute(context) + + assert updated_context == context + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 1 + assert updated_context.files_to_process[0].map_type == "MASK" + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') +def test_alpha_extraction_success(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + albedo_rule_id = uuid.uuid4() + albedo_fr = create_mock_file_rule_for_alpha_test(id_val=albedo_rule_id, map_type="ALBEDO") + + initial_processed_details = { + albedo_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_albedo.png', 'status': 'Processed', 'map_type': 'ALBEDO', 'source_file_path': Path('/fake/source/albedo.png')} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[albedo_fr], + initial_processed_details=initial_processed_details + ) + + mock_rgba_data = np.zeros((10, 10, 4), dtype=np.uint8) + mock_rgba_data[:, :, 3] = 128 # Example alpha data + mock_load_image.side_effect = [mock_rgba_data, mock_rgba_data] + + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + assert mock_load_image.call_count == 2 + # First call to check for alpha, second to get data for saving + mock_load_image.assert_any_call(Path('/fake/temp_engine_dir/processed_albedo.png')) + + mock_save_image.assert_called_once() + saved_path_arg = mock_save_image.call_args[0][0] + saved_data_arg = mock_save_image.call_args[0][1] + + assert isinstance(saved_path_arg, Path) + assert "mask_from_alpha_" in saved_path_arg.name + assert np.array_equal(saved_data_arg, mock_rgba_data[:, :, 3]) + + assert len(updated_context.files_to_process) == 2 + new_mask_rule = None + for fr in updated_context.files_to_process: + if fr.map_type == "MASK": + new_mask_rule = fr + break + assert new_mask_rule is not None + assert new_mask_rule.item_type == "MAP_DER" # Derived map + + assert new_mask_rule.id.hex in updated_context.processed_maps_details + new_mask_detail = updated_context.processed_maps_details[new_mask_rule.id.hex] + assert new_mask_detail['map_type'] == "MASK" + assert "mask_from_alpha_" in new_mask_detail['temp_processed_file'] + assert "Generated from alpha of ALBEDO" in new_mask_detail['notes'] # Check for specific note + assert new_mask_detail['status'] == 'Processed' + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') +def test_no_alpha_channel_in_source(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + albedo_rule_id = uuid.uuid4() + albedo_fr = create_mock_file_rule_for_alpha_test(id_val=albedo_rule_id, map_type="ALBEDO") + initial_processed_details = { + albedo_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_rgb_albedo.png', 'status': 'Processed', 'map_type': 'ALBEDO', 'source_file_path': Path('/fake/source/albedo_rgb.png')} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[albedo_fr], + initial_processed_details=initial_processed_details + ) + + mock_rgb_data = np.zeros((10, 10, 3), dtype=np.uint8) # RGB, no alpha + mock_load_image.return_value = mock_rgb_data # Only called once for check + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/temp_engine_dir/processed_rgb_albedo.png')) + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 1 # No new MASK rule + assert albedo_fr.id.hex in updated_context.processed_maps_details + assert len(updated_context.processed_maps_details) == 1 + + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') +def test_no_suitable_source_map_type(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_alpha_test(id_val=normal_rule_id, map_type="NORMAL") + initial_processed_details = { + normal_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_normal.png', 'status': 'Processed', 'map_type': 'NORMAL'} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_processed_details + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 1 + assert normal_fr.id.hex in updated_context.processed_maps_details + assert len(updated_context.processed_maps_details) == 1 + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.warning') # Expect a warning log +def test_load_image_fails(mock_log_warning, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + albedo_rule_id = uuid.uuid4() + albedo_fr = create_mock_file_rule_for_alpha_test(id_val=albedo_rule_id, map_type="ALBEDO") + initial_processed_details = { + albedo_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_albedo_load_fail.png', 'status': 'Processed', 'map_type': 'ALBEDO', 'source_file_path': Path('/fake/source/albedo_load_fail.png')} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[albedo_fr], + initial_processed_details=initial_processed_details + ) + + mock_load_image.return_value = None # Simulate load failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/temp_engine_dir/processed_albedo_load_fail.png')) + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 1 + assert albedo_fr.id.hex in updated_context.processed_maps_details + assert len(updated_context.processed_maps_details) == 1 + mock_log_warning.assert_called_once() # Check that a warning was logged + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.error') # Expect an error log +def test_save_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + albedo_rule_id = uuid.uuid4() + albedo_fr = create_mock_file_rule_for_alpha_test(id_val=albedo_rule_id, map_type="ALBEDO") + initial_processed_details = { + albedo_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_albedo_save_fail.png', 'status': 'Processed', 'map_type': 'ALBEDO', 'source_file_path': Path('/fake/source/albedo_save_fail.png')} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[albedo_fr], + initial_processed_details=initial_processed_details + ) + + mock_rgba_data = np.zeros((10, 10, 4), dtype=np.uint8) + mock_rgba_data[:, :, 3] = 128 + mock_load_image.side_effect = [mock_rgba_data, mock_rgba_data] # Load succeeds + + mock_save_image.return_value = False # Simulate save failure + + updated_context = stage.execute(context) + + assert mock_load_image.call_count == 2 + mock_save_image.assert_called_once() # Save was attempted + + assert len(updated_context.files_to_process) == 1 # No new MASK rule should be successfully added and detailed + + # Check that no new MASK details were added, or if they were, they reflect failure. + # The current stage logic returns context early, so no new rule or details should be present. + mask_rule_found = any(fr.map_type == "MASK" for fr in updated_context.files_to_process) + assert not mask_rule_found + + mask_details_found = any( + details['map_type'] == "MASK" + for fr_id, details in updated_context.processed_maps_details.items() + if fr_id != albedo_fr.id.hex # Exclude the original albedo + ) + assert not mask_details_found + mock_log_error.assert_called_once() # Check that an error was logged \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_asset_skip_logic.py b/tests/processing/pipeline/stages/test_asset_skip_logic.py new file mode 100644 index 0000000..388cc8c --- /dev/null +++ b/tests/processing/pipeline/stages/test_asset_skip_logic.py @@ -0,0 +1,213 @@ +import pytest +from unittest import mock +from pathlib import Path +from typing import Dict, Optional, Any + +from processing.pipeline.stages.asset_skip_logic import AssetSkipLogicStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule +from configuration import Configuration, GeneralSettings + +# Helper function to create a mock AssetProcessingContext +def create_skip_logic_mock_context( + effective_supplier: Optional[str] = "ValidSupplier", + asset_process_status: str = "PENDING", + overwrite_existing: bool = False, + asset_name: str = "TestAssetSkip" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.process_status = asset_process_status + mock_asset_rule.source_path = "fake/source" # Added for completeness + mock_asset_rule.output_path = "fake/output" # Added for completeness + mock_asset_rule.maps = [] # Added for completeness + mock_asset_rule.metadata = {} # Added for completeness + mock_asset_rule.material_name = None # Added for completeness + mock_asset_rule.notes = None # Added for completeness + mock_asset_rule.tags = [] # Added for completeness + mock_asset_rule.enabled = True # Added for completeness + + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.name = "TestSourceRule" # Added for completeness + mock_source_rule.path = "fake/source_rule_path" # Added for completeness + mock_source_rule.default_supplier = None # Added for completeness + mock_source_rule.assets = [mock_asset_rule] # Added for completeness + mock_source_rule.enabled = True # Added for completeness + + mock_general_settings = mock.MagicMock(spec=GeneralSettings) + mock_general_settings.overwrite_existing = overwrite_existing + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_general_settings + mock_config.suppliers = {"ValidSupplier": mock.MagicMock()} + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp"), + output_base_path=Path("/fake/output"), + effective_supplier=effective_supplier, + asset_metadata={}, + processed_maps_details={}, + merged_maps_details={}, + files_to_process=[], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha256_value if that's the actual field + ) + # Ensure status_flags is initialized if AssetSkipLogicStage expects it + # context.status_flags = {} # Already done in constructor + return context +@mock.patch('logging.info') +def test_skip_due_to_missing_supplier(mock_log_info): + """ + Test that the asset is skipped if effective_supplier is None. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context(effective_supplier=None, asset_name="MissingSupplierAsset") + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset') is True + assert updated_context.status_flags.get('skip_reason') == "Invalid or missing supplier" + mock_log_info.assert_any_call(f"Asset 'MissingSupplierAsset': Skipping due to missing or invalid supplier.") + +@mock.patch('logging.info') +def test_skip_due_to_process_status_skip(mock_log_info): + """ + Test that the asset is skipped if asset_rule.process_status is "SKIP". + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context(asset_process_status="SKIP", asset_name="SkipStatusAsset") + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset') is True + assert updated_context.status_flags.get('skip_reason') == "Process status set to SKIP" + mock_log_info.assert_any_call(f"Asset 'SkipStatusAsset': Skipping because process_status is 'SKIP'.") + +@mock.patch('logging.info') +def test_skip_due_to_processed_and_overwrite_disabled(mock_log_info): + """ + Test that the asset is skipped if asset_rule.process_status is "PROCESSED" + and overwrite_existing is False. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="PROCESSED", + overwrite_existing=False, + asset_name="ProcessedNoOverwriteAsset" + ) + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset') is True + assert updated_context.status_flags.get('skip_reason') == "Already processed, overwrite disabled" + mock_log_info.assert_any_call(f"Asset 'ProcessedNoOverwriteAsset': Skipping because already processed and overwrite is disabled.") + +@mock.patch('logging.info') +def test_no_skip_when_processed_and_overwrite_enabled(mock_log_info): + """ + Test that the asset is NOT skipped if asset_rule.process_status is "PROCESSED" + but overwrite_existing is True. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="PROCESSED", + overwrite_existing=True, + effective_supplier="ValidSupplier", # Ensure supplier is valid + asset_name="ProcessedOverwriteAsset" + ) + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset', False) is False # Default to False if key not present + # No specific skip_reason to check if not skipped + # Check that no skip log message was called for this specific reason + for call_args in mock_log_info.call_args_list: + assert "Skipping because already processed and overwrite is disabled" not in call_args[0][0] + assert "Skipping due to missing or invalid supplier" not in call_args[0][0] + assert "Skipping because process_status is 'SKIP'" not in call_args[0][0] + + +@mock.patch('logging.info') +def test_no_skip_when_process_status_pending(mock_log_info): + """ + Test that the asset is NOT skipped if asset_rule.process_status is "PENDING". + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="PENDING", + effective_supplier="ValidSupplier", # Ensure supplier is valid + asset_name="PendingAsset" + ) + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset', False) is False + # Check that no skip log message was called + for call_args in mock_log_info.call_args_list: + assert "Skipping" not in call_args[0][0] + + +@mock.patch('logging.info') +def test_no_skip_when_process_status_failed_previously(mock_log_info): + """ + Test that the asset is NOT skipped if asset_rule.process_status is "FAILED_PREVIOUSLY". + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="FAILED_PREVIOUSLY", + effective_supplier="ValidSupplier", # Ensure supplier is valid + asset_name="FailedPreviouslyAsset" + ) + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset', False) is False + # Check that no skip log message was called + for call_args in mock_log_info.call_args_list: + assert "Skipping" not in call_args[0][0] + +@mock.patch('logging.info') +def test_no_skip_when_process_status_other_valid_status(mock_log_info): + """ + Test that the asset is NOT skipped for other valid, non-skip process statuses. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="READY_FOR_PROCESSING", # Example of another non-skip status + effective_supplier="ValidSupplier", + asset_name="ReadyAsset" + ) + updated_context = stage.execute(context) + assert updated_context.status_flags.get('skip_asset', False) is False + for call_args in mock_log_info.call_args_list: + assert "Skipping" not in call_args[0][0] + +@mock.patch('logging.info') +def test_skip_asset_flag_initialized_if_not_present(mock_log_info): + """ + Test that 'skip_asset' is initialized to False in status_flags if not skipped and not present. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="PENDING", + effective_supplier="ValidSupplier", + asset_name="InitFlagAsset" + ) + # Ensure status_flags is empty before execute + context.status_flags = {} + + updated_context = stage.execute(context) + + # If not skipped, 'skip_asset' should be explicitly False. + assert updated_context.status_flags.get('skip_asset') is False + # No skip reason should be set + assert 'skip_reason' not in updated_context.status_flags + for call_args in mock_log_info.call_args_list: + assert "Skipping" not in call_args[0][0] \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_file_rule_filter.py b/tests/processing/pipeline/stages/test_file_rule_filter.py new file mode 100644 index 0000000..4a79308 --- /dev/null +++ b/tests/processing/pipeline/stages/test_file_rule_filter.py @@ -0,0 +1,330 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +from typing import Optional # Added Optional for type hinting + +from processing.pipeline.stages.file_rule_filter import FileRuleFilterStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule # FileRule is key here +from configuration import Configuration # Minimal config needed + +def create_mock_file_rule( + id_val: Optional[uuid.UUID] = None, + map_type: str = "Diffuse", + filename_pattern: str = "*.tif", + item_type: str = "MAP_COL", # e.g., MAP_COL, FILE_IGNORE + active: bool = True +) -> mock.MagicMock: # Return MagicMock to easily set other attributes if needed + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = item_type + mock_fr.active = active + return mock_fr + +def create_file_filter_mock_context( + file_rules_list: Optional[list] = None, # List of mock FileRule objects + skip_asset_flag: bool = False, + asset_name: str = "FileFilterAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.file_rules = file_rules_list if file_rules_list is not None else [] + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_config = mock.MagicMock(spec=Configuration) + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", # Assume valid for this stage + asset_metadata={'asset_name': asset_name}, # Assume metadata init happened + processed_maps_details={}, + merged_maps_details={}, + files_to_process=[], # Stage will populate this + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha256_value based on AssetProcessingContext + ) + return context +# Test Cases for FileRuleFilterStage.execute() + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_asset_skipped(mock_log_debug, mock_log_info): + """ + Test case: Asset Skipped - status_flags['skip_asset'] is True. + Assert context.files_to_process remains empty. + """ + stage = FileRuleFilterStage() + context = create_file_filter_mock_context(skip_asset_flag=True) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 0 + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule filtering as 'skip_asset' is True.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_no_file_rules(mock_log_debug, mock_log_info): + """ + Test case: No File Rules - asset_rule.file_rules is empty. + Assert context.files_to_process is empty. + """ + stage = FileRuleFilterStage() + context = create_file_filter_mock_context(file_rules_list=[]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 0 + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': No file rules defined. Skipping file rule filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_only_active_processable_rules(mock_log_debug, mock_log_info): + """ + Test case: Only Active, Processable Rules - All FileRules are active=True and item_type="MAP_COL". + Assert all are added to context.files_to_process. + """ + stage = FileRuleFilterStage() + fr1 = create_mock_file_rule(filename_pattern="diffuse.png", item_type="MAP_COL", active=True) + fr2 = create_mock_file_rule(filename_pattern="normal.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr1, fr2]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 2 + assert fr1 in updated_context.files_to_process + assert fr2 in updated_context.files_to_process + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 2 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_inactive_rules(mock_log_debug, mock_log_info): + """ + Test case: Inactive Rules - Some FileRules have active=False. + Assert only active rules are added. + """ + stage = FileRuleFilterStage() + fr_active = create_mock_file_rule(filename_pattern="active.png", item_type="MAP_COL", active=True) + fr_inactive = create_mock_file_rule(filename_pattern="inactive.png", item_type="MAP_COL", active=False) + fr_another_active = create_mock_file_rule(filename_pattern="another_active.jpg", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_active, fr_inactive, fr_another_active]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 2 + assert fr_active in updated_context.files_to_process + assert fr_another_active in updated_context.files_to_process + assert fr_inactive not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping inactive file rule: '{fr_inactive.filename_pattern}'") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 2 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_file_ignore_simple_match(mock_log_debug, mock_log_info): + """ + Test case: FILE_IGNORE Rule (Simple Match). + One FILE_IGNORE rule with filename_pattern="*_ignore.png". + One MAP_COL rule with filename_pattern="diffuse_ignore.png". + One MAP_COL rule with filename_pattern="normal_process.png". + Assert only "normal_process.png" rule is added. + """ + stage = FileRuleFilterStage() + fr_ignore = create_mock_file_rule(filename_pattern="*_ignore.png", item_type="FILE_IGNORE", active=True) + fr_ignored_map = create_mock_file_rule(filename_pattern="diffuse_ignore.png", item_type="MAP_COL", active=True) + fr_process_map = create_mock_file_rule(filename_pattern="normal_process.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_ignore, fr_ignored_map, fr_process_map]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_process_map in updated_context.files_to_process + assert fr_ignored_map not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_ignored_map.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_file_ignore_glob_pattern(mock_log_debug, mock_log_info): + """ + Test case: FILE_IGNORE Rule (Glob Pattern). + One FILE_IGNORE rule with filename_pattern="*_ignore.*". + MAP_COL rules: "tex_ignore.tif", "tex_process.png". + Assert only "tex_process.png" rule is added. + """ + stage = FileRuleFilterStage() + fr_ignore_glob = create_mock_file_rule(filename_pattern="*_ignore.*", item_type="FILE_IGNORE", active=True) + fr_ignored_tif = create_mock_file_rule(filename_pattern="tex_ignore.tif", item_type="MAP_COL", active=True) + fr_process_png = create_mock_file_rule(filename_pattern="tex_process.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_ignore_glob, fr_ignored_tif, fr_process_png]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_process_png in updated_context.files_to_process + assert fr_ignored_tif not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore_glob.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_ignored_tif.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_multiple_file_ignore_rules(mock_log_debug, mock_log_info): + """ + Test case: Multiple FILE_IGNORE Rules. + Test with several ignore patterns and ensure they are all respected. + """ + stage = FileRuleFilterStage() + fr_ignore1 = create_mock_file_rule(filename_pattern="*.tmp", item_type="FILE_IGNORE", active=True) + fr_ignore2 = create_mock_file_rule(filename_pattern="backup_*", item_type="FILE_IGNORE", active=True) + fr_ignore3 = create_mock_file_rule(filename_pattern="*_old.png", item_type="FILE_IGNORE", active=True) + + fr_map_ignored1 = create_mock_file_rule(filename_pattern="data.tmp", item_type="MAP_COL", active=True) + fr_map_ignored2 = create_mock_file_rule(filename_pattern="backup_diffuse.jpg", item_type="MAP_COL", active=True) + fr_map_ignored3 = create_mock_file_rule(filename_pattern="normal_old.png", item_type="MAP_COL", active=True) + fr_map_process = create_mock_file_rule(filename_pattern="final_texture.tif", item_type="MAP_COL", active=True) + + context = create_file_filter_mock_context(file_rules_list=[ + fr_ignore1, fr_ignore2, fr_ignore3, + fr_map_ignored1, fr_map_ignored2, fr_map_ignored3, fr_map_process + ]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_map_process in updated_context.files_to_process + assert fr_map_ignored1 not in updated_context.files_to_process + assert fr_map_ignored2 not in updated_context.files_to_process + assert fr_map_ignored3 not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore1.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore2.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore3.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_map_ignored1.filename_pattern}' due to matching ignore pattern.") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_map_ignored2.filename_pattern}' due to matching ignore pattern.") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_map_ignored3.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_file_ignore_rule_is_inactive(mock_log_debug, mock_log_info): + """ + Test case: FILE_IGNORE Rule is Inactive. + An ignore rule itself is active=False. Assert its pattern is NOT used for filtering. + """ + stage = FileRuleFilterStage() + fr_inactive_ignore = create_mock_file_rule(filename_pattern="*_ignore.tif", item_type="FILE_IGNORE", active=False) + fr_should_process1 = create_mock_file_rule(filename_pattern="diffuse_ignore.tif", item_type="MAP_COL", active=True) # Should be processed + fr_should_process2 = create_mock_file_rule(filename_pattern="normal_ok.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_inactive_ignore, fr_should_process1, fr_should_process2]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 2 + assert fr_should_process1 in updated_context.files_to_process + assert fr_should_process2 in updated_context.files_to_process + # Ensure the inactive ignore rule's pattern was not registered + # We check this by ensuring no debug log for registering *that specific* pattern was made. + # A more robust way would be to check mock_log_debug.call_args_list, but this is simpler for now. + for call in mock_log_debug.call_args_list: + args, kwargs = call + if "Registering ignore pattern" in args[0] and fr_inactive_ignore.filename_pattern in args[0]: + pytest.fail(f"Inactive ignore pattern '{fr_inactive_ignore.filename_pattern}' was incorrectly registered.") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping inactive file rule: '{fr_inactive_ignore.filename_pattern}' (type: FILE_IGNORE)") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 2 file rules queued for processing after filtering.") + + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_no_file_ignore_rules(mock_log_debug, mock_log_info): + """ + Test case: No FILE_IGNORE Rules. + All rules are MAP_COL or other processable types. + Assert all active, processable rules are included. + """ + stage = FileRuleFilterStage() + fr1 = create_mock_file_rule(filename_pattern="diffuse.png", item_type="MAP_COL", active=True) + fr2 = create_mock_file_rule(filename_pattern="normal.png", item_type="MAP_COL", active=True) + fr_other_type = create_mock_file_rule(filename_pattern="spec.tif", item_type="MAP_SPEC", active=True) # Assuming MAP_SPEC is processable + fr_inactive = create_mock_file_rule(filename_pattern="ao.jpg", item_type="MAP_AO", active=False) + + context = create_file_filter_mock_context(file_rules_list=[fr1, fr2, fr_other_type, fr_inactive]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 3 + assert fr1 in updated_context.files_to_process + assert fr2 in updated_context.files_to_process + assert fr_other_type in updated_context.files_to_process + assert fr_inactive not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping inactive file rule: '{fr_inactive.filename_pattern}'") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 3 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_item_type_not_processable(mock_log_debug, mock_log_info): + """ + Test case: Item type is not processable (e.g., not MAP_COL, MAP_AO etc., but something else like 'METADATA_ONLY'). + Assert such rules are not added to files_to_process, unless they are FILE_IGNORE. + """ + stage = FileRuleFilterStage() + fr_processable = create_mock_file_rule(filename_pattern="diffuse.png", item_type="MAP_COL", active=True) + fr_not_processable = create_mock_file_rule(filename_pattern="info.txt", item_type="METADATA_ONLY", active=True) + fr_ignore = create_mock_file_rule(filename_pattern="*.bak", item_type="FILE_IGNORE", active=True) + fr_ignored_by_bak = create_mock_file_rule(filename_pattern="diffuse.bak", item_type="MAP_COL", active=True) + + context = create_file_filter_mock_context(file_rules_list=[fr_processable, fr_not_processable, fr_ignore, fr_ignored_by_bak]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_processable in updated_context.files_to_process + assert fr_not_processable not in updated_context.files_to_process + assert fr_ignored_by_bak not in updated_context.files_to_process + + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_not_processable.filename_pattern}' as its item_type '{fr_not_processable.item_type}' is not processable.") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_ignored_by_bak.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") + +# Example tests from instructions (can be adapted or used as a base) +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_basic_active_example(mock_log_debug, mock_log_info): # Renamed to avoid conflict + stage = FileRuleFilterStage() + fr1 = create_mock_file_rule(filename_pattern="diffuse.png", item_type="MAP_COL", active=True) + fr2 = create_mock_file_rule(filename_pattern="normal.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr1, fr2]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 2 + assert fr1 in updated_context.files_to_process + assert fr2 in updated_context.files_to_process + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 2 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_with_file_ignore_example(mock_log_debug, mock_log_info): # Renamed to avoid conflict + stage = FileRuleFilterStage() + fr_ignore = create_mock_file_rule(filename_pattern="*_ignore.tif", item_type="FILE_IGNORE", active=True) + fr_process = create_mock_file_rule(filename_pattern="diffuse_ok.tif", item_type="MAP_COL", active=True) + fr_skip = create_mock_file_rule(filename_pattern="normal_ignore.tif", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_ignore, fr_process, fr_skip]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_process in updated_context.files_to_process + assert fr_skip not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_skip.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_gloss_to_rough_conversion.py b/tests/processing/pipeline/stages/test_gloss_to_rough_conversion.py new file mode 100644 index 0000000..934ad2c --- /dev/null +++ b/tests/processing/pipeline/stages/test_gloss_to_rough_conversion.py @@ -0,0 +1,486 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np +from typing import Optional, List, Dict + +from processing.pipeline.stages.gloss_to_rough_conversion import GlossToRoughConversionStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule +from configuration import Configuration, GeneralSettings +# No direct ipu import needed in test if we mock its usage by the stage + +def create_mock_file_rule_for_gloss_test( + id_val: Optional[uuid.UUID] = None, + map_type: str = "GLOSS", # Test with GLOSS and other types + filename_pattern: str = "gloss.png" +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = "MAP_COL" + mock_fr.active = True + return mock_fr + +def create_gloss_conversion_mock_context( + initial_file_rules: Optional[List[FileRule]] = None, # Type hint corrected + initial_processed_details: Optional[Dict] = None, # Type hint corrected + skip_asset_flag: bool = False, + asset_name: str = "GlossAsset", + # Add a mock for general_settings if your stage checks a global flag + # convert_gloss_globally: bool = True +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.file_rules = initial_file_rules if initial_file_rules is not None else [] + + mock_source_rule = mock.MagicMock(spec=SourceRule) + + mock_gs = mock.MagicMock(spec=GeneralSettings) + # if your stage uses a global flag: + # mock_gs.convert_gloss_to_rough_globally = convert_gloss_globally + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_gs + + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), # Important for new temp file paths + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details=initial_processed_details if initial_processed_details is not None else {}, + merged_maps_details={}, + files_to_process=list(initial_file_rules) if initial_file_rules else [], # Stage modifies this list + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, # Added as per AssetProcessingContext definition + sha5_value=None # Added as per AssetProcessingContext definition + ) + return context + +# Unit tests will be added below +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_asset_skipped(mock_load_image, mock_save_image): + """ + Test that if 'skip_asset' is True, no processing occurs. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS") + + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_gloss_map.png', 'status': 'Processed', 'map_type': 'GLOSS'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr], + initial_processed_details=initial_details, + skip_asset_flag=True # Asset is skipped + ) + + # Keep a copy of files_to_process and processed_maps_details to compare + original_files_to_process = list(context.files_to_process) + original_processed_maps_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + + assert updated_context.files_to_process == original_files_to_process, "files_to_process should not change if asset is skipped" + assert updated_context.processed_maps_details == original_processed_maps_details, "processed_maps_details should not change if asset is skipped" + assert updated_context.status_flags['skip_asset'] is True +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_no_gloss_map_present(mock_load_image, mock_save_image): + """ + Test that if no GLOSS maps are in files_to_process, no conversion occurs. + """ + stage = GlossToRoughConversionStage() + + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_gloss_test(id_val=normal_rule_id, map_type="NORMAL", filename_pattern="normal.png") + albedo_fr = create_mock_file_rule_for_gloss_test(map_type="ALBEDO", filename_pattern="albedo.jpg") + + initial_details = { + normal_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_normal_map.png', 'status': 'Processed', 'map_type': 'NORMAL'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[normal_fr, albedo_fr], + initial_processed_details=initial_details + ) + + original_files_to_process = list(context.files_to_process) + original_processed_maps_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + + assert updated_context.files_to_process == original_files_to_process, "files_to_process should not change if no GLOSS maps are present" + assert updated_context.processed_maps_details == original_processed_maps_details, "processed_maps_details should not change if no GLOSS maps are present" + + # Ensure map types of existing rules are unchanged + for fr_in_list in updated_context.files_to_process: + if fr_in_list.id == normal_fr.id: + assert fr_in_list.map_type == "NORMAL" + elif fr_in_list.id == albedo_fr.id: + assert fr_in_list.map_type == "ALBEDO" +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') # Mock logging +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_gloss_conversion_uint8_success(mock_load_image, mock_save_image, mock_logging): + """ + Test successful conversion of a GLOSS map (uint8 data) to ROUGHNESS. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + # Use a distinct filename for the gloss map to ensure correct path construction + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="my_gloss_map.png") + other_fr_id = uuid.uuid4() + other_fr = create_mock_file_rule_for_gloss_test(id_val=other_fr_id, map_type="NORMAL", filename_pattern="normal_map.png") + + initial_gloss_temp_path = Path("/fake/temp_engine_dir/processed_gloss_map.png") + initial_other_temp_path = Path("/fake/temp_engine_dir/processed_normal_map.png") + + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': str(initial_gloss_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'}, + other_fr.id.hex: {'temp_processed_file': str(initial_other_temp_path), 'status': 'Processed', 'map_type': 'NORMAL'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr, other_fr], + initial_processed_details=initial_details + ) + + mock_loaded_gloss_data = np.array([10, 50, 250], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_gloss_data + mock_save_image.return_value = True # Simulate successful save + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_gloss_temp_path) + + # Check that save_image was called with inverted data and correct path + expected_inverted_data = 255 - mock_loaded_gloss_data + + # call_args[0] is a tuple of positional args, call_args[1] is a dict of kwargs + saved_path_arg = mock_save_image.call_args[0][0] + saved_data_arg = mock_save_image.call_args[0][1] + + assert np.array_equal(saved_data_arg, expected_inverted_data), "Image data passed to save_image is not correctly inverted." + assert "rough_from_gloss_" in saved_path_arg.name, "Saved file name should indicate conversion from gloss." + assert saved_path_arg.parent == Path("/fake/temp_engine_dir"), "Saved file should be in the engine temp directory." + # Ensure the new filename is based on the original gloss map's ID for uniqueness + assert gloss_fr.id.hex in saved_path_arg.name + + # Check context.files_to_process + assert len(updated_context.files_to_process) == 2, "Number of file rules in context should remain the same." + converted_rule_found = False + other_rule_untouched = False + for fr_in_list in updated_context.files_to_process: + if fr_in_list.id == gloss_fr.id: # Should be the same rule object, modified + assert fr_in_list.map_type == "ROUGHNESS", "GLOSS map_type should be changed to ROUGHNESS." + # Check if filename_pattern was updated (optional, depends on stage logic) + # For now, assume it might not be, as the primary identifier is map_type and ID + converted_rule_found = True + elif fr_in_list.id == other_fr.id: + assert fr_in_list.map_type == "NORMAL", "Other map_type should remain unchanged." + other_rule_untouched = True + assert converted_rule_found, "The converted GLOSS rule was not found or not updated correctly in files_to_process." + assert other_rule_untouched, "The non-GLOSS rule was modified unexpectedly." + + # Check context.processed_maps_details + assert len(updated_context.processed_maps_details) == 2, "Number of entries in processed_maps_details should remain the same." + + gloss_detail = updated_context.processed_maps_details[gloss_fr.id.hex] + assert "rough_from_gloss_" in gloss_detail['temp_processed_file'], "temp_processed_file for gloss map not updated." + assert Path(gloss_detail['temp_processed_file']).name == saved_path_arg.name, "Path in details should match saved path." + assert gloss_detail['original_map_type_before_conversion'] == "GLOSS", "original_map_type_before_conversion not set correctly." + assert "Converted from GLOSS to ROUGHNESS" in gloss_detail['notes'], "Conversion notes not added or incorrect." + assert gloss_detail['map_type'] == "ROUGHNESS", "map_type in details not updated to ROUGHNESS." + + + other_detail = updated_context.processed_maps_details[other_fr.id.hex] + assert other_detail['temp_processed_file'] == str(initial_other_temp_path), "Other map's temp_processed_file should be unchanged." + assert other_detail['map_type'] == "NORMAL", "Other map's map_type should be unchanged." + assert 'original_map_type_before_conversion' not in other_detail, "Other map should not have conversion history." + assert 'notes' not in other_detail or "Converted from GLOSS" not in other_detail['notes'], "Other map should not have conversion notes." + + mock_logging.info.assert_any_call(f"Successfully converted GLOSS map {gloss_fr.id.hex} to ROUGHNESS.") +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') # Mock logging +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_gloss_conversion_float_success(mock_load_image, mock_save_image, mock_logging): + """ + Test successful conversion of a GLOSS map (float data) to ROUGHNESS. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="gloss_float.hdr") # Example float format + + initial_gloss_temp_path = Path("/fake/temp_engine_dir/processed_gloss_float.hdr") + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': str(initial_gloss_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr], + initial_processed_details=initial_details + ) + + mock_loaded_gloss_data = np.array([0.1, 0.5, 0.9], dtype=np.float32) + mock_load_image.return_value = mock_loaded_gloss_data + mock_save_image.return_value = True # Simulate successful save + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_gloss_temp_path) + + expected_inverted_data = 1.0 - mock_loaded_gloss_data + + saved_path_arg = mock_save_image.call_args[0][0] + saved_data_arg = mock_save_image.call_args[0][1] + + assert np.allclose(saved_data_arg, expected_inverted_data), "Image data (float) passed to save_image is not correctly inverted." + assert "rough_from_gloss_" in saved_path_arg.name, "Saved file name should indicate conversion from gloss." + assert saved_path_arg.parent == Path("/fake/temp_engine_dir"), "Saved file should be in the engine temp directory." + assert gloss_fr.id.hex in saved_path_arg.name + + assert len(updated_context.files_to_process) == 1 + converted_rule = updated_context.files_to_process[0] + assert converted_rule.id == gloss_fr.id + assert converted_rule.map_type == "ROUGHNESS" + + gloss_detail = updated_context.processed_maps_details[gloss_fr.id.hex] + assert "rough_from_gloss_" in gloss_detail['temp_processed_file'] + assert Path(gloss_detail['temp_processed_file']).name == saved_path_arg.name + assert gloss_detail['original_map_type_before_conversion'] == "GLOSS" + assert "Converted from GLOSS to ROUGHNESS" in gloss_detail['notes'] + assert gloss_detail['map_type'] == "ROUGHNESS" + + mock_logging.info.assert_any_call(f"Successfully converted GLOSS map {gloss_fr.id.hex} to ROUGHNESS.") +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_load_image_fails(mock_load_image, mock_save_image, mock_logging): + """ + Test behavior when ipu.load_image fails (returns None). + The original FileRule should be kept, and an error logged. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="gloss_fails_load.png") + + initial_gloss_temp_path = Path("/fake/temp_engine_dir/processed_gloss_fails_load.png") + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': str(initial_gloss_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr], + initial_processed_details=initial_details + ) + + # Keep a copy for comparison + original_file_rule_map_type = gloss_fr.map_type + original_details_entry = context.processed_maps_details[gloss_fr.id.hex].copy() + + mock_load_image.return_value = None # Simulate load failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_gloss_temp_path) + mock_save_image.assert_not_called() # Save should not be attempted + + # Check context.files_to_process: rule should be unchanged + assert len(updated_context.files_to_process) == 1 + processed_rule = updated_context.files_to_process[0] + assert processed_rule.id == gloss_fr.id + assert processed_rule.map_type == original_file_rule_map_type, "FileRule map_type should not change if load fails." + assert processed_rule.map_type == "GLOSS" # Explicitly check it's still GLOSS + + # Check context.processed_maps_details: details should be unchanged + current_details_entry = updated_context.processed_maps_details[gloss_fr.id.hex] + assert current_details_entry['temp_processed_file'] == str(initial_gloss_temp_path) + assert current_details_entry['map_type'] == "GLOSS" + assert 'original_map_type_before_conversion' not in current_details_entry + assert 'notes' not in current_details_entry or "Converted from GLOSS" not in current_details_entry['notes'] + + mock_logging.error.assert_called_once_with( + f"Failed to load image data for GLOSS map {gloss_fr.id.hex} from {initial_gloss_temp_path}. Skipping conversion for this map." + ) +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_save_image_fails(mock_load_image, mock_save_image, mock_logging): + """ + Test behavior when ipu.save_image fails (returns False). + The original FileRule should be kept, and an error logged. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="gloss_fails_save.png") + + initial_gloss_temp_path = Path("/fake/temp_engine_dir/processed_gloss_fails_save.png") + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': str(initial_gloss_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr], + initial_processed_details=initial_details + ) + + original_file_rule_map_type = gloss_fr.map_type + original_details_entry = context.processed_maps_details[gloss_fr.id.hex].copy() + + mock_loaded_gloss_data = np.array([10, 50, 250], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_gloss_data + mock_save_image.return_value = False # Simulate save failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_gloss_temp_path) + + # Check that save_image was called with correct data and path + expected_inverted_data = 255 - mock_loaded_gloss_data + # call_args[0] is a tuple of positional args + saved_path_arg = mock_save_image.call_args[0][0] + saved_data_arg = mock_save_image.call_args[0][1] + + assert np.array_equal(saved_data_arg, expected_inverted_data), "Image data passed to save_image is not correctly inverted even on failure." + assert "rough_from_gloss_" in saved_path_arg.name, "Attempted save file name should indicate conversion from gloss." + assert saved_path_arg.parent == Path("/fake/temp_engine_dir"), "Attempted save file should be in the engine temp directory." + + # Check context.files_to_process: rule should be unchanged + assert len(updated_context.files_to_process) == 1 + processed_rule = updated_context.files_to_process[0] + assert processed_rule.id == gloss_fr.id + assert processed_rule.map_type == original_file_rule_map_type, "FileRule map_type should not change if save fails." + assert processed_rule.map_type == "GLOSS" + + # Check context.processed_maps_details: details should be unchanged + current_details_entry = updated_context.processed_maps_details[gloss_fr.id.hex] + assert current_details_entry['temp_processed_file'] == str(initial_gloss_temp_path) + assert current_details_entry['map_type'] == "GLOSS" + assert 'original_map_type_before_conversion' not in current_details_entry + assert 'notes' not in current_details_entry or "Converted from GLOSS" not in current_details_entry['notes'] + + mock_logging.error.assert_called_once_with( + f"Failed to save inverted GLOSS map {gloss_fr.id.hex} to {saved_path_arg}. Retaining original GLOSS map." + ) +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_gloss_map_in_files_to_process_but_not_in_details(mock_load_image, mock_save_image, mock_logging): + """ + Test behavior when a GLOSS FileRule is in files_to_process but its details + are missing from processed_maps_details. + The stage should log an error and skip this FileRule. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + # This FileRule is in files_to_process + gloss_fr_in_list = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="orphan_gloss.png") + + # processed_maps_details is empty or does not contain gloss_fr_in_list.id.hex + initial_details = {} + + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr_in_list], + initial_processed_details=initial_details + ) + + original_files_to_process = list(context.files_to_process) + original_processed_maps_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() # Load should not be attempted if details are missing + mock_save_image.assert_not_called() # Save should not be attempted + + # Check context.files_to_process: rule should be unchanged + assert len(updated_context.files_to_process) == 1 + processed_rule = updated_context.files_to_process[0] + assert processed_rule.id == gloss_fr_in_list.id + assert processed_rule.map_type == "GLOSS", "FileRule map_type should not change if its details are missing." + + # Check context.processed_maps_details: should remain unchanged + assert updated_context.processed_maps_details == original_processed_maps_details, "processed_maps_details should not change." + + mock_logging.error.assert_called_once_with( + f"GLOSS map {gloss_fr_in_list.id.hex} found in files_to_process but missing from processed_maps_details. Skipping conversion." + ) + +# Test for Case 8.2 (GLOSS map ID in processed_maps_details but no corresponding FileRule in files_to_process) +# This case is implicitly handled because the stage iterates files_to_process. +# If a FileRule isn't in files_to_process, its corresponding entry in processed_maps_details (if any) won't be acted upon. +# We can add a simple test to ensure no errors occur and non-relevant details are untouched. + +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_gloss_detail_exists_but_not_in_files_to_process(mock_load_image, mock_save_image, mock_logging): + """ + Test that if a GLOSS map detail exists in processed_maps_details but + no corresponding FileRule is in files_to_process, it's simply ignored + without error, and other valid conversions proceed. + """ + stage = GlossToRoughConversionStage() + + # This rule will be processed + convert_rule_id = uuid.uuid4() + convert_fr = create_mock_file_rule_for_gloss_test(id_val=convert_rule_id, map_type="GLOSS", filename_pattern="convert_me.png") + convert_initial_temp_path = Path("/fake/temp_engine_dir/processed_convert_me.png") + + # This rule's details exist, but the rule itself is not in files_to_process + orphan_detail_id = uuid.uuid4() + + initial_details = { + convert_fr.id.hex: {'temp_processed_file': str(convert_initial_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'}, + orphan_detail_id.hex: {'temp_processed_file': '/fake/temp_engine_dir/orphan.png', 'status': 'Processed', 'map_type': 'GLOSS', 'notes': 'This is an orphan'} + } + + context = create_gloss_conversion_mock_context( + initial_file_rules=[convert_fr], # Only convert_fr is in files_to_process + initial_processed_details=initial_details + ) + + mock_loaded_data = np.array([100], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_data + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + # Assert that load/save were called only for the rule in files_to_process + mock_load_image.assert_called_once_with(convert_initial_temp_path) + mock_save_image.assert_called_once() # Check it was called, details checked in other tests + + # Check that the orphan detail in processed_maps_details is untouched + assert orphan_detail_id.hex in updated_context.processed_maps_details + orphan_entry = updated_context.processed_maps_details[orphan_detail_id.hex] + assert orphan_entry['temp_processed_file'] == '/fake/temp_engine_dir/orphan.png' + assert orphan_entry['map_type'] == 'GLOSS' + assert orphan_entry['notes'] == 'This is an orphan' + assert 'original_map_type_before_conversion' not in orphan_entry + + # Check that the processed rule was indeed converted + assert convert_fr.id.hex in updated_context.processed_maps_details + converted_entry = updated_context.processed_maps_details[convert_fr.id.hex] + assert converted_entry['map_type'] == 'ROUGHNESS' + assert "rough_from_gloss_" in converted_entry['temp_processed_file'] + + # No errors should have been logged regarding the orphan detail + for call_args in mock_logging.error.call_args_list: + assert str(orphan_detail_id.hex) not in call_args[0][0], "Error logged for orphan detail" \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_individual_map_processing.py b/tests/processing/pipeline/stages/test_individual_map_processing.py new file mode 100644 index 0000000..2d78c00 --- /dev/null +++ b/tests/processing/pipeline/stages/test_individual_map_processing.py @@ -0,0 +1,555 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np +from typing import Optional # Added for type hinting in helper functions + +from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule, TransformSettings # Key models +from configuration import Configuration, GeneralSettings +# cv2 might be imported by the stage for interpolation constants, ensure it's mockable if so. +# For now, assume ipu handles interpolation details. + +def create_mock_transform_settings( + target_width=0, target_height=0, resize_mode="FIT", + ensure_pot=False, allow_upscale=True, target_color_profile="RGB" # Add other fields as needed +) -> mock.MagicMock: + ts = mock.MagicMock(spec=TransformSettings) + ts.target_width = target_width + ts.target_height = target_height + ts.resize_mode = resize_mode + ts.ensure_pot = ensure_pot + ts.allow_upscale = allow_upscale + ts.target_color_profile = target_color_profile + # ts.resize_filter = "AREA" # if your stage uses this + return ts + +def create_mock_file_rule_for_individual_processing( + id_val: Optional[uuid.UUID] = None, + map_type: str = "ALBEDO", + filename_pattern: str = "albedo_*.png", # Pattern for glob + item_type: str = "MAP_COL", + active: bool = True, + transform_settings: Optional[mock.MagicMock] = None +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = item_type + mock_fr.active = active + mock_fr.transform_settings = transform_settings if transform_settings else create_mock_transform_settings() + return mock_fr + +def create_individual_map_proc_mock_context( + initial_file_rules: Optional[list] = None, + asset_source_path_str: str = "/fake/asset_source", + skip_asset_flag: bool = False, + asset_name: str = "IndividualMapAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.source_path = Path(asset_source_path_str) + # file_rules on AssetRule not directly used by stage, context.files_to_process is + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_config = mock.MagicMock(spec=Configuration) + # mock_config.general_settings = mock.MagicMock(spec=GeneralSettings) # If needed + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details={}, # Stage populates this + merged_maps_details={}, + files_to_process=list(initial_file_rules) if initial_file_rules else [], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha_value if that's the actual param + ) + return context + +# Placeholder for tests to be added next +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu') +@mock.patch('logging.info') +def test_asset_skipped_if_flag_is_true(mock_log_info, mock_ipu): + stage = IndividualMapProcessingStage() + context = create_individual_map_proc_mock_context(skip_asset_flag=True) + + # Add a dummy file rule to ensure it's not processed + file_rule = create_mock_file_rule_for_individual_processing() + context.files_to_process = [file_rule] + + updated_context = stage.execute(context) + + mock_ipu.load_image.assert_not_called() + mock_ipu.save_image.assert_not_called() + assert not updated_context.processed_maps_details # No details should be added + # Check for a log message indicating skip, if applicable (depends on stage's logging) + # mock_log_info.assert_any_call("Skipping asset IndividualMapAsset due to status_flags['skip_asset'] = True") # Example + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu') +@mock.patch('logging.info') +def test_no_processing_if_no_map_col_rules(mock_log_info, mock_ipu): + stage = IndividualMapProcessingStage() + + # Create a file rule that is NOT of item_type MAP_COL + non_map_col_rule = create_mock_file_rule_for_individual_processing(item_type="METADATA") + context = create_individual_map_proc_mock_context(initial_file_rules=[non_map_col_rule]) + + updated_context = stage.execute(context) + + mock_ipu.load_image.assert_not_called() + mock_ipu.save_image.assert_not_called() + assert not updated_context.processed_maps_details + # mock_log_info.assert_any_call("No FileRules of item_type 'MAP_COL' to process for asset IndividualMapAsset.") # Example + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') # Mocking Path.glob used by the stage's _find_source_file +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_individual_map_processing_success_no_resize( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + + source_file_name = "albedo_source.png" + # The glob is called on context.asset_rule.source_path, so mock that Path object's glob + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + + # We need to mock the glob method of the Path instance + # that represents the asset's source directory. + # The stage does something like: Path(context.asset_rule.source_path).glob(...) + # So, we need to ensure that when Path() is called with that specific string, + # the resulting object's glob method is our mock. + # A more robust way is to mock Path itself to return a mock object + # whose glob method is also a mock. + + # Simpler approach for now: assume Path.glob is used as a static/class method call + # or that the instance it's called on is correctly patched by @mock.patch('pathlib.Path.glob') + # if the stage does `from pathlib import Path` and then `Path(path_str).glob(...)`. + # The prompt example uses @mock.patch('pathlib.Path.glob'), implying the stage might do this: + # for f_pattern in patterns: + # for found_file in Path(base_dir).glob(f_pattern): ... + # Let's refine the mock_path_glob setup. + # The stage's _find_source_file likely does: + # search_path = Path(self.context.asset_rule.source_path) + # found_files = list(search_path.glob(filename_pattern)) + + # To correctly mock this, we need to mock the `glob` method of the specific Path instance. + # Or, if `_find_source_file` instantiates `Path` like `Path(str(context.asset_rule.source_path)).glob(...)`, + # then patching `pathlib.Path.glob` might work if it's treated as a method that gets bound. + # Let's stick to the example's @mock.patch('pathlib.Path.glob') and assume it covers the usage. + mock_path_glob.return_value = [mock_found_source_path] # Glob finds one file + + ts = create_mock_transform_settings(target_width=100, target_height=100) + file_rule = create_mock_file_rule_for_individual_processing( + map_type="ALBEDO", filename_pattern="albedo_*.png", transform_settings=ts + ) + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) # Ensure context uses this path + ) + + mock_img_data = np.zeros((100, 100, 3), dtype=np.uint8) # Original dimensions + mock_load_image.return_value = mock_img_data + mock_calc_dims.return_value = (100, 100) # No resize needed + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + # Assert that Path(context.asset_rule.source_path).glob was called + # This requires a bit more intricate mocking if Path instances are created inside. + # For now, assert mock_path_glob was called with the pattern. + # The actual call in stage is `Path(context.asset_rule.source_path).glob(file_rule.filename_pattern)` + # So, `mock_path_glob` (if it patches `Path.glob` globally) should be called. + # We need to ensure the mock_path_glob is associated with the correct Path instance or that + # the global patch works as intended. + # A common pattern is: + # with mock.patch.object(Path, 'glob', return_value=[mock_found_source_path]) as specific_glob_mock: + # # execute code + # specific_glob_mock.assert_called_once_with(file_rule.filename_pattern) + # However, the decorator @mock.patch('pathlib.Path.glob') should work if the stage code is + # `from pathlib import Path; p = Path(...); p.glob(...)` + + # The stage's _find_source_file will instantiate a Path object from context.asset_rule.source_path + # and then call glob on it. + # So, @mock.patch('pathlib.Path.glob') is patching the method on the class. + # When an instance calls it, the mock is used. + mock_path_glob.assert_called_once_with(file_rule.filename_pattern) + + + mock_load_image.assert_called_once_with(mock_found_source_path) + # The actual call to calculate_target_dimensions is: + # ipu.calculate_target_dimensions(original_dims, ts.target_width, ts.target_height, ts.resize_mode, ts.ensure_pot, ts.allow_upscale) + mock_calc_dims.assert_called_once_with( + (100, 100), ts.target_width, ts.target_height, ts.resize_mode, ts.ensure_pot, ts.allow_upscale + ) + mock_resize_image.assert_not_called() # Crucial for this test case + mock_save_image.assert_called_once() + + # Check save path and data + saved_image_arg, saved_path_arg = mock_save_image.call_args[0] + assert np.array_equal(saved_image_arg, mock_img_data) # Ensure correct image data is passed to save + assert "processed_ALBEDO_" in saved_path_arg.name # Based on map_type + assert file_rule.id.hex in saved_path_arg.name # Ensure unique name with FileRule ID + assert saved_path_arg.parent == context.engine_temp_dir + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Processed' + assert details['source_file'] == str(mock_found_source_path) + assert Path(details['temp_processed_file']) == saved_path_arg + assert details['original_dimensions'] == (100, 100) + assert details['processed_dimensions'] == (100, 100) + assert details['map_type'] == file_rule.map_type + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Successfully processed map {file_rule.map_type} (ID: {file_rule.id.hex}) for asset {context.asset_rule.name}. Output: {saved_path_arg}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_source_file_not_found( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + mock_asset_source_path = Path("/fake/asset_source") + + mock_path_glob.return_value = [] # Glob finds no files + + file_rule = create_mock_file_rule_for_individual_processing(filename_pattern="nonexistent_*.png") + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + + updated_context = stage.execute(context) + + mock_path_glob.assert_called_once_with(file_rule.filename_pattern) + mock_load_image.assert_not_called() + mock_calc_dims.assert_not_called() + mock_resize_image.assert_not_called() + mock_save_image.assert_not_called() + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Source Not Found' + assert details['source_file'] is None + assert details['temp_processed_file'] is None + assert details['error_message'] is not None # Check an error message is present + mock_log_error.assert_called_once() + # Example: mock_log_error.assert_called_with(f"Could not find source file for rule {file_rule.id} (pattern: {file_rule.filename_pattern}) in {context.asset_rule.source_path}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_load_image_fails( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + source_file_name = "albedo_corrupt.png" + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + mock_path_glob.return_value = [mock_found_source_path] + + mock_load_image.return_value = None # Simulate load failure + + file_rule = create_mock_file_rule_for_individual_processing(filename_pattern="albedo_*.png") + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + + updated_context = stage.execute(context) + + mock_path_glob.assert_called_once_with(file_rule.filename_pattern) + mock_load_image.assert_called_once_with(mock_found_source_path) + mock_calc_dims.assert_not_called() + mock_resize_image.assert_not_called() + mock_save_image.assert_not_called() + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Load Failed' + assert details['source_file'] == str(mock_found_source_path) + assert details['temp_processed_file'] is None + assert details['error_message'] is not None + mock_log_error.assert_called_once() + # Example: mock_log_error.assert_called_with(f"Failed to load image {mock_found_source_path} for rule {file_rule.id}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_resize_occurs_when_dimensions_differ( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + source_file_name = "albedo_resize.png" + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + mock_path_glob.return_value = [mock_found_source_path] + + original_dims = (100, 100) + target_dims = (50, 50) # Different dimensions + mock_img_data = np.zeros((*original_dims, 3), dtype=np.uint8) + mock_resized_img_data = np.zeros((*target_dims, 3), dtype=np.uint8) + + mock_load_image.return_value = mock_img_data + ts = create_mock_transform_settings(target_width=target_dims[0], target_height=target_dims[1]) + file_rule = create_mock_file_rule_for_individual_processing(transform_settings=ts) + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + + mock_calc_dims.return_value = target_dims # Simulate calc_dims returning new dimensions + mock_resize_image.return_value = mock_resized_img_data # Simulate resize returning new image data + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(mock_found_source_path) + mock_calc_dims.assert_called_once_with( + original_dims, ts.target_width, ts.target_height, ts.resize_mode, ts.ensure_pot, ts.allow_upscale + ) + # The actual call to resize_image is: + # ipu.resize_image(loaded_image, target_dims, ts.resize_filter) # Assuming resize_filter is used + # If resize_filter is not on TransformSettings or not used, adjust this. + # For now, let's assume it's ipu.resize_image(loaded_image, target_dims) or similar + # The stage code is: resized_image = ipu.resize_image(loaded_image, target_dims_calculated, file_rule.transform_settings.resize_filter) + # So we need to mock ts.resize_filter + ts.resize_filter = "LANCZOS4" # Example filter + mock_resize_image.assert_called_once_with(mock_img_data, target_dims, ts.resize_filter) + + saved_image_arg, saved_path_arg = mock_save_image.call_args[0] + assert np.array_equal(saved_image_arg, mock_resized_img_data) # Check resized data is saved + assert "processed_ALBEDO_" in saved_path_arg.name + assert saved_path_arg.parent == context.engine_temp_dir + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Processed' + assert details['original_dimensions'] == original_dims + assert details['processed_dimensions'] == target_dims + mock_log_error.assert_not_called() + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_save_image_fails( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + source_file_name = "albedo_save_fail.png" + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + mock_path_glob.return_value = [mock_found_source_path] + + mock_img_data = np.zeros((100, 100, 3), dtype=np.uint8) + mock_load_image.return_value = mock_img_data + mock_calc_dims.return_value = (100, 100) # No resize + mock_save_image.return_value = False # Simulate save failure + + ts = create_mock_transform_settings() + file_rule = create_mock_file_rule_for_individual_processing(transform_settings=ts) + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + + updated_context = stage.execute(context) + + mock_save_image.assert_called_once() # Attempt to save should still happen + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Save Failed' + assert details['source_file'] == str(mock_found_source_path) + assert details['temp_processed_file'] is not None # Path was generated + assert details['error_message'] is not None + mock_log_error.assert_called_once() + # Example: mock_log_error.assert_called_with(f"Failed to save processed image for rule {file_rule.id} to {details['temp_processed_file']}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.convert_bgr_to_rgb') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_color_conversion_bgr_to_rgb( + mock_log_error, mock_log_info, mock_path_glob, mock_convert_bgr, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + source_file_name = "albedo_bgr.png" + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + mock_path_glob.return_value = [mock_found_source_path] + + mock_bgr_img_data = np.zeros((100, 100, 3), dtype=np.uint8) # Loaded as BGR + mock_rgb_img_data = np.zeros((100, 100, 3), dtype=np.uint8) # After conversion + + mock_load_image.return_value = mock_bgr_img_data # Image is loaded (assume BGR by default from cv2) + mock_convert_bgr.return_value = mock_rgb_img_data # Mock the conversion + mock_calc_dims.return_value = (100, 100) # No resize + mock_save_image.return_value = True + + # Transform settings request RGB, and stage assumes load might be BGR + ts = create_mock_transform_settings(target_color_profile="RGB") + file_rule = create_mock_file_rule_for_individual_processing(transform_settings=ts) + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + # The stage code is: + # if file_rule.transform_settings.target_color_profile == "RGB" and loaded_image.shape[2] == 3: + # logger.info(f"Attempting to convert image from BGR to RGB for {file_rule_id_hex}") + # processed_image_data = ipu.convert_bgr_to_rgb(processed_image_data) + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(mock_found_source_path) + mock_convert_bgr.assert_called_once_with(mock_bgr_img_data) + mock_resize_image.assert_not_called() + + saved_image_arg, _ = mock_save_image.call_args[0] + assert np.array_equal(saved_image_arg, mock_rgb_img_data) # Ensure RGB data is saved + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Attempting to convert image from BGR to RGB for {file_rule.id.hex}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_multiple_map_col_rules_processed( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + mock_asset_source_path = Path("/fake/asset_source") + + # Rule 1: Albedo + ts1 = create_mock_transform_settings(target_width=100, target_height=100) + file_rule1_id = uuid.uuid4() + file_rule1 = create_mock_file_rule_for_individual_processing( + id_val=file_rule1_id, map_type="ALBEDO", filename_pattern="albedo_*.png", transform_settings=ts1 + ) + source_file1 = mock_asset_source_path / "albedo_map.png" + img_data1 = np.zeros((100, 100, 3), dtype=np.uint8) + + # Rule 2: Roughness + ts2 = create_mock_transform_settings(target_width=50, target_height=50) # Resize + ts2.resize_filter = "AREA" + file_rule2_id = uuid.uuid4() + file_rule2 = create_mock_file_rule_for_individual_processing( + id_val=file_rule2_id, map_type="ROUGHNESS", filename_pattern="rough_*.png", transform_settings=ts2 + ) + source_file2 = mock_asset_source_path / "rough_map.png" + img_data2_orig = np.zeros((200, 200, 1), dtype=np.uint8) # Original, needs resize + img_data2_resized = np.zeros((50, 50, 1), dtype=np.uint8) # Resized + + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule1, file_rule2], + asset_source_path_str=str(mock_asset_source_path) + ) + + # Mock behaviors for Path.glob, load_image, calc_dims, resize, save + # Path.glob will be called twice + mock_path_glob.side_effect = [ + [source_file1], # For albedo_*.png + [source_file2] # For rough_*.png + ] + mock_load_image.side_effect = [img_data1, img_data2_orig] + mock_calc_dims.side_effect = [ + (100, 100), # For rule1 (no change) + (50, 50) # For rule2 (change) + ] + mock_resize_image.return_value = img_data2_resized # Only called for rule2 + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + # Assertions for Rule 1 (Albedo) + assert mock_path_glob.call_args_list[0][0][0] == file_rule1.filename_pattern + assert mock_load_image.call_args_list[0][0][0] == source_file1 + assert mock_calc_dims.call_args_list[0][0] == ((100,100), ts1.target_width, ts1.target_height, ts1.resize_mode, ts1.ensure_pot, ts1.allow_upscale) + + # Assertions for Rule 2 (Roughness) + assert mock_path_glob.call_args_list[1][0][0] == file_rule2.filename_pattern + assert mock_load_image.call_args_list[1][0][0] == source_file2 + assert mock_calc_dims.call_args_list[1][0] == ((200,200), ts2.target_width, ts2.target_height, ts2.resize_mode, ts2.ensure_pot, ts2.allow_upscale) + mock_resize_image.assert_called_once_with(img_data2_orig, (50,50), ts2.resize_filter) + + assert mock_save_image.call_count == 2 + # Check saved image for rule 1 + saved_img1_arg, saved_path1_arg = mock_save_image.call_args_list[0][0] + assert np.array_equal(saved_img1_arg, img_data1) + assert "processed_ALBEDO_" in saved_path1_arg.name + assert file_rule1_id.hex in saved_path1_arg.name + + # Check saved image for rule 2 + saved_img2_arg, saved_path2_arg = mock_save_image.call_args_list[1][0] + assert np.array_equal(saved_img2_arg, img_data2_resized) + assert "processed_ROUGHNESS_" in saved_path2_arg.name + assert file_rule2_id.hex in saved_path2_arg.name + + # Check context details + assert file_rule1_id.hex in updated_context.processed_maps_details + details1 = updated_context.processed_maps_details[file_rule1_id.hex] + assert details1['status'] == 'Processed' + assert details1['original_dimensions'] == (100, 100) + assert details1['processed_dimensions'] == (100, 100) + + assert file_rule2_id.hex in updated_context.processed_maps_details + details2 = updated_context.processed_maps_details[file_rule2_id.hex] + assert details2['status'] == 'Processed' + assert details2['original_dimensions'] == (200, 200) # Original dims of img_data2_orig + assert details2['processed_dimensions'] == (50, 50) + + mock_log_error.assert_not_called() \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_map_merging.py b/tests/processing/pipeline/stages/test_map_merging.py new file mode 100644 index 0000000..d222f62 --- /dev/null +++ b/tests/processing/pipeline/stages/test_map_merging.py @@ -0,0 +1,538 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np +from typing import Optional # Added Optional for type hinting + +from processing.pipeline.stages.map_merging import MapMergingStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule, MergeSettings, MergeInputChannel +from configuration import Configuration + +# Mock Helper Functions +def create_mock_merge_input_channel( + file_rule_id: uuid.UUID, source_channel: int = 0, target_channel: int = 0, invert: bool = False +) -> mock.MagicMock: + mic = mock.MagicMock(spec=MergeInputChannel) + mic.file_rule_id = file_rule_id + mic.source_channel = source_channel + mic.target_channel = target_channel + mic.invert_source_channel = invert + mic.default_value_if_missing = 0 # Or some other default + return mic + +def create_mock_merge_settings( + input_maps: Optional[list] = None, # List of mock MergeInputChannel + output_channels: int = 3 +) -> mock.MagicMock: + ms = mock.MagicMock(spec=MergeSettings) + ms.input_maps = input_maps if input_maps is not None else [] + ms.output_channels = output_channels + return ms + +def create_mock_file_rule_for_merging( + id_val: Optional[uuid.UUID] = None, + map_type: str = "ORM", # Output map type + item_type: str = "MAP_MERGE", + merge_settings: Optional[mock.MagicMock] = None +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = f"{map_type.lower()}_merged.png" # Placeholder + mock_fr.item_type = item_type + mock_fr.active = True + mock_fr.merge_settings = merge_settings if merge_settings else create_mock_merge_settings() + return mock_fr + +def create_map_merging_mock_context( + initial_file_rules: Optional[list] = None, # Will contain the MAP_MERGE rule + initial_processed_details: Optional[dict] = None, # Pre-processed inputs for merge + skip_asset_flag: bool = False, + asset_name: str = "MergeAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_config = mock.MagicMock(spec=Configuration) + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details=initial_processed_details if initial_processed_details is not None else {}, + merged_maps_details={}, # Stage populates this + files_to_process=list(initial_file_rules) if initial_file_rules else [], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha_value based on AssetProcessingContext + ) + return context +def test_asset_skipped(): + stage = MapMergingStage() + context = create_map_merging_mock_context(skip_asset_flag=True) + + updated_context = stage.execute(context) + + assert updated_context == context # No changes expected + assert not updated_context.merged_maps_details # No maps should be merged + +def test_no_map_merge_rules(): + stage = MapMergingStage() + # Context with a non-MAP_MERGE rule + non_merge_rule = create_mock_file_rule_for_merging(item_type="TEXTURE_MAP", map_type="Diffuse") + context = create_map_merging_mock_context(initial_file_rules=[non_merge_rule]) + + updated_context = stage.execute(context) + + assert updated_context == context # No changes expected + assert not updated_context.merged_maps_details # No maps should be merged + +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.resize_image') # If testing resize +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_map_merging_rgb_success(mock_log_error, mock_log_info, mock_load_image, mock_resize_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRules (mocked as already processed) + r_id, g_id, b_id = uuid.uuid4(), uuid.uuid4(), uuid.uuid4() + processed_details = { + r_id.hex: {'temp_processed_file': '/fake/red.png', 'status': 'Processed', 'map_type': 'RED_SRC'}, + g_id.hex: {'temp_processed_file': '/fake/green.png', 'status': 'Processed', 'map_type': 'GREEN_SRC'}, + b_id.hex: {'temp_processed_file': '/fake/blue.png', 'status': 'Processed', 'map_type': 'BLUE_SRC'} + } + # Mock loaded image data (grayscale for inputs) + mock_r_data = np.full((10, 10), 200, dtype=np.uint8) + mock_g_data = np.full((10, 10), 100, dtype=np.uint8) + mock_b_data = np.full((10, 10), 50, dtype=np.uint8) + mock_load_image.side_effect = [mock_r_data, mock_g_data, mock_b_data] + + # Merge Rule setup + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=r_id, source_channel=0, target_channel=0), # R to R + create_mock_merge_input_channel(file_rule_id=g_id, source_channel=0, target_channel=1), # G to G + create_mock_merge_input_channel(file_rule_id=b_id, source_channel=0, target_channel=2) # B to B + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=3) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="RGB_Combined", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + assert mock_load_image.call_count == 3 + mock_resize_image.assert_not_called() # Assuming all inputs are same size for this test + mock_save_image.assert_called_once() + + # Check that the correct filename was passed to save_image + # The filename is constructed as: f"{context.asset_rule.name}_merged_{merge_rule.map_type}{Path(first_input_path).suffix}" + # In this case, first_input_path is '/fake/red.png', so suffix is '.png' + # Asset name is "MergeAsset" + expected_filename_part = f"{context.asset_rule.name}_merged_{merge_rule.map_type}.png" + saved_path_arg = mock_save_image.call_args[0][0] + assert expected_filename_part in str(saved_path_arg) + + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (10, 10, 3) + assert np.all(saved_data[:,:,0] == 200) # Red channel + assert np.all(saved_data[:,:,1] == 100) # Green channel + assert np.all(saved_data[:,:,2] == 50) # Blue channel + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + # The temp_merged_file path will be under engine_temp_dir / asset_name / filename + assert f"{context.engine_temp_dir / context.asset_rule.name / expected_filename_part}" == details['temp_merged_file'] + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Successfully merged map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'.") + +# Unit tests will be added below this line +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.resize_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_map_merging_channel_inversion(mock_log_error, mock_log_info, mock_load_image, mock_resize_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRule + input_id = uuid.uuid4() + processed_details = { + input_id.hex: {'temp_processed_file': '/fake/source.png', 'status': 'Processed', 'map_type': 'SOURCE_MAP'} + } + # Mock loaded image data (single channel for simplicity, to be inverted) + mock_source_data = np.array([[0, 100], [155, 255]], dtype=np.uint8) + mock_load_image.return_value = mock_source_data + + # Merge Rule setup: one input, inverted, to one output channel + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=0, invert=True) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="Inverted_Gray", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/source.png')) + mock_resize_image.assert_not_called() + mock_save_image.assert_called_once() + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (2, 2) # Grayscale output + + # Expected inverted data: 255-original + expected_inverted_data = np.array([[255, 155], [100, 0]], dtype=np.uint8) + assert np.all(saved_data == expected_inverted_data) + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + assert "merged_Inverted_Gray" in details['temp_merged_file'] + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Successfully merged map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'.") +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_input_map_missing(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRule ID that will be missing from processed_details + missing_input_id = uuid.uuid4() + + # Merge Rule setup + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=missing_input_id, source_channel=0, target_channel=0) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="TestMissing", merge_settings=merge_settings) + + # processed_details is empty, so missing_input_id will not be found + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details={} + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Failed' + assert 'error_message' in details + assert f"Input map FileRule ID {missing_input_id.hex} not found in processed_maps_details or not successfully processed" in details['error_message'] + + mock_log_error.assert_called_once() + assert f"Failed to merge map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'" in mock_log_error.call_args[0][0] + assert f"Input map FileRule ID {missing_input_id.hex} not found in processed_maps_details or not successfully processed" in mock_log_error.call_args[0][0] + +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_input_map_status_not_processed(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + input_id = uuid.uuid4() + processed_details = { + # Status is 'Failed', not 'Processed' + input_id.hex: {'temp_processed_file': '/fake/source.png', 'status': 'Failed', 'map_type': 'SOURCE_MAP'} + } + + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=0) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="TestNotProcessed", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Failed' + assert 'error_message' in details + assert f"Input map FileRule ID {input_id.hex} not found in processed_maps_details or not successfully processed" in details['error_message'] + + mock_log_error.assert_called_once() + assert f"Failed to merge map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'" in mock_log_error.call_args[0][0] + assert f"Input map FileRule ID {input_id.hex} not found in processed_maps_details or not successfully processed" in mock_log_error.call_args[0][0] +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_load_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + input_id = uuid.uuid4() + processed_details = { + input_id.hex: {'temp_processed_file': '/fake/source.png', 'status': 'Processed', 'map_type': 'SOURCE_MAP'} + } + + # Configure mock_load_image to raise an exception + mock_load_image.side_effect = Exception("Failed to load image") + + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=0) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="TestLoadFail", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/source.png')) + mock_save_image.assert_not_called() + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Failed' + assert 'error_message' in details + assert "Failed to load image for merge input" in details['error_message'] + assert str(Path('/fake/source.png')) in details['error_message'] + + mock_log_error.assert_called_once() + assert f"Failed to merge map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'" in mock_log_error.call_args[0][0] + assert "Failed to load image for merge input" in mock_log_error.call_args[0][0] +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_save_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + input_id = uuid.uuid4() + processed_details = { + input_id.hex: {'temp_processed_file': '/fake/source.png', 'status': 'Processed', 'map_type': 'SOURCE_MAP'} + } + mock_source_data = np.full((10, 10), 128, dtype=np.uint8) + mock_load_image.return_value = mock_source_data + + # Configure mock_save_image to return False (indicating failure) + mock_save_image.return_value = False + + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=0) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="TestSaveFail", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/source.png')) + mock_save_image.assert_called_once() # save_image is called, but returns False + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Failed' + assert 'error_message' in details + assert "Failed to save merged map" in details['error_message'] + + mock_log_error.assert_called_once() + assert f"Failed to merge map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'" in mock_log_error.call_args[0][0] + assert "Failed to save merged map" in mock_log_error.call_args[0][0] +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.resize_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_map_merging_dimension_mismatch_handling(mock_log_error, mock_log_info, mock_load_image, mock_resize_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRules + id1, id2 = uuid.uuid4(), uuid.uuid4() + processed_details = { + id1.hex: {'temp_processed_file': '/fake/img1.png', 'status': 'Processed', 'map_type': 'IMG1_SRC'}, + id2.hex: {'temp_processed_file': '/fake/img2.png', 'status': 'Processed', 'map_type': 'IMG2_SRC'} + } + + # Mock loaded image data with different dimensions + mock_img1_data = np.full((10, 10), 100, dtype=np.uint8) # 10x10 + mock_img2_data_original = np.full((5, 5), 200, dtype=np.uint8) # 5x5, will be resized + + mock_load_image.side_effect = [mock_img1_data, mock_img2_data_original] + + # Mock resize_image to return an image of the target dimensions + # For simplicity, it just creates a new array of the target size filled with a value. + mock_img2_data_resized = np.full((10, 10), 210, dtype=np.uint8) # Resized to 10x10 + mock_resize_image.return_value = mock_img2_data_resized + + # Merge Rule setup: two inputs, one output channel (e.g., averaging them) + # Target channel 0 for both, the stage should handle combining them if they map to the same target. + # However, the current stage logic for multiple inputs to the same target channel is to take the last one. + # Let's make them target different channels for a clearer test of resize. + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=id1, source_channel=0, target_channel=0), + create_mock_merge_input_channel(file_rule_id=id2, source_channel=0, target_channel=1) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=2) # Outputting 2 channels + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="ResizedMerge", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + assert mock_load_image.call_count == 2 + mock_load_image.assert_any_call(Path('/fake/img1.png')) + mock_load_image.assert_any_call(Path('/fake/img2.png')) + + # Assert resize_image was called for the second image to match the first's dimensions + mock_resize_image.assert_called_once() + # The first argument to resize_image is the image data, second is target_shape tuple (height, width) + # np.array_equal is needed for comparing numpy arrays in mock calls + assert np.array_equal(mock_resize_image.call_args[0][0], mock_img2_data_original) + assert mock_resize_image.call_args[0][1] == (10, 10) + + mock_save_image.assert_called_once() + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (10, 10, 2) # 2 output channels + assert np.all(saved_data[:,:,0] == mock_img1_data) # First channel from img1 + assert np.all(saved_data[:,:,1] == mock_img2_data_resized) # Second channel from resized img2 + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + assert "merged_ResizedMerge" in details['temp_merged_file'] + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Resized input map from {Path('/fake/img2.png')} from {mock_img2_data_original.shape} to {(10,10)} to match first loaded map.") + mock_log_info.assert_any_call(f"Successfully merged map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'.") +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.resize_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_map_merging_to_grayscale_output(mock_log_error, mock_log_info, mock_load_image, mock_resize_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRule (e.g., an RGB image) + input_id = uuid.uuid4() + processed_details = { + input_id.hex: {'temp_processed_file': '/fake/rgb_source.png', 'status': 'Processed', 'map_type': 'RGB_SRC'} + } + # Mock loaded image data (3 channels) + mock_rgb_data = np.full((10, 10, 3), [50, 100, 150], dtype=np.uint8) + mock_load_image.return_value = mock_rgb_data + + # Merge Rule setup: take the Green channel (source_channel=1) from input and map it to the single output channel (target_channel=0) + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=1, target_channel=0) # G to Grayscale + ] + # output_channels = 1 for grayscale + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="GrayscaleFromGreen", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/rgb_source.png')) + mock_resize_image.assert_not_called() + mock_save_image.assert_called_once() + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (10, 10) # Grayscale output (2D) + assert np.all(saved_data == 100) # Green channel's value + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + assert "merged_GrayscaleFromGreen" in details['temp_merged_file'] + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Successfully merged map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'.") + +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_default_value_if_missing_channel(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + input_id = uuid.uuid4() + processed_details = { + # Input is a grayscale image (1 channel) + input_id.hex: {'temp_processed_file': '/fake/gray_source.png', 'status': 'Processed', 'map_type': 'GRAY_SRC'} + } + mock_gray_data = np.full((10, 10), 50, dtype=np.uint8) + mock_load_image.return_value = mock_gray_data + + # Merge Rule: try to read source_channel 1 (which doesn't exist in grayscale) + # and use default_value_if_missing for target_channel 0. + # Also, read source_channel 0 (which exists) for target_channel 1. + mic1 = create_mock_merge_input_channel(file_rule_id=input_id, source_channel=1, target_channel=0) + mic1.default_value_if_missing = 128 # Set a specific default value + mic2 = create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=1) + + merge_settings = create_mock_merge_settings(input_maps=[mic1, mic2], output_channels=2) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="DefaultValueTest", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/gray_source.png')) + mock_save_image.assert_called_once() + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (10, 10, 2) + assert np.all(saved_data[:,:,0] == 128) # Default value for missing source channel 1 + assert np.all(saved_data[:,:,1] == 50) # Value from existing source channel 0 + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + mock_log_error.assert_not_called() \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_metadata_finalization_save.py b/tests/processing/pipeline/stages/test_metadata_finalization_save.py new file mode 100644 index 0000000..68741ce --- /dev/null +++ b/tests/processing/pipeline/stages/test_metadata_finalization_save.py @@ -0,0 +1,359 @@ +import pytest +from unittest import mock +from pathlib import Path +import datetime +import json # For comparing dumped content +import uuid +from typing import Optional, Dict, Any + +from processing.pipeline.stages.metadata_finalization_save import MetadataFinalizationAndSaveStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule +from configuration import Configuration, GeneralSettings # Added GeneralSettings as it's in the helper + + +def create_metadata_save_mock_context( + status_flags: Optional[Dict[str, Any]] = None, + initial_asset_metadata: Optional[Dict[str, Any]] = None, + processed_details: Optional[Dict[str, Any]] = None, + merged_details: Optional[Dict[str, Any]] = None, + asset_name: str = "MetaSaveAsset", + output_path_pattern_val: str = "{asset_name}/metadata/{filename}", + # ... other common context fields ... +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.output_path_pattern = output_path_pattern_val + mock_asset_rule.id = uuid.uuid4() # Needed for generate_path_from_pattern if it uses it + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.name = "MetaSaveSource" + + mock_config = mock.MagicMock(spec=Configuration) + # mock_config.general_settings = mock.MagicMock(spec=GeneralSettings) # If needed + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output_base"), # For generate_path + effective_supplier="ValidSupplier", + asset_metadata=initial_asset_metadata if initial_asset_metadata is not None else {}, + processed_maps_details=processed_details if processed_details is not None else {}, + merged_maps_details=merged_details if merged_details is not None else {}, + files_to_process=[], + loaded_data_cache={}, + config_obj=mock_config, + status_flags=status_flags if status_flags is not None else {}, + incrementing_value="001", # Example for path generation + sha5_value="abc" # Example for path generation + ) + return context +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +def test_asset_skipped_before_metadata_init( + mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests that if an asset is marked for skipping and has no initial metadata, + the stage returns early without attempting to save metadata. + """ + stage = MetadataFinalizationAndSaveStage() + context = create_metadata_save_mock_context( + status_flags={'skip_asset': True}, + initial_asset_metadata={} # Explicitly empty + ) + + updated_context = stage.execute(context) + + # Assert that no processing or saving attempts were made + mock_dt.now.assert_not_called() # Should not even try to set end time if no metadata + mock_gen_path.assert_not_called() + mock_mkdir.assert_not_called() + mock_file_open.assert_not_called() + mock_json_dump.assert_not_called() + + assert updated_context.asset_metadata == {} # Metadata remains empty + assert 'metadata_file_path' not in updated_context.asset_metadata + assert updated_context.status_flags.get('metadata_save_error') is None +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +def test_asset_skipped_after_metadata_init( + mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests that if an asset is marked for skipping but has initial metadata, + the status is updated to 'Skipped' and metadata is saved. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_dt.now.return_value = fixed_now + + fake_metadata_path_str = "/fake/output_base/SkippedAsset/metadata/SkippedAsset_metadata.json" + mock_gen_path.return_value = fake_metadata_path_str + + initial_meta = {'asset_name': "SkippedAsset", 'status': "Pending"} + + context = create_metadata_save_mock_context( + asset_name="SkippedAsset", + status_flags={'skip_asset': True}, + initial_asset_metadata=initial_meta + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() + mock_gen_path.assert_called_once_with( + context.asset_rule.output_path_pattern, + context.asset_rule, + context.source_rule, + context.output_base_path, + context.asset_metadata, # Original metadata passed for path gen + context.incrementing_value, + context.sha5_value, + filename_override=f"{context.asset_rule.name}_metadata.json" + ) + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_file_open.assert_called_once_with(Path(fake_metadata_path_str), 'w') + mock_json_dump.assert_called_once() + + dumped_data = mock_json_dump.call_args[0][0] + assert dumped_data['status'] == "Skipped" + assert dumped_data['processing_end_time'] == fixed_now.isoformat() + assert 'processed_map_details' not in dumped_data # Should not be present if skipped early + assert 'merged_map_details' not in dumped_data # Should not be present if skipped early + + assert updated_context.asset_metadata['status'] == "Skipped" + assert updated_context.asset_metadata['processing_end_time'] == fixed_now.isoformat() + assert updated_context.asset_metadata['metadata_file_path'] == fake_metadata_path_str + assert updated_context.status_flags.get('metadata_save_error') is None +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) # Mocks open() +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +def test_metadata_save_success(mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump): + """ + Tests successful metadata finalization and saving, including serialization of Path objects. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 30, 0) + mock_dt.now.return_value = fixed_now + + fake_metadata_path_str = "/fake/output_base/MetaSaveAsset/metadata/MetaSaveAsset_metadata.json" + mock_gen_path.return_value = fake_metadata_path_str + + initial_meta = {'asset_name': "MetaSaveAsset", 'status': "Pending", 'processing_start_time': "2023-01-01T12:00:00"} + # Example of a Path object that needs serialization + proc_details = {'map1': {'temp_processed_file': Path('/fake/temp_engine_dir/map1.png'), 'final_file_path': Path('/fake/output_base/MetaSaveAsset/map1.png')}} + merged_details = {'merged_map_A': {'output_path': Path('/fake/output_base/MetaSaveAsset/merged_A.png')}} + + context = create_metadata_save_mock_context( + initial_asset_metadata=initial_meta, + processed_details=proc_details, + merged_details=merged_details, + status_flags={} # No errors, no skip + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() + mock_gen_path.assert_called_once_with( + context.asset_rule.output_path_pattern, + context.asset_rule, + context.source_rule, + context.output_base_path, + context.asset_metadata, # The metadata *before* adding end_time, status etc. + context.incrementing_value, + context.sha5_value, + filename_override=f"{context.asset_rule.name}_metadata.json" + ) + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) # Checks parent dir of fake_metadata_path_str + mock_file_open.assert_called_once_with(Path(fake_metadata_path_str), 'w') + mock_json_dump.assert_called_once() + + # Check what was passed to json.dump + dumped_data = mock_json_dump.call_args[0][0] + assert dumped_data['status'] == "Processed" + assert dumped_data['processing_end_time'] == fixed_now.isoformat() + assert 'processing_start_time' in dumped_data # Ensure existing fields are preserved + + # Verify processed_map_details and Path serialization + assert 'processed_map_details' in dumped_data + assert dumped_data['processed_map_details']['map1']['temp_processed_file'] == '/fake/temp_engine_dir/map1.png' + assert dumped_data['processed_map_details']['map1']['final_file_path'] == '/fake/output_base/MetaSaveAsset/map1.png' + + # Verify merged_map_details and Path serialization + assert 'merged_map_details' in dumped_data + assert dumped_data['merged_map_details']['merged_map_A']['output_path'] == '/fake/output_base/MetaSaveAsset/merged_A.png' + + assert updated_context.asset_metadata['metadata_file_path'] == fake_metadata_path_str + assert updated_context.asset_metadata['status'] == "Processed" + assert updated_context.status_flags.get('metadata_save_error') is None +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +def test_processing_failed_due_to_previous_error( + mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests that if a previous stage set an error flag, the status is 'Failed' + and metadata (including any existing details) is saved. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 45, 0) + mock_dt.now.return_value = fixed_now + + fake_metadata_path_str = "/fake/output_base/FailedAsset/metadata/FailedAsset_metadata.json" + mock_gen_path.return_value = fake_metadata_path_str + + initial_meta = {'asset_name': "FailedAsset", 'status': "Processing"} + # Simulate some details might exist even if a later stage failed + proc_details = {'map1_partial': {'temp_processed_file': Path('/fake/temp_engine_dir/map1_partial.png')}} + + context = create_metadata_save_mock_context( + asset_name="FailedAsset", + initial_asset_metadata=initial_meta, + processed_details=proc_details, + merged_details={}, # No merged details if processing failed before that + status_flags={'file_processing_error': True, 'error_message': "Something went wrong"} + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() + mock_gen_path.assert_called_once() # Path generation should still occur + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_file_open.assert_called_once_with(Path(fake_metadata_path_str), 'w') + mock_json_dump.assert_called_once() + + dumped_data = mock_json_dump.call_args[0][0] + assert dumped_data['status'] == "Failed" + assert dumped_data['processing_end_time'] == fixed_now.isoformat() + assert 'error_message' in dumped_data # Assuming error messages from status_flags are copied + assert dumped_data['error_message'] == "Something went wrong" + + # Check that existing details are included + assert 'processed_map_details' in dumped_data + assert dumped_data['processed_map_details']['map1_partial']['temp_processed_file'] == '/fake/temp_engine_dir/map1_partial.png' + assert 'merged_map_details' in dumped_data # Should be present, even if empty + assert dumped_data['merged_map_details'] == {} + + assert updated_context.asset_metadata['status'] == "Failed" + assert updated_context.asset_metadata['metadata_file_path'] == fake_metadata_path_str + assert updated_context.status_flags.get('metadata_save_error') is None + # Ensure the original error flag is preserved + assert updated_context.status_flags['file_processing_error'] is True +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +@mock.patch('logging.error') # To check if error is logged +def test_generate_path_fails( + mock_log_error, mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests behavior when generate_path_from_pattern raises an exception. + Ensures status is updated, error flag is set, and no save is attempted. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 50, 0) + mock_dt.now.return_value = fixed_now + + mock_gen_path.side_effect = Exception("Simulated path generation error") + + initial_meta = {'asset_name': "PathFailAsset", 'status': "Processing"} + context = create_metadata_save_mock_context( + asset_name="PathFailAsset", + initial_asset_metadata=initial_meta, + status_flags={} + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() # Time is set before path generation + mock_gen_path.assert_called_once() # generate_path_from_pattern is called + + # File operations should NOT be called if path generation fails + mock_mkdir.assert_not_called() + mock_file_open.assert_not_called() + mock_json_dump.assert_not_called() + + mock_log_error.assert_called_once() # Check that an error was logged + # Example: check if the log message contains relevant info, if needed + # assert "Failed to generate metadata path" in mock_log_error.call_args[0][0] + + assert updated_context.asset_metadata['status'] == "Failed" # Or a more specific error status + assert 'processing_end_time' in updated_context.asset_metadata # End time should still be set + assert updated_context.asset_metadata['processing_end_time'] == fixed_now.isoformat() + assert 'metadata_file_path' not in updated_context.asset_metadata # Path should not be set + + assert updated_context.status_flags.get('metadata_save_error') is True + assert 'error_message' in updated_context.asset_metadata # Check if error message is populated + assert "Simulated path generation error" in updated_context.asset_metadata['error_message'] +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +@mock.patch('logging.error') # To check if error is logged +def test_json_dump_fails( + mock_log_error, mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests behavior when json.dump raises an exception during saving. + Ensures status is updated, error flag is set, and error is logged. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 55, 0) + mock_dt.now.return_value = fixed_now + + fake_metadata_path_str = "/fake/output_base/JsonDumpFailAsset/metadata/JsonDumpFailAsset_metadata.json" + mock_gen_path.return_value = fake_metadata_path_str + + mock_json_dump.side_effect = IOError("Simulated JSON dump error") # Or TypeError for non-serializable + + initial_meta = {'asset_name': "JsonDumpFailAsset", 'status': "Processing"} + context = create_metadata_save_mock_context( + asset_name="JsonDumpFailAsset", + initial_asset_metadata=initial_meta, + status_flags={} + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() + mock_gen_path.assert_called_once() + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_file_open.assert_called_once_with(Path(fake_metadata_path_str), 'w') + mock_json_dump.assert_called_once() # json.dump was attempted + + mock_log_error.assert_called_once() + # assert "Failed to save metadata JSON" in mock_log_error.call_args[0][0] + + assert updated_context.asset_metadata['status'] == "Failed" # Or specific "Metadata Save Failed" + assert 'processing_end_time' in updated_context.asset_metadata + assert updated_context.asset_metadata['processing_end_time'] == fixed_now.isoformat() + # metadata_file_path might be set if path generation succeeded, even if dump failed. + # Depending on desired behavior, this could be asserted or not. + # For now, let's assume it's set if path generation was successful. + assert updated_context.asset_metadata['metadata_file_path'] == fake_metadata_path_str + + assert updated_context.status_flags.get('metadata_save_error') is True + assert 'error_message' in updated_context.asset_metadata + assert "Simulated JSON dump error" in updated_context.asset_metadata['error_message'] \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_metadata_initialization.py b/tests/processing/pipeline/stages/test_metadata_initialization.py new file mode 100644 index 0000000..5b358fd --- /dev/null +++ b/tests/processing/pipeline/stages/test_metadata_initialization.py @@ -0,0 +1,169 @@ +import pytest +from unittest import mock +from pathlib import Path +import datetime +import uuid +from typing import Optional + +from processing.pipeline.stages.metadata_initialization import MetadataInitializationStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule +from configuration import Configuration, GeneralSettings + +# Helper function to create a mock AssetProcessingContext +def create_metadata_init_mock_context( + skip_asset_flag: bool = False, + asset_name: str = "MetaAsset", + asset_id: uuid.UUID = None, # Allow None to default to uuid.uuid4() + source_path_str: str = "source/meta_asset", + output_pattern: str = "{asset_name}/{map_type}", + tags: list = None, + custom_fields: dict = None, + source_rule_name: str = "MetaSource", + source_rule_id: uuid.UUID = None, # Allow None to default to uuid.uuid4() + eff_supplier: Optional[str] = "SupplierMeta", + app_version_str: str = "1.0.0-test", + inc_val: Optional[str] = None, + sha_val: Optional[str] = None +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.id = asset_id if asset_id is not None else uuid.uuid4() + mock_asset_rule.source_path = Path(source_path_str) + mock_asset_rule.output_path_pattern = output_pattern + mock_asset_rule.tags = tags if tags is not None else ["tag1", "test_tag"] + mock_asset_rule.custom_fields = custom_fields if custom_fields is not None else {"custom_key": "custom_value"} + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.name = source_rule_name + mock_source_rule.id = source_rule_id if source_rule_id is not None else uuid.uuid4() + + mock_general_settings = mock.MagicMock(spec=GeneralSettings) + mock_general_settings.app_version = app_version_str + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_general_settings + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp"), + output_base_path=Path("/fake/output"), + effective_supplier=eff_supplier, + asset_metadata={}, + processed_maps_details={}, + merged_maps_details={}, + files_to_process=[], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=inc_val, + sha5_value=sha_val + ) + return context + +@mock.patch('processing.pipeline.stages.metadata_initialization.datetime') +def test_metadata_initialization_not_skipped(mock_datetime_module): + stage = MetadataInitializationStage() + + fixed_now = datetime.datetime(2023, 10, 26, 12, 0, 0, tzinfo=datetime.timezone.utc) + mock_datetime_module.datetime.now.return_value = fixed_now + + asset_id_val = uuid.uuid4() + source_id_val = uuid.uuid4() + + context = create_metadata_init_mock_context( + skip_asset_flag=False, + asset_id=asset_id_val, + source_rule_id=source_id_val, + inc_val="001", + sha_val="abcde" + ) + + updated_context = stage.execute(context) + + assert isinstance(updated_context.asset_metadata, dict) + assert isinstance(updated_context.processed_maps_details, dict) + assert isinstance(updated_context.merged_maps_details, dict) + + md = updated_context.asset_metadata + assert md['asset_name'] == "MetaAsset" + assert md['asset_id'] == str(asset_id_val) + assert md['source_rule_name'] == "MetaSource" + assert md['source_rule_id'] == str(source_id_val) + assert md['source_path'] == "source/meta_asset" + assert md['effective_supplier'] == "SupplierMeta" + assert md['output_path_pattern'] == "{asset_name}/{map_type}" + assert md['processing_start_time'] == fixed_now.isoformat() + assert md['status'] == "Pending" + assert md['version'] == "1.0.0-test" + assert md['tags'] == ["tag1", "test_tag"] + assert md['custom_fields'] == {"custom_key": "custom_value"} + assert md['incrementing_value'] == "001" + assert md['sha5_value'] == "abcde" + +@mock.patch('processing.pipeline.stages.metadata_initialization.datetime') +def test_metadata_initialization_not_skipped_none_inc_sha(mock_datetime_module): + stage = MetadataInitializationStage() + + fixed_now = datetime.datetime(2023, 10, 26, 12, 0, 0, tzinfo=datetime.timezone.utc) + mock_datetime_module.datetime.now.return_value = fixed_now + + context = create_metadata_init_mock_context( + skip_asset_flag=False, + inc_val=None, + sha_val=None + ) + + updated_context = stage.execute(context) + + md = updated_context.asset_metadata + assert 'incrementing_value' not in md # Or assert md['incrementing_value'] is None, depending on desired behavior + assert 'sha5_value' not in md # Or assert md['sha5_value'] is None + +def test_metadata_initialization_skipped(): + stage = MetadataInitializationStage() + context = create_metadata_init_mock_context(skip_asset_flag=True) + + # Make copies of initial state to ensure they are not modified + initial_asset_metadata = dict(context.asset_metadata) + initial_processed_maps = dict(context.processed_maps_details) + initial_merged_maps = dict(context.merged_maps_details) + + updated_context = stage.execute(context) + + assert updated_context.asset_metadata == initial_asset_metadata + assert updated_context.processed_maps_details == initial_processed_maps + assert updated_context.merged_maps_details == initial_merged_maps + assert not updated_context.asset_metadata # Explicitly check it's empty as per initial setup + assert not updated_context.processed_maps_details + assert not updated_context.merged_maps_details + +@mock.patch('processing.pipeline.stages.metadata_initialization.datetime') +def test_tags_and_custom_fields_are_copies(mock_datetime_module): + stage = MetadataInitializationStage() + fixed_now = datetime.datetime(2023, 10, 26, 12, 0, 0, tzinfo=datetime.timezone.utc) + mock_datetime_module.datetime.now.return_value = fixed_now + + original_tags = ["original_tag"] + original_custom_fields = {"original_key": "original_value"} + + context = create_metadata_init_mock_context( + skip_asset_flag=False, + tags=original_tags, + custom_fields=original_custom_fields + ) + + # Modify originals after context creation but before stage execution + original_tags.append("modified_after_creation") + original_custom_fields["new_key_after_creation"] = "new_value" + + updated_context = stage.execute(context) + + md = updated_context.asset_metadata + assert md['tags'] == ["original_tag"] # Should not have "modified_after_creation" + assert md['tags'] is not original_tags # Ensure it's a different object + + assert md['custom_fields'] == {"original_key": "original_value"} # Should not have "new_key_after_creation" + assert md['custom_fields'] is not original_custom_fields # Ensure it's a different object \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_normal_map_green_channel.py b/tests/processing/pipeline/stages/test_normal_map_green_channel.py new file mode 100644 index 0000000..3120655 --- /dev/null +++ b/tests/processing/pipeline/stages/test_normal_map_green_channel.py @@ -0,0 +1,323 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np +import logging # Added for mocking logger + +from processing.pipeline.stages.normal_map_green_channel import NormalMapGreenChannelStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule +from configuration import Configuration, GeneralSettings + +# Helper functions +def create_mock_file_rule_for_normal_test( + id_val: uuid.UUID = None, # Corrected type hint from Optional[uuid.UUID] + map_type: str = "NORMAL", + filename_pattern: str = "normal.png" +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = "MAP_COL" # As per example, though not directly used by stage + mock_fr.active = True # As per example + return mock_fr + +def create_normal_map_mock_context( + initial_file_rules: list = None, # Corrected type hint + initial_processed_details: dict = None, # Corrected type hint + invert_green_globally: bool = False, + skip_asset_flag: bool = False, + asset_name: str = "NormalMapAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + + mock_source_rule = mock.MagicMock(spec=SourceRule) + + mock_gs = mock.MagicMock(spec=GeneralSettings) + mock_gs.invert_normal_map_green_channel_globally = invert_green_globally + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_gs + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details=initial_processed_details if initial_processed_details is not None else {}, + merged_maps_details={}, + files_to_process=list(initial_file_rules) if initial_file_rules else [], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, # Added as per AssetProcessingContext constructor + sha5_value=None # Added as per AssetProcessingContext constructor + ) + return context + +# Unit tests will be added below +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +def test_asset_skipped(mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_fr = create_mock_file_rule_for_normal_test(map_type="NORMAL") + initial_details = { + normal_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_normal.png', 'status': 'Processed', 'map_type': 'NORMAL', 'notes': ''} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True, + skip_asset_flag=True # Asset is skipped + ) + original_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert updated_context.processed_maps_details == original_details + assert normal_fr in updated_context.files_to_process # Ensure rule is still there + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +def test_no_normal_map_present(mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + # Create a non-normal map rule + diffuse_fr = create_mock_file_rule_for_normal_test(map_type="DIFFUSE", filename_pattern="diffuse.png") + initial_details = { + diffuse_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_diffuse.png', 'status': 'Processed', 'map_type': 'DIFFUSE', 'notes': ''} + } + context = create_normal_map_mock_context( + initial_file_rules=[diffuse_fr], + initial_processed_details=initial_details, + invert_green_globally=True # Inversion enabled, but no normal map + ) + original_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert updated_context.processed_maps_details == original_details + assert diffuse_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +def test_normal_map_present_inversion_disabled(mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_details = { + normal_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_normal.png', 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Initial note'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=False # Inversion disabled + ) + original_details_entry = context.processed_maps_details[normal_fr.id.hex].copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert updated_context.processed_maps_details[normal_fr.id.hex] == original_details_entry + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_normal_map_inversion_uint8_success(mock_log_debug, mock_log_info, mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + + initial_temp_path = Path('/fake/temp_engine_dir/processed_normal.png') + initial_details = { + normal_fr.id.hex: {'temp_processed_file': str(initial_temp_path), 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Initial note'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True # Enable inversion + ) + + # R=10, G=50, B=100 + mock_loaded_normal_data = np.array([[[10, 50, 100]]], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_normal_data + mock_save_image.return_value = True # Simulate successful save + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_temp_path) + + # Check that save_image was called with green channel inverted + assert mock_save_image.call_count == 1 + saved_path_arg, saved_data_arg = mock_save_image.call_args[0] + + assert saved_data_arg[0,0,0] == 10 # R unchanged + assert saved_data_arg[0,0,1] == 255 - 50 # G inverted + assert saved_data_arg[0,0,2] == 100 # B unchanged + + assert isinstance(saved_path_arg, Path) + assert "normal_g_inv_" in saved_path_arg.name + assert saved_path_arg.parent == initial_temp_path.parent # Should be in same temp dir + + normal_detail = updated_context.processed_maps_details[normal_fr.id.hex] + assert "normal_g_inv_" in normal_detail['temp_processed_file'] + assert Path(normal_detail['temp_processed_file']).name == saved_path_arg.name + assert "Green channel inverted" in normal_detail['notes'] + assert "Initial note" in normal_detail['notes'] # Check existing notes preserved + + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_normal_map_inversion_float_success(mock_log_debug, mock_log_info, mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_temp_path = Path('/fake/temp_engine_dir/processed_normal_float.png') + initial_details = { + normal_fr.id.hex: {'temp_processed_file': str(initial_temp_path), 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Float image'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True + ) + + # R=0.1, G=0.25, B=0.75 + mock_loaded_normal_data = np.array([[[0.1, 0.25, 0.75]]], dtype=np.float32) + mock_load_image.return_value = mock_loaded_normal_data + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_temp_path) + + assert mock_save_image.call_count == 1 + saved_path_arg, saved_data_arg = mock_save_image.call_args[0] + + assert np.isclose(saved_data_arg[0,0,0], 0.1) # R unchanged + assert np.isclose(saved_data_arg[0,0,1], 1.0 - 0.25) # G inverted + assert np.isclose(saved_data_arg[0,0,2], 0.75) # B unchanged + + assert "normal_g_inv_" in saved_path_arg.name + normal_detail = updated_context.processed_maps_details[normal_fr.id.hex] + assert "normal_g_inv_" in normal_detail['temp_processed_file'] + assert "Green channel inverted" in normal_detail['notes'] + assert "Float image" in normal_detail['notes'] + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.error') +def test_load_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_temp_path_str = '/fake/temp_engine_dir/processed_normal_load_fail.png' + initial_details = { + normal_fr.id.hex: {'temp_processed_file': initial_temp_path_str, 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Load fail test'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True + ) + original_details_entry = context.processed_maps_details[normal_fr.id.hex].copy() + + mock_load_image.return_value = None # Simulate load failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path(initial_temp_path_str)) + mock_save_image.assert_not_called() + mock_log_error.assert_called_once() + assert f"Failed to load image {Path(initial_temp_path_str)} for green channel inversion." in mock_log_error.call_args[0][0] + + # Details should be unchanged + assert updated_context.processed_maps_details[normal_fr.id.hex] == original_details_entry + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.error') +def test_save_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_temp_path = Path('/fake/temp_engine_dir/processed_normal_save_fail.png') + initial_details = { + normal_fr.id.hex: {'temp_processed_file': str(initial_temp_path), 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Save fail test'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True + ) + original_details_entry = context.processed_maps_details[normal_fr.id.hex].copy() + + mock_loaded_normal_data = np.array([[[10, 50, 100]]], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_normal_data + mock_save_image.return_value = False # Simulate save failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_temp_path) + mock_save_image.assert_called_once() # Save is attempted + + saved_path_arg = mock_save_image.call_args[0][0] # Get the path it tried to save to + mock_log_error.assert_called_once() + assert f"Failed to save green channel inverted image to {saved_path_arg}." in mock_log_error.call_args[0][0] + + # Details should be unchanged + assert updated_context.processed_maps_details[normal_fr.id.hex] == original_details_entry + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.error') +@pytest.mark.parametrize("unsuitable_data, description", [ + (np.array([[1, 2], [3, 4]], dtype=np.uint8), "2D array"), # 2D array + (np.array([[[1, 2]]], dtype=np.uint8), "2-channel image") # Image with less than 3 channels +]) +def test_image_not_suitable_for_inversion(mock_log_error, mock_load_image, mock_save_image, unsuitable_data, description): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_temp_path_str = f'/fake/temp_engine_dir/unsuitable_{description.replace(" ", "_")}.png' + initial_details = { + normal_fr.id.hex: {'temp_processed_file': initial_temp_path_str, 'status': 'Processed', 'map_type': 'NORMAL', 'notes': f'Unsuitable: {description}'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True + ) + original_details_entry = context.processed_maps_details[normal_fr.id.hex].copy() + + mock_load_image.return_value = unsuitable_data + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path(initial_temp_path_str)) + mock_save_image.assert_not_called() # Save should not be attempted + mock_log_error.assert_called_once() + assert f"Image at {Path(initial_temp_path_str)} is not suitable for green channel inversion (e.g., not RGB/RGBA)." in mock_log_error.call_args[0][0] + + # Details should be unchanged + assert updated_context.processed_maps_details[normal_fr.id.hex] == original_details_entry + assert normal_fr in updated_context.files_to_process \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_output_organization.py b/tests/processing/pipeline/stages/test_output_organization.py new file mode 100644 index 0000000..ccf6c08 --- /dev/null +++ b/tests/processing/pipeline/stages/test_output_organization.py @@ -0,0 +1,417 @@ +import pytest +from unittest import mock +from pathlib import Path +import shutil # To check if shutil.copy2 is called +import uuid +from typing import Optional # Added for type hinting in helper + +from processing.pipeline.stages.output_organization import OutputOrganizationStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule # For context setup +from configuration import Configuration, GeneralSettings + +def create_output_org_mock_context( + status_flags: Optional[dict] = None, + asset_metadata_status: str = "Processed", # Default to processed for testing copy + processed_map_details: Optional[dict] = None, + merged_map_details: Optional[dict] = None, + overwrite_setting: bool = False, + asset_name: str = "OutputOrgAsset", + output_path_pattern_val: str = "{asset_name}/{map_type}/{filename}" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.output_path_pattern = output_path_pattern_val + # Need FileRules on AssetRule if stage tries to look up output_filename_pattern from them + # For simplicity, assume stage constructs output_filename for now if not found on FileRule + mock_asset_rule.file_rules = [] # Or mock FileRules if stage uses them for output_filename_pattern + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.name = "OutputOrgSource" + + mock_gs = mock.MagicMock(spec=GeneralSettings) + mock_gs.overwrite_existing = overwrite_setting + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_gs + + # Ensure asset_metadata has a status + initial_asset_metadata = {'asset_name': asset_name, 'status': asset_metadata_status} + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output_final"), + effective_supplier="ValidSupplier", + asset_metadata=initial_asset_metadata, + processed_maps_details=processed_map_details if processed_map_details is not None else {}, + merged_maps_details=merged_map_details if merged_map_details is not None else {}, + files_to_process=[], # Not directly used by this stage, but good to have + loaded_data_cache={}, + config_obj=mock_config, + status_flags=status_flags if status_flags is not None else {}, + incrementing_value="001", + sha5_value="xyz" # Corrected from sha5_value to sha256_value if that's the actual param, or ensure it's a valid param. Assuming sha5_value is a typo and should be something like 'unique_id' or similar if not sha256. For now, keeping as sha5_value as per instructions. + ) + return context +@mock.patch('shutil.copy2') +@mock.patch('logging.info') # To check for log messages +def test_output_organization_asset_skipped_by_status_flag(mock_log_info, mock_shutil_copy): + stage = OutputOrganizationStage() + context = create_output_org_mock_context(status_flags={'skip_asset': True}) + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_not_called() + # Check if a log message indicates skipping, if applicable + # e.g., mock_log_info.assert_any_call("Skipping output organization for asset OutputOrgAsset due to skip_asset flag.") + assert 'final_output_files' not in updated_context.asset_metadata # Or assert it's empty + assert updated_context.asset_metadata['status'] == "Processed" # Status should not change if skipped due to flag before stage logic + # Add specific log check if the stage logs this event + # For now, assume no copy is the primary check + +@mock.patch('shutil.copy2') +@mock.patch('logging.warning') # Or info, depending on how failure is logged +def test_output_organization_asset_failed_by_metadata_status(mock_log_warning, mock_shutil_copy): + stage = OutputOrganizationStage() + context = create_output_org_mock_context(asset_metadata_status="Failed") + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_not_called() + # Check for a log message indicating skipping due to failure status + # e.g., mock_log_warning.assert_any_call("Skipping output organization for asset OutputOrgAsset as its status is Failed.") + assert 'final_output_files' not in updated_context.asset_metadata # Or assert it's empty + assert updated_context.asset_metadata['status'] == "Failed" # Status remains Failed + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_output_organization_success_no_overwrite( + mock_log_error, mock_log_info, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + + proc_id_1 = uuid.uuid4().hex + merged_id_1 = uuid.uuid4().hex + + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc1.png', 'map_type': 'Diffuse', 'output_filename': 'OutputOrgAsset_Diffuse.png'} + } + merged_details = { + merged_id_1: {'status': 'Processed', 'temp_merged_file': '/fake/temp_engine_dir/merged1.png', 'map_type': 'ORM', 'output_filename': 'OutputOrgAsset_ORM.png'} + } + + context = create_output_org_mock_context( + processed_map_details=processed_details, + merged_map_details=merged_details, + overwrite_setting=False + ) + + # Mock generate_path_from_pattern to return different paths for each call + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/Diffuse/OutputOrgAsset_Diffuse.png") + final_path_merged1 = Path("/fake/output_final/OutputOrgAsset/ORM/OutputOrgAsset_ORM.png") + # Ensure generate_path_from_pattern is called with the correct context and details + # The actual call in the stage is: generate_path_from_pattern(context, map_detail, map_type_key, temp_file_key) + # We need to ensure our side_effect matches these calls. + + def gen_path_side_effect(ctx, detail, map_type_key, temp_file_key, output_filename_key): + if detail['temp_processed_file'] == '/fake/temp_engine_dir/proc1.png': + return final_path_proc1 + elif detail['temp_merged_file'] == '/fake/temp_engine_dir/merged1.png': + return final_path_merged1 + raise ValueError("Unexpected call to generate_path_from_pattern") + + mock_gen_path.side_effect = gen_path_side_effect + + mock_path_exists.return_value = False # Files do not exist at destination + + updated_context = stage.execute(context) + + assert mock_shutil_copy.call_count == 2 + mock_shutil_copy.assert_any_call(Path(processed_details[proc_id_1]['temp_processed_file']), final_path_proc1) + mock_shutil_copy.assert_any_call(Path(merged_details[merged_id_1]['temp_merged_file']), final_path_merged1) + + # Check mkdir calls + # It should be called for each unique parent directory + expected_mkdir_calls = [ + mock.call(Path("/fake/output_final/OutputOrgAsset/Diffuse"), parents=True, exist_ok=True), + mock.call(Path("/fake/output_final/OutputOrgAsset/ORM"), parents=True, exist_ok=True) + ] + mock_mkdir.assert_has_calls(expected_mkdir_calls, any_order=True) + # Ensure mkdir was called for the parent of each file + assert mock_mkdir.call_count >= 1 # Could be 1 or 2 if paths share a base that's created once + + assert len(updated_context.asset_metadata['final_output_files']) == 2 + assert str(final_path_proc1) in updated_context.asset_metadata['final_output_files'] + assert str(final_path_merged1) in updated_context.asset_metadata['final_output_files'] + + assert updated_context.processed_maps_details[proc_id_1]['final_output_path'] == str(final_path_proc1) + assert updated_context.merged_maps_details[merged_id_1]['final_output_path'] == str(final_path_merged1) + mock_log_error.assert_not_called() + # Check for specific info logs if necessary + # mock_log_info.assert_any_call(f"Copying {processed_details[proc_id_1]['temp_processed_file']} to {final_path_proc1}") + # mock_log_info.assert_any_call(f"Copying {merged_details[merged_id_1]['temp_merged_file']} to {final_path_merged1}") +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') # Still might be called if other files are processed +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.info') +def test_output_organization_overwrite_disabled_file_exists( + mock_log_info, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_exists.png', 'map_type': 'Diffuse', 'output_filename': 'OutputOrgAsset_Diffuse_Exists.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=False + ) + + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/Diffuse/OutputOrgAsset_Diffuse_Exists.png") + mock_gen_path.return_value = final_path_proc1 # Only one file + mock_path_exists.return_value = True # File exists at destination + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_not_called() + mock_log_info.assert_any_call( + f"Skipping copy for {final_path_proc1} as it already exists and overwrite is disabled." + ) + # final_output_files should still be populated if the file exists and is considered "organized" + assert str(final_path_proc1) in updated_context.asset_metadata['final_output_files'] + assert updated_context.processed_maps_details[proc_id_1]['final_output_path'] == str(final_path_proc1) + + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_output_organization_overwrite_enabled_file_exists( + mock_log_error, mock_log_info, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_overwrite.png', 'map_type': 'Diffuse', 'output_filename': 'OutputOrgAsset_Diffuse_Overwrite.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=True # Overwrite is enabled + ) + + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/Diffuse/OutputOrgAsset_Diffuse_Overwrite.png") + mock_gen_path.return_value = final_path_proc1 + mock_path_exists.return_value = True # File exists, but we should overwrite + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_called_once_with(Path(processed_details[proc_id_1]['temp_processed_file']), final_path_proc1) + mock_mkdir.assert_called_once_with(final_path_proc1.parent, parents=True, exist_ok=True) + assert str(final_path_proc1) in updated_context.asset_metadata['final_output_files'] + assert updated_context.processed_maps_details[proc_id_1]['final_output_path'] == str(final_path_proc1) + mock_log_error.assert_not_called() + # Optionally check for a log message indicating overwrite, if implemented + # mock_log_info.assert_any_call(f"Overwriting existing file {final_path_proc1}...") + + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.error') +def test_output_organization_only_processed_maps( + mock_log_error, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_only.png', 'map_type': 'Albedo', 'output_filename': 'OutputOrgAsset_Albedo.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + merged_map_details={}, # No merged maps + overwrite_setting=False + ) + + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/Albedo/OutputOrgAsset_Albedo.png") + mock_gen_path.return_value = final_path_proc1 + mock_path_exists.return_value = False + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_called_once_with(Path(processed_details[proc_id_1]['temp_processed_file']), final_path_proc1) + mock_mkdir.assert_called_once_with(final_path_proc1.parent, parents=True, exist_ok=True) + assert len(updated_context.asset_metadata['final_output_files']) == 1 + assert str(final_path_proc1) in updated_context.asset_metadata['final_output_files'] + assert updated_context.processed_maps_details[proc_id_1]['final_output_path'] == str(final_path_proc1) + assert not updated_context.merged_maps_details # Should remain empty + mock_log_error.assert_not_called() + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.error') +def test_output_organization_only_merged_maps( + mock_log_error, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + merged_id_1 = uuid.uuid4().hex + merged_details = { + merged_id_1: {'status': 'Processed', 'temp_merged_file': '/fake/temp_engine_dir/merged_only.png', 'map_type': 'Metallic', 'output_filename': 'OutputOrgAsset_Metallic.png'} + } + context = create_output_org_mock_context( + processed_map_details={}, # No processed maps + merged_map_details=merged_details, + overwrite_setting=False + ) + + final_path_merged1 = Path("/fake/output_final/OutputOrgAsset/Metallic/OutputOrgAsset_Metallic.png") + mock_gen_path.return_value = final_path_merged1 + mock_path_exists.return_value = False + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_called_once_with(Path(merged_details[merged_id_1]['temp_merged_file']), final_path_merged1) + mock_mkdir.assert_called_once_with(final_path_merged1.parent, parents=True, exist_ok=True) + assert len(updated_context.asset_metadata['final_output_files']) == 1 + assert str(final_path_merged1) in updated_context.asset_metadata['final_output_files'] + assert updated_context.merged_maps_details[merged_id_1]['final_output_path'] == str(final_path_merged1) + assert not updated_context.processed_maps_details # Should remain empty + mock_log_error.assert_not_called() + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.warning') # Expect a warning for skipped map +@mock.patch('logging.error') +def test_output_organization_map_status_not_processed( + mock_log_error, mock_log_warning, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + + proc_id_1_failed = uuid.uuid4().hex + proc_id_2_ok = uuid.uuid4().hex + + processed_details = { + proc_id_1_failed: {'status': 'Failed', 'temp_processed_file': '/fake/temp_engine_dir/proc_failed.png', 'map_type': 'Diffuse', 'output_filename': 'OutputOrgAsset_Diffuse_Failed.png'}, + proc_id_2_ok: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_ok.png', 'map_type': 'Normal', 'output_filename': 'OutputOrgAsset_Normal_OK.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=False + ) + + final_path_proc_ok = Path("/fake/output_final/OutputOrgAsset/Normal/OutputOrgAsset_Normal_OK.png") + # generate_path_from_pattern should only be called for the 'Processed' map + mock_gen_path.return_value = final_path_proc_ok + mock_path_exists.return_value = False + + updated_context = stage.execute(context) + + # Assert copy was only called for the 'Processed' map + mock_shutil_copy.assert_called_once_with(Path(processed_details[proc_id_2_ok]['temp_processed_file']), final_path_proc_ok) + mock_mkdir.assert_called_once_with(final_path_proc_ok.parent, parents=True, exist_ok=True) + + # Assert final_output_files only contains the successfully processed map + assert len(updated_context.asset_metadata['final_output_files']) == 1 + assert str(final_path_proc_ok) in updated_context.asset_metadata['final_output_files'] + + # Assert final_output_path is set for the processed map + assert updated_context.processed_maps_details[proc_id_2_ok]['final_output_path'] == str(final_path_proc_ok) + # Assert final_output_path is NOT set for the failed map + assert 'final_output_path' not in updated_context.processed_maps_details[proc_id_1_failed] + + mock_log_warning.assert_any_call( + f"Skipping output organization for map with ID {proc_id_1_failed} (type: Diffuse) as its status is 'Failed'." + ) + mock_log_error.assert_not_called() +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.error') +def test_output_organization_generate_path_fails( + mock_log_error, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_path_fail.png', 'map_type': 'Roughness', 'output_filename': 'OutputOrgAsset_Roughness_PathFail.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=False + ) + + mock_gen_path.side_effect = Exception("Simulated path generation error") + mock_path_exists.return_value = False # Should not matter if path gen fails + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_not_called() # No copy if path generation fails + mock_mkdir.assert_not_called() # No mkdir if path generation fails + + assert not updated_context.asset_metadata.get('final_output_files') # No files should be listed + assert 'final_output_path' not in updated_context.processed_maps_details[proc_id_1] + + assert updated_context.status_flags.get('output_organization_error') is True + assert updated_context.asset_metadata['status'] == "Error" # Or "Failed" depending on desired behavior + + mock_log_error.assert_any_call( + f"Error generating output path for map ID {proc_id_1} (type: Roughness): Simulated path generation error" + ) + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.error') +def test_output_organization_shutil_copy_fails( + mock_log_error, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_copy_fail.png', 'map_type': 'AO', 'output_filename': 'OutputOrgAsset_AO_CopyFail.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=False + ) + + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/AO/OutputOrgAsset_AO_CopyFail.png") + mock_gen_path.return_value = final_path_proc1 + mock_path_exists.return_value = False + mock_shutil_copy.side_effect = shutil.Error("Simulated copy error") # Can also be IOError, OSError + + updated_context = stage.execute(context) + + mock_mkdir.assert_called_once_with(final_path_proc1.parent, parents=True, exist_ok=True) # mkdir would be called before copy + mock_shutil_copy.assert_called_once_with(Path(processed_details[proc_id_1]['temp_processed_file']), final_path_proc1) + + # Even if copy fails, the path might be added to final_output_files before the error is caught, + # or the design might be to not add it. Let's assume it's not added on error. + # Check the stage's actual behavior for this. + # If the intention is to record the *attempted* path, this assertion might change. + # For now, assume failure means it's not a "final" output. + assert not updated_context.asset_metadata.get('final_output_files') + assert 'final_output_path' not in updated_context.processed_maps_details[proc_id_1] # Or it might contain the path but status is error + + assert updated_context.status_flags.get('output_organization_error') is True + assert updated_context.asset_metadata['status'] == "Error" # Or "Failed" + + mock_log_error.assert_any_call( + f"Error copying file {processed_details[proc_id_1]['temp_processed_file']} to {final_path_proc1}: Simulated copy error" + ) \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_supplier_determination.py b/tests/processing/pipeline/stages/test_supplier_determination.py new file mode 100644 index 0000000..a1613b1 --- /dev/null +++ b/tests/processing/pipeline/stages/test_supplier_determination.py @@ -0,0 +1,213 @@ +import pytest +from unittest import mock +from pathlib import Path +from typing import Dict, List, Optional, Any + +# Assuming pytest is run from project root, adjust if necessary +from processing.pipeline.stages.supplier_determination import SupplierDeterminationStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule # For constructing mock context +from configuration import Configuration, GeneralSettings, Supplier # For mock config + +# Example helper (can be a pytest fixture too) +def create_mock_context( + asset_rule_supplier_override: Optional[str] = None, + source_rule_supplier: Optional[str] = None, + config_suppliers: Optional[Dict[str, Any]] = None, # Mocked Supplier objects or dicts + asset_name: str = "TestAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.supplier_override = asset_rule_supplier_override + # ... other AssetRule fields if needed by the stage ... + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.supplier = source_rule_supplier + # ... other SourceRule fields ... + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.suppliers = config_suppliers if config_suppliers is not None else {} + + # Basic AssetProcessingContext fields + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp"), + output_base_path=Path("/fake/output"), + effective_supplier=None, + asset_metadata={}, + processed_maps_details={}, + merged_maps_details={}, + files_to_process=[], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha256_value if that's the actual field name + ) + return context + +@pytest.fixture +def supplier_stage(): + return SupplierDeterminationStage() + +@mock.patch('logging.error') +@mock.patch('logging.info') +def test_supplier_from_asset_rule_override_valid(mock_log_info, mock_log_error, supplier_stage): + mock_suppliers_config = {"SupplierA": mock.MagicMock(spec=Supplier)} + context = create_mock_context( + asset_rule_supplier_override="SupplierA", + config_suppliers=mock_suppliers_config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier == "SupplierA" + assert not updated_context.status_flags.get('supplier_error') + mock_log_info.assert_any_call("Effective supplier for asset 'TestAsset' set to 'SupplierA' from asset rule override.") + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.info') +def test_supplier_from_source_rule_fallback_valid(mock_log_info, mock_log_error, supplier_stage): + mock_suppliers_config = {"SupplierB": mock.MagicMock(spec=Supplier)} + context = create_mock_context( + asset_rule_supplier_override=None, + source_rule_supplier="SupplierB", + config_suppliers=mock_suppliers_config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier == "SupplierB" + assert not updated_context.status_flags.get('supplier_error') + mock_log_info.assert_any_call("Effective supplier for asset 'TestAsset' set to 'SupplierB' from source rule.") + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') # supplier_determination uses logging.warning for invalid suppliers +def test_asset_rule_override_invalid_supplier(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + asset_rule_supplier_override="InvalidSupplier", + config_suppliers={"SupplierA": mock.MagicMock(spec=Supplier)} # "InvalidSupplier" not in config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "Asset 'TestAsset' has supplier_override 'InvalidSupplier' which is not defined in global suppliers. No supplier set." + ) + mock_log_error.assert_not_called() + + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_source_rule_fallback_invalid_supplier(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + asset_rule_supplier_override=None, + source_rule_supplier="InvalidSupplierB", + config_suppliers={"SupplierA": mock.MagicMock(spec=Supplier)} # "InvalidSupplierB" not in config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "Asset 'TestAsset' has source rule supplier 'InvalidSupplierB' which is not defined in global suppliers. No supplier set." + ) + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_no_supplier_defined(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + asset_rule_supplier_override=None, + source_rule_supplier=None, + config_suppliers={"SupplierA": mock.MagicMock(spec=Supplier)} + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "No supplier could be determined for asset 'TestAsset'. " + "AssetRule override is None and SourceRule supplier is None or empty." + ) + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_empty_config_suppliers_with_asset_override(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + asset_rule_supplier_override="SupplierX", + config_suppliers={} # Empty global supplier config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "Asset 'TestAsset' has supplier_override 'SupplierX' which is not defined in global suppliers. No supplier set." + ) + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_empty_config_suppliers_with_source_rule(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + source_rule_supplier="SupplierY", + config_suppliers={} # Empty global supplier config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "Asset 'TestAsset' has source rule supplier 'SupplierY' which is not defined in global suppliers. No supplier set." + ) + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.info') +def test_asset_rule_override_empty_string(mock_log_info, mock_log_error, supplier_stage): + # This scenario should fall back to source_rule.supplier if asset_rule.supplier_override is "" + mock_suppliers_config = {"SupplierB": mock.MagicMock(spec=Supplier)} + context = create_mock_context( + asset_rule_supplier_override="", # Empty string override + source_rule_supplier="SupplierB", + config_suppliers=mock_suppliers_config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier == "SupplierB" # Falls back to SourceRule + assert not updated_context.status_flags.get('supplier_error') + mock_log_info.assert_any_call("Effective supplier for asset 'TestAsset' set to 'SupplierB' from source rule.") + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_source_rule_supplier_empty_string(mock_log_warning, mock_log_error, supplier_stage): + # This scenario should result in an error if asset_rule.supplier_override is None and source_rule.supplier is "" + context = create_mock_context( + asset_rule_supplier_override=None, + source_rule_supplier="", # Empty string source supplier + config_suppliers={"SupplierA": mock.MagicMock(spec=Supplier)} + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "No supplier could be determined for asset 'TestAsset'. " + "AssetRule override is None and SourceRule supplier is None or empty." + ) + mock_log_error.assert_not_called() \ No newline at end of file diff --git a/tests/processing/pipeline/test_orchestrator.py b/tests/processing/pipeline/test_orchestrator.py new file mode 100644 index 0000000..3f52908 --- /dev/null +++ b/tests/processing/pipeline/test_orchestrator.py @@ -0,0 +1,383 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import shutil # For checking rmtree +import tempfile # For mocking mkdtemp + +from processing.pipeline.orchestrator import PipelineOrchestrator +from processing.pipeline.asset_context import AssetProcessingContext +from processing.pipeline.stages.base_stage import ProcessingStage # For mocking stages +from rule_structure import SourceRule, AssetRule, FileRule +from configuration import Configuration, GeneralSettings + +# Mock Stage that modifies context +class MockPassThroughStage(ProcessingStage): + def __init__(self, stage_name="mock_stage"): + self.stage_name = stage_name + self.execute_call_count = 0 + self.contexts_called_with = [] # To store contexts for verification + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + self.execute_call_count += 1 + self.contexts_called_with.append(context) + # Optionally, modify context for testing + context.asset_metadata[f'{self.stage_name}_executed'] = True + if self.stage_name == "skipper_stage": # Example conditional logic + context.status_flags['skip_asset'] = True + context.status_flags['skip_reason'] = "Skipped by skipper_stage" + elif self.stage_name == "error_stage": # Example error-raising stage + raise ValueError("Simulated error in error_stage") + + # Simulate status update based on stage execution + if not context.status_flags.get('skip_asset') and not context.status_flags.get('asset_failed'): + context.asset_metadata['status'] = "Processed" # Default to processed if not skipped/failed + return context + +def create_orchestrator_test_config() -> mock.MagicMock: + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock.MagicMock(spec=GeneralSettings) + mock_config.general_settings.temp_dir_override = None # Default, can be overridden in tests + # Add other config details if orchestrator or stages depend on them directly + return mock_config + +def create_orchestrator_test_asset_rule(name: str, num_file_rules: int = 1) -> mock.MagicMock: + asset_rule = mock.MagicMock(spec=AssetRule) + asset_rule.name = name + asset_rule.id = uuid.uuid4() + asset_rule.source_path = Path(f"/fake/source/{name}") # Using Path object + asset_rule.file_rules = [mock.MagicMock(spec=FileRule) for _ in range(num_file_rules)] + asset_rule.enabled = True + asset_rule.map_types = {} # Initialize as dict + asset_rule.material_name_scheme = "{asset_name}" + asset_rule.texture_name_scheme = "{asset_name}_{map_type}" + asset_rule.output_path_scheme = "{source_name}/{asset_name}" + # ... other necessary AssetRule fields ... + return asset_rule + +def create_orchestrator_test_source_rule(name: str, num_assets: int = 1, asset_names: list = None) -> mock.MagicMock: + source_rule = mock.MagicMock(spec=SourceRule) + source_rule.name = name + source_rule.id = uuid.uuid4() + if asset_names: + source_rule.assets = [create_orchestrator_test_asset_rule(an) for an in asset_names] + else: + source_rule.assets = [create_orchestrator_test_asset_rule(f"Asset_{i+1}_in_{name}") for i in range(num_assets)] + source_rule.enabled = True + source_rule.source_path = Path(f"/fake/source_root/{name}") # Using Path object + # ... other necessary SourceRule fields ... + return source_rule + +# --- Test Cases for PipelineOrchestrator.process_source_rule() --- + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_basic_flow_mock_stages(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_path" # Path for mkdtemp + + config = create_orchestrator_test_config() + stage1 = MockPassThroughStage("stage1") + stage2 = MockPassThroughStage("stage2") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[stage1, stage2]) + + source_rule = create_orchestrator_test_source_rule("MySourceRule", num_assets=2) + asset1_name = source_rule.assets[0].name + asset2_name = source_rule.assets[1].name + + # Mock asset_metadata to be updated by stages for status check + # The MockPassThroughStage already sets a 'status' = "Processed" if not skipped/failed + # and adds '{stage_name}_executed' = True to asset_metadata. + + results = orchestrator.process_source_rule( + source_rule, Path("/ws"), Path("/out"), False, "inc_val_123", "sha_val_abc" + ) + + assert stage1.execute_call_count == 2 # Called for each asset + assert stage2.execute_call_count == 2 # Called for each asset + + assert asset1_name in results['processed'] + assert asset2_name in results['processed'] + assert not results['skipped'] + assert not results['failed'] + + # Verify context modifications by stages + for i in range(2): # For each asset + # Stage 1 context checks + s1_context_asset = stage1.contexts_called_with[i] + assert s1_context_asset.asset_metadata.get('stage1_executed') is True + assert s1_context_asset.asset_metadata.get('stage2_executed') is None # Stage 2 not yet run for this asset + + # Stage 2 context checks + s2_context_asset = stage2.contexts_called_with[i] + assert s2_context_asset.asset_metadata.get('stage1_executed') is True # From stage 1 + assert s2_context_asset.asset_metadata.get('stage2_executed') is True + assert s2_context_asset.asset_metadata.get('status') == "Processed" + + mock_mkdtemp.assert_called_once() + # The orchestrator creates a subdirectory within the mkdtemp path + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_asset_skipping_by_stage(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_path_skip" + + config = create_orchestrator_test_config() + skipper_stage = MockPassThroughStage("skipper_stage") # This stage will set skip_asset = True + stage_after_skip = MockPassThroughStage("stage_after_skip") + + orchestrator = PipelineOrchestrator(config_obj=config, stages=[skipper_stage, stage_after_skip]) + + source_rule = create_orchestrator_test_source_rule("SkipSourceRule", num_assets=1) + asset_to_skip_name = source_rule.assets[0].name + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_skip"), Path("/out_skip"), False, "inc_skip", "sha_skip" + ) + + assert skipper_stage.execute_call_count == 1 # Called for the asset + assert stage_after_skip.execute_call_count == 0 # Not called because asset was skipped + + assert asset_to_skip_name in results['skipped'] + assert not results['processed'] + assert not results['failed'] + + # Verify skip reason in context if needed (MockPassThroughStage stores contexts) + skipped_context = skipper_stage.contexts_called_with[0] + assert skipped_context.status_flags['skip_asset'] is True + assert skipped_context.status_flags['skip_reason'] == "Skipped by skipper_stage" + + mock_mkdtemp.assert_called_once() + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_no_assets_in_source_rule(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_no_assets" + + config = create_orchestrator_test_config() + stage1 = MockPassThroughStage("stage1_no_assets") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[stage1]) + + source_rule = create_orchestrator_test_source_rule("NoAssetSourceRule", num_assets=0) + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_no_assets"), Path("/out_no_assets"), False, "inc_no", "sha_no" + ) + + assert stage1.execute_call_count == 0 + assert not results['processed'] + assert not results['skipped'] + assert not results['failed'] + + # mkdtemp should still be called for the source rule processing, even if no assets + mock_mkdtemp.assert_called_once() + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) + + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_error_during_stage_execution(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_error" + + config = create_orchestrator_test_config() + error_stage = MockPassThroughStage("error_stage") # This stage will raise an error + stage_after_error = MockPassThroughStage("stage_after_error") + + orchestrator = PipelineOrchestrator(config_obj=config, stages=[error_stage, stage_after_error]) + + # Test with two assets, one fails, one processes (if orchestrator continues) + # The current orchestrator's process_asset is per asset, so an error in one + # should not stop processing of other assets in the same source_rule. + source_rule = create_orchestrator_test_source_rule("ErrorSourceRule", asset_names=["AssetFails", "AssetSucceeds"]) + asset_fails_name = source_rule.assets[0].name + asset_succeeds_name = source_rule.assets[1].name + + # Make only the first asset's processing trigger the error + original_execute = error_stage.execute + def error_execute_side_effect(context: AssetProcessingContext): + if context.asset_rule.name == asset_fails_name: + # The MockPassThroughStage is already configured to raise ValueError for "error_stage" + # but we need to ensure it's only for the first asset. + # We can achieve this by modifying the stage_name temporarily or by checking asset_rule.name + # For simplicity, let's assume the mock stage's error logic is fine, + # and we just need to check the outcome. + # The error_stage will raise ValueError("Simulated error in error_stage") + # The orchestrator's _process_single_asset catches generic Exception. + return original_execute(context) # This will call the erroring logic + else: + # For the second asset, make it pass through without error + context.asset_metadata[f'{error_stage.stage_name}_executed'] = True + context.asset_metadata['status'] = "Processed" + return context + + error_stage.execute = mock.MagicMock(side_effect=error_execute_side_effect) + # stage_after_error should still be called for the successful asset + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_error"), Path("/out_error"), False, "inc_err", "sha_err" + ) + + assert error_stage.execute.call_count == 2 # Called for both assets + # stage_after_error is only called for the asset that didn't fail in error_stage + assert stage_after_error.execute_call_count == 1 + + assert asset_fails_name in results['failed'] + assert asset_succeeds_name in results['processed'] + assert not results['skipped'] + + # Verify the context of the failed asset + failed_context = None + for ctx in error_stage.contexts_called_with: + if ctx.asset_rule.name == asset_fails_name: + failed_context = ctx + break + assert failed_context is not None + assert failed_context.status_flags['asset_failed'] is True + assert "Simulated error in error_stage" in failed_context.status_flags['failure_reason'] + + # Verify the context of the successful asset after stage_after_error + successful_context_after_s2 = None + for ctx in stage_after_error.contexts_called_with: + if ctx.asset_rule.name == asset_succeeds_name: + successful_context_after_s2 = ctx + break + assert successful_context_after_s2 is not None + assert successful_context_after_s2.asset_metadata.get('error_stage_executed') is True # from the non-erroring path + assert successful_context_after_s2.asset_metadata.get('stage_after_error_executed') is True + assert successful_context_after_s2.asset_metadata.get('status') == "Processed" + + + mock_mkdtemp.assert_called_once() + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) + + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_asset_processing_context_initialization(mock_mkdtemp, mock_rmtree): + mock_engine_temp_dir = "/fake/engine_temp_dir_context_init" + mock_mkdtemp.return_value = mock_engine_temp_dir + + config = create_orchestrator_test_config() + mock_stage = MockPassThroughStage("context_check_stage") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[mock_stage]) + + source_rule = create_orchestrator_test_source_rule("ContextSourceRule", num_assets=1) + asset_rule = source_rule.assets[0] + + workspace_path = Path("/ws_context") + output_base_path = Path("/out_context") + incrementing_value = "inc_context_123" + sha5_value = "sha_context_abc" + + orchestrator.process_source_rule( + source_rule, workspace_path, output_base_path, False, incrementing_value, sha5_value + ) + + assert mock_stage.execute_call_count == 1 + + # Retrieve the context passed to the mock stage + captured_context = mock_stage.contexts_called_with[0] + + assert captured_context.source_rule == source_rule + assert captured_context.asset_rule == asset_rule + assert captured_context.workspace_path == workspace_path + + # engine_temp_dir for the asset is a sub-directory of the source_rule's temp dir + # which itself is a sub-directory of the main engine_temp_dir from mkdtemp + expected_source_rule_temp_dir = Path(mock_engine_temp_dir) / source_rule.id.hex + expected_asset_temp_dir = expected_source_rule_temp_dir / asset_rule.id.hex + assert captured_context.engine_temp_dir == expected_asset_temp_dir + + assert captured_context.output_base_path == output_base_path + assert captured_context.config_obj == config + assert captured_context.incrementing_value == incrementing_value + assert captured_context.sha5_value == sha5_value + + # Check initial state of other context fields + assert captured_context.asset_metadata == {} # Should be empty initially for an asset + assert captured_context.status_flags == {} # Should be empty initially + assert captured_context.shared_data == {} # Should be empty initially + assert captured_context.current_files == [] # Should be empty initially + + mock_mkdtemp.assert_called_once() + mock_rmtree.assert_called_once_with(expected_source_rule_temp_dir, ignore_errors=True) + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_temp_dir_override_from_config(mock_mkdtemp, mock_rmtree): + # This test verifies that if config.general_settings.temp_dir_override is set, + # mkdtemp is NOT called, and the override path is used and cleaned up. + + config = create_orchestrator_test_config() + override_temp_path_str = "/override/temp/path" + config.general_settings.temp_dir_override = override_temp_path_str + + stage1 = MockPassThroughStage("stage_temp_override") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[stage1]) + + source_rule = create_orchestrator_test_source_rule("TempOverrideRule", num_assets=1) + asset_rule = source_rule.assets[0] + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_override"), Path("/out_override"), False, "inc_override", "sha_override" + ) + + assert stage1.execute_call_count == 1 + assert asset_rule.name in results['processed'] + + mock_mkdtemp.assert_not_called() # mkdtemp should not be called due to override + + # The orchestrator should create its source-rule specific subdir within the override + expected_source_rule_temp_dir_in_override = Path(override_temp_path_str) / source_rule.id.hex + + # Verify the context passed to the stage uses the overridden path structure + captured_context = stage1.contexts_called_with[0] + expected_asset_temp_dir_in_override = expected_source_rule_temp_dir_in_override / asset_rule.id.hex + assert captured_context.engine_temp_dir == expected_asset_temp_dir_in_override + + # rmtree should be called on the source_rule's directory within the override path + mock_rmtree.assert_called_once_with(expected_source_rule_temp_dir_in_override, ignore_errors=True) + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_disabled_asset_rule_is_skipped(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_disabled_asset" + + config = create_orchestrator_test_config() + stage1 = MockPassThroughStage("stage_disabled_check") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[stage1]) + + source_rule = create_orchestrator_test_source_rule("DisabledAssetSourceRule", asset_names=["EnabledAsset", "DisabledAsset"]) + enabled_asset = source_rule.assets[0] + disabled_asset = source_rule.assets[1] + disabled_asset.enabled = False # Disable this asset rule + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_disabled"), Path("/out_disabled"), False, "inc_dis", "sha_dis" + ) + + assert stage1.execute_call_count == 1 # Only called for the enabled asset + + assert enabled_asset.name in results['processed'] + assert disabled_asset.name in results['skipped'] + assert not results['failed'] + + # Verify context for the processed asset + assert stage1.contexts_called_with[0].asset_rule.name == enabled_asset.name + + # Verify skip reason for the disabled asset (this is set by the orchestrator itself) + # The orchestrator's _process_single_asset checks asset_rule.enabled + # We need to inspect the results dictionary for the skip reason if it's stored there, + # or infer it. The current structure of `results` doesn't store detailed skip reasons directly, + # but the test ensures it's in the 'skipped' list. + # For a more detailed check, one might need to adjust how results are reported or mock deeper. + # For now, confirming it's in 'skipped' and stage1 wasn't called for it is sufficient. + + mock_mkdtemp.assert_called_once() + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) \ No newline at end of file diff --git a/tests/processing/utils/test_image_processing_utils.py b/tests/processing/utils/test_image_processing_utils.py new file mode 100644 index 0000000..e128b3f --- /dev/null +++ b/tests/processing/utils/test_image_processing_utils.py @@ -0,0 +1,504 @@ +import pytest +from unittest import mock +import numpy as np +from pathlib import Path +import sys + +# Attempt to import the module under test +# This assumes that the 'tests' directory is at the same level as the 'processing' directory, +# and pytest handles the PYTHONPATH correctly. +try: + from processing.utils import image_processing_utils as ipu + import cv2 # Import cv2 here if it's used for constants like cv2.COLOR_BGR2RGB +except ImportError: + # Fallback for environments where PYTHONPATH might not be set up as expected by pytest initially + # This adds the project root to sys.path to find the 'processing' module + # Adjust the number of Path.parent calls if your test structure is deeper or shallower + project_root = Path(__file__).parent.parent.parent.parent + sys.path.insert(0, str(project_root)) + from processing.utils import image_processing_utils as ipu + import cv2 # Import cv2 here as well + +# If cv2 is imported directly in image_processing_utils, you might need to mock it globally for some tests +# For example, at the top of the test file: +# sys.modules['cv2'] = mock.MagicMock() # Basic global mock if needed +# We will use more targeted mocks with @mock.patch where cv2 is used. + +# --- Tests for Mathematical Helpers --- + +def test_is_power_of_two(): + assert ipu.is_power_of_two(1) is True + assert ipu.is_power_of_two(2) is True + assert ipu.is_power_of_two(4) is True + assert ipu.is_power_of_two(16) is True + assert ipu.is_power_of_two(1024) is True + assert ipu.is_power_of_two(0) is False + assert ipu.is_power_of_two(-2) is False + assert ipu.is_power_of_two(3) is False + assert ipu.is_power_of_two(100) is False + +def test_get_nearest_pot(): + assert ipu.get_nearest_pot(1) == 1 + assert ipu.get_nearest_pot(2) == 2 + # Based on current implementation: + # For 3: lower=2, upper=4. (3-2)=1, (4-3)=1. Else branch returns upper_pot. So 4. + assert ipu.get_nearest_pot(3) == 4 + assert ipu.get_nearest_pot(50) == 64 # (50-32)=18, (64-50)=14 -> upper + assert ipu.get_nearest_pot(100) == 128 # (100-64)=36, (128-100)=28 -> upper + assert ipu.get_nearest_pot(256) == 256 + assert ipu.get_nearest_pot(0) == 1 + assert ipu.get_nearest_pot(-10) == 1 + # For 700: value.bit_length() = 10. lower_pot = 1<<(10-1) = 512. upper_pot = 1<<10 = 1024. + # (700-512) = 188. (1024-700) = 324. (188 < 324) is True. Returns lower_pot. So 512. + assert ipu.get_nearest_pot(700) == 512 + assert ipu.get_nearest_pot(6) == 8 # (6-4)=2, (8-6)=2. Returns upper. + assert ipu.get_nearest_pot(5) == 4 # (5-4)=1, (8-5)=3. Returns lower. + + +@pytest.mark.parametrize( + "orig_w, orig_h, target_w, target_h, resize_mode, ensure_pot, allow_upscale, target_max_dim, expected_w, expected_h", + [ + # FIT mode + (1000, 800, 500, None, "fit", False, False, None, 500, 400), # Fit width + (1000, 800, None, 400, "fit", False, False, None, 500, 400), # Fit height + (1000, 800, 500, 500, "fit", False, False, None, 500, 400), # Fit to box (width constrained) + (800, 1000, 500, 500, "fit", False, False, None, 400, 500), # Fit to box (height constrained) + (100, 80, 200, None, "fit", False, False, None, 100, 80), # Fit width, no upscale + (100, 80, 200, None, "fit", False, True, None, 200, 160), # Fit width, allow upscale + (100, 80, 128, None, "fit", True, False, None, 128, 64), # Re-evaluated + (100, 80, 128, None, "fit", True, True, None, 128, 128), # Fit width, ensure_pot, allow upscale (128, 102 -> pot 128, 128) + + # STRETCH mode + (1000, 800, 500, 400, "stretch", False, False, None, 500, 400), + (100, 80, 200, 160, "stretch", False, True, None, 200, 160), # Stretch, allow upscale + (100, 80, 200, 160, "stretch", False, False, None, 100, 80), # Stretch, no upscale + (100, 80, 128, 128, "stretch", True, True, None, 128, 128), # Stretch, ensure_pot, allow upscale + (100, 80, 70, 70, "stretch", True, False, None, 64, 64), # Stretch, ensure_pot, no upscale (70,70 -> pot 64,64) + + # MAX_DIM_POT mode + (1000, 800, None, None, "max_dim_pot", True, False, 512, 512, 512), + (800, 1000, None, None, "max_dim_pot", True, False, 512, 512, 512), + (1920, 1080, None, None, "max_dim_pot", True, False, 1024, 1024, 512), + (100, 100, None, None, "max_dim_pot", True, False, 60, 64, 64), + # Edge cases for calculate_target_dimensions + (0, 0, 512, 512, "fit", False, False, None, 512, 512), + (10, 10, 512, 512, "fit", True, False, None, 8, 8), + (100, 100, 150, 150, "fit", True, False, None, 128, 128), + ] +) +def test_calculate_target_dimensions(orig_w, orig_h, target_w, target_h, resize_mode, ensure_pot, allow_upscale, target_max_dim, expected_w, expected_h): + if resize_mode == "max_dim_pot" and target_max_dim is None: + with pytest.raises(ValueError, match="target_max_dim_for_pot_mode must be provided"): + ipu.calculate_target_dimensions(orig_w, orig_h, target_width=target_w, target_height=target_h, + resize_mode=resize_mode, ensure_pot=ensure_pot, allow_upscale=allow_upscale, + target_max_dim_for_pot_mode=target_max_dim) + elif (resize_mode == "fit" and target_w is None and target_h is None) or \ + (resize_mode == "stretch" and (target_w is None or target_h is None)): + with pytest.raises(ValueError): + ipu.calculate_target_dimensions(orig_w, orig_h, target_width=target_w, target_height=target_h, + resize_mode=resize_mode, ensure_pot=ensure_pot, allow_upscale=allow_upscale, + target_max_dim_for_pot_mode=target_max_dim) + else: + actual_w, actual_h = ipu.calculate_target_dimensions( + orig_w, orig_h, target_width=target_w, target_height=target_h, + resize_mode=resize_mode, ensure_pot=ensure_pot, allow_upscale=allow_upscale, + target_max_dim_for_pot_mode=target_max_dim + ) + assert (actual_w, actual_h) == (expected_w, expected_h), \ + f"Input: ({orig_w},{orig_h}), T=({target_w},{target_h}), M={resize_mode}, POT={ensure_pot}, UPSC={allow_upscale}, TMAX={target_max_dim}" + + +def test_calculate_target_dimensions_invalid_mode(): + with pytest.raises(ValueError, match="Unsupported resize_mode"): + ipu.calculate_target_dimensions(100, 100, 50, 50, resize_mode="invalid_mode") + +@pytest.mark.parametrize( + "ow, oh, rw, rh, expected_str", + [ + (100, 100, 100, 100, "EVEN"), + (100, 100, 200, 200, "EVEN"), + (200, 200, 100, 100, "EVEN"), + (100, 100, 150, 100, "X15Y1"), + (100, 100, 50, 100, "X05Y1"), + (100, 100, 100, 150, "X1Y15"), + (100, 100, 100, 50, "X1Y05"), + (100, 50, 150, 75, "EVEN"), + (100, 50, 150, 50, "X15Y1"), + (100, 50, 100, 75, "X1Y15"), + (100, 50, 120, 60, "EVEN"), + (100, 50, 133, 66, "EVEN"), + (100, 100, 133, 100, "X133Y1"), + (100, 100, 100, 133, "X1Y133"), + (100, 100, 133, 133, "EVEN"), + (100, 100, 67, 100, "X067Y1"), + (100, 100, 100, 67, "X1Y067"), + (100, 100, 67, 67, "EVEN"), + (1920, 1080, 1024, 576, "EVEN"), + (1920, 1080, 1024, 512, "X112Y1"), + (0, 100, 50, 50, "InvalidInput"), + (100, 0, 50, 50, "InvalidInput"), + (100, 100, 0, 50, "InvalidResize"), + (100, 100, 50, 0, "InvalidResize"), + ] +) +def test_normalize_aspect_ratio_change(ow, oh, rw, rh, expected_str): + assert ipu.normalize_aspect_ratio_change(ow, oh, rw, rh) == expected_str + +# --- Tests for Image Manipulation --- + +@mock.patch('cv2.imread') +def test_load_image_success_str_path(mock_cv2_imread): + mock_img_data = np.array([[[1, 2, 3]]], dtype=np.uint8) + mock_cv2_imread.return_value = mock_img_data + + result = ipu.load_image("dummy/path.png") + + mock_cv2_imread.assert_called_once_with("dummy/path.png", cv2.IMREAD_UNCHANGED) + assert np.array_equal(result, mock_img_data) + +@mock.patch('cv2.imread') +def test_load_image_success_path_obj(mock_cv2_imread): + mock_img_data = np.array([[[1, 2, 3]]], dtype=np.uint8) + mock_cv2_imread.return_value = mock_img_data + dummy_path = Path("dummy/path.png") + + result = ipu.load_image(dummy_path) + + mock_cv2_imread.assert_called_once_with(str(dummy_path), cv2.IMREAD_UNCHANGED) + assert np.array_equal(result, mock_img_data) + +@mock.patch('cv2.imread') +def test_load_image_failure(mock_cv2_imread): + mock_cv2_imread.return_value = None + + result = ipu.load_image("dummy/path.png") + + mock_cv2_imread.assert_called_once_with("dummy/path.png", cv2.IMREAD_UNCHANGED) + assert result is None + +@mock.patch('cv2.imread', side_effect=Exception("CV2 Read Error")) +def test_load_image_exception(mock_cv2_imread): + result = ipu.load_image("dummy/path.png") + mock_cv2_imread.assert_called_once_with("dummy/path.png", cv2.IMREAD_UNCHANGED) + assert result is None + + +@mock.patch('cv2.cvtColor') +def test_convert_bgr_to_rgb_3_channel(mock_cv2_cvtcolor): + bgr_image = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + rgb_image_mock = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + mock_cv2_cvtcolor.return_value = rgb_image_mock + + result = ipu.convert_bgr_to_rgb(bgr_image) + + mock_cv2_cvtcolor.assert_called_once_with(bgr_image, cv2.COLOR_BGR2RGB) + assert np.array_equal(result, rgb_image_mock) + +@mock.patch('cv2.cvtColor') +def test_convert_bgr_to_rgb_4_channel_bgra(mock_cv2_cvtcolor): + bgra_image = np.random.randint(0, 255, (10, 10, 4), dtype=np.uint8) + rgb_image_mock = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) # cvtColor BGRA2RGB drops alpha + mock_cv2_cvtcolor.return_value = rgb_image_mock # Mocking the output of BGRA2RGB + + result = ipu.convert_bgr_to_rgb(bgra_image) + + mock_cv2_cvtcolor.assert_called_once_with(bgra_image, cv2.COLOR_BGRA2RGB) + assert np.array_equal(result, rgb_image_mock) + + +def test_convert_bgr_to_rgb_none_input(): + assert ipu.convert_bgr_to_rgb(None) is None + +def test_convert_bgr_to_rgb_grayscale_input(): + gray_image = np.random.randint(0, 255, (10, 10), dtype=np.uint8) + result = ipu.convert_bgr_to_rgb(gray_image) + assert np.array_equal(result, gray_image) # Should return as is + +@mock.patch('cv2.cvtColor') +def test_convert_rgb_to_bgr_3_channel(mock_cv2_cvtcolor): + rgb_image = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + bgr_image_mock = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + mock_cv2_cvtcolor.return_value = bgr_image_mock + + result = ipu.convert_rgb_to_bgr(rgb_image) + + mock_cv2_cvtcolor.assert_called_once_with(rgb_image, cv2.COLOR_RGB2BGR) + assert np.array_equal(result, bgr_image_mock) + +def test_convert_rgb_to_bgr_none_input(): + assert ipu.convert_rgb_to_bgr(None) is None + +def test_convert_rgb_to_bgr_grayscale_input(): + gray_image = np.random.randint(0, 255, (10, 10), dtype=np.uint8) + result = ipu.convert_rgb_to_bgr(gray_image) + assert np.array_equal(result, gray_image) # Should return as is + +def test_convert_rgb_to_bgr_4_channel_input(): + rgba_image = np.random.randint(0, 255, (10, 10, 4), dtype=np.uint8) + result = ipu.convert_rgb_to_bgr(rgba_image) + assert np.array_equal(result, rgba_image) # Should return as is + + +@mock.patch('cv2.resize') +def test_resize_image_downscale(mock_cv2_resize): + original_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + resized_image_mock = np.random.randint(0, 255, (50, 50, 3), dtype=np.uint8) + mock_cv2_resize.return_value = resized_image_mock + target_w, target_h = 50, 50 + + result = ipu.resize_image(original_image, target_w, target_h) + + mock_cv2_resize.assert_called_once_with(original_image, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4) + assert np.array_equal(result, resized_image_mock) + +@mock.patch('cv2.resize') +def test_resize_image_upscale(mock_cv2_resize): + original_image = np.random.randint(0, 255, (50, 50, 3), dtype=np.uint8) + resized_image_mock = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + mock_cv2_resize.return_value = resized_image_mock + target_w, target_h = 100, 100 + + result = ipu.resize_image(original_image, target_w, target_h) + + mock_cv2_resize.assert_called_once_with(original_image, (target_w, target_h), interpolation=cv2.INTER_CUBIC) + assert np.array_equal(result, resized_image_mock) + +@mock.patch('cv2.resize') +def test_resize_image_custom_interpolation(mock_cv2_resize): + original_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + resized_image_mock = np.random.randint(0, 255, (50, 50, 3), dtype=np.uint8) + mock_cv2_resize.return_value = resized_image_mock + target_w, target_h = 50, 50 + + result = ipu.resize_image(original_image, target_w, target_h, interpolation=cv2.INTER_NEAREST) + + mock_cv2_resize.assert_called_once_with(original_image, (target_w, target_h), interpolation=cv2.INTER_NEAREST) + assert np.array_equal(result, resized_image_mock) + +def test_resize_image_none_input(): + with pytest.raises(ValueError, match="Cannot resize a None image."): + ipu.resize_image(None, 50, 50) + +@pytest.mark.parametrize("w, h", [(0, 50), (50, 0), (-1, 50)]) +def test_resize_image_invalid_dims(w, h): + original_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + with pytest.raises(ValueError, match="Target width and height must be positive."): + ipu.resize_image(original_image, w, h) + + +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') # Mock mkdir to avoid actual directory creation +def test_save_image_success(mock_mkdir, mock_cv2_imwrite): + mock_cv2_imwrite.return_value = True + img_data = np.zeros((10,10,3), dtype=np.uint8) # RGB + save_path = "output/test.png" + + # ipu.save_image converts RGB to BGR by default for non-EXR + # So we expect convert_rgb_to_bgr to be called internally, + # and cv2.imwrite to receive BGR data. + # We can mock convert_rgb_to_bgr if we want to be very specific, + # or trust its own unit tests and check the data passed to imwrite. + # For simplicity, let's assume convert_rgb_to_bgr works and imwrite gets BGR. + # The function copies data, so we can check the mock call. + + success = ipu.save_image(save_path, img_data, convert_to_bgr_before_save=True) + + assert success is True + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + + # Check that imwrite was called. The first arg to assert_called_once_with is the path. + # The second arg is the image data. We need to compare it carefully. + # Since convert_rgb_to_bgr is called internally, the data passed to imwrite will be BGR. + # Let's create expected BGR data. + expected_bgr_data = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR) + + args, kwargs = mock_cv2_imwrite.call_args + assert args[0] == str(Path(save_path)) + assert np.array_equal(args[1], expected_bgr_data) + + +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') +def test_save_image_success_exr_no_bgr_conversion(mock_mkdir, mock_cv2_imwrite): + mock_cv2_imwrite.return_value = True + img_data_rgb_float = np.random.rand(10,10,3).astype(np.float32) # RGB float for EXR + save_path = "output/test.exr" + + success = ipu.save_image(save_path, img_data_rgb_float, output_format="exr", convert_to_bgr_before_save=False) + + assert success is True + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + args, kwargs = mock_cv2_imwrite.call_args + assert args[0] == str(Path(save_path)) + assert np.array_equal(args[1], img_data_rgb_float) # Should be original RGB data + +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') +def test_save_image_success_explicit_bgr_false_png(mock_mkdir, mock_cv2_imwrite): + mock_cv2_imwrite.return_value = True + img_data_rgb = np.zeros((10,10,3), dtype=np.uint8) # RGB + save_path = "output/test.png" + + # If convert_to_bgr_before_save is False, it should save RGB as is. + # However, OpenCV's imwrite for PNG might still expect BGR. + # The function's docstring says: "If True and image is 3-channel, converts RGB to BGR." + # So if False, it passes the data as is. + success = ipu.save_image(save_path, img_data_rgb, convert_to_bgr_before_save=False) + + assert success is True + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + args, kwargs = mock_cv2_imwrite.call_args + assert args[0] == str(Path(save_path)) + assert np.array_equal(args[1], img_data_rgb) + + +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') +def test_save_image_failure(mock_mkdir, mock_cv2_imwrite): + mock_cv2_imwrite.return_value = False + img_data = np.zeros((10,10,3), dtype=np.uint8) + save_path = "output/fail.png" + + success = ipu.save_image(save_path, img_data) + + assert success is False + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_cv2_imwrite.assert_called_once() # Check it was called + +def test_save_image_none_data(): + assert ipu.save_image("output/none.png", None) is False + +@mock.patch('cv2.imwrite', side_effect=Exception("CV2 Write Error")) +@mock.patch('pathlib.Path.mkdir') +def test_save_image_exception(mock_mkdir, mock_cv2_imwrite_exception): + img_data = np.zeros((10,10,3), dtype=np.uint8) + save_path = "output/exception.png" + + success = ipu.save_image(save_path, img_data) + + assert success is False + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_cv2_imwrite_exception.assert_called_once() + +# Test data type conversions in save_image +@pytest.mark.parametrize( + "input_dtype, input_data_producer, output_dtype_target, expected_conversion_dtype, check_scaling", + [ + (np.uint16, lambda: (np.random.randint(0, 65535, (10,10,3), dtype=np.uint16)), np.uint8, np.uint8, True), + (np.float32, lambda: np.random.rand(10,10,3).astype(np.float32), np.uint8, np.uint8, True), + (np.uint8, lambda: (np.random.randint(0, 255, (10,10,3), dtype=np.uint8)), np.uint16, np.uint16, True), + (np.float32, lambda: np.random.rand(10,10,3).astype(np.float32), np.uint16, np.uint16, True), + (np.uint8, lambda: (np.random.randint(0, 255, (10,10,3), dtype=np.uint8)), np.float16, np.float16, True), + (np.uint16, lambda: (np.random.randint(0, 65535, (10,10,3), dtype=np.uint16)), np.float32, np.float32, True), + ] +) +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') +def test_save_image_dtype_conversion(mock_mkdir, mock_cv2_imwrite, input_dtype, input_data_producer, output_dtype_target, expected_conversion_dtype, check_scaling): + mock_cv2_imwrite.return_value = True + img_data = input_data_producer() + original_img_data_copy = img_data.copy() # For checking scaling if needed + + ipu.save_image("output/dtype_test.png", img_data, output_dtype_target=output_dtype_target) + + mock_cv2_imwrite.assert_called_once() + saved_img_data = mock_cv2_imwrite.call_args[0][1] # Get the image data passed to imwrite + + assert saved_img_data.dtype == expected_conversion_dtype + + if check_scaling: + # This is a basic check. More precise checks would require known input/output values. + if output_dtype_target == np.uint8: + if input_dtype == np.uint16: + expected_scaled_data = (original_img_data_copy.astype(np.float32) / 65535.0 * 255.0).astype(np.uint8) + assert np.allclose(saved_img_data, cv2.cvtColor(expected_scaled_data, cv2.COLOR_RGB2BGR), atol=1) # Allow small diff due to float precision + elif input_dtype in [np.float16, np.float32, np.float64]: + expected_scaled_data = (np.clip(original_img_data_copy, 0.0, 1.0) * 255.0).astype(np.uint8) + assert np.allclose(saved_img_data, cv2.cvtColor(expected_scaled_data, cv2.COLOR_RGB2BGR), atol=1) + elif output_dtype_target == np.uint16: + if input_dtype == np.uint8: + expected_scaled_data = (original_img_data_copy.astype(np.float32) / 255.0 * 65535.0).astype(np.uint16) + assert np.allclose(saved_img_data, cv2.cvtColor(expected_scaled_data, cv2.COLOR_RGB2BGR), atol=1) + elif input_dtype in [np.float16, np.float32, np.float64]: + expected_scaled_data = (np.clip(original_img_data_copy, 0.0, 1.0) * 65535.0).astype(np.uint16) + assert np.allclose(saved_img_data, cv2.cvtColor(expected_scaled_data, cv2.COLOR_RGB2BGR), atol=1) + # Add more scaling checks for float16, float32 if necessary + + +# --- Tests for calculate_image_stats --- + +def test_calculate_image_stats_grayscale_uint8(): + img_data = np.array([[0, 128], [255, 10]], dtype=np.uint8) + # Expected normalized: [[0, 0.50196], [1.0, 0.03921]] approx + stats = ipu.calculate_image_stats(img_data) + assert stats is not None + assert np.isclose(stats["min"], 0/255.0) + assert np.isclose(stats["max"], 255/255.0) + assert np.isclose(stats["mean"], np.mean(img_data.astype(np.float64)/255.0)) + +def test_calculate_image_stats_color_uint8(): + img_data = np.array([ + [[0, 50, 100], [10, 60, 110]], + [[255, 128, 200], [20, 70, 120]] + ], dtype=np.uint8) + stats = ipu.calculate_image_stats(img_data) + assert stats is not None + # Min per channel (normalized) + assert np.allclose(stats["min"], [0/255.0, 50/255.0, 100/255.0]) + # Max per channel (normalized) + assert np.allclose(stats["max"], [255/255.0, 128/255.0, 200/255.0]) + # Mean per channel (normalized) + expected_mean = np.mean(img_data.astype(np.float64)/255.0, axis=(0,1)) + assert np.allclose(stats["mean"], expected_mean) + +def test_calculate_image_stats_grayscale_uint16(): + img_data = np.array([[0, 32768], [65535, 1000]], dtype=np.uint16) + stats = ipu.calculate_image_stats(img_data) + assert stats is not None + assert np.isclose(stats["min"], 0/65535.0) + assert np.isclose(stats["max"], 65535/65535.0) + assert np.isclose(stats["mean"], np.mean(img_data.astype(np.float64)/65535.0)) + +def test_calculate_image_stats_color_float32(): + # Floats are assumed to be in 0-1 range already by the function's normalization logic + img_data = np.array([ + [[0.0, 0.2, 0.4], [0.1, 0.3, 0.5]], + [[1.0, 0.5, 0.8], [0.05, 0.25, 0.6]] + ], dtype=np.float32) + stats = ipu.calculate_image_stats(img_data) + assert stats is not None + assert np.allclose(stats["min"], [0.0, 0.2, 0.4]) + assert np.allclose(stats["max"], [1.0, 0.5, 0.8]) + expected_mean = np.mean(img_data.astype(np.float64), axis=(0,1)) + assert np.allclose(stats["mean"], expected_mean) + +def test_calculate_image_stats_none_input(): + assert ipu.calculate_image_stats(None) is None + +def test_calculate_image_stats_unsupported_shape(): + img_data = np.zeros((2,2,2,2), dtype=np.uint8) # 4D array + assert ipu.calculate_image_stats(img_data) is None + +@mock.patch('numpy.mean', side_effect=Exception("Numpy error")) +def test_calculate_image_stats_exception_during_calculation(mock_np_mean): + img_data = np.array([[0, 128], [255, 10]], dtype=np.uint8) + stats = ipu.calculate_image_stats(img_data) + assert stats == {"error": "Error calculating image stats"} + +# Example of mocking ipu.load_image for a function that uses it (if calculate_image_stats used it) +# For the current calculate_image_stats, it takes image_data directly, so this is not needed for it. +# This is just an example as requested in the prompt for a hypothetical scenario. +@mock.patch('processing.utils.image_processing_utils.load_image') +def test_hypothetical_function_using_load_image(mock_load_image): + # This test is for a function that would call ipu.load_image internally + # e.g. def process_image_from_path(path): + # img_data = ipu.load_image(path) + # return ipu.calculate_image_stats(img_data) + + mock_img_data = np.array([[[0.5]]], dtype=np.float32) + mock_load_image.return_value = mock_img_data + + # result = ipu.hypothetical_process_image_from_path("dummy.png") + # mock_load_image.assert_called_once_with("dummy.png") + # assert result["mean"] == 0.5 + pass # This is a conceptual example \ No newline at end of file diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 0000000..cfc5ffa --- /dev/null +++ b/tests/utils/__init__.py @@ -0,0 +1 @@ +# This file makes the 'tests/utils' directory a Python package. \ No newline at end of file diff --git a/tests/utils/test_path_utils.py b/tests/utils/test_path_utils.py new file mode 100644 index 0000000..56f7a0a --- /dev/null +++ b/tests/utils/test_path_utils.py @@ -0,0 +1,252 @@ +import pytest +from pathlib import Path +from utils.path_utils import sanitize_filename, generate_path_from_pattern + +# Tests for sanitize_filename +def test_sanitize_filename_valid(): + assert sanitize_filename("valid_filename.txt") == "valid_filename.txt" + +def test_sanitize_filename_with_spaces(): + assert sanitize_filename("file name with spaces.txt") == "file_name_with_spaces.txt" + +def test_sanitize_filename_with_special_characters(): + assert sanitize_filename("file!@#$%^&*()[]{};:'\",.<>/?\\|.txt") == "file____________________.txt" + +def test_sanitize_filename_with_leading_trailing_whitespace(): + assert sanitize_filename(" filename_with_spaces .txt") == "filename_with_spaces.txt" + +def test_sanitize_filename_empty_string(): + assert sanitize_filename("") == "" + +def test_sanitize_filename_with_none(): + with pytest.raises(TypeError): + sanitize_filename(None) + +def test_sanitize_filename_mixed_case(): + assert sanitize_filename("MixedCaseFileName.PNG") == "MixedCaseFileName.PNG" + +def test_sanitize_filename_long_filename(): + long_name = "a" * 255 + ".txt" + # Assuming the function doesn't truncate, but sanitizes. + # If it's meant to handle OS limits, this test might need adjustment + # based on the function's specific behavior for long names. + assert sanitize_filename(long_name) == long_name + +def test_sanitize_filename_unicode_characters(): + assert sanitize_filename("文件名前缀_文件名_后缀.jpg") == "文件名前缀_文件名_后缀.jpg" + +def test_sanitize_filename_multiple_extensions(): + assert sanitize_filename("archive.tar.gz") == "archive.tar.gz" + +def test_sanitize_filename_no_extension(): + assert sanitize_filename("filename") == "filename" + +def test_sanitize_filename_only_special_chars(): + assert sanitize_filename("!@#$%^") == "______" + +def test_sanitize_filename_with_hyphens_and_underscores(): + assert sanitize_filename("file-name_with-hyphens_and_underscores.zip") == "file-name_with-hyphens_and_underscores.zip" + +# Tests for generate_path_from_pattern +def test_generate_path_basic(): + result = generate_path_from_pattern( + base_path="output", + pattern="{asset_name}/{map_type}/{filename}", + asset_name="MyAsset", + map_type="Diffuse", + filename="MyAsset_Diffuse.png", + source_rule_name="TestRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("output/MyAsset/Diffuse/MyAsset_Diffuse.png") + assert Path(result) == expected + +def test_generate_path_all_placeholders(): + result = generate_path_from_pattern( + base_path="project_files", + pattern="{source_rule_name}/{asset_name}/{map_type}_{incrementing_value}_{sha5_value}/{filename}", + asset_name="AnotherAsset", + map_type="Normal", + filename="NormalMap.tif", + source_rule_name="ComplexRule", + incrementing_value="001", + sha5_value="abcde" + ) + expected = Path("project_files/ComplexRule/AnotherAsset/Normal_001_abcde/NormalMap.tif") + assert Path(result) == expected + +def test_generate_path_optional_placeholders_none(): + result = generate_path_from_pattern( + base_path="data", + pattern="{asset_name}/{filename}", + asset_name="SimpleAsset", + map_type="Albedo", # map_type is in pattern but not used if not in string + filename="texture.jpg", + source_rule_name="Basic", + incrementing_value=None, + sha5_value=None + ) + expected = Path("data/SimpleAsset/texture.jpg") + assert Path(result) == expected + +def test_generate_path_optional_incrementing_value_present(): + result = generate_path_from_pattern( + base_path="assets", + pattern="{asset_name}/{map_type}/v{incrementing_value}/{filename}", + asset_name="VersionedAsset", + map_type="Specular", + filename="spec.png", + source_rule_name="VersioningRule", + incrementing_value="3", + sha5_value=None + ) + expected = Path("assets/VersionedAsset/Specular/v3/spec.png") + assert Path(result) == expected + +def test_generate_path_optional_sha5_value_present(): + result = generate_path_from_pattern( + base_path="cache", + pattern="{asset_name}/{sha5_value}/{filename}", + asset_name="HashedAsset", + map_type="Roughness", + filename="rough.exr", + source_rule_name="HashingRule", + incrementing_value=None, + sha5_value="f1234" + ) + expected = Path("cache/HashedAsset/f1234/rough.exr") + assert Path(result) == expected + +def test_generate_path_base_path_is_path_object(): + result = generate_path_from_pattern( + base_path=Path("output_path"), + pattern="{asset_name}/{filename}", + asset_name="ObjectAsset", + map_type="AO", + filename="ao.png", + source_rule_name="PathObjectRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("output_path/ObjectAsset/ao.png") + assert Path(result) == expected + +def test_generate_path_empty_pattern(): + result = generate_path_from_pattern( + base_path="output", + pattern="", # Empty pattern should just use base_path and filename + asset_name="MyAsset", + map_type="Diffuse", + filename="MyAsset_Diffuse.png", + source_rule_name="TestRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("output/MyAsset_Diffuse.png") + assert Path(result) == expected + +def test_generate_path_pattern_with_no_placeholders(): + result = generate_path_from_pattern( + base_path="fixed_output", + pattern="some/static/path", # Pattern has no placeholders + asset_name="MyAsset", + map_type="Diffuse", + filename="MyAsset_Diffuse.png", + source_rule_name="TestRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("fixed_output/some/static/path/MyAsset_Diffuse.png") + assert Path(result) == expected + +def test_generate_path_filename_with_subdirs_in_pattern(): + result = generate_path_from_pattern( + base_path="output", + pattern="{asset_name}", # Filename itself will be appended + asset_name="AssetWithSubdirFile", + map_type="Color", + filename="textures/variant1/color.png", # Filename contains subdirectories + source_rule_name="SubdirRule", + incrementing_value=None, + sha5_value=None + ) + # The function is expected to join pattern result with filename + expected = Path("output/AssetWithSubdirFile/textures/variant1/color.png") + assert Path(result) == expected + +def test_generate_path_no_filename_provided(): + # This test assumes that if filename is None or empty, it might raise an error + # or behave in a specific way, e.g. not append anything or use a default. + # Adjust based on actual function behavior for missing filename. + # For now, let's assume it might raise TypeError if filename is critical. + with pytest.raises(TypeError): # Or ValueError, depending on implementation + generate_path_from_pattern( + base_path="output", + pattern="{asset_name}/{map_type}", + asset_name="MyAsset", + map_type="Diffuse", + filename=None, # No filename + source_rule_name="TestRule", + incrementing_value=None, + sha5_value=None + ) + +def test_generate_path_all_values_are_empty_strings_or_none_where_applicable(): + result = generate_path_from_pattern( + base_path="", # Empty base_path + pattern="{asset_name}/{map_type}/{incrementing_value}/{sha5_value}", + asset_name="", # Empty asset_name + map_type="", # Empty map_type + filename="empty_test.file", + source_rule_name="", # Empty source_rule_name + incrementing_value="", # Empty incrementing_value + sha5_value="" # Empty sha5_value + ) + # Behavior with empty strings might vary. Assuming they are treated as literal empty segments. + # Path("///empty_test.file") might resolve to "/empty_test.file" on POSIX + # or just "empty_test.file" if base_path is current dir. + # Let's assume Path() handles normalization. + # If base_path is "", it means current directory. + # So, "//empty_test.file" relative to current dir. + # Path objects normalize this. e.g. Path('//a') -> Path('/a') on POSIX + # Path('a//b') -> Path('a/b') + # Path('/a//b') -> Path('/a/b') + # Path('//a//b') -> Path('/a/b') + # If base_path is empty, it's like Path('.////empty_test.file') + expected = Path("empty_test.file") # Simplified, actual result might be OS dependent or Path lib norm. + # More robust check: + # result_path = Path(result) + # expected_path = Path.cwd() / "" / "" / "" / "" / "empty_test.file" # This is not quite right + # Let's assume the function joins them: "" + "/" + "" + "/" + "" + "/" + "" + "/" + "empty_test.file" + # which becomes "////empty_test.file" + # Path("////empty_test.file") on Windows becomes "\\empty_test.file" (network path attempt) + # Path("////empty_test.file") on Linux becomes "/empty_test.file" + # Given the function likely uses os.path.join or Path.joinpath, + # and base_path="", asset_name="", map_type="", inc_val="", sha5_val="" + # pattern = "{asset_name}/{map_type}/{incrementing_value}/{sha5_value}" -> "///" + # result = base_path / pattern_result / filename + # result = "" / "///" / "empty_test.file" + # Path("") / "///" / "empty_test.file" -> Path("///empty_test.file") + # This is tricky. Let's assume the function is robust. + # If all path segments are empty, it should ideally resolve to just the filename relative to base_path. + # If base_path is also empty, then filename relative to CWD. + # Let's test the expected output based on typical os.path.join behavior: + # os.path.join("", "", "", "", "", "empty_test.file") -> "empty_test.file" on Windows + # os.path.join("", "", "", "", "", "empty_test.file") -> "empty_test.file" on Linux + assert Path(result) == Path("empty_test.file") + + +def test_generate_path_with_dots_in_placeholders(): + result = generate_path_from_pattern( + base_path="output", + pattern="{asset_name}/{map_type}", + asset_name="My.Asset.V1", + map_type="Diffuse.Main", + filename="texture.png", + source_rule_name="DotsRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("output/My.Asset.V1/Diffuse.Main/texture.png") + assert Path(result) == expected \ No newline at end of file diff --git a/utils/path_utils.py b/utils/path_utils.py index e20d3c2..b67929f 100644 --- a/utils/path_utils.py +++ b/utils/path_utils.py @@ -154,6 +154,15 @@ def get_next_incrementing_value(output_base_path: Path, output_directory_pattern logger.info(f"Determined next incrementing value: {next_value_str} (Max found: {max_value})") return next_value_str +def sanitize_filename(name: str) -> str: + """Removes or replaces characters invalid for filenames/directory names.""" + if not isinstance(name, str): name = str(name) + name = re.sub(r'[^\w.\-]+', '_', name) # Allow alphanumeric, underscore, hyphen, dot + name = re.sub(r'_+', '_', name) + name = name.strip('_') + if not name: name = "invalid_name" + return name + # --- Basic Unit Tests --- if __name__ == "__main__": print("Running basic tests for path_utils.generate_path_from_pattern...") -- 2.47.2 From deeb1595fdb5ef3b53403b8124f1d65d8c925d1f Mon Sep 17 00:00:00 2001 From: Rusfort Date: Fri, 9 May 2025 13:57:22 +0200 Subject: [PATCH 02/16] No crashes anymore :3 --- main.py | 21 + processing/pipeline/orchestrator.py | 32 +- .../stages/alpha_extraction_to_mask.py | 89 ++-- .../pipeline/stages/asset_skip_logic.py | 29 +- .../pipeline/stages/file_rule_filter.py | 56 ++- .../stages/gloss_to_rough_conversion.py | 57 ++- .../stages/individual_map_processing.py | 303 +++++++------ processing/pipeline/stages/map_merging.py | 397 ++++++++---------- .../stages/metadata_finalization_save.py | 81 ++-- .../stages/metadata_initialization.py | 38 +- .../stages/normal_map_green_channel.py | 117 +++--- .../pipeline/stages/output_organization.py | 233 ++++++---- .../pipeline/stages/supplier_determination.py | 37 +- processing/utils/image_processing_utils.py | 17 + processing_engine.py | 21 +- .../stages/test_alpha_extraction_to_mask.py | 2 +- .../stages/test_individual_map_processing.py | 2 +- .../pipeline/stages/test_map_merging.py | 2 +- 18 files changed, 857 insertions(+), 677 deletions(-) diff --git a/main.py b/main.py index 25f8049..1cc864a 100644 --- a/main.py +++ b/main.py @@ -25,18 +25,39 @@ from PySide6.QtWidgets import QApplication import sys import os sys.path.append(os.path.dirname(__file__)) +print(f"DEBUG: sys.path after append: {sys.path}") try: + print("DEBUG: Attempting to import Configuration...") from configuration import Configuration, ConfigurationError + print("DEBUG: Successfully imported Configuration.") + + print("DEBUG: Attempting to import ProcessingEngine...") from processing_engine import ProcessingEngine + print("DEBUG: Successfully imported ProcessingEngine.") + + print("DEBUG: Attempting to import SourceRule...") from rule_structure import SourceRule + print("DEBUG: Successfully imported SourceRule.") + + print("DEBUG: Attempting to import MainWindow...") from gui.main_window import MainWindow + print("DEBUG: Successfully imported MainWindow.") + + print("DEBUG: Attempting to import prepare_processing_workspace...") from utils.workspace_utils import prepare_processing_workspace + print("DEBUG: Successfully imported prepare_processing_workspace.") + except ImportError as e: script_dir = Path(__file__).parent.resolve() print(f"ERROR: Cannot import Configuration or rule_structure classes.") print(f"Ensure configuration.py and rule_structure.py are in the same directory or Python path.") print(f"ERROR: Failed to import necessary classes: {e}") + print(f"DEBUG: Exception type: {type(e)}") + print(f"DEBUG: Exception args: {e.args}") + import traceback + print("DEBUG: Full traceback of the ImportError:") + traceback.print_exc() print(f"Ensure 'configuration.py' and 'asset_processor.py' exist in the directory:") print(f" {script_dir}") print("Or that the directory is included in your PYTHONPATH.") diff --git a/processing/pipeline/orchestrator.py b/processing/pipeline/orchestrator.py index 6396f38..4d3cba7 100644 --- a/processing/pipeline/orchestrator.py +++ b/processing/pipeline/orchestrator.py @@ -61,15 +61,13 @@ class PipelineOrchestrator: # Create a temporary directory for this processing run if needed by any stage # This temp dir is for the entire source_rule processing, not per asset. # Individual stages might create their own sub-temp dirs if necessary. - temp_dir_path_str = tempfile.mkdtemp( - prefix="asset_processor_orchestrator_temp_", dir=self.config_obj.get_temp_directory_base() - ) + temp_dir_path_str = tempfile.mkdtemp(prefix=self.config_obj.temp_dir_prefix) engine_temp_dir_path = Path(temp_dir_path_str) - log.debug(f"PipelineOrchestrator created temporary directory: {engine_temp_dir_path}") + log.debug(f"PipelineOrchestrator created temporary directory: {engine_temp_dir_path} using prefix '{self.config_obj.temp_dir_prefix}'") for asset_rule in source_rule.assets: - log.debug(f"Orchestrator: Processing asset '{asset_rule.name}'") + log.debug(f"Orchestrator: Processing asset '{asset_rule.asset_name}'") context = AssetProcessingContext( source_rule=source_rule, asset_rule=asset_rule, @@ -85,43 +83,43 @@ class PipelineOrchestrator: config_obj=self.config_obj, status_flags={"skip_asset": False, "asset_failed": False}, # Initialize common flags incrementing_value=incrementing_value, - sha256_value=sha5_value # Parameter name in context is sha256_value + sha5_value=sha5_value ) for stage_idx, stage in enumerate(self.stages): - log.debug(f"Asset '{asset_rule.name}': Executing stage {stage_idx + 1}/{len(self.stages)}: {stage.__class__.__name__}") + log.debug(f"Asset '{asset_rule.asset_name}': Executing stage {stage_idx + 1}/{len(self.stages)}: {stage.__class__.__name__}") try: context = stage.execute(context) except Exception as e: - log.error(f"Asset '{asset_rule.name}': Error during stage '{stage.__class__.__name__}': {e}", exc_info=True) + log.error(f"Asset '{asset_rule.asset_name}': Error during stage '{stage.__class__.__name__}': {e}", exc_info=True) context.status_flags["asset_failed"] = True context.asset_metadata["status"] = f"Failed: Error in stage {stage.__class__.__name__}" context.asset_metadata["error_message"] = str(e) break # Stop processing stages for this asset on error if context.status_flags.get("skip_asset"): - log.info(f"Asset '{asset_rule.name}': Skipped by stage '{stage.__class__.__name__}'. Reason: {context.status_flags.get('skip_reason', 'N/A')}") + log.info(f"Asset '{asset_rule.asset_name}': Skipped by stage '{stage.__class__.__name__}'. Reason: {context.status_flags.get('skip_reason', 'N/A')}") break # Skip remaining stages for this asset # Refined status collection if context.status_flags.get('skip_asset'): - overall_status["skipped"].append(asset_rule.name) + overall_status["skipped"].append(asset_rule.asset_name) elif context.status_flags.get('asset_failed') or str(context.asset_metadata.get('status', '')).startswith("Failed"): - overall_status["failed"].append(asset_rule.name) + overall_status["failed"].append(asset_rule.asset_name) elif context.asset_metadata.get('status') == "Processed": - overall_status["processed"].append(asset_rule.name) + overall_status["processed"].append(asset_rule.asset_name) else: # Default or unknown state - log.warning(f"Asset '{asset_rule.name}': Unknown status after pipeline execution. Metadata status: '{context.asset_metadata.get('status')}'. Marking as failed.") - overall_status["failed"].append(f"{asset_rule.name} (Unknown Status: {context.asset_metadata.get('status')})") - log.debug(f"Asset '{asset_rule.name}' final status: {context.asset_metadata.get('status', 'N/A')}, Flags: {context.status_flags}") + log.warning(f"Asset '{asset_rule.asset_name}': Unknown status after pipeline execution. Metadata status: '{context.asset_metadata.get('status')}'. Marking as failed.") + overall_status["failed"].append(f"{asset_rule.asset_name} (Unknown Status: {context.asset_metadata.get('status')})") + log.debug(f"Asset '{asset_rule.asset_name}' final status: {context.asset_metadata.get('status', 'N/A')}, Flags: {context.status_flags}") except Exception as e: log.error(f"PipelineOrchestrator.process_source_rule failed: {e}", exc_info=True) # Mark all remaining assets as failed if a top-level error occurs processed_or_skipped_or_failed = set(overall_status["processed"] + overall_status["skipped"] + overall_status["failed"]) for asset_rule in source_rule.assets: - if asset_rule.name not in processed_or_skipped_or_failed: - overall_status["failed"].append(f"{asset_rule.name} (Orchestrator Error)") + if asset_rule.asset_name not in processed_or_skipped_or_failed: + overall_status["failed"].append(f"{asset_rule.asset_name} (Orchestrator Error)") finally: if engine_temp_dir_path and engine_temp_dir_path.exists(): try: diff --git a/processing/pipeline/stages/alpha_extraction_to_mask.py b/processing/pipeline/stages/alpha_extraction_to_mask.py index ca1ea38..8de310e 100644 --- a/processing/pipeline/stages/alpha_extraction_to_mask.py +++ b/processing/pipeline/stages/alpha_extraction_to_mask.py @@ -8,8 +8,8 @@ import numpy as np from .base_stage import ProcessingStage from ..asset_context import AssetProcessingContext from ...utils import image_processing_utils as ipu -from .....rule_structure import FileRule, TransformSettings -from .....utils.path_utils import sanitize_filename +from rule_structure import FileRule +from utils.path_utils import sanitize_filename logger = logging.getLogger(__name__) @@ -21,31 +21,34 @@ class AlphaExtractionToMaskStage(ProcessingStage): SUITABLE_SOURCE_MAP_TYPES = ["ALBEDO", "DIFFUSE", "BASE_COLOR"] # Map types likely to have alpha def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: - logger.debug(f"Asset '{context.asset_rule.name}': Running AlphaExtractionToMaskStage.") + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + logger.debug(f"Asset '{asset_name_for_log}': Running AlphaExtractionToMaskStage.") if context.status_flags.get('skip_asset'): - logger.debug(f"Asset '{context.asset_rule.name}': Skipping due to 'skip_asset' flag.") + logger.debug(f"Asset '{asset_name_for_log}': Skipping due to 'skip_asset' flag.") return context if not context.files_to_process or not context.processed_maps_details: logger.debug( - f"Asset '{context.asset_rule.name}': Skipping alpha extraction - " + f"Asset '{asset_name_for_log}': Skipping alpha extraction - " f"no files to process or no processed map details." ) return context # A. Check for Existing MASK Map for file_rule in context.files_to_process: - if file_rule.map_type == "MASK": + # Assuming file_rule has 'map_type' and 'file_path' (instead of filename_pattern) + if hasattr(file_rule, 'map_type') and file_rule.map_type == "MASK": + file_path_for_log = file_rule.file_path if hasattr(file_rule, 'file_path') else "Unknown file path" logger.info( - f"Asset '{context.asset_rule.name}': MASK map already defined by FileRule " - f"'{file_rule.filename_pattern}'. Skipping alpha extraction." + f"Asset '{asset_name_for_log}': MASK map already defined by FileRule " + f"for '{file_path_for_log}'. Skipping alpha extraction." ) return context # B. Find Suitable Source Map with Alpha source_map_details_for_alpha: Optional[Dict] = None - source_file_rule_id_for_alpha: Optional[str] = None + source_file_rule_id_for_alpha: Optional[str] = None # This ID comes from processed_maps_details keys for file_rule_id, details in context.processed_maps_details.items(): if details.get('status') == 'Processed' and \ @@ -54,33 +57,31 @@ class AlphaExtractionToMaskStage(ProcessingStage): temp_path = Path(details['temp_processed_file']) if not temp_path.exists(): logger.warning( - f"Asset '{context.asset_rule.name}': Temp file {temp_path} for map " + f"Asset '{asset_name_for_log}': Temp file {temp_path} for map " f"{details['map_type']} (ID: {file_rule_id}) does not exist. Cannot check for alpha." ) continue - # Load image header or minimal data to check for alpha if possible, - # otherwise load full image. ipu.load_image should handle this. - image_data = ipu.load_image(temp_path) + image_data = ipu.load_image(temp_path) if image_data is not None and image_data.ndim == 3 and image_data.shape[2] == 4: source_map_details_for_alpha = details source_file_rule_id_for_alpha = file_rule_id logger.info( - f"Asset '{context.asset_rule.name}': Found potential source for alpha extraction: " + f"Asset '{asset_name_for_log}': Found potential source for alpha extraction: " f"{temp_path} (MapType: {details['map_type']})" ) - break + break except Exception as e: logger.warning( - f"Asset '{context.asset_rule.name}': Error checking alpha for {details.get('temp_processed_file', 'N/A')}: {e}" + f"Asset '{asset_name_for_log}': Error checking alpha for {details.get('temp_processed_file', 'N/A')}: {e}" ) continue if source_map_details_for_alpha is None or source_file_rule_id_for_alpha is None: logger.info( - f"Asset '{context.asset_rule.name}': No suitable source map with alpha channel found " + f"Asset '{asset_name_for_log}': No suitable source map with alpha channel found " f"for MASK extraction." ) return context @@ -91,7 +92,7 @@ class AlphaExtractionToMaskStage(ProcessingStage): if full_image_data is None or not (full_image_data.ndim == 3 and full_image_data.shape[2] == 4): logger.error( - f"Asset '{context.asset_rule.name}': Failed to reload or verify alpha channel from " + f"Asset '{asset_name_for_log}': Failed to reload or verify alpha channel from " f"{source_image_path} for MASK extraction." ) return context @@ -99,15 +100,13 @@ class AlphaExtractionToMaskStage(ProcessingStage): alpha_channel: np.ndarray = full_image_data[:, :, 3] # Extract alpha (0-255) # D. Save New Temporary MASK Map - # Ensure the mask is a 2D grayscale image. If ipu.save_image expects 3 channels for grayscale, adapt. - # Assuming ipu.save_image can handle a 2D numpy array for a grayscale image. if alpha_channel.ndim == 2: # Expected pass elif alpha_channel.ndim == 3 and alpha_channel.shape[2] == 1: # (H, W, 1) alpha_channel = alpha_channel.squeeze(axis=2) else: logger.error( - f"Asset '{context.asset_rule.name}': Extracted alpha channel has unexpected dimensions: " + f"Asset '{asset_name_for_log}': Extracted alpha channel has unexpected dimensions: " f"{alpha_channel.shape}. Cannot save." ) return context @@ -122,54 +121,54 @@ class AlphaExtractionToMaskStage(ProcessingStage): if not save_success: logger.error( - f"Asset '{context.asset_rule.name}': Failed to save extracted alpha mask to {mask_temp_path}." + f"Asset '{asset_name_for_log}': Failed to save extracted alpha mask to {mask_temp_path}." ) return context logger.info( - f"Asset '{context.asset_rule.name}': Extracted alpha and saved as new MASK map: {mask_temp_path}" + f"Asset '{asset_name_for_log}': Extracted alpha and saved as new MASK map: {mask_temp_path}" ) # E. Create New FileRule for the MASK and Update Context - new_mask_file_rule_id_obj = uuid.uuid4() - new_mask_file_rule_id_str = str(new_mask_file_rule_id_obj) # Use string for FileRule.id - new_mask_file_rule_id_hex = new_mask_file_rule_id_obj.hex # Use hex for dict key - + # FileRule does not have id, active, transform_settings, source_map_ids_for_generation + # It has file_path, item_type, item_type_override, etc. new_mask_file_rule = FileRule( - id=new_mask_file_rule_id_str, - map_type="MASK", - filename_pattern=mask_temp_path.name, # Pattern matches the generated temp file - item_type="MAP_COL", # Considered a collected map post-generation - active=True, - transform_settings=TransformSettings(), # Default transform settings - source_map_ids_for_generation=[source_file_rule_id_for_alpha] # Link to original source - # Ensure other necessary FileRule fields are defaulted or set if required + file_path=mask_temp_path.name, # Use file_path + item_type="MAP_MASK", # This should be the item_type for a mask + map_type="MASK" # Explicitly set map_type if FileRule has it, or handle via item_type + # Other FileRule fields like item_type_override can be set if needed ) + # If FileRule needs a unique identifier, it should be handled differently, + # perhaps by generating one and storing it in common_metadata or a separate mapping. + # For now, we create a simple FileRule. context.files_to_process.append(new_mask_file_rule) + # For processed_maps_details, we need a unique key. Using a new UUID. + new_mask_processed_map_key = uuid.uuid4().hex + original_dims = source_map_details_for_alpha.get('original_dimensions') if original_dims is None and full_image_data is not None: # Fallback if not in details original_dims = (full_image_data.shape[1], full_image_data.shape[0]) - context.processed_maps_details[new_mask_file_rule_id_hex] = { + context.processed_maps_details[new_mask_processed_map_key] = { 'map_type': "MASK", - 'source_file': str(source_image_path), # Original RGBA map path - 'temp_processed_file': str(mask_temp_path), # Path to the new MASK map - 'original_dimensions': original_dims, # Dimensions of the source image - 'processed_dimensions': (alpha_channel.shape[1], alpha_channel.shape[0]), # Dimensions of MASK - 'status': 'Processed', # This map is now considered processed + 'source_file': str(source_image_path), + 'temp_processed_file': str(mask_temp_path), + 'original_dimensions': original_dims, + 'processed_dimensions': (alpha_channel.shape[1], alpha_channel.shape[0]), + 'status': 'Processed', 'notes': ( f"Generated from alpha of {source_map_details_for_alpha['map_type']} " - f"(Source Rule ID: {source_file_rule_id_for_alpha})" + f"(Source Detail ID: {source_file_rule_id_for_alpha})" # Changed from Source Rule ID ), - 'file_rule_id': new_mask_file_rule_id_str # Link back to the new FileRule ID + # 'file_rule_id': new_mask_file_rule_id_str # FileRule doesn't have an ID to link here directly } logger.info( - f"Asset '{context.asset_rule.name}': Added new FileRule for generated MASK " - f"(ID: {new_mask_file_rule_id_str}) and updated processed_maps_details." + f"Asset '{asset_name_for_log}': Added new FileRule for generated MASK " + f"and updated processed_maps_details with key '{new_mask_processed_map_key}'." ) return context \ No newline at end of file diff --git a/processing/pipeline/stages/asset_skip_logic.py b/processing/pipeline/stages/asset_skip_logic.py index afb5b3c..0c176fe 100644 --- a/processing/pipeline/stages/asset_skip_logic.py +++ b/processing/pipeline/stages/asset_skip_logic.py @@ -1,6 +1,6 @@ import logging -from ..base_stage import ProcessingStage -from ...asset_context import AssetProcessingContext +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext class AssetSkipLogicStage(ProcessingStage): """ @@ -17,31 +17,38 @@ class AssetSkipLogicStage(ProcessingStage): The updated asset processing context. """ context.status_flags['skip_asset'] = False # Initialize/reset skip flag + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" # 1. Check for Supplier Error # Assuming 'supplier_error' might be set by a previous stage (e.g., SupplierDeterminationStage) # or if effective_supplier is None after attempts to determine it. if context.effective_supplier is None or context.status_flags.get('supplier_error', False): - logging.info(f"Asset '{context.asset_rule.name}': Skipping due to missing or invalid supplier.") + logging.info(f"Asset '{asset_name_for_log}': Skipping due to missing or invalid supplier.") context.status_flags['skip_asset'] = True context.status_flags['skip_reason'] = "Invalid or missing supplier" return context - # 2. Check asset_rule.process_status - if context.asset_rule.process_status == "SKIP": - logging.info(f"Asset '{context.asset_rule.name}': Skipping as per process_status 'SKIP'.") + # 2. Check process_status in asset_rule.common_metadata + process_status = context.asset_rule.common_metadata.get('process_status') + + if process_status == "SKIP": + logging.info(f"Asset '{asset_name_for_log}': Skipping as per common_metadata.process_status 'SKIP'.") context.status_flags['skip_asset'] = True - context.status_flags['skip_reason'] = "Process status set to SKIP" + context.status_flags['skip_reason'] = "Process status set to SKIP in common_metadata" return context - if context.asset_rule.process_status == "PROCESSED" and \ - not context.config_obj.general_settings.overwrite_existing: + # Assuming context.config_obj.general_settings.overwrite_existing is a valid path. + # This might need adjustment if 'general_settings' or 'overwrite_existing' is not found. + # For now, we'll assume it's correct based on the original code's intent. + if process_status == "PROCESSED" and \ + hasattr(context.config_obj, 'general_settings') and \ + not getattr(context.config_obj.general_settings, 'overwrite_existing', True): # Default to True (allow overwrite) if not found logging.info( - f"Asset '{context.asset_rule.name}': Skipping as it's already 'PROCESSED' " + f"Asset '{asset_name_for_log}': Skipping as it's already 'PROCESSED' (from common_metadata) " f"and overwrite is disabled." ) context.status_flags['skip_asset'] = True - context.status_flags['skip_reason'] = "Already processed, overwrite disabled" + context.status_flags['skip_reason'] = "Already processed (common_metadata), overwrite disabled" return context # If none of the above conditions are met, skip_asset remains False. diff --git a/processing/pipeline/stages/file_rule_filter.py b/processing/pipeline/stages/file_rule_filter.py index b7ae7c3..b6785c8 100644 --- a/processing/pipeline/stages/file_rule_filter.py +++ b/processing/pipeline/stages/file_rule_filter.py @@ -2,9 +2,9 @@ import logging import fnmatch from typing import List, Set -from ..base_stage import ProcessingStage -from ...asset_context import AssetProcessingContext -from .....rule_structure import FileRule +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from rule_structure import FileRule class FileRuleFilterStage(ProcessingStage): @@ -23,46 +23,56 @@ class FileRuleFilterStage(ProcessingStage): Returns: The modified AssetProcessingContext. """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" if context.status_flags.get('skip_asset'): - logging.debug(f"Asset '{context.asset_rule.name}': Skipping FileRuleFilterStage due to 'skip_asset' flag.") + logging.debug(f"Asset '{asset_name_for_log}': Skipping FileRuleFilterStage due to 'skip_asset' flag.") return context context.files_to_process: List[FileRule] = [] ignore_patterns: Set[str] = set() # Step 1: Collect all FILE_IGNORE patterns - if context.asset_rule and context.asset_rule.file_rules: - for file_rule in context.asset_rule.file_rules: - if file_rule.item_type == "FILE_IGNORE" and file_rule.active: - ignore_patterns.add(file_rule.filename_pattern) - logging.debug( - f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{file_rule.filename_pattern}'" - ) + if context.asset_rule and context.asset_rule.files: + for file_rule in context.asset_rule.files: + if file_rule.item_type == "FILE_IGNORE": # Removed 'and file_rule.active' + if hasattr(file_rule, 'file_path') and file_rule.file_path: + ignore_patterns.add(file_rule.file_path) + logging.debug( + f"Asset '{asset_name_for_log}': Registering ignore pattern: '{file_rule.file_path}'" + ) + else: + logging.warning(f"Asset '{asset_name_for_log}': FILE_IGNORE rule found without a file_path. Skipping this ignore rule.") else: - logging.debug(f"Asset '{context.asset_rule.name if context.asset_rule else 'Unknown'}': No file rules to process or asset_rule is None.") + logging.debug(f"Asset '{asset_name_for_log}': No file rules (context.asset_rule.files) to process or asset_rule is None.") # Still need to return context even if there are no rules - logging.info(f"Asset '{context.asset_rule.name if context.asset_rule else 'Unknown'}': 0 file rules queued for processing after filtering.") + logging.info(f"Asset '{asset_name_for_log}': 0 file rules queued for processing after filtering.") return context # Step 2: Filter and add processable FileRules - for file_rule in context.asset_rule.file_rules: - if not file_rule.active: - logging.debug( - f"Asset '{context.asset_rule.name}': Skipping inactive file rule '{file_rule.filename_pattern}'." - ) - continue + for file_rule in context.asset_rule.files: # Iterate over .files + # Removed 'if not file_rule.active:' check if file_rule.item_type == "FILE_IGNORE": # Already processed, skip. continue is_ignored = False + # Ensure file_rule.file_path exists before using it with fnmatch + current_file_path = file_rule.file_path if hasattr(file_rule, 'file_path') else None + if not current_file_path: + logging.warning(f"Asset '{asset_name_for_log}': FileRule found without a file_path. Skipping this rule for ignore matching.") + # Decide if this rule should be added or skipped if it has no path + # For now, let's assume it might be an error and not add it if it can't be matched. + # If it should be added by default, this logic needs adjustment. + continue + + for ignore_pat in ignore_patterns: - if fnmatch.fnmatch(file_rule.filename_pattern, ignore_pat): + if fnmatch.fnmatch(current_file_path, ignore_pat): is_ignored = True logging.debug( - f"Asset '{context.asset_rule.name}': Skipping file rule '{file_rule.filename_pattern}' " + f"Asset '{asset_name_for_log}': Skipping file rule for '{current_file_path}' " f"due to matching ignore pattern '{ignore_pat}'." ) break @@ -70,11 +80,11 @@ class FileRuleFilterStage(ProcessingStage): if not is_ignored: context.files_to_process.append(file_rule) logging.debug( - f"Asset '{context.asset_rule.name}': Adding file rule '{file_rule.filename_pattern}' " + f"Asset '{asset_name_for_log}': Adding file rule for '{current_file_path}' " f"(type: {file_rule.item_type}) to processing queue." ) logging.info( - f"Asset '{context.asset_rule.name}': {len(context.files_to_process)} file rules queued for processing after filtering." + f"Asset '{asset_name_for_log}': {len(context.files_to_process)} file rules queued for processing after filtering." ) return context \ No newline at end of file diff --git a/processing/pipeline/stages/gloss_to_rough_conversion.py b/processing/pipeline/stages/gloss_to_rough_conversion.py index d99f06a..7c61919 100644 --- a/processing/pipeline/stages/gloss_to_rough_conversion.py +++ b/processing/pipeline/stages/gloss_to_rough_conversion.py @@ -5,9 +5,9 @@ from typing import List from .base_stage import ProcessingStage from ..asset_context import AssetProcessingContext -from ...rule_structure import FileRule -from ..utils import image_processing_utils as ipu -from ...utils.path_utils import sanitize_filename +from rule_structure import FileRule +from ...utils import image_processing_utils as ipu +from utils.path_utils import sanitize_filename logger = logging.getLogger(__name__) @@ -30,13 +30,14 @@ class GlossToRoughConversionStage(ProcessingStage): Returns: The updated AssetProcessingContext. """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" if context.status_flags.get('skip_asset'): - logger.debug(f"Asset '{context.asset_rule.name}': Skipping GlossToRoughConversionStage due to skip_asset flag.") + logger.debug(f"Asset '{asset_name_for_log}': Skipping GlossToRoughConversionStage due to skip_asset flag.") return context if not context.files_to_process or not context.processed_maps_details: logger.debug( - f"Asset '{context.asset_rule.name}': No files to process or processed_maps_details empty " + f"Asset '{asset_name_for_log}': No files to process or processed_maps_details empty " f"in GlossToRoughConversionStage. Skipping." ) return context @@ -44,14 +45,23 @@ class GlossToRoughConversionStage(ProcessingStage): new_files_to_process: List[FileRule] = [] processed_a_gloss_map = False - logger.info(f"Asset '{context.asset_rule.name}': Starting Gloss to Roughness Conversion Stage.") + logger.info(f"Asset '{asset_name_for_log}': Starting Gloss to Roughness Conversion Stage.") for idx, file_rule in enumerate(context.files_to_process): - if file_rule.map_type == "GLOSS": + # Assuming FileRule has 'map_type' and 'id' (with a .hex attribute) and 'source_file_path' + # These might need to be checked with hasattr if they are optional or could be missing + if hasattr(file_rule, 'map_type') and file_rule.map_type == "GLOSS": + if not hasattr(file_rule, 'id') or not hasattr(file_rule.id, 'hex'): + logger.warning(f"Asset '{asset_name_for_log}': GLOSS FileRule missing 'id.hex'. Skipping conversion for this rule: {file_rule}") + new_files_to_process.append(file_rule) + continue map_detail_key = file_rule.id.hex + + source_file_path_for_log = file_rule.source_file_path if hasattr(file_rule, 'source_file_path') else "Unknown source path" + if map_detail_key not in context.processed_maps_details: logger.warning( - f"Asset '{context.asset_rule.name}': GLOSS map '{file_rule.source_file_path}' " + f"Asset '{asset_name_for_log}': GLOSS map '{source_file_path_for_log}' " f"(ID: {map_detail_key}) found in files_to_process but not in processed_maps_details. " f"Adding original rule and skipping conversion for this map." ) @@ -62,7 +72,7 @@ class GlossToRoughConversionStage(ProcessingStage): if map_details.get('status') != 'Processed' or 'temp_processed_file' not in map_details: logger.warning( - f"Asset '{context.asset_rule.name}': GLOSS map '{file_rule.source_file_path}' " + f"Asset '{asset_name_for_log}': GLOSS map '{source_file_path_for_log}' " f"(ID: {map_detail_key}) not successfully processed by previous stage or temp file missing. " f"Status: {map_details.get('status')}. Adding original rule and skipping conversion." ) @@ -74,18 +84,18 @@ class GlossToRoughConversionStage(ProcessingStage): if not original_temp_path.exists(): logger.error( - f"Asset '{context.asset_rule.name}': Temporary file {original_temp_path_str} for GLOSS map " + f"Asset '{asset_name_for_log}': Temporary file {original_temp_path_str} for GLOSS map " f"(ID: {map_detail_key}) does not exist. Adding original rule and skipping conversion." ) new_files_to_process.append(file_rule) continue - logger.debug(f"Asset '{context.asset_rule.name}': Processing GLOSS map {original_temp_path} for conversion.") + logger.debug(f"Asset '{asset_name_for_log}': Processing GLOSS map {original_temp_path} for conversion.") image_data = ipu.load_image(original_temp_path) if image_data is None: logger.error( - f"Asset '{context.asset_rule.name}': Failed to load image data from {original_temp_path} " + f"Asset '{asset_name_for_log}': Failed to load image data from {original_temp_path} " f"for GLOSS map (ID: {map_detail_key}). Adding original rule and skipping conversion." ) new_files_to_process.append(file_rule) @@ -96,14 +106,14 @@ class GlossToRoughConversionStage(ProcessingStage): if np.issubdtype(image_data.dtype, np.floating): inverted_image_data = 1.0 - image_data inverted_image_data = np.clip(inverted_image_data, 0.0, 1.0) # Ensure range for floats - logger.debug(f"Asset '{context.asset_rule.name}': Inverted float image data for {original_temp_path}.") + logger.debug(f"Asset '{asset_name_for_log}': Inverted float image data for {original_temp_path}.") elif np.issubdtype(image_data.dtype, np.integer): max_val = np.iinfo(image_data.dtype).max inverted_image_data = max_val - image_data - logger.debug(f"Asset '{context.asset_rule.name}': Inverted integer image data (max_val: {max_val}) for {original_temp_path}.") + logger.debug(f"Asset '{asset_name_for_log}': Inverted integer image data (max_val: {max_val}) for {original_temp_path}.") else: logger.error( - f"Asset '{context.asset_rule.name}': Unsupported image data type {image_data.dtype} " + f"Asset '{asset_name_for_log}': Unsupported image data type {image_data.dtype} " f"for GLOSS map {original_temp_path}. Cannot invert. Adding original rule." ) new_files_to_process.append(file_rule) @@ -111,19 +121,22 @@ class GlossToRoughConversionStage(ProcessingStage): # Save New Temporary (Roughness) Map # Using original_temp_path.suffix ensures we keep the format (e.g., .png, .exr) - new_temp_filename = f"rough_from_gloss_{sanitize_filename(file_rule.map_type)}_{file_rule.id.hex}{original_temp_path.suffix}" + # Ensure file_rule.map_type exists before using sanitize_filename + map_type_for_filename = file_rule.map_type if hasattr(file_rule, 'map_type') else "unknownmaptype" + new_temp_filename = f"rough_from_gloss_{sanitize_filename(map_type_for_filename)}_{file_rule.id.hex}{original_temp_path.suffix}" new_temp_path = context.engine_temp_dir / new_temp_filename save_success = ipu.save_image(new_temp_path, inverted_image_data) if save_success: logger.info( - f"Asset '{context.asset_rule.name}': Converted GLOSS map {original_temp_path} " + f"Asset '{asset_name_for_log}': Converted GLOSS map {original_temp_path} " f"to ROUGHNESS map {new_temp_path}." ) - modified_file_rule = file_rule.model_copy(deep=True) - modified_file_rule.map_type = "ROUGHNESS" + # Assuming FileRule has model_copy method + modified_file_rule = file_rule.model_copy(deep=True) if hasattr(file_rule, 'model_copy') else file_rule + modified_file_rule.map_type = "ROUGHNESS" # Ensure map_type can be set # Update context.processed_maps_details for the original file_rule.id.hex context.processed_maps_details[map_detail_key]['temp_processed_file'] = str(new_temp_path) @@ -134,7 +147,7 @@ class GlossToRoughConversionStage(ProcessingStage): processed_a_gloss_map = True else: logger.error( - f"Asset '{context.asset_rule.name}': Failed to save inverted ROUGHNESS map to {new_temp_path} " + f"Asset '{asset_name_for_log}': Failed to save inverted ROUGHNESS map to {new_temp_path} " f"for original GLOSS map (ID: {map_detail_key}). Adding original rule." ) new_files_to_process.append(file_rule) @@ -145,11 +158,11 @@ class GlossToRoughConversionStage(ProcessingStage): if processed_a_gloss_map: logger.info( - f"Asset '{context.asset_rule.name}': Gloss to Roughness conversion stage successfully processed one or more maps and updated file list." + f"Asset '{asset_name_for_log}': Gloss to Roughness conversion stage successfully processed one or more maps and updated file list." ) else: logger.debug( - f"Asset '{context.asset_rule.name}': No gloss maps were successfully converted in GlossToRoughConversionStage. " + f"Asset '{asset_name_for_log}': No gloss maps were successfully converted in GlossToRoughConversionStage. " f"File list for next stage contains original non-gloss maps and any gloss maps that failed conversion." ) diff --git a/processing/pipeline/stages/individual_map_processing.py b/processing/pipeline/stages/individual_map_processing.py index 72552c4..96a7ffa 100644 --- a/processing/pipeline/stages/individual_map_processing.py +++ b/processing/pipeline/stages/individual_map_processing.py @@ -1,3 +1,5 @@ +import uuid +import dataclasses import os import logging from pathlib import Path @@ -6,10 +8,10 @@ from typing import Optional, Tuple, Dict import cv2 import numpy as np -from ..base_stage import ProcessingStage +from .base_stage import ProcessingStage from ..asset_context import AssetProcessingContext -from ....rule_structure import FileRule, TransformSettings -from ....utils.path_utils import sanitize_filename +from rule_structure import FileRule +from utils.path_utils import sanitize_filename from ...utils import image_processing_utils as ipu logger = logging.getLogger(__name__) @@ -26,220 +28,245 @@ class IndividualMapProcessingStage(ProcessingStage): """ Executes the individual map processing logic. """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" if context.status_flags.get('skip_asset', False): - logger.info(f"Asset {context.asset_id}: Skipping individual map processing due to skip_asset flag.") + logger.info(f"Asset '{asset_name_for_log}': Skipping individual map processing due to skip_asset flag.") return context if not hasattr(context, 'processed_maps_details') or context.processed_maps_details is None: context.processed_maps_details = {} - logger.debug(f"Asset {context.asset_id}: Initialized processed_maps_details.") + logger.debug(f"Asset '{asset_name_for_log}': Initialized processed_maps_details.") if not context.files_to_process: - logger.info(f"Asset {context.asset_id}: No files to process in this stage.") + logger.info(f"Asset '{asset_name_for_log}': No files to process in this stage.") return context - source_base_path = Path(context.asset_rule.source_path) - if not source_base_path.is_dir(): - logger.error(f"Asset {context.asset_id}: Source path '{source_base_path}' is not a valid directory. Skipping individual map processing.") + # Source path for the asset group comes from SourceRule + if not context.source_rule or not context.source_rule.input_path: + logger.error(f"Asset '{asset_name_for_log}': SourceRule or SourceRule.input_path is not set. Cannot determine source base path.") context.status_flags['individual_map_processing_failed'] = True - # Potentially mark all file_rules as failed if source path is invalid - for file_rule in context.files_to_process: - if file_rule.item_type.startswith("MAP_"): # General check for map types - self._update_file_rule_status(context, file_rule.id.hex, 'Failed', details="Source path invalid") + # Mark all file_rules as failed + for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): + temp_id_for_fail = f"fr_fail_{fr_idx}" # Temporary ID for status update + map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" + self._update_file_rule_status(context, temp_id_for_fail, 'Failed', map_type=map_type_for_fail, details="SourceRule.input_path missing") + return context + + # The workspace_path in the context should be the directory where files are extracted/available. + source_base_path = context.workspace_path + if not source_base_path.is_dir(): + logger.error(f"Asset '{asset_name_for_log}': Workspace path '{source_base_path}' is not a valid directory.") + context.status_flags['individual_map_processing_failed'] = True + for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): + temp_id_for_fail = f"fr_fail_{fr_idx}" # Use a temporary unique ID for this status update + map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" + self._update_file_rule_status(context, temp_id_for_fail, 'Failed', map_type=map_type_for_fail, details="Workspace path invalid") return context - for file_rule in context.files_to_process: - # Primarily focus on "MAP_COL", "MAP_NORM", "MAP_ROUGH", etc. - # For now, let's assume any item_type starting with "MAP_" is a candidate - # unless it's specifically handled by another stage (e.g., "MAP_GEN" might be). - # The prompt mentions "MAP_COL" primarily. - # Let's be a bit more specific for now, focusing on types that are typically direct file mappings. - # This can be refined based on how `item_type` is used for generated maps. - # For now, we'll process any `FileRule` that isn't explicitly a generated map type - # that would be handled *after* individual processing (e.g. a composite map). - # A simple check for now: - if not file_rule.item_type or not file_rule.item_type.startswith("MAP_") or file_rule.item_type == "MAP_GEN_COMPOSITE": # Example exclusion - logger.debug(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Skipping, item_type '{file_rule.item_type}' not targeted for individual processing.") + for file_rule_idx, file_rule in enumerate(context.files_to_process): + # Generate a unique ID for this file_rule processing instance for processed_maps_details + current_map_id_hex = f"map_{file_rule_idx}_{uuid.uuid4().hex[:8]}" + + current_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" + + if not current_map_type or not current_map_type.startswith("MAP_") or current_map_type == "MAP_GEN_COMPOSITE": + logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}': Skipping, item_type '{current_map_type}' not targeted for individual processing.") continue - logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Starting individual processing.") + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Type: {current_map_type}, ID: {current_map_id_hex}): Starting individual processing.") - # A. Find Source File - source_file_path = self._find_source_file(source_base_path, file_rule.filename_pattern, context.asset_id, file_rule.id.hex) + # A. Find Source File (using file_rule.file_path as the pattern relative to source_base_path) + # The _find_source_file might need adjustment if file_rule.file_path is absolute or needs complex globbing. + # For now, assume file_rule.file_path is a relative pattern or exact name. + source_file_path = self._find_source_file(source_base_path, file_rule.file_path, asset_name_for_log, current_map_id_hex) if not source_file_path: - logger.error(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Source file not found with pattern '{file_rule.filename_pattern}' in '{source_base_path}'.") - self._update_file_rule_status(context, file_rule.id.hex, 'Failed', map_type=file_rule.map_type, details="Source file not found") + logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Source file not found with path/pattern '{file_rule.file_path}' in '{source_base_path}'.") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, details="Source file not found") continue # B. Load and Transform Image image_data: Optional[np.ndarray] = ipu.load_image(str(source_file_path)) if image_data is None: - logger.error(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Failed to load image from '{source_file_path}'.") - self._update_file_rule_status(context, file_rule.id.hex, 'Failed', map_type=file_rule.map_type, source_file=str(source_file_path), details="Image load failed") + logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to load image from '{source_file_path}'.") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, source_file=str(source_file_path), details="Image load failed") continue original_height, original_width = image_data.shape[:2] - logger.debug(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Loaded image '{source_file_path}' with dimensions {original_width}x{original_height}.") + logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Loaded image '{source_file_path}' with dimensions {original_width}x{original_height}.") + + # Initialize transform settings with defaults + transform_settings = { + "target_width": 2048, + "target_height": None, + "resize_mode": "fit", + "ensure_pot": False, + "allow_upscale": False, + "resize_filter": "AREA", + "color_profile_management": False, + "target_color_profile": "sRGB", + "output_format_settings": None + } + + # Attempt to load transform settings from file_rule.channel_merge_instructions + if file_rule.channel_merge_instructions and 'transform' in file_rule.channel_merge_instructions: + custom_transform_settings = file_rule.channel_merge_instructions['transform'] + if isinstance(custom_transform_settings, dict): + transform_settings.update(custom_transform_settings) + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Loaded transform settings from file_rule.channel_merge_instructions.") + else: + logger.warning(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): 'transform' in channel_merge_instructions is not a dictionary. Using defaults.") + # TODO: Implement fallback to context.config_obj for global/item_type specific transform settings + # else: + # # Example: config_transforms = context.config_obj.get_transform_settings(file_rule.item_type or file_rule.item_type_override) + # # if config_transforms: + # # transform_settings.update(config_transforms) - transform: TransformSettings = file_rule.transform_settings - target_width, target_height = ipu.calculate_target_dimensions( original_width, original_height, - transform.target_width, transform.target_height, - transform.resize_mode, - transform.ensure_pot, - transform.allow_upscale + transform_settings['target_width'], transform_settings['target_height'], + transform_settings['resize_mode'], + transform_settings['ensure_pot'], + transform_settings['allow_upscale'] ) - logger.debug(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Original dims: ({original_width},{original_height}), Calculated target dims: ({target_width},{target_height})") + logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Original dims: ({original_width},{original_height}), Calculated target dims: ({target_width},{target_height}) using sourced transforms.") - processed_image_data = image_data.copy() # Start with a copy + processed_image_data = image_data.copy() if (target_width, target_height) != (original_width, original_height): - logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Resizing from ({original_width},{original_height}) to ({target_width},{target_height}).") - # Map resize_filter string to cv2 interpolation constant - interpolation_map = { - "NEAREST": cv2.INTER_NEAREST, - "LINEAR": cv2.INTER_LINEAR, - "CUBIC": cv2.INTER_CUBIC, - "AREA": cv2.INTER_AREA, # Good for downscaling - "LANCZOS4": cv2.INTER_LANCZOS4 - } - interpolation = interpolation_map.get(transform.resize_filter.upper(), cv2.INTER_AREA) # Default to INTER_AREA + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Resizing from ({original_width},{original_height}) to ({target_width},{target_height}).") + interpolation_map = {"NEAREST": cv2.INTER_NEAREST, "LINEAR": cv2.INTER_LINEAR, "CUBIC": cv2.INTER_CUBIC, "AREA": cv2.INTER_AREA, "LANCZOS4": cv2.INTER_LANCZOS4} + interpolation = interpolation_map.get(transform_settings['resize_filter'].upper(), cv2.INTER_AREA) processed_image_data = ipu.resize_image(processed_image_data, target_width, target_height, interpolation=interpolation) - if processed_image_data is None: # Should not happen if resize_image handles errors, but good practice - logger.error(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Failed to resize image.") - self._update_file_rule_status(context, file_rule.id.hex, 'Failed', map_type=file_rule.map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), details="Image resize failed") + if processed_image_data is None: + logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to resize image.") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), details="Image resize failed") continue - - # Color Space Conversion (simplified) - # Assuming ipu.load_image loads as BGR if color. - # This needs more robust handling of source color profiles if they are known. - if transform.color_profile_management and transform.target_color_profile == "RGB": - if len(processed_image_data.shape) == 3 and processed_image_data.shape[2] == 3: # Check if it's a color image - logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Converting BGR to RGB.") + if transform_settings['color_profile_management'] and transform_settings['target_color_profile'] == "RGB": + if len(processed_image_data.shape) == 3 and processed_image_data.shape[2] == 3: + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Converting BGR to RGB.") processed_image_data = ipu.convert_bgr_to_rgb(processed_image_data) - elif len(processed_image_data.shape) == 3 and processed_image_data.shape[2] == 4: # Check for BGRA - logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex}: Converting BGRA to RGBA.") + elif len(processed_image_data.shape) == 3 and processed_image_data.shape[2] == 4: + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Converting BGRA to RGBA.") processed_image_data = ipu.convert_bgra_to_rgba(processed_image_data) - - # C. Save Temporary Processed Map - # Ensure engine_temp_dir exists (orchestrator should handle this, but good to be safe) if not context.engine_temp_dir.exists(): try: context.engine_temp_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Asset {context.asset_id}: Created engine_temp_dir at '{context.engine_temp_dir}'") + logger.info(f"Asset '{asset_name_for_log}': Created engine_temp_dir at '{context.engine_temp_dir}'") except OSError as e: - logger.error(f"Asset {context.asset_id}: Failed to create engine_temp_dir '{context.engine_temp_dir}': {e}") - self._update_file_rule_status(context, file_rule.id.hex, 'Failed', map_type=file_rule.map_type, source_file=str(source_file_path), details="Failed to create temp directory") - continue # Or potentially fail the whole asset processing here + logger.error(f"Asset '{asset_name_for_log}': Failed to create engine_temp_dir '{context.engine_temp_dir}': {e}") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, source_file=str(source_file_path), details="Failed to create temp directory") + continue temp_filename_suffix = Path(source_file_path).suffix - # Use a more descriptive name if possible, including map_type - safe_map_type = sanitize_filename(file_rule.map_type if file_rule.map_type else "unknown_map") - temp_output_filename = f"processed_{safe_map_type}_{file_rule.id.hex}{temp_filename_suffix}" + safe_map_type_filename = sanitize_filename(current_map_type) + temp_output_filename = f"processed_{safe_map_type_filename}_{current_map_id_hex}{temp_filename_suffix}" temp_output_path = context.engine_temp_dir / temp_output_filename - # Consider output_format_settings from transform if they apply here - # For now, save_image handles basic saving. - # Example: cv2.imwrite params for quality for JPG, compression for PNG save_params = [] - if transform.output_format_settings: + if transform_settings['output_format_settings']: if temp_filename_suffix.lower() in ['.jpg', '.jpeg']: - quality = transform.output_format_settings.get('quality', 95) + quality = transform_settings['output_format_settings'].get('quality', 95) save_params = [cv2.IMWRITE_JPEG_QUALITY, quality] elif temp_filename_suffix.lower() == '.png': - compression = transform.output_format_settings.get('compression_level', 3) # 0-9, 3 is default + compression = transform_settings['output_format_settings'].get('compression_level', 3) save_params = [cv2.IMWRITE_PNG_COMPRESSION, compression] - # Add more formats as needed (e.g., EXR, TIFF) save_success = ipu.save_image(str(temp_output_path), processed_image_data, params=save_params) if not save_success: - logger.error(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Failed to save temporary image to '{temp_output_path}'.") - self._update_file_rule_status( - context, file_rule.id.hex, 'Failed', - map_type=file_rule.map_type, - source_file=str(source_file_path), - original_dimensions=(original_width, original_height), - processed_dimensions=(processed_image_data.shape[1], processed_image_data.shape[0]) if processed_image_data is not None else None, - details="Temporary image save failed" - ) + logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to save temporary image to '{temp_output_path}'.") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), processed_dimensions=(processed_image_data.shape[1], processed_image_data.shape[0]) if processed_image_data is not None else None, details="Temporary image save failed") continue - logger.info(f"Asset {context.asset_id}, FileRule {file_rule.id.hex} ({file_rule.map_type}): Successfully processed and saved temporary map to '{temp_output_path}'.") + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Successfully processed and saved temporary map to '{temp_output_path}'.") - # D. Update Context - self._update_file_rule_status( - context, file_rule.id.hex, 'Processed', - map_type=file_rule.map_type, - source_file=str(source_file_path), - temp_processed_file=str(temp_output_path), - original_dimensions=(original_width, original_height), - processed_dimensions=(processed_image_data.shape[1], processed_image_data.shape[0]), - details="Successfully processed" - ) + self._update_file_rule_status(context, current_map_id_hex, 'Processed', map_type=current_map_type, source_file=str(source_file_path), temp_processed_file=str(temp_output_path), original_dimensions=(original_width, original_height), processed_dimensions=(processed_image_data.shape[1], processed_image_data.shape[0]), details="Successfully processed") - # Optional: Update context.asset_metadata['processed_files'] if 'processed_files' not in context.asset_metadata: context.asset_metadata['processed_files'] = [] context.asset_metadata['processed_files'].append({ - 'file_rule_id': file_rule.id.hex, + 'processed_map_key': current_map_id_hex, # Changed from file_rule_id 'path': str(temp_output_path), 'type': 'temporary_map', - 'map_type': file_rule.map_type + 'map_type': current_map_type }) - - logger.info(f"Asset {context.asset_id}: Finished individual map processing stage.") + logger.info(f"Asset '{asset_name_for_log}': Finished individual map processing stage.") return context - def _find_source_file(self, base_path: Path, pattern: str, asset_id: str, file_rule_id_hex: str) -> Optional[Path]: + def _find_source_file(self, base_path: Path, pattern: str, asset_name_for_log: str, current_map_id_hex: str) -> Optional[Path]: # asset_id -> asset_name_for_log, file_rule_id_hex -> current_map_id_hex """ Finds a single source file matching the pattern within the base_path. - Adapts logic from ProcessingEngine._find_source_file. """ - if not pattern: - logger.warning(f"Asset {asset_id}, FileRule {file_rule_id_hex}: Empty filename pattern provided.") + if not pattern: # pattern is now file_rule.file_path + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Empty file_path provided in FileRule.") return None + # If pattern is an absolute path, use it directly + potential_abs_path = Path(pattern) + if potential_abs_path.is_absolute() and potential_abs_path.exists(): + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: file_path '{pattern}' is absolute and exists. Using it directly.") + return potential_abs_path + elif potential_abs_path.is_absolute(): + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: file_path '{pattern}' is absolute but does not exist.") + # Fall through to try resolving against base_path if it's just a name/relative pattern + + # Treat pattern as relative to base_path + # This could be an exact name or a glob pattern try: - # Using rglob for potentially nested structures, though original might have been simpler. - # If pattern is exact filename, it will also work. - # If pattern is a glob, it will search. - matched_files = list(base_path.rglob(pattern)) - - if not matched_files: - logger.debug(f"Asset {asset_id}, FileRule {file_rule_id_hex}: No files found matching pattern '{pattern}' in '{base_path}' (recursive).") - # Try non-recursive if rglob fails and pattern might be for top-level - matched_files_non_recursive = list(base_path.glob(pattern)) - if matched_files_non_recursive: - logger.debug(f"Asset {asset_id}, FileRule {file_rule_id_hex}: Found {len(matched_files_non_recursive)} files non-recursively. Using first: {matched_files_non_recursive[0]}") - return matched_files_non_recursive[0] - return None - - if len(matched_files) > 1: - logger.warning(f"Asset {asset_id}, FileRule {file_rule_id_hex}: Multiple files ({len(matched_files)}) found for pattern '{pattern}' in '{base_path}'. Using the first one: {matched_files[0]}. Files: {matched_files}") - - return matched_files[0] + # First, check if pattern is an exact relative path + exact_match_path = base_path / pattern + if exact_match_path.exists() and exact_match_path.is_file(): + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Found exact match for '{pattern}' at '{exact_match_path}'.") + return exact_match_path + # If not an exact match, try as a glob pattern (recursive) + matched_files_rglob = list(base_path.rglob(pattern)) + if matched_files_rglob: + if len(matched_files_rglob) > 1: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Multiple files ({len(matched_files_rglob)}) found for pattern '{pattern}' in '{base_path}' (recursive). Using first: {matched_files_rglob[0]}. Files: {matched_files_rglob}") + return matched_files_rglob[0] + + # Try non-recursive glob if rglob fails + matched_files_glob = list(base_path.glob(pattern)) + if matched_files_glob: + if len(matched_files_glob) > 1: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Multiple files ({len(matched_files_glob)}) found for pattern '{pattern}' in '{base_path}' (non-recursive). Using first: {matched_files_glob[0]}. Files: {matched_files_glob}") + return matched_files_glob[0] + + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: No files found matching pattern '{pattern}' in '{base_path}' (exact, recursive, or non-recursive).") + return None except Exception as e: - logger.error(f"Asset {asset_id}, FileRule {file_rule_id_hex}: Error searching for file with pattern '{pattern}' in '{base_path}': {e}") + logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Error searching for file with pattern '{pattern}' in '{base_path}': {e}") return None - def _update_file_rule_status(self, context: AssetProcessingContext, file_rule_id_hex: str, status: str, **kwargs): - """Helper to update processed_maps_details for a file_rule.""" - if file_rule_id_hex not in context.processed_maps_details: - context.processed_maps_details[file_rule_id_hex] = {} + def _update_file_rule_status(self, context: AssetProcessingContext, map_id_hex: str, status: str, **kwargs): # file_rule_id_hex -> map_id_hex + """Helper to update processed_maps_details for a map.""" + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + if map_id_hex not in context.processed_maps_details: + context.processed_maps_details[map_id_hex] = {} - context.processed_maps_details[file_rule_id_hex]['status'] = status + context.processed_maps_details[map_id_hex]['status'] = status for key, value in kwargs.items(): - context.processed_maps_details[file_rule_id_hex][key] = value + context.processed_maps_details[map_id_hex][key] = value - # Ensure essential keys are present even on failure, if known - if 'map_type' not in context.processed_maps_details[file_rule_id_hex] and 'map_type' in kwargs: - context.processed_maps_details[file_rule_id_hex]['map_type'] = kwargs['map_type'] + if 'map_type' not in context.processed_maps_details[map_id_hex] and 'map_type' in kwargs: + context.processed_maps_details[map_id_hex]['map_type'] = kwargs['map_type'] + + # Add formatted resolution names + if 'original_dimensions' in kwargs and isinstance(kwargs['original_dimensions'], tuple) and len(kwargs['original_dimensions']) == 2: + orig_w, orig_h = kwargs['original_dimensions'] + context.processed_maps_details[map_id_hex]['original_resolution_name'] = f"{orig_w}x{orig_h}" + + if status == 'Processed' and 'processed_dimensions' in kwargs and isinstance(kwargs['processed_dimensions'], tuple) and len(kwargs['processed_dimensions']) == 2: + proc_w, proc_h = kwargs['processed_dimensions'] + context.processed_maps_details[map_id_hex]['processed_resolution_name'] = f"{proc_w}x{proc_h}" + elif 'processed_dimensions' in kwargs: # If present but not as expected, log or handle + logger.warning(f"Asset '{asset_name_for_log}', Map ID {map_id_hex}: 'processed_dimensions' present but not a valid tuple: {kwargs['processed_dimensions']}") - logger.debug(f"Asset {context.asset_id}, FileRule {file_rule_id_hex}: Status updated to '{status}'. Details: {kwargs}") \ No newline at end of file + # Log all details being stored for clarity, including the newly added resolution names + log_details = context.processed_maps_details[map_id_hex].copy() + logger.debug(f"Asset '{asset_name_for_log}', Map ID {map_id_hex}: Status updated to '{status}'. Details: {log_details}") \ No newline at end of file diff --git a/processing/pipeline/stages/map_merging.py b/processing/pipeline/stages/map_merging.py index 6e0fd0f..c93093c 100644 --- a/processing/pipeline/stages/map_merging.py +++ b/processing/pipeline/stages/map_merging.py @@ -5,10 +5,10 @@ from typing import Dict, Optional, List, Tuple import numpy as np import cv2 # For potential direct cv2 operations if ipu doesn't cover all merge needs -from ..base_stage import ProcessingStage -from ...asset_context import AssetProcessingContext -from ....rule_structure import FileRule, MergeSettings, MergeInputChannel -from ....utils.path_utils import sanitize_filename +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from rule_structure import FileRule +from utils.path_utils import sanitize_filename from ...utils import image_processing_utils as ipu @@ -30,281 +30,244 @@ class MapMergingStage(ProcessingStage): Returns: The updated asset processing context. """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" if context.status_flags.get('skip_asset'): - logger.info(f"Skipping map merging for asset {context.asset_name} as skip_asset flag is set.") + logger.info(f"Skipping map merging for asset {asset_name_for_log} as skip_asset flag is set.") return context if not hasattr(context, 'merged_maps_details'): context.merged_maps_details = {} if not hasattr(context, 'processed_maps_details'): - logger.warning(f"Asset {context.asset_name}: 'processed_maps_details' not found in context. Cannot perform map merging.") + logger.warning(f"Asset {asset_name_for_log}: 'processed_maps_details' not found in context. Cannot perform map merging.") return context - if not context.files_to_process: - logger.info(f"Asset {context.asset_name}: No files_to_process defined. Skipping map merging.") - return context + if not context.files_to_process: # This list might not be relevant if merge rules are defined elsewhere or implicitly + logger.info(f"Asset {asset_name_for_log}: No files_to_process defined. This stage might rely on config or processed_maps_details directly for merge rules.") + # Depending on design, this might not be an error, so we don't return yet. - logger.info(f"Starting MapMergingStage for asset: {context.asset_name}") + logger.info(f"Starting MapMergingStage for asset: {asset_name_for_log}") - for merge_rule in context.files_to_process: + # TODO: The logic for identifying merge rules and their inputs needs significant rework + # as FileRule no longer has 'id' or 'merge_settings' directly in the way this stage expects. + # Merge rules are likely defined in the main configuration (context.config_obj.map_merge_rules) + # and need to be matched against available maps in context.processed_maps_details. + + # Placeholder for the loop that would iterate over context.config_obj.map_merge_rules + # For now, this stage will effectively do nothing until that logic is implemented. + + # Example of how one might start to adapt: + # for configured_merge_rule in context.config_obj.map_merge_rules: + # output_map_type = configured_merge_rule.get('output_map_type') + # inputs_config = configured_merge_rule.get('inputs') # e.g. {"R": "NORMAL", "G": "ROUGHNESS"} + # # ... then find these input map_types in context.processed_maps_details ... + # # ... and perform the merge ... + # # This is a complex change beyond simple attribute renaming. + + # The following is the original loop structure, which will likely fail due to missing attributes on FileRule. + # Keeping it commented out to show what was there. + """ + for merge_rule in context.files_to_process: # This iteration logic is likely incorrect for merge rules if not isinstance(merge_rule, FileRule) or merge_rule.item_type != "MAP_MERGE": continue - if not merge_rule.merge_settings: - logger.error(f"Asset {context.asset_name}, Rule ID {merge_rule.id.hex}: Merge rule for map_type '{merge_rule.map_type}' is missing merge_settings. Skipping this merge.") - context.merged_maps_details[merge_rule.id.hex] = { - 'map_type': merge_rule.map_type, - 'status': 'Failed', - 'reason': 'Missing merge_settings in FileRule.' - } + # FileRule does not have merge_settings or id.hex + # This entire block needs to be re-thought based on where merge rules are defined. + # Assuming merge_rule_id_hex would be a generated UUID for this operation. + merge_rule_id_hex = f"merge_op_{uuid.uuid4().hex[:8]}" + current_map_type = merge_rule.item_type_override or merge_rule.item_type + + logger.error(f"Asset {asset_name_for_log}, Potential Merge for {current_map_type}: Merge rule processing needs rework. FileRule lacks 'merge_settings' and 'id'. Skipping this rule.") + context.merged_maps_details[merge_rule_id_hex] = { + 'map_type': current_map_type, + 'status': 'Failed', + 'reason': 'Merge rule processing logic in MapMergingStage needs refactor due to FileRule changes.' + } + continue + """ + + # For now, let's assume no merge rules are processed until the logic is fixed. + num_merge_rules_attempted = 0 + # If context.config_obj.map_merge_rules exists, iterate it here. + # The original code iterated context.files_to_process looking for item_type "MAP_MERGE". + # This implies FileRule objects were being used to define merge operations, which is no longer the case + # if 'merge_settings' and 'id' were removed from FileRule. + + # The core merge rules are in context.config_obj.map_merge_rules + # Each rule in there defines an output_map_type and its inputs. + + config_merge_rules = context.config_obj.map_merge_rules + if not config_merge_rules: + logger.info(f"Asset {asset_name_for_log}: No map_merge_rules found in configuration. Skipping map merging.") + return context + + for rule_idx, configured_merge_rule in enumerate(config_merge_rules): + output_map_type = configured_merge_rule.get('output_map_type') + inputs_map_type_to_channel = configured_merge_rule.get('inputs') # e.g. {"R": "NRM", "G": "NRM", "B": "ROUGH"} + default_values = configured_merge_rule.get('defaults', {}) # e.g. {"R": 0.5, "G": 0.5, "B": 0.5} + # output_bit_depth_rule = configured_merge_rule.get('output_bit_depth', 'respect_inputs') # Not used yet + + if not output_map_type or not inputs_map_type_to_channel: + logger.warning(f"Asset {asset_name_for_log}: Invalid configured_merge_rule at index {rule_idx}. Missing 'output_map_type' or 'inputs'. Rule: {configured_merge_rule}") continue - merge_settings: MergeSettings = merge_rule.merge_settings - output_map_type = merge_rule.map_type - rule_id_hex = merge_rule.id.hex - logger.info(f"Processing MAP_MERGE rule for '{output_map_type}' (ID: {rule_id_hex})") + num_merge_rules_attempted +=1 + merge_op_id = f"merge_{sanitize_filename(output_map_type)}_{rule_idx}" + logger.info(f"Asset {asset_name_for_log}: Processing configured merge rule for '{output_map_type}' (Op ID: {merge_op_id})") - loaded_input_maps: Dict[str, np.ndarray] = {} - input_map_paths: Dict[str, str] = {} - target_dims: Optional[Tuple[int, int]] = None # width, height + loaded_input_maps: Dict[str, np.ndarray] = {} # Key: input_map_type (e.g. "NRM"), Value: image_data + input_map_paths: Dict[str, str] = {} # Key: input_map_type, Value: path_str + target_dims: Optional[Tuple[int, int]] = None all_inputs_valid = True - - # A. Load Input Maps for Merging - if not merge_settings.input_maps: - logger.warning(f"Asset {context.asset_name}, Rule ID {rule_id_hex}: No input_maps defined in merge_settings for '{output_map_type}'. Skipping this merge.") - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': 'No input_maps defined in merge_settings.' - } - continue - - for input_map_config in merge_settings.input_maps: - input_rule_id_hex = input_map_config.file_rule_id.hex - processed_detail = context.processed_maps_details.get(input_rule_id_hex) - - if not processed_detail or processed_detail.get('status') != 'Processed': - error_msg = f"Input map (Rule ID: {input_rule_id_hex}) for merge rule '{output_map_type}' (Rule ID: {rule_id_hex}) not found or not processed. Details: {processed_detail}" - logger.error(error_msg) - all_inputs_valid = False - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': f"Input map {input_rule_id_hex} not processed or missing." - } - break + + # Find and load input maps from processed_maps_details + # This assumes one processed map per map_type. If multiple variants exist, this needs refinement. + required_input_map_types = set(inputs_map_type_to_channel.values()) + + for required_map_type in required_input_map_types: + found_processed_map = None + processed_map_key = None + for p_key, p_details in context.processed_maps_details.items(): + processed_map_type_in_details = p_details.get('map_type') + # Check for direct match or match with "MAP_" prefix + if (processed_map_type_in_details == required_map_type or \ + processed_map_type_in_details == f"MAP_{required_map_type}") and \ + p_details.get('status') == 'Processed': + found_processed_map = p_details + processed_map_key = p_key # The UUID hex key from individual processing + break - temp_processed_file_path = Path(processed_detail['temp_processed_file']) - if not temp_processed_file_path.exists(): - error_msg = f"Input map file {temp_processed_file_path} for merge rule '{output_map_type}' (Rule ID: {rule_id_hex}) does not exist." - logger.error(error_msg) + if not found_processed_map: + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Required input map_type '{required_map_type}' for output '{output_map_type}' not found or not processed in processed_maps_details.") + # Option: Use default value for the entire map if one could be constructed for this map_type + # For now, we fail the merge if a required map is missing. all_inputs_valid = False - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': f"Input map file {temp_processed_file_path} not found." - } + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Required input map_type '{required_map_type}' missing."} + break # Break from finding inputs for this merge rule + + temp_file_path = Path(found_processed_map['temp_processed_file']) + if not temp_file_path.exists(): + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Temp file {temp_file_path} for input map_type '{required_map_type}' does not exist.") + all_inputs_valid = False + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Temp file for input '{required_map_type}' missing."} break try: - image_data = ipu.load_image(temp_processed_file_path) + image_data = ipu.load_image(temp_file_path) + if image_data is None: raise ValueError("Loaded image is None") except Exception as e: - logger.error(f"Error loading image {temp_processed_file_path} for merge rule '{output_map_type}' (Rule ID: {rule_id_hex}): {e}") + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error loading image {temp_file_path} for input map_type '{required_map_type}': {e}") all_inputs_valid = False - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': f"Error loading input image {temp_processed_file_path}." - } + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Error loading input '{required_map_type}'."} break - if image_data is None: - logger.error(f"Failed to load image data from {temp_processed_file_path} for merge rule '{output_map_type}' (Rule ID: {rule_id_hex}).") - all_inputs_valid = False - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': f"Failed to load image data from {temp_processed_file_path}." - } - break + loaded_input_maps[required_map_type] = image_data + input_map_paths[required_map_type] = str(temp_file_path) - loaded_input_maps[input_rule_id_hex] = image_data - input_map_paths[input_rule_id_hex] = str(temp_processed_file_path) - - current_dims = (image_data.shape[1], image_data.shape[0]) # width, height + current_dims = (image_data.shape[1], image_data.shape[0]) if target_dims is None: target_dims = current_dims - logger.debug(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Set target dimensions to {target_dims} from first input {temp_processed_file_path}.") elif current_dims != target_dims: - logger.warning(f"Input map {temp_processed_file_path} for merge rule '{output_map_type}' (ID: {rule_id_hex}) has dimensions {current_dims}, but target is {target_dims}. Resizing.") + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{required_map_type}' dims {current_dims} differ from target {target_dims}. Resizing.") try: image_data = ipu.resize_image(image_data, target_dims[0], target_dims[1]) - if image_data is None: - raise ValueError("Resize operation returned None.") - loaded_input_maps[input_rule_id_hex] = image_data + if image_data is None: raise ValueError("Resize returned None") + loaded_input_maps[required_map_type] = image_data except Exception as e: - logger.error(f"Failed to resize image {temp_processed_file_path} for merge rule '{output_map_type}' (ID: {rule_id_hex}): {e}") + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to resize '{required_map_type}': {e}") all_inputs_valid = False - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': f"Failed to resize input image {temp_processed_file_path}." - } + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Failed to resize input '{required_map_type}'."} break if not all_inputs_valid: - # Failure already logged and recorded in context.merged_maps_details - logger.warning(f"Skipping merge for '{output_map_type}' (ID: {rule_id_hex}) due to invalid inputs.") + logger.warning(f"Asset {asset_name_for_log}: Skipping merge for Op ID {merge_op_id} ('{output_map_type}') due to invalid inputs.") continue - if target_dims is None: # Should not happen if all_inputs_valid is true and there was at least one input map - logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Target dimensions not determined despite valid inputs. This indicates an issue with input map loading or an empty input_maps list that wasn't caught.") - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': 'Target dimensions could not be determined.' - } + if not loaded_input_maps or target_dims is None: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: No input maps loaded or target_dims not set for '{output_map_type}'. This shouldn't happen if all_inputs_valid was true.") + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'Internal error: input maps not loaded or target_dims missing.'} + continue + + # Determine output channels (e.g., 3 for RGB, 1 for Grayscale) + # This depends on the keys in inputs_map_type_to_channel (R,G,B,A) + output_channel_keys = sorted(list(inputs_map_type_to_channel.keys())) # e.g. ['B', 'G', 'R'] + num_output_channels = len(output_channel_keys) + + if num_output_channels == 0: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: No output channels defined in 'inputs' for '{output_map_type}'.") + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'No output channels defined.'} continue - # B. Perform Merge Operation try: - if merge_settings.output_channels == 1: + if num_output_channels == 1: # Grayscale output merged_image = np.zeros((target_dims[1], target_dims[0]), dtype=np.uint8) - else: - merged_image = np.zeros((target_dims[1], target_dims[0], merge_settings.output_channels), dtype=np.uint8) + else: # Color output + merged_image = np.zeros((target_dims[1], target_dims[0], num_output_channels), dtype=np.uint8) except Exception as e: - logger.error(f"Error creating empty merged image for '{output_map_type}' (ID: {rule_id_hex}) with dims {target_dims} and {merge_settings.output_channels} channels: {e}") - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': f'Error creating output image canvas: {e}' - } + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error creating empty merged image for '{output_map_type}': {e}") + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f'Error creating output canvas: {e}'} continue - merge_op_failed = False - for input_map_config in merge_settings.input_maps: - source_image = loaded_input_maps[input_map_config.file_rule_id.hex] - source_channel_index = input_map_config.source_channel - target_channel_index = input_map_config.target_channel + merge_op_failed_detail = False + for i, out_channel_char in enumerate(output_channel_keys): # e.g. R, G, B + input_map_type_for_this_channel = inputs_map_type_to_channel[out_channel_char] + source_image = loaded_input_maps.get(input_map_type_for_this_channel) - source_data = None - if source_image.ndim == 2: # Grayscale - source_data = source_image - elif source_image.ndim == 3: # Multi-channel (e.g. RGB, RGBA) - if source_channel_index >= source_image.shape[2]: - logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Source channel index {source_channel_index} out of bounds for source image with shape {source_image.shape} (from Rule ID {input_map_config.file_rule_id.hex}).") - merge_op_failed = True - break - source_data = source_image[:, :, source_channel_index] - else: - logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Source image (from Rule ID {input_map_config.file_rule_id.hex}) has unexpected dimensions: {source_image.ndim}. Shape: {source_image.shape}") - merge_op_failed = True - break + source_data_this_channel = None + if source_image is not None: + if source_image.ndim == 2: # Grayscale source + source_data_this_channel = source_image + elif source_image.ndim == 3: # Color source, take the first channel (assuming it's grayscale or R of RGB) + source_data_this_channel = source_image[:,:,0] + logger.debug(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Taking channel 0 from {input_map_type_for_this_channel} for output {out_channel_char}.") + else: # Source map was not found, use default + default_val_for_channel = default_values.get(out_channel_char) + if default_val_for_channel is not None: + # Convert 0-1 float default to 0-255 uint8 + source_data_this_channel = np.full((target_dims[1], target_dims[0]), int(default_val_for_channel * 255), dtype=np.uint8) + logger.info(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Using default value {default_val_for_channel} for output channel '{out_channel_char}' as input map '{input_map_type_for_this_channel}' was missing.") + else: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{input_map_type_for_this_channel}' for output channel '{out_channel_char}' is missing and no default value provided.") + merge_op_failed_detail = True; break - if source_data is None: # Should be caught by previous checks - logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Failed to extract source_data for unknown reasons from input {input_map_config.file_rule_id.hex}.") - merge_op_failed = True - break + if source_data_this_channel is None: # Should be caught by default value logic or earlier checks + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to get source data for output channel '{out_channel_char}'.") + merge_op_failed_detail = True; break - # Assign to target channel try: - if merged_image.ndim == 2: # Output is grayscale - if merge_settings.output_channels != 1: - logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Mismatch - merged_image is 2D but output_channels is {merge_settings.output_channels}.") - merge_op_failed = True - break - merged_image = source_data # Overwrites if multiple inputs map to grayscale; consider blending or specific logic if needed - elif merged_image.ndim == 3: # Output is multi-channel - if target_channel_index >= merged_image.shape[2]: - logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Target channel index {target_channel_index} out of bounds for merged image with shape {merged_image.shape}.") - merge_op_failed = True - break - merged_image[:, :, target_channel_index] = source_data - else: # Should not happen - logger.error(f"Merge rule '{output_map_type}' (ID: {rule_id_hex}): Merged image has unexpected dimensions: {merged_image.ndim}. Shape: {merged_image.shape}") - merge_op_failed = True - break + if merged_image.ndim == 2: # Single channel output + merged_image = source_data_this_channel + else: # Multi-channel output + merged_image[:, :, i] = source_data_this_channel except Exception as e: - logger.error(f"Error assigning source data to target channel for '{output_map_type}' (ID: {rule_id_hex}): {e}. Source shape: {source_data.shape}, Target channel: {target_channel_index}, Merged image shape: {merged_image.shape}") - merge_op_failed = True - break - - if input_map_config.invert_source_channel: - if merged_image.ndim == 2: - merged_image = 255 - merged_image # Assumes uint8 - elif merged_image.ndim == 3: - # Ensure we are not inverting an alpha channel if that's not desired, - # but current spec inverts the target channel data. - merged_image[:, :, target_channel_index] = 255 - merged_image[:, :, target_channel_index] - - # input_map_config.default_value_if_missing: - # This was handled by all_inputs_valid check for file presence. - # If a channel is missing from a multi-channel source, that's an error in source_channel_index. - # If a file is entirely missing and a default color/value is needed for the *output channel*, - # that would be a different logic, perhaps pre-filling merged_image. - # For now, we assume if an input map is specified, it must be present and valid. - - if merge_op_failed: - logger.error(f"Merge operation failed for '{output_map_type}' (ID: {rule_id_hex}).") - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': 'Error during channel packing/merge operation.' - } - continue + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error assigning data to output channel '{out_channel_char}' (index {i}): {e}") + merge_op_failed_detail = True; break - # C. Save Temporary Merged Map - # Default to PNG, or use format from merge_settings if available (future enhancement) - output_format = getattr(merge_settings, 'output_format', 'png').lower() - if output_format not in ['png', 'jpg', 'jpeg', 'tif', 'tiff', 'exr']: # Add more as ipu supports - logger.warning(f"Unsupported output_format '{output_format}' in merge_settings for '{output_map_type}' (ID: {rule_id_hex}). Defaulting to PNG.") - output_format = 'png' - - temp_merged_filename = f"merged_{sanitize_filename(output_map_type)}_{rule_id_hex}.{output_format}" - - if not context.engine_temp_dir: - logger.error(f"Asset {context.asset_name}: engine_temp_dir is not set. Cannot save merged map.") - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': 'engine_temp_dir not set in context.' - } + if merge_op_failed_detail: + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'Error during channel assignment.'} continue + output_format = 'png' # Default, can be configured per rule later + temp_merged_filename = f"merged_{sanitize_filename(output_map_type)}_{merge_op_id}.{output_format}" temp_merged_path = context.engine_temp_dir / temp_merged_filename - + try: save_success = ipu.save_image(temp_merged_path, merged_image) + if not save_success: raise ValueError("Save image returned false") except Exception as e: - logger.error(f"Error saving merged image {temp_merged_path} for '{output_map_type}' (ID: {rule_id_hex}): {e}") - save_success = False - - if not save_success: - logger.error(f"Failed to save temporary merged map to {temp_merged_path} for '{output_map_type}' (ID: {rule_id_hex}).") - context.merged_maps_details[rule_id_hex] = { - 'map_type': output_map_type, - 'status': 'Failed', - 'reason': f'Failed to save merged image to {temp_merged_path}.' - } + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error saving merged image {temp_merged_path}: {e}") + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f'Failed to save merged image: {e}'} continue - - logger.info(f"Successfully merged and saved '{output_map_type}' (ID: {rule_id_hex}) to {temp_merged_path}") - - # D. Update Context - context.merged_maps_details[rule_id_hex] = { + + logger.info(f"Asset {asset_name_for_log}: Successfully merged and saved '{output_map_type}' (Op ID: {merge_op_id}) to {temp_merged_path}") + context.merged_maps_details[merge_op_id] = { 'map_type': output_map_type, 'temp_merged_file': str(temp_merged_path), - 'input_map_ids_used': [mc.file_rule_id.hex for mc in merge_settings.input_maps], - 'input_map_files_used': input_map_paths, # Dict[rule_id_hex, path_str] - 'merged_dimensions': target_dims, # (width, height) - 'status': 'Processed', - 'file_rule_id': rule_id_hex # For easier reverse lookup if needed + 'input_map_types_used': list(inputs_map_type_to_channel.values()), + 'input_map_files_used': input_map_paths, + 'merged_dimensions': target_dims, + 'status': 'Processed' } - - # Optional: Update context.asset_metadata['processed_files'] or similar - # This might be better handled by a later stage that finalizes files. - # For now, merged_maps_details is the primary record. - logger.info(f"Finished MapMergingStage for asset: {context.asset_name}. Merged maps: {len(context.merged_maps_details)}") + logger.info(f"Finished MapMergingStage for asset: {asset_name_for_log}. Merged maps operations attempted: {num_merge_rules_attempted}, Succeeded: {len([d for d in context.merged_maps_details.values() if d.get('status') == 'Processed'])}") return context \ No newline at end of file diff --git a/processing/pipeline/stages/metadata_finalization_save.py b/processing/pipeline/stages/metadata_finalization_save.py index d18bfc4..1fb763f 100644 --- a/processing/pipeline/stages/metadata_finalization_save.py +++ b/processing/pipeline/stages/metadata_finalization_save.py @@ -6,7 +6,7 @@ from typing import Any, Dict from ..asset_context import AssetProcessingContext from .base_stage import ProcessingStage -from ....utils.path_utils import generate_path_from_pattern +from utils.path_utils import generate_path_from_pattern, sanitize_filename logger = logging.getLogger(__name__) @@ -21,29 +21,34 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): """ Finalizes metadata, determines output path, and saves the metadata JSON file. """ + asset_name_for_log = "Unknown Asset" + if hasattr(context, 'asset_rule') and context.asset_rule and hasattr(context.asset_rule, 'asset_name'): + asset_name_for_log = context.asset_rule.asset_name + if not hasattr(context, 'asset_metadata') or not context.asset_metadata: if context.status_flags.get('skip_asset'): logger.info( - f"Asset '{context.asset_rule.name if hasattr(context, 'asset_rule') and context.asset_rule else 'Unknown'}': " + f"Asset '{asset_name_for_log}': " f"Skipped before metadata initialization. No metadata file will be saved." ) else: logger.warning( - f"Asset '{context.asset_rule.name if hasattr(context, 'asset_rule') and context.asset_rule else 'Unknown'}': " + f"Asset '{asset_name_for_log}': " f"asset_metadata not initialized. Skipping metadata finalization and save." ) return context - + # Check Skip Flag if context.status_flags.get('skip_asset'): context.asset_metadata['status'] = "Skipped" context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() context.asset_metadata['notes'] = context.status_flags.get('skip_reason', 'Skipped early in pipeline') logger.info( - f"Asset '{context.asset_rule.name}': Marked as skipped. Reason: {context.asset_metadata['notes']}" + f"Asset '{asset_name_for_log}': Marked as skipped. Reason: {context.asset_metadata['notes']}" ) # Assuming we save metadata for skipped assets if it was initialized. # If not, the logic to skip saving would be here or before path generation. + # However, if we are here, asset_metadata IS initialized. # A. Finalize Metadata context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() @@ -52,7 +57,8 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): if context.asset_metadata.get('status') != "Skipped": has_errors = any( context.status_flags.get(error_flag) - for error_flag in ['file_processing_error', 'merge_error', 'critical_error'] # Added critical_error + for error_flag in ['file_processing_error', 'merge_error', 'critical_error', + 'individual_map_processing_failed', 'metadata_save_error'] # Added more flags ) if has_errors: context.asset_metadata['status'] = "Failed" @@ -64,31 +70,54 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): context.asset_metadata['merged_map_details'] = getattr(context, 'merged_maps_details', {}) # (Optional) Add a list of all temporary files - context.asset_metadata['temporary_files'] = getattr(context, 'temporary_files', []) + # context.asset_metadata['temporary_files'] = getattr(context, 'temporary_files', []) # Assuming this is populated elsewhere # B. Determine Metadata Output Path - # Ensure asset_rule and source_rule exist before accessing their names - asset_name = context.asset_rule.name if hasattr(context, 'asset_rule') and context.asset_rule else "unknown_asset" - source_rule_name = context.source_rule.name if hasattr(context, 'source_rule') and context.source_rule else "unknown_source" + # asset_name_for_log is defined at the top of the function if asset_metadata exists + + source_rule_identifier_for_path = "unknown_source" + if hasattr(context, 'source_rule') and context.source_rule: + if hasattr(context.source_rule, 'supplier_identifier') and context.source_rule.supplier_identifier: + source_rule_identifier_for_path = context.source_rule.supplier_identifier + elif hasattr(context.source_rule, 'input_path') and context.source_rule.input_path: + source_rule_identifier_for_path = Path(context.source_rule.input_path).stem # Use stem of input path if no identifier + else: + source_rule_identifier_for_path = "unknown_source_details" - metadata_filename = f"{asset_name}_metadata.json" - output_path_pattern = context.asset_rule.output_path_pattern if hasattr(context, 'asset_rule') and context.asset_rule else "" + # Use the configured metadata filename from config_obj + metadata_filename_from_config = getattr(context.config_obj, 'metadata_filename', "metadata.json") + # Ensure asset_name_for_log is safe for filenames + safe_asset_name = sanitize_filename(asset_name_for_log) # asset_name_for_log is defined at the top + final_metadata_filename = f"{safe_asset_name}_{metadata_filename_from_config}" - # Handle potential missing sha5_value, defaulting to None or an empty string - sha_value = getattr(context, 'sha5_value', getattr(context, 'sha_value', None)) + # Output path pattern should come from config_obj, not asset_rule + output_path_pattern_from_config = getattr(context.config_obj, 'output_directory_pattern', "[supplier]/[assetname]") + + sha_value = getattr(context, 'sha5_value', None) # Prefer sha5_value if explicitly set on context + if sha_value is None: # Fallback to sha256_value if that was the intended attribute + sha_value = getattr(context, 'sha256_value', None) + token_data = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier if context.effective_supplier else source_rule_identifier_for_path, + "sourcerulename": source_rule_identifier_for_path, + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": sha_value, # Assuming pattern uses [sha5] or similar for sha_value + "maptype": "metadata", # Added maptype to token_data + "filename": final_metadata_filename # Added filename to token_data + # Add other tokens if your output_path_pattern_from_config expects them + } + # Clean None values, as generate_path_from_pattern might not handle them well for all tokens + token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} - full_output_path = generate_path_from_pattern( - base_path=str(context.output_base_path), # Ensure base_path is a string - pattern=output_path_pattern, - asset_name=asset_name, - map_type="metadata", # Special map_type for metadata - filename=metadata_filename, - source_rule_name=source_rule_name, - incrementing_value=getattr(context, 'incrementing_value', None), - sha_value=sha_value # Changed from sha5_value to sha_value for more generality + # Generate the relative directory path using the pattern and tokens + relative_dir_path_str = generate_path_from_pattern( + pattern_string=output_path_pattern_from_config, # This pattern should resolve to a directory + token_data=token_data_cleaned ) - metadata_save_path = Path(full_output_path) + + # Construct the full path by joining the base output path, the generated relative directory, and the final filename + metadata_save_path = Path(context.output_base_path) / Path(relative_dir_path_str) / Path(final_metadata_filename) # C. Save Metadata File try: @@ -109,10 +138,10 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): with open(metadata_save_path, 'w') as f: json.dump(serializable_metadata, f, indent=4) - logger.info(f"Asset '{asset_name}': Metadata saved to {metadata_save_path}") + logger.info(f"Asset '{asset_name_for_log}': Metadata saved to {metadata_save_path}") # Use asset_name_for_log context.asset_metadata['metadata_file_path'] = str(metadata_save_path) except Exception as e: - logger.error(f"Asset '{asset_name}': Failed to save metadata to {metadata_save_path}. Error: {e}") + logger.error(f"Asset '{asset_name_for_log}': Failed to save metadata to {metadata_save_path}. Error: {e}") # Use asset_name_for_log context.asset_metadata['status'] = "Failed (Metadata Save Error)" context.status_flags['metadata_save_error'] = True diff --git a/processing/pipeline/stages/metadata_initialization.py b/processing/pipeline/stages/metadata_initialization.py index 4d5fbf5..f938ff5 100644 --- a/processing/pipeline/stages/metadata_initialization.py +++ b/processing/pipeline/stages/metadata_initialization.py @@ -1,8 +1,8 @@ import datetime import logging -from ..base_stage import ProcessingStage -from ...asset_context import AssetProcessingContext # Adjusted import path assuming asset_context is in processing.pipeline +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext # Adjusted import path assuming asset_context is in processing.pipeline # If AssetProcessingContext is directly under 'processing', the import would be: # from ...asset_context import AssetProcessingContext # Based on the provided file structure, asset_context.py is in processing/pipeline/ @@ -74,8 +74,6 @@ from ...asset_context import AssetProcessingContext # Adjusted import path assum # I will use the imports that align with the provided file structure. -from .base_stage import ProcessingStage -from ..asset_context import AssetProcessingContext logger = logging.getLogger(__name__) @@ -97,10 +95,10 @@ class MetadataInitializationStage(ProcessingStage): The modified AssetProcessingContext. """ if context.status_flags.get('skip_asset', False): - logger.debug(f"Asset '{context.asset_rule.name if context.asset_rule else 'Unknown'}': Skipping metadata initialization as 'skip_asset' is True.") + logger.debug(f"Asset '{context.asset_rule.asset_name if context.asset_rule else 'Unknown'}': Skipping metadata initialization as 'skip_asset' is True.") return context - logger.debug(f"Asset '{context.asset_rule.name}': Initializing metadata.") + logger.debug(f"Asset '{context.asset_rule.asset_name if context.asset_rule else 'Unknown'}': Initializing metadata.") context.asset_metadata = {} context.processed_maps_details = {} @@ -108,12 +106,19 @@ class MetadataInitializationStage(ProcessingStage): # Populate Initial asset_metadata if context.asset_rule: - context.asset_metadata['asset_name'] = context.asset_rule.name - context.asset_metadata['asset_id'] = str(context.asset_rule.id) - context.asset_metadata['source_path'] = str(context.asset_rule.source_path) - context.asset_metadata['output_path_pattern'] = context.asset_rule.output_path_pattern - context.asset_metadata['tags'] = list(context.asset_rule.tags) if context.asset_rule.tags else [] - context.asset_metadata['custom_fields'] = dict(context.asset_rule.custom_fields) if context.asset_rule.custom_fields else {} + context.asset_metadata['asset_name'] = context.asset_rule.asset_name + # Attempt to get 'id' from common_metadata or use asset_name as a fallback + asset_id_val = context.asset_rule.common_metadata.get('id', context.asset_rule.common_metadata.get('asset_id')) + if asset_id_val is None: + logger.warning(f"Asset '{context.asset_rule.asset_name}': No 'id' or 'asset_id' found in common_metadata. Using asset_name as asset_id.") + asset_id_val = context.asset_rule.asset_name + context.asset_metadata['asset_id'] = str(asset_id_val) + + # Assuming source_path, output_path_pattern, tags, custom_fields might also be in common_metadata + context.asset_metadata['source_path'] = str(context.asset_rule.common_metadata.get('source_path', 'N/A')) + context.asset_metadata['output_path_pattern'] = context.asset_rule.common_metadata.get('output_path_pattern', 'N/A') + context.asset_metadata['tags'] = list(context.asset_rule.common_metadata.get('tags', [])) + context.asset_metadata['custom_fields'] = dict(context.asset_rule.common_metadata.get('custom_fields', {})) else: # Handle cases where asset_rule might be None, though typically it should be set logger.warning("AssetRule is not set in context during metadata initialization.") @@ -126,8 +131,13 @@ class MetadataInitializationStage(ProcessingStage): if context.source_rule: - context.asset_metadata['source_rule_name'] = context.source_rule.name - context.asset_metadata['source_rule_id'] = str(context.source_rule.id) + # SourceRule also doesn't have 'name' or 'id' directly. + # Using 'input_path' as a proxy for name, and a placeholder for id. + source_rule_name_val = context.source_rule.input_path if context.source_rule.input_path else "Unknown Source Rule Path" + source_rule_id_val = context.source_rule.high_level_sorting_parameters.get('id', "N/A_SR_ID") # Check high_level_sorting_parameters + logger.debug(f"SourceRule: using input_path '{source_rule_name_val}' as name, and '{source_rule_id_val}' as id.") + context.asset_metadata['source_rule_name'] = source_rule_name_val + context.asset_metadata['source_rule_id'] = str(source_rule_id_val) else: logger.warning("SourceRule is not set in context during metadata initialization.") context.asset_metadata['source_rule_name'] = "Unknown Source Rule" diff --git a/processing/pipeline/stages/normal_map_green_channel.py b/processing/pipeline/stages/normal_map_green_channel.py index ca7984b..38d9034 100644 --- a/processing/pipeline/stages/normal_map_green_channel.py +++ b/processing/pipeline/stages/normal_map_green_channel.py @@ -3,11 +3,11 @@ import numpy as np from pathlib import Path from typing import List -from ..base_stage import ProcessingStage -from ...asset_context import AssetProcessingContext -from .....rule_structure import FileRule +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from rule_structure import FileRule from ...utils import image_processing_utils as ipu -from .....utils.path_utils import sanitize_filename +from utils.path_utils import sanitize_filename logger = logging.getLogger(__name__) @@ -23,71 +23,70 @@ class NormalMapGreenChannelStage(ProcessingStage): performs inversion if needed, saves a new temporary file, and updates the AssetProcessingContext. """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" if context.status_flags.get('skip_asset'): - logger.debug(f"Asset '{context.asset_rule.name}': Skipping NormalMapGreenChannelStage due to skip_asset flag.") + logger.debug(f"Asset '{asset_name_for_log}': Skipping NormalMapGreenChannelStage due to skip_asset flag.") return context - if not context.files_to_process or not context.processed_maps_details: + if not context.processed_maps_details: # Check processed_maps_details primarily logger.debug( - f"Asset '{context.asset_rule.name}': No files to process or processed_maps_details empty in NormalMapGreenChannelStage. Skipping." + f"Asset '{asset_name_for_log}': No processed_maps_details in NormalMapGreenChannelStage. Skipping." ) return context - new_files_to_process: List[FileRule] = [] processed_a_normal_map = False - for file_rule in context.files_to_process: - if file_rule.map_type == "NORMAL": + # Iterate through processed maps, as FileRule objects don't have IDs directly + for map_id_hex, map_details in context.processed_maps_details.items(): + if map_details.get('map_type') == "NORMAL" and map_details.get('status') == 'Processed': + # Check configuration for inversion - # Assuming a global setting for now. - # This key should exist in the Configuration object's general_settings. - should_invert = context.config_obj.general_settings.get('invert_normal_map_green_channel_globally', False) + # Assuming general_settings is an attribute of config_obj and might be a dict or an object + should_invert = False + if hasattr(context.config_obj, 'general_settings'): + if isinstance(context.config_obj.general_settings, dict): + should_invert = context.config_obj.general_settings.get('invert_normal_map_green_channel_globally', False) + elif hasattr(context.config_obj.general_settings, 'invert_normal_map_green_channel_globally'): + should_invert = getattr(context.config_obj.general_settings, 'invert_normal_map_green_channel_globally', False) + + original_temp_path_str = map_details.get('temp_processed_file') + if not original_temp_path_str: + logger.warning(f"Asset '{asset_name_for_log}': Normal map (ID: {map_id_hex}) missing 'temp_processed_file' in details. Skipping.") + continue + + original_temp_path = Path(original_temp_path_str) + original_filename_for_log = original_temp_path.name if not should_invert: logger.debug( - f"Asset '{context.asset_rule.name}': Normal map green channel inversion not enabled globally. " - f"Skipping for {file_rule.filename_pattern} (ID: {file_rule.id.hex})." + f"Asset '{asset_name_for_log}': Normal map green channel inversion not enabled. " + f"Skipping for {original_filename_for_log} (ID: {map_id_hex})." ) - new_files_to_process.append(file_rule) continue - # Get the temporary processed file path - map_details = context.processed_maps_details.get(file_rule.id.hex) - if not map_details or map_details.get('status') != 'Processed' or not map_details.get('temp_processed_file'): - logger.warning( - f"Asset '{context.asset_rule.name}': Normal map {file_rule.filename_pattern} (ID: {file_rule.id.hex}) " - f"not found in processed_maps_details or not marked as 'Processed'. Cannot invert green channel." - ) - new_files_to_process.append(file_rule) - continue - - original_temp_path = Path(map_details['temp_processed_file']) if not original_temp_path.exists(): logger.error( - f"Asset '{context.asset_rule.name}': Temporary file {original_temp_path} for normal map " - f"{file_rule.filename_pattern} (ID: {file_rule.id.hex}) does not exist. Cannot invert green channel." + f"Asset '{asset_name_for_log}': Temporary file {original_temp_path} for normal map " + f"{original_filename_for_log} (ID: {map_id_hex}) does not exist. Cannot invert green channel." ) - new_files_to_process.append(file_rule) continue image_data = ipu.load_image(original_temp_path) if image_data is None: logger.error( - f"Asset '{context.asset_rule.name}': Failed to load image from {original_temp_path} " - f"for normal map {file_rule.filename_pattern} (ID: {file_rule.id.hex})." + f"Asset '{asset_name_for_log}': Failed to load image from {original_temp_path} " + f"for normal map {original_filename_for_log} (ID: {map_id_hex})." ) - new_files_to_process.append(file_rule) continue if image_data.ndim != 3 or image_data.shape[2] < 2: # Must have at least R, G channels logger.error( - f"Asset '{context.asset_rule.name}': Image {original_temp_path} for normal map " - f"{file_rule.filename_pattern} (ID: {file_rule.id.hex}) is not a valid RGB/normal map " + f"Asset '{asset_name_for_log}': Image {original_temp_path} for normal map " + f"{original_filename_for_log} (ID: {map_id_hex}) is not a valid RGB/normal map " f"(ndim={image_data.ndim}, channels={image_data.shape[2] if image_data.ndim == 3 else 'N/A'}) " f"for green channel inversion." ) - new_files_to_process.append(file_rule) continue # Perform Green Channel Inversion @@ -100,55 +99,55 @@ class NormalMapGreenChannelStage(ProcessingStage): modified_image_data[:, :, 1] = max_val - modified_image_data[:, :, 1] else: logger.error( - f"Asset '{context.asset_rule.name}': Unsupported image data type " + f"Asset '{asset_name_for_log}': Unsupported image data type " f"{modified_image_data.dtype} for normal map {original_temp_path}. Cannot invert green channel." ) - new_files_to_process.append(file_rule) continue except IndexError: logger.error( - f"Asset '{context.asset_rule.name}': Image {original_temp_path} for normal map " - f"{file_rule.filename_pattern} (ID: {file_rule.id.hex}) does not have a green channel (index 1) " + f"Asset '{asset_name_for_log}': Image {original_temp_path} for normal map " + f"{original_filename_for_log} (ID: {map_id_hex}) does not have a green channel (index 1) " f"or has unexpected dimensions ({modified_image_data.shape}). Cannot invert." ) - new_files_to_process.append(file_rule) continue - # Save New Temporary (Modified Normal) Map - new_temp_filename = f"normal_g_inv_{sanitize_filename(file_rule.map_type)}_{file_rule.id.hex}{original_temp_path.suffix}" + # Sanitize map_details.get('map_type') in case it's missing, though it should be 'NORMAL' here + map_type_for_filename = sanitize_filename(map_details.get('map_type', 'NORMAL')) + new_temp_filename = f"normal_g_inv_{map_type_for_filename}_{map_id_hex}{original_temp_path.suffix}" new_temp_path = context.engine_temp_dir / new_temp_filename save_success = ipu.save_image(new_temp_path, modified_image_data) if save_success: logger.info( - f"Asset '{context.asset_rule.name}': Inverted green channel for NORMAL map " - f"{original_temp_path.name}, saved to {new_temp_path.name}." + f"Asset '{asset_name_for_log}': Inverted green channel for NORMAL map " + f"{original_filename_for_log}, saved to {new_temp_path.name}." ) - # Update processed_maps_details - context.processed_maps_details[file_rule.id.hex]['temp_processed_file'] = str(new_temp_path) - current_notes = context.processed_maps_details[file_rule.id.hex].get('notes', '') - context.processed_maps_details[file_rule.id.hex]['notes'] = \ + # Update processed_maps_details for this map_id_hex + context.processed_maps_details[map_id_hex]['temp_processed_file'] = str(new_temp_path) + current_notes = context.processed_maps_details[map_id_hex].get('notes', '') + context.processed_maps_details[map_id_hex]['notes'] = \ f"{current_notes}; Green channel inverted by NormalMapGreenChannelStage".strip('; ') - new_files_to_process.append(file_rule) # Add original rule, it now points to modified data processed_a_normal_map = True else: logger.error( - f"Asset '{context.asset_rule.name}': Failed to save inverted normal map to {new_temp_path} " - f"for original {original_temp_path.name}." + f"Asset '{asset_name_for_log}': Failed to save inverted normal map to {new_temp_path} " + f"for original {original_filename_for_log}." ) - new_files_to_process.append(file_rule) # Add original rule, as processing failed - else: - # Not a normal map, just pass it through - new_files_to_process.append(file_rule) + # No need to explicitly manage new_files_to_process list in this loop, + # as we are modifying the temp_processed_file path within processed_maps_details. + # The existing FileRule objects in context.files_to_process (if any) would + # be linked to these details by a previous stage (e.g. IndividualMapProcessing) + # if that stage populates a 'file_rule_id' in map_details. - context.files_to_process = new_files_to_process + # context.files_to_process remains unchanged by this stage directly, + # as we modify the data pointed to by processed_maps_details. if processed_a_normal_map: - logger.info(f"Asset '{context.asset_rule.name}': NormalMapGreenChannelStage processed relevant normal maps.") + logger.info(f"Asset '{asset_name_for_log}': NormalMapGreenChannelStage processed relevant normal maps.") else: - logger.debug(f"Asset '{context.asset_rule.name}': No normal maps found or processed in NormalMapGreenChannelStage.") + logger.debug(f"Asset '{asset_name_for_log}': No normal maps found or processed in NormalMapGreenChannelStage.") return context \ No newline at end of file diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index 358d3e1..a6edda9 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -3,10 +3,10 @@ import shutil from pathlib import Path from typing import List, Dict, Optional -from ..base_stage import ProcessingStage -from ...asset_context import AssetProcessingContext -from ....utils.path_utils import generate_path_from_pattern, sanitize_filename -from ....config import FileRule, MergeRule # Assuming these are needed for type hints if not directly in context +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from utils.path_utils import generate_path_from_pattern, sanitize_filename +from rule_structure import FileRule # Assuming these are needed for type hints if not directly in context logger = logging.getLogger(__name__) @@ -21,135 +21,218 @@ class OutputOrganizationStage(ProcessingStage): Copies temporary processed and merged files to their final output locations based on path patterns and updates AssetProcessingContext. """ - logger.debug(f"Asset '{context.asset_rule.name}': Starting output organization stage.") + asset_name_for_log = context.asset_rule.asset_name if hasattr(context, 'asset_rule') and context.asset_rule else "Unknown Asset" + logger.debug(f"Asset '{asset_name_for_log}': Starting output organization stage.") if context.status_flags.get('skip_asset'): - logger.info(f"Asset '{context.asset_rule.name}': Output organization skipped as 'skip_asset' is True.") + logger.info(f"Asset '{asset_name_for_log}': Output organization skipped as 'skip_asset' is True.") return context current_status = context.asset_metadata.get('status', '') if current_status.startswith("Failed") or current_status == "Skipped": - logger.info(f"Asset '{context.asset_rule.name}': Output organization skipped due to prior status: {current_status}.") + logger.info(f"Asset '{asset_name_for_log}': Output organization skipped due to prior status: {current_status}.") return context final_output_files: List[str] = [] - # Ensure config_obj and general_settings are present, provide default for overwrite_existing if not overwrite_existing = False - if context.config_obj and hasattr(context.config_obj, 'general_settings'): - overwrite_existing = context.config_obj.general_settings.overwrite_existing + # Correctly access general_settings and overwrite_existing from config_obj + if hasattr(context.config_obj, 'general_settings'): + if isinstance(context.config_obj.general_settings, dict): + overwrite_existing = context.config_obj.general_settings.get('overwrite_existing', False) + elif hasattr(context.config_obj.general_settings, 'overwrite_existing'): # If general_settings is an object + overwrite_existing = getattr(context.config_obj.general_settings, 'overwrite_existing', False) else: - logger.warning(f"Asset '{context.asset_rule.name}': config_obj.general_settings not found, defaulting overwrite_existing to False.") + logger.warning(f"Asset '{asset_name_for_log}': config_obj.general_settings not found, defaulting overwrite_existing to False.") + + output_dir_pattern = getattr(context.config_obj, 'output_directory_pattern', "[supplier]/[assetname]") + output_filename_pattern_config = getattr(context.config_obj, 'output_filename_pattern', "[assetname]_[maptype]_[resolution].[ext]") # A. Organize Processed Individual Maps if context.processed_maps_details: - logger.debug(f"Asset '{context.asset_rule.name}': Organizing {len(context.processed_maps_details)} processed individual map(s).") - for file_rule_id, details in context.processed_maps_details.items(): + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(context.processed_maps_details)} processed individual map(s).") + for processed_map_key, details in context.processed_maps_details.items(): # Use processed_map_key if details.get('status') != 'Processed' or not details.get('temp_processed_file'): - logger.debug(f"Asset '{context.asset_rule.name}': Skipping file_rule_id '{file_rule_id}' due to status '{details.get('status')}' or missing temp file.") + logger.debug(f"Asset '{asset_name_for_log}': Skipping processed map key '{processed_map_key}' due to status '{details.get('status')}' or missing temp file.") continue temp_file_path = Path(details['temp_processed_file']) - map_type = details['map_type'] + map_type = details.get('map_type', 'unknown_map_type') + resolution_str = details.get('processed_resolution_name', details.get('original_resolution_name', 'resX')) - output_filename = f"{context.asset_rule.name}_{sanitize_filename(map_type)}{temp_file_path.suffix}" - if context.asset_rule and context.asset_rule.file_rules: - current_file_rule: Optional[FileRule] = next( - (fr for fr in context.asset_rule.file_rules if fr.id == file_rule_id), None - ) - if current_file_rule and current_file_rule.output_filename_pattern: - output_filename = current_file_rule.output_filename_pattern + + # Construct token_data for path generation + token_data = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + "maptype": map_type, + "resolution": resolution_str, + "ext": temp_file_path.suffix.lstrip('.'), # Get extension without dot + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} + # Generate filename first using its pattern + # output_filename = f"{asset_name_for_log}_{sanitize_filename(map_type)}{temp_file_path.suffix}" # Old way + output_filename = generate_path_from_pattern(output_filename_pattern_config, token_data_cleaned) + + try: - final_path_str = generate_path_from_pattern( - base_path=str(context.output_base_path), - pattern=context.asset_rule.output_path_pattern, - asset_name=context.asset_rule.name, - map_type=map_type, - filename=output_filename, - source_rule_name=context.source_rule.name if context.source_rule else "DefaultSource", - incrementing_value=str(context.incrementing_value) if context.incrementing_value is not None else None, - sha5_value=context.sha5_value + relative_dir_path_str = generate_path_from_pattern( + pattern_string=output_dir_pattern, + token_data=token_data_cleaned ) - final_path = Path(final_path_str) + final_path = Path(context.output_base_path) / Path(relative_dir_path_str) / Path(output_filename) final_path.parent.mkdir(parents=True, exist_ok=True) if final_path.exists() and not overwrite_existing: - logger.info(f"Asset '{context.asset_rule.name}': Output file {final_path} exists and overwrite is disabled. Skipping copy.") + logger.info(f"Asset '{asset_name_for_log}': Output file {final_path} exists and overwrite is disabled. Skipping copy.") else: shutil.copy2(temp_file_path, final_path) - logger.info(f"Asset '{context.asset_rule.name}': Copied {temp_file_path} to {final_path}") + logger.info(f"Asset '{asset_name_for_log}': Copied {temp_file_path} to {final_path}") final_output_files.append(str(final_path)) - context.processed_maps_details[file_rule_id]['final_output_path'] = str(final_path) - context.processed_maps_details[file_rule_id]['status'] = 'Organized' # Or some other status indicating completion + context.processed_maps_details[processed_map_key]['final_output_path'] = str(final_path) + context.processed_maps_details[processed_map_key]['status'] = 'Organized' except Exception as e: - logger.error(f"Asset '{context.asset_rule.name}': Failed to copy {temp_file_path} to {final_path_str if 'final_path_str' in locals() else 'unknown destination'} for file_rule_id '{file_rule_id}'. Error: {e}", exc_info=True) + logger.error(f"Asset '{asset_name_for_log}': Failed to copy {temp_file_path} to destination for processed map key '{processed_map_key}'. Error: {e}", exc_info=True) context.status_flags['output_organization_error'] = True context.asset_metadata['status'] = "Failed (Output Organization Error)" - # Optionally update status in details as well - context.processed_maps_details[file_rule_id]['status'] = 'Organization Failed' + context.processed_maps_details[processed_map_key]['status'] = 'Organization Failed' else: - logger.debug(f"Asset '{context.asset_rule.name}': No processed individual maps to organize.") + logger.debug(f"Asset '{asset_name_for_log}': No processed individual maps to organize.") # B. Organize Merged Maps if context.merged_maps_details: - logger.debug(f"Asset '{context.asset_rule.name}': Organizing {len(context.merged_maps_details)} merged map(s).") - for merge_rule_id, details in context.merged_maps_details.items(): + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(context.merged_maps_details)} merged map(s).") + for merge_op_id, details in context.merged_maps_details.items(): # Use merge_op_id if details.get('status') != 'Processed' or not details.get('temp_merged_file'): - logger.debug(f"Asset '{context.asset_rule.name}': Skipping merge_rule_id '{merge_rule_id}' due to status '{details.get('status')}' or missing temp file.") + logger.debug(f"Asset '{asset_name_for_log}': Skipping merge op id '{merge_op_id}' due to status '{details.get('status')}' or missing temp file.") continue temp_file_path = Path(details['temp_merged_file']) - map_type = details['map_type'] # This is the output_map_type of the merge rule + map_type = details.get('map_type', 'unknown_merged_map') # This is the output_map_type of the merge rule + # Merged maps might not have a simple 'resolution' token like individual maps. + # We'll use a placeholder or derive if possible. + resolution_str = details.get('merged_resolution_name', 'mergedRes') - output_filename = f"{context.asset_rule.name}_{sanitize_filename(map_type)}{temp_file_path.suffix}" - if context.asset_rule and context.asset_rule.merge_rules: - current_merge_rule: Optional[MergeRule] = next( - (mr for mr in context.asset_rule.merge_rules if mr.id == merge_rule_id), None - ) - if current_merge_rule and current_merge_rule.output_filename_pattern: - output_filename = current_merge_rule.output_filename_pattern + + token_data_merged = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + "maptype": map_type, + "resolution": resolution_str, + "ext": temp_file_path.suffix.lstrip('.'), + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + token_data_merged_cleaned = {k: v for k, v in token_data_merged.items() if v is not None} + + output_filename_merged = generate_path_from_pattern(output_filename_pattern_config, token_data_merged_cleaned) try: - final_path_str = generate_path_from_pattern( - base_path=str(context.output_base_path), - pattern=context.asset_rule.output_path_pattern, - asset_name=context.asset_rule.name, - map_type=map_type, - filename=output_filename, - source_rule_name=context.source_rule.name if context.source_rule else "DefaultSource", - incrementing_value=str(context.incrementing_value) if context.incrementing_value is not None else None, - sha5_value=context.sha5_value + relative_dir_path_str_merged = generate_path_from_pattern( + pattern_string=output_dir_pattern, + token_data=token_data_merged_cleaned ) - final_path = Path(final_path_str) - final_path.parent.mkdir(parents=True, exist_ok=True) + final_path_merged = Path(context.output_base_path) / Path(relative_dir_path_str_merged) / Path(output_filename_merged) + final_path_merged.parent.mkdir(parents=True, exist_ok=True) - if final_path.exists() and not overwrite_existing: - logger.info(f"Asset '{context.asset_rule.name}': Output file {final_path} exists and overwrite is disabled. Skipping copy for merged map.") + if final_path_merged.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': Output file {final_path_merged} exists and overwrite is disabled. Skipping copy for merged map.") else: - shutil.copy2(temp_file_path, final_path) - logger.info(f"Asset '{context.asset_rule.name}': Copied merged map {temp_file_path} to {final_path}") - final_output_files.append(str(final_path)) + shutil.copy2(temp_file_path, final_path_merged) + logger.info(f"Asset '{asset_name_for_log}': Copied merged map {temp_file_path} to {final_path_merged}") + final_output_files.append(str(final_path_merged)) - context.merged_maps_details[merge_rule_id]['final_output_path'] = str(final_path) - context.merged_maps_details[merge_rule_id]['status'] = 'Organized' + context.merged_maps_details[merge_op_id]['final_output_path'] = str(final_path_merged) + context.merged_maps_details[merge_op_id]['status'] = 'Organized' except Exception as e: - logger.error(f"Asset '{context.asset_rule.name}': Failed to copy merged map {temp_file_path} to {final_path_str if 'final_path_str' in locals() else 'unknown destination'} for merge_rule_id '{merge_rule_id}'. Error: {e}", exc_info=True) + logger.error(f"Asset '{asset_name_for_log}': Failed to copy merged map {temp_file_path} to destination for merge op id '{merge_op_id}'. Error: {e}", exc_info=True) context.status_flags['output_organization_error'] = True context.asset_metadata['status'] = "Failed (Output Organization Error)" - context.merged_maps_details[merge_rule_id]['status'] = 'Organization Failed' + context.merged_maps_details[merge_op_id]['status'] = 'Organization Failed' else: - logger.debug(f"Asset '{context.asset_rule.name}': No merged maps to organize.") + logger.debug(f"Asset '{asset_name_for_log}': No merged maps to organize.") + + # C. Organize Extra Files (e.g., previews, text files) + logger.debug(f"Asset '{asset_name_for_log}': Checking for EXTRA files to organize.") + extra_files_organized_count = 0 + if hasattr(context, 'files_to_process') and context.files_to_process: + extra_subdir_name = getattr(context.config_obj, 'extra_files_subdir', 'Extra') # Default to 'Extra' + + for file_rule in context.files_to_process: + if file_rule.item_type == 'EXTRA': + source_file_path = context.workspace_path / file_rule.file_path + if not source_file_path.is_file(): + logger.warning(f"Asset '{asset_name_for_log}': EXTRA file '{source_file_path}' not found. Skipping.") + continue + + # Basic token data for the asset's base output directory + # We don't use map_type, resolution, or ext for the base directory of extras. + # However, generate_path_from_pattern might expect them or handle their absence. + # For the base asset directory, only assetname and supplier are typically primary. + base_token_data = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + # Add other tokens if your output_directory_pattern uses them at the asset level + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + base_token_data_cleaned = {k: v for k, v in base_token_data.items() if v is not None} + + try: + asset_base_output_dir_str = generate_path_from_pattern( + pattern_string=output_dir_pattern, # Uses the same pattern as other maps for base dir + token_data=base_token_data_cleaned + ) + # Destination: /// + final_dest_path = (Path(context.output_base_path) / + Path(asset_base_output_dir_str) / + Path(extra_subdir_name) / + source_file_path.name) # Use original filename + + final_dest_path.parent.mkdir(parents=True, exist_ok=True) + + if final_dest_path.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': EXTRA file destination {final_dest_path} exists and overwrite is disabled. Skipping copy.") + else: + shutil.copy2(source_file_path, final_dest_path) + logger.info(f"Asset '{asset_name_for_log}': Copied EXTRA file {source_file_path} to {final_dest_path}") + final_output_files.append(str(final_dest_path)) + extra_files_organized_count += 1 + + # Optionally, add more detailed tracking for extra files in context.asset_metadata + # For example: + # if 'extra_files_details' not in context.asset_metadata: + # context.asset_metadata['extra_files_details'] = [] + # context.asset_metadata['extra_files_details'].append({ + # 'source_path': str(source_file_path), + # 'destination_path': str(final_dest_path), + # 'status': 'Organized' + # }) + + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to copy EXTRA file {source_file_path} to destination. Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error - Extra Files)" + # Optionally, update status for the specific file_rule if tracked + + if extra_files_organized_count > 0: + logger.info(f"Asset '{asset_name_for_log}': Successfully organized {extra_files_organized_count} EXTRA file(s).") + else: + logger.debug(f"Asset '{asset_name_for_log}': No EXTRA files were processed or found to organize.") + context.asset_metadata['final_output_files'] = final_output_files if context.status_flags.get('output_organization_error'): - logger.error(f"Asset '{context.asset_rule.name}': Output organization encountered errors. Status: {context.asset_metadata['status']}") + logger.error(f"Asset '{asset_name_for_log}': Output organization encountered errors. Status: {context.asset_metadata['status']}") else: - logger.info(f"Asset '{context.asset_rule.name}': Output organization complete. {len(final_output_files)} files placed.") + logger.info(f"Asset '{asset_name_for_log}': Output organization complete. {len(final_output_files)} files placed.") - logger.debug(f"Asset '{context.asset_rule.name}': Output organization stage finished.") + logger.debug(f"Asset '{asset_name_for_log}': Output organization stage finished.") return context \ No newline at end of file diff --git a/processing/pipeline/stages/supplier_determination.py b/processing/pipeline/stages/supplier_determination.py index ff60722..27a1a5e 100644 --- a/processing/pipeline/stages/supplier_determination.py +++ b/processing/pipeline/stages/supplier_determination.py @@ -20,29 +20,29 @@ class SupplierDeterminationStage(ProcessingStage): """ effective_supplier = None logger = logging.getLogger(__name__) # Using a logger specific to this module + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" - # 1. Check asset_rule.supplier_override - if context.asset_rule and context.asset_rule.supplier_override: - effective_supplier = context.asset_rule.supplier_override - logger.debug(f"Asset '{context.asset_rule.name}': Supplier override found: '{effective_supplier}'.") - - # 2. If not overridden, check source_rule.supplier - if not effective_supplier and context.source_rule and context.source_rule.supplier: - effective_supplier = context.source_rule.supplier - logger.debug(f"Asset '{context.asset_rule.name if context.asset_rule else 'Unknown'}': Source rule supplier found: '{effective_supplier}'.") + # 1. Check source_rule.supplier_override (highest precedence) + if context.source_rule and context.source_rule.supplier_override: + effective_supplier = context.source_rule.supplier_override + logger.debug(f"Asset '{asset_name_for_log}': Supplier override from source_rule found: '{effective_supplier}'.") + # 2. If not overridden, check source_rule.supplier_identifier + elif context.source_rule and context.source_rule.supplier_identifier: + effective_supplier = context.source_rule.supplier_identifier + logger.debug(f"Asset '{asset_name_for_log}': Supplier identifier from source_rule found: '{effective_supplier}'.") # 3. Validation if not effective_supplier: - asset_name = context.asset_rule.name if context.asset_rule else "Unknown Asset" - logger.error(f"Asset '{asset_name}': No supplier defined in asset rule or source rule.") + logger.error(f"Asset '{asset_name_for_log}': No supplier defined in source_rule (override or identifier).") context.effective_supplier = None if 'status_flags' not in context: # Ensure status_flags exists context.status_flags = {} context.status_flags['supplier_error'] = True - elif context.config_obj and effective_supplier not in context.config_obj.suppliers: - asset_name = context.asset_rule.name if context.asset_rule else "Unknown Asset" + # Assuming context.config_obj.suppliers is a valid way to get the list of configured suppliers. + # This might need further investigation if errors occur here later. + elif context.config_obj and hasattr(context.config_obj, 'suppliers') and effective_supplier not in context.config_obj.suppliers: logger.warning( - f"Asset '{asset_name}': Supplier '{effective_supplier}' not found in global supplier configuration. " + f"Asset '{asset_name_for_log}': Determined supplier '{effective_supplier}' not found in global supplier configuration. " f"Available: {list(context.config_obj.suppliers.keys()) if context.config_obj.suppliers else 'None'}" ) context.effective_supplier = None @@ -51,11 +51,10 @@ class SupplierDeterminationStage(ProcessingStage): context.status_flags['supplier_error'] = True else: context.effective_supplier = effective_supplier - asset_name = context.asset_rule.name if context.asset_rule else "Unknown Asset" - logger.info(f"Asset '{asset_name}': Effective supplier set to '{effective_supplier}'.") - # Optionally clear the error flag if previously set and now resolved, though current logic doesn't show this path. - # if 'status_flags' in context and 'supplier_error' in context.status_flags: - # del context.status_flags['supplier_error'] + logger.info(f"Asset '{asset_name_for_log}': Effective supplier set to '{effective_supplier}'.") + # Optionally clear the error flag if previously set and now resolved. + if 'supplier_error' in context.status_flags: + del context.status_flags['supplier_error'] return context \ No newline at end of file diff --git a/processing/utils/image_processing_utils.py b/processing/utils/image_processing_utils.py index 46768a8..0ea5cbf 100644 --- a/processing/utils/image_processing_utils.py +++ b/processing/utils/image_processing_utils.py @@ -25,6 +25,23 @@ def get_nearest_pot(value: int) -> int: else: return upper_pot +def get_nearest_power_of_two_downscale(value: int) -> int: + """ + Finds the nearest power of two that is less than or equal to the given value. + If the value is already a power of two, it returns the value itself. + Returns 1 if the value is less than 1. + """ + if value < 1: + return 1 + if is_power_of_two(value): + return value + # Find the largest power of two strictly less than value, + # unless value itself is POT. + # (1 << (value.bit_length() - 1)) achieves this. + # Example: value=7 (0111, bl=3), 1<<2 = 4. + # Example: value=8 (1000, bl=4), 1<<3 = 8. + # Example: value=9 (1001, bl=4), 1<<3 = 8. + return 1 << (value.bit_length() - 1) # --- Dimension Calculation --- def calculate_target_dimensions( diff --git a/processing_engine.py b/processing_engine.py index 779f6f8..968cb6c 100644 --- a/processing_engine.py +++ b/processing_engine.py @@ -12,7 +12,8 @@ from typing import List, Dict, Tuple, Optional, Set try: import cv2 import numpy as np -except ImportError: +except ImportError as e: + log.error(f"Failed to import cv2 or numpy in processing_engine.py: {e}", exc_info=True) print("ERROR: Missing required image processing libraries. Please install opencv-python and numpy:") print("pip install opencv-python numpy") # Allow import to fail but log error; execution will likely fail later @@ -25,8 +26,11 @@ try: from configuration import Configuration, ConfigurationError from rule_structure import SourceRule, AssetRule, FileRule from utils.path_utils import generate_path_from_pattern, sanitize_filename - from utils import image_processing_utils as ipu # Added import -except ImportError: + from processing.utils import image_processing_utils as ipu # Corrected import +except ImportError as e: + # Temporarily print to console as log might not be initialized yet + print(f"ERROR during initial imports in processing_engine.py: {e}") + # log.error(f"Failed to import Configuration or rule_structure classes in processing_engine.py: {e}", exc_info=True) # Log will be used after init print("ERROR: Cannot import Configuration or rule_structure classes.") print("Ensure configuration.py and rule_structure.py are in the same directory or Python path.") # Allow import to fail but log error; execution will likely fail later @@ -36,6 +40,12 @@ except ImportError: FileRule = None +# Initialize logger early +log = logging.getLogger(__name__) +# Basic config if logger hasn't been set up elsewhere (e.g., during testing) +if not log.hasHandlers(): + logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') + # Use logger defined in main.py (or configure one here if run standalone) from processing.pipeline.orchestrator import PipelineOrchestrator @@ -51,11 +61,6 @@ from processing.pipeline.stages.individual_map_processing import IndividualMapPr from processing.pipeline.stages.map_merging import MapMergingStage from processing.pipeline.stages.metadata_finalization_save import MetadataFinalizationAndSaveStage from processing.pipeline.stages.output_organization import OutputOrganizationStage -log = logging.getLogger(__name__) -# Basic config if logger hasn't been set up elsewhere (e.g., during testing) -if not log.hasHandlers(): - logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') - # --- Custom Exception --- class ProcessingEngineError(Exception): diff --git a/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py b/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py index 0589585..198097b 100644 --- a/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py +++ b/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py @@ -6,7 +6,7 @@ import numpy as np from processing.pipeline.stages.alpha_extraction_to_mask import AlphaExtractionToMaskStage from processing.pipeline.asset_context import AssetProcessingContext -from rule_structure import AssetRule, SourceRule, FileRule, TransformSettings +from rule_structure import AssetRule, SourceRule, FileRule from configuration import Configuration, GeneralSettings import processing.utils.image_processing_utils as ipu # Ensure ipu is available for mocking diff --git a/tests/processing/pipeline/stages/test_individual_map_processing.py b/tests/processing/pipeline/stages/test_individual_map_processing.py index 2d78c00..8af2d19 100644 --- a/tests/processing/pipeline/stages/test_individual_map_processing.py +++ b/tests/processing/pipeline/stages/test_individual_map_processing.py @@ -7,7 +7,7 @@ from typing import Optional # Added for type hinting in helper functions from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage from processing.pipeline.asset_context import AssetProcessingContext -from rule_structure import AssetRule, SourceRule, FileRule, TransformSettings # Key models +from rule_structure import AssetRule, SourceRule, FileRule # Key models from configuration import Configuration, GeneralSettings # cv2 might be imported by the stage for interpolation constants, ensure it's mockable if so. # For now, assume ipu handles interpolation details. diff --git a/tests/processing/pipeline/stages/test_map_merging.py b/tests/processing/pipeline/stages/test_map_merging.py index d222f62..f7c0c56 100644 --- a/tests/processing/pipeline/stages/test_map_merging.py +++ b/tests/processing/pipeline/stages/test_map_merging.py @@ -7,7 +7,7 @@ from typing import Optional # Added Optional for type hinting from processing.pipeline.stages.map_merging import MapMergingStage from processing.pipeline.asset_context import AssetProcessingContext -from rule_structure import AssetRule, SourceRule, FileRule, MergeSettings, MergeInputChannel +from rule_structure import AssetRule, SourceRule, FileRule from configuration import Configuration # Mock Helper Functions -- 2.47.2 From beb8640085a34346503d13c763758e8e07a12942 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Fri, 9 May 2025 20:47:44 +0200 Subject: [PATCH 03/16] Futher changes to bring refactor up to feature parity + Updated Docs --- .vscode/settings.json | 2 +- Documentation/00_Overview.md | 4 +- .../02_Developer_Guide/01_Architecture.md | 51 +- .../02_Developer_Guide/03_Key_Components.md | 78 ++- .../05_Processing_Pipeline.md | 109 ++-- Presets/Dinesen.json | 2 +- Presets/Poliigon.json | 2 +- config/app_settings.json | 11 +- .../stages/individual_map_processing.py | 547 +++++++++++++++--- processing/pipeline/stages/map_merging.py | 25 +- .../stages/metadata_finalization_save.py | 22 +- .../pipeline/stages/output_organization.py | 241 ++++++-- processing/utils/image_processing_utils.py | 74 ++- 13 files changed, 898 insertions(+), 270 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index c21d2a5..33d89d2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,6 +8,6 @@ ".vscode": true, ".vs": true, ".lh": true, - "__pycache__": true, + "__pycache__": true } } \ No newline at end of file diff --git a/Documentation/00_Overview.md b/Documentation/00_Overview.md index 897442c..177ecf0 100644 --- a/Documentation/00_Overview.md +++ b/Documentation/00_Overview.md @@ -12,9 +12,9 @@ This documentation strictly excludes details on environment setup, dependency in ## Architecture and Codebase Summary -For developers interested in contributing, the tool's architecture centers on a **Core Processing Engine** (`processing_engine.py`) executing a pipeline based on a **Hierarchical Rule System** (`rule_structure.py`) and a **Configuration System** (`configuration.py` loading `config/app_settings.json` and `Presets/*.json`). The **Graphical User Interface** (`gui/`) has been significantly refactored: `MainWindow` (`main_window.py`) acts as a coordinator, delegating tasks to specialized widgets (`MainPanelWidget`, `PresetEditorWidget`, `LogConsoleWidget`) and background handlers (`RuleBasedPredictionHandler`, `LLMPredictionHandler`, `LLMInteractionHandler`, `AssetRestructureHandler`). The **Directory Monitor** (`monitor.py`) now processes archives asynchronously using a thread pool and utility functions (`utils/prediction_utils.py`, `utils/workspace_utils.py`). The **Command-Line Interface** entry point (`main.py`) primarily launches the GUI, with core CLI functionality currently non-operational. Optional **Blender Integration** (`blenderscripts/`) remains. A new `utils/` directory houses shared helper functions. +For developers interested in contributing, the tool's architecture centers on a **Core Processing Engine** (`processing_engine.py`) which initializes and runs a **Pipeline Orchestrator** (`processing/pipeline/orchestrator.py::PipelineOrchestrator`). This orchestrator executes a defined sequence of **Processing Stages** (located in `processing/pipeline/stages/`) based on a **Hierarchical Rule System** (`rule_structure.py`) and a **Configuration System** (`configuration.py` loading `config/app_settings.json` and `Presets/*.json`). The **Graphical User Interface** (`gui/`) has been significantly refactored: `MainWindow` (`main_window.py`) acts as a coordinator, delegating tasks to specialized widgets (`MainPanelWidget`, `PresetEditorWidget`, `LogConsoleWidget`) and background handlers (`RuleBasedPredictionHandler`, `LLMPredictionHandler`, `LLMInteractionHandler`, `AssetRestructureHandler`). The **Directory Monitor** (`monitor.py`) now processes archives asynchronously using a thread pool and utility functions (`utils/prediction_utils.py`, `utils/workspace_utils.py`). The **Command-Line Interface** entry point (`main.py`) primarily launches the GUI, with core CLI functionality currently non-operational. Optional **Blender Integration** (`blenderscripts/`) remains. A new `utils/` directory houses shared helper functions. -The codebase reflects this structure. The `gui/` directory contains the refactored UI components, `utils/` holds shared utilities, `Presets/` contains JSON presets, and `blenderscripts/` holds Blender scripts. Core logic resides in `processing_engine.py`, `configuration.py`, `rule_structure.py`, `monitor.py`, and `main.py`. The processing pipeline, executed by `processing_engine.py`, relies entirely on the input `SourceRule` and static configuration for steps like map processing, channel merging, and metadata generation. +The codebase reflects this structure. The `gui/` directory contains the refactored UI components, `utils/` holds shared utilities, `processing/pipeline/` contains the orchestrator and individual processing stages, `Presets/` contains JSON presets, and `blenderscripts/` holds Blender scripts. Core logic resides in `processing_engine.py`, `processing/pipeline/orchestrator.py`, `configuration.py`, `rule_structure.py`, `monitor.py`, and `main.py`. The processing pipeline, initiated by `processing_engine.py` and executed by the `PipelineOrchestrator`, relies entirely on the input `SourceRule` and static configuration. Each stage in the pipeline operates on an `AssetProcessingContext` object (`processing/pipeline/asset_context.py`) to perform specific tasks like map processing, channel merging, and metadata generation. ## Table of Contents diff --git a/Documentation/02_Developer_Guide/01_Architecture.md b/Documentation/02_Developer_Guide/01_Architecture.md index 4ff04e9..24d888d 100644 --- a/Documentation/02_Developer_Guide/01_Architecture.md +++ b/Documentation/02_Developer_Guide/01_Architecture.md @@ -6,17 +6,19 @@ This document provides a high-level overview of the Asset Processor Tool's archi The Asset Processor Tool is designed to process 3D asset source files into a standardized library format. Its high-level architecture consists of: -1. **Core Processing Engine (`processing_engine.py`):** The primary component responsible for executing the asset processing pipeline for a single input asset based on a provided `SourceRule` object and static configuration. The previous `asset_processor.py` has been removed. -2. **Prediction System:** Responsible for analyzing input files and generating the initial `SourceRule` hierarchy with predicted values. This system utilizes a base handler (`gui/base_prediction_handler.py::BasePredictionHandler`) with specific implementations: +1. **Core Processing Initiation (`processing_engine.py`):** The `ProcessingEngine` class acts as the entry point for an asset processing task. It initializes and runs a `PipelineOrchestrator`. +2. **Pipeline Orchestration (`processing/pipeline/orchestrator.py`):** The `PipelineOrchestrator` manages a sequence of discrete processing stages. It creates an `AssetProcessingContext` for each asset and passes this context through each stage. +3. **Processing Stages (`processing/pipeline/stages/`):** Individual modules, each responsible for a specific task in the pipeline (e.g., filtering files, processing maps, merging channels, organizing output). They operate on the `AssetProcessingContext`. +4. **Prediction System:** Responsible for analyzing input files and generating the initial `SourceRule` hierarchy with predicted values. This system utilizes a base handler (`gui/base_prediction_handler.py::BasePredictionHandler`) with specific implementations: * **Rule-Based Predictor (`gui/prediction_handler.py::RuleBasedPredictionHandler`):** Uses predefined rules from presets to classify files and determine initial processing parameters. * **LLM Predictor (`gui/llm_prediction_handler.py::LLMPredictionHandler`):** An experimental alternative that uses a Large Language Model (LLM) to interpret file contents and context to predict processing parameters. -3. **Configuration System (`Configuration`):** Handles loading core settings (including centralized type definitions and LLM-specific configuration) and merging them with supplier-specific rules defined in JSON presets and the persistent `config/suppliers.json` file. -4. **Multiple Interfaces:** Provides different ways to interact with the tool: +5. **Configuration System (`Configuration`):** Handles loading core settings (including centralized type definitions and LLM-specific configuration) and merging them with supplier-specific rules defined in JSON presets and the persistent `config/suppliers.json` file. +6. **Multiple Interfaces:** Provides different ways to interact with the tool: * Graphical User Interface (GUI) * Command-Line Interface (CLI) - *Note: The primary CLI execution logic (`run_cli` in `main.py`) is currently non-functional/commented out post-refactoring.* * Directory Monitor for automated processing. -The GUI acts as the primary source of truth for processing rules, coordinating the generation and management of the `SourceRule` hierarchy before sending it to the processing engine. It accumulates prediction results from multiple input sources before updating the view. The Monitor interface can also generate `SourceRule` objects (using `utils/prediction_utils.py`) to bypass the GUI for automated workflows. -5. **Optional Integration:** Includes scripts (`blenderscripts/`) for integrating with Blender. Logic for executing these scripts was intended to be centralized in `utils/blender_utils.py`, but this utility has not yet been implemented. +The GUI acts as the primary source of truth for processing rules, coordinating the generation and management of the `SourceRule` hierarchy before sending it to the `ProcessingEngine`. It accumulates prediction results from multiple input sources before updating the view. The Monitor interface can also generate `SourceRule` objects (using `utils/prediction_utils.py`) to bypass the GUI for automated workflows. +7. **Optional Integration:** Includes scripts (`blenderscripts/`) for integrating with Blender. Logic for executing these scripts was intended to be centralized in `utils/blender_utils.py`, but this utility has not yet been implemented. ## Hierarchical Rule System @@ -26,14 +28,14 @@ A key addition to the architecture is the **Hierarchical Rule System**, which pr * **AssetRule:** Represents rules applied to a specific asset within a source (a source can contain multiple assets). * **FileRule:** Represents rules applied to individual files within an asset. -This hierarchy allows for fine-grained control over processing parameters. The GUI's prediction logic generates this hierarchy with initial predicted values for overridable fields based on presets and file analysis. The processing engine then operates *solely* on the explicit values provided in this `SourceRule` object and static configuration, without internal prediction or fallback logic. +This hierarchy allows for fine-grained control over processing parameters. The GUI's prediction logic generates this hierarchy with initial predicted values for overridable fields based on presets and file analysis. The `ProcessingEngine` (via the `PipelineOrchestrator` and its stages) then operates *solely* on the explicit values provided in this `SourceRule` object and static configuration, without internal prediction or fallback logic. ## Core Components * `config/app_settings.json`: Defines core, global settings, constants, and centralized definitions for allowed asset and file types (`ASSET_TYPE_DEFINITIONS`, `FILE_TYPE_DEFINITIONS`), including metadata like colors and descriptions. This replaces the old `config.py` file. * `config/suppliers.json`: A persistent JSON file storing known supplier names for GUI auto-completion. * `Presets/*.json`: Supplier-specific JSON files defining rules for file interpretation and initial prediction. -* `configuration.py` (`Configuration` class): Loads `config/app_settings.json` settings and merges them with a selected preset, pre-compiling regex patterns for efficiency. This static configuration is used by the processing engine. +* `configuration.py` (`Configuration` class): Loads `config/app_settings.json` settings and merges them with a selected preset, pre-compiling regex patterns for efficiency. This static configuration is used by the processing pipeline. * `rule_structure.py`: Defines the `SourceRule`, `AssetRule`, and `FileRule` dataclasses used to represent the hierarchical processing rules. * `gui/`: Directory containing modules for the Graphical User Interface (GUI), built with PySide6. The `MainWindow` (`main_window.py`) acts as a coordinator, orchestrating interactions between various components. Key GUI components include: * `main_panel_widget.py::MainPanelWidget`: Contains the primary controls for loading sources, selecting presets, viewing/editing rules, and initiating processing. @@ -47,7 +49,10 @@ This hierarchy allows for fine-grained control over processing parameters. The G * `prediction_handler.py::RuleBasedPredictionHandler`: Generates the initial `SourceRule` hierarchy based on presets and file analysis. Inherits from `BasePredictionHandler`. * `llm_prediction_handler.py::LLMPredictionHandler`: Experimental predictor using an LLM. Inherits from `BasePredictionHandler`. * `llm_interaction_handler.py::LLMInteractionHandler`: Manages communication with the LLM service for the LLM predictor. -* `processing_engine.py` (`ProcessingEngine` class): The core component that executes the processing pipeline for a single `SourceRule` object using the static `Configuration`. A new instance is created per task for state isolation. +* `processing_engine.py` (`ProcessingEngine` class): The entry-point class that initializes and runs the `PipelineOrchestrator` for a given `SourceRule` and `Configuration`. +* `processing/pipeline/orchestrator.py` (`PipelineOrchestrator` class): Manages the sequence of processing stages, creating and passing an `AssetProcessingContext` through them. +* `processing/pipeline/asset_context.py` (`AssetProcessingContext` class): A dataclass holding all data and state for the processing of a single asset, passed between stages. +* `processing/pipeline/stages/`: Directory containing individual processing stage modules, each handling a specific part of the pipeline (e.g., `IndividualMapProcessingStage`, `MapMergingStage`). * `main.py`: The main entry point for the application. Primarily launches the GUI. Contains commented-out/non-functional CLI logic (`run_cli`). * `monitor.py`: Implements the directory monitoring feature using `watchdog`. It now processes archives asynchronously using a `ThreadPoolExecutor`, leveraging `utils.prediction_utils.py` for rule generation and `utils.workspace_utils.py` for workspace management before invoking the `ProcessingEngine`. * `blenderscripts/`: Contains Python scripts designed to be executed *within* Blender for post-processing tasks. @@ -56,19 +61,21 @@ This hierarchy allows for fine-grained control over processing parameters. The G * `prediction_utils.py`: Contains functions like `generate_source_rule_from_archive` used by the monitor for rule-based prediction. * `blender_utils.py`: (Intended location for Blender script execution logic, currently not implemented). -## Processing Pipeline (Simplified) +## Processing Pipeline (Simplified Overview) -The primary processing engine (`processing_engine.py`) executes a series of steps for each asset based on the provided `SourceRule` object and static configuration: +The asset processing pipeline, initiated by `processing_engine.py` and managed by `PipelineOrchestrator`, executes a series of stages for each asset defined in the `SourceRule`. An `AssetProcessingContext` object carries data between stages. The typical sequence is: -1. Extraction of input to a temporary workspace (using `utils.workspace_utils.py`). -2. Classification of files (map, model, extra, ignored, unrecognised) based *only* on the provided `SourceRule` object (classification/prediction happens *before* the engine is called). -3. Determination of base metadata (asset name, category, archetype). -4. Skip check if output exists and overwrite is not forced. -5. Processing of maps (resize, format/bit depth conversion, inversion, stats calculation). -6. Merging of channels based on rules. -7. Generation of `metadata.json` file. -8. Organization of processed files into the final output structure. -9. Cleanup of the temporary workspace. -10. (Optional) Execution of Blender scripts (currently triggered directly, intended to use `utils.blender_utils.py`). +1. **Supplier Determination**: Identify the effective supplier. +2. **Asset Skip Logic**: Check if the asset should be skipped. +3. **Metadata Initialization**: Set up initial asset metadata. +4. **File Rule Filtering**: Determine which files to process. +5. **Pre-Map Processing**: + * Gloss-to-Roughness Conversion. + * Alpha Channel Extraction. + * Normal Map Green Channel Inversion. +6. **Individual Map Processing**: Handle individual maps (scaling, variants, stats, naming). +7. **Map Merging**: Combine channels from different maps. +8. **Metadata Finalization & Save**: Generate and save `metadata.json` (temporarily). +9. **Output Organization**: Copy all processed files to final output locations. -This architecture allows for a modular design, separating configuration, rule generation/management (GUI, Monitor utilities), and core processing execution. The `SourceRule` object serves as a clear data contract between the rule generation layer and the processing engine. Parallel processing (in Monitor) and background threads (in GUI) are utilized for efficiency and responsiveness. \ No newline at end of file +External steps like workspace preparation/cleanup and optional Blender script execution bracket this core pipeline. This architecture allows for a modular design, separating configuration, rule generation/management, and core processing execution. \ No newline at end of file diff --git a/Documentation/02_Developer_Guide/03_Key_Components.md b/Documentation/02_Developer_Guide/03_Key_Components.md index 11fb683..95f1c17 100644 --- a/Documentation/02_Developer_Guide/03_Key_Components.md +++ b/Documentation/02_Developer_Guide/03_Key_Components.md @@ -2,17 +2,65 @@ This document describes the major classes and modules that form the core of the Asset Processor Tool. -## `ProcessingEngine` (`processing_engine.py`) +## Core Processing Architecture -The `ProcessingEngine` class is the new core component responsible for executing the asset processing pipeline for a *single* input asset. Unlike the older `AssetProcessor`, this engine operates *solely* based on a complete `SourceRule` object provided to its `process()` method and the static `Configuration` object passed during initialization. It contains no internal prediction, classification, or fallback logic. Its key responsibilities include: +The asset processing pipeline has been refactored into a staged architecture, managed by an orchestrator. -* Setting up and cleaning up a temporary workspace for processing (potentially using `utils.workspace_utils`). -* Extracting or copying input files to the workspace. -* Processing files based on the explicit rules and predicted values contained within the input `SourceRule`. -* Processing texture maps (resizing, format/bit depth conversion, inversion, stats calculation) using parameters from the `SourceRule` or static `Configuration`. -* Merging channels based on rules defined in the static `Configuration` and parameters from the `SourceRule`. -* Generating the `metadata.json` file containing details about the processed asset, incorporating information from the `SourceRule`. -* Organizing the final output files into the structured library directory. +### `ProcessingEngine` (`processing_engine.py`) + +The `ProcessingEngine` class serves as the primary entry point for initiating an asset processing task. Its main responsibilities are: + +* Initializing a `PipelineOrchestrator` instance. +* Providing the `PipelineOrchestrator` with the global `Configuration` object and a predefined list of processing stages. +* Invoking the orchestrator's `process_source_rule()` method with the input `SourceRule`, workspace path, output path, and other processing parameters. +* Managing a top-level temporary directory for the engine's operations if needed, though individual stages might also use sub-temporary directories via the `AssetProcessingContext`. + +It no longer contains the detailed logic for each processing step (like map manipulation, merging, etc.) directly. Instead, it delegates these tasks to the orchestrator and its stages. + +### `PipelineOrchestrator` (`processing/pipeline/orchestrator.py`) + +The `PipelineOrchestrator` class is responsible for managing the execution of the asset processing pipeline. Its key functions include: + +* Receiving a `SourceRule` object, `Configuration`, and a list of `ProcessingStage` objects. +* For each `AssetRule` within the `SourceRule`: + * Creating an `AssetProcessingContext` instance. + * Sequentially executing each registered `ProcessingStage`, passing the `AssetProcessingContext` to each stage. + * Handling exceptions that occur within stages and managing the overall status of asset processing (processed, skipped, failed). +* Managing a temporary directory for the duration of a `SourceRule` processing, which is made available to stages via the `AssetProcessingContext`. + +### `AssetProcessingContext` (`processing/pipeline/asset_context.py`) + +The `AssetProcessingContext` is a dataclass that acts as a stateful container for all data related to the processing of a single `AssetRule`. An instance of this context is created by the `PipelineOrchestrator` for each asset and is passed through each processing stage. Key information it holds includes: + +* The input `SourceRule` and the current `AssetRule`. +* Paths: `workspace_path`, `engine_temp_dir`, `output_base_path`. +* The `Configuration` object. +* `effective_supplier`: Determined by an early stage. +* `asset_metadata`: A dictionary to accumulate metadata about the asset. +* `processed_maps_details`: Stores details about individually processed maps (paths, dimensions, etc.). +* `merged_maps_details`: Stores details about merged maps. +* `files_to_process`: A list of `FileRule` objects to be processed for the current asset. +* `loaded_data_cache`: For caching loaded image data within an asset's processing. +* `status_flags`: For signaling conditions like `skip_asset` or `asset_failed`. +* `incrementing_value`, `sha5_value`: Optional values for path generation. + +Each stage reads from and writes to this context, allowing data and state to flow through the pipeline. + +### `Processing Stages` (`processing/pipeline/stages/`) + +The actual processing logic is broken down into a series of discrete stages, each inheriting from `ProcessingStage` (`processing/pipeline/stages/base_stage.py`). Each stage implements an `execute(context: AssetProcessingContext)` method. Key stages include (in typical execution order): + +* **`SupplierDeterminationStage`**: Determines the effective supplier. +* **`AssetSkipLogicStage`**: Checks if the asset processing should be skipped. +* **`MetadataInitializationStage`**: Initializes basic asset metadata. +* **`FileRuleFilterStage`**: Filters `FileRule`s to decide which files to process. +* **`GlossToRoughConversionStage`**: Handles gloss-to-roughness map inversion. +* **`AlphaExtractionToMaskStage`**: Extracts alpha channels to create masks. +* **`NormalMapGreenChannelStage`**: Inverts normal map green channels if required. +* **`IndividualMapProcessingStage`**: Processes individual maps (POT scaling, resolution variants, color conversion, stats, aspect ratio, filename conventions). +* **`MapMergingStage`**: Merges map channels based on rules. +* **`MetadataFinalizationAndSaveStage`**: Collects all metadata and saves `metadata.json` to a temporary location. +* **`OutputOrganizationStage`**: Copies all processed files and metadata to the final output directory structure. ## `Rule Structure` (`rule_structure.py`) @@ -22,19 +70,19 @@ This module defines the data structures used to represent the hierarchical proce * `AssetRule`: A dataclass representing rules applied at the asset level. It contains nested `FileRule` objects. * `FileRule`: A dataclass representing rules applied at the file level. -These classes hold specific rule parameters (e.g., `supplier_identifier`, `asset_type`, `asset_type_override`, `item_type`, `item_type_override`, `target_asset_name_override`). Attributes like `asset_type` and `item_type_override` now use string types, which are validated against centralized lists in `config/app_settings.json`. These structures support serialization (Pickle, JSON) to allow them to be passed between different parts of the application, including across process boundaries. +These classes hold specific rule parameters (e.g., `supplier_identifier`, `asset_type`, `asset_type_override`, `item_type`, `item_type_override`, `target_asset_name_override`, `resolution_override`, `channel_merge_instructions`). Attributes like `asset_type` and `item_type_override` now use string types, which are validated against centralized lists in `config/app_settings.json`. These structures support serialization (Pickle, JSON) to allow them to be passed between different parts of theapplication, including across process boundaries. The `PipelineOrchestrator` and its stages heavily rely on the information within these rule objects, passed via the `AssetProcessingContext`. ## `Configuration` (`configuration.py`) The `Configuration` class manages the tool's settings. It is responsible for: -* Loading the core default settings defined in `config/app_settings.json`. +* Loading the core default settings defined in `config/app_settings.json` (e.g., `FILE_TYPE_DEFINITIONS`, `ASSET_TYPE_DEFINITIONS`, `image_resolutions`, `map_merge_rules`, `output_filename_pattern`). * Loading the supplier-specific rules from a selected preset JSON file (`Presets/*.json`). * Merging the core settings and preset rules into a single, unified configuration object. * Validating the loaded configuration to ensure required settings are present. -* Pre-compiling regular expression patterns defined in the preset for efficient file classification by the `PredictionHandler`. +* Pre-compiling regular expression patterns defined in the preset for efficient file classification by the prediction handlers. -An instance of the `Configuration` class is typically created once per application run (or per processing batch) and passed to the `ProcessingEngine`. +An instance of the `Configuration` class is typically created once per application run (or per processing batch) and passed to the `ProcessingEngine`, which then makes it available to the `PipelineOrchestrator` and subsequently to each stage via the `AssetProcessingContext`. ## GUI Components (`gui/`) @@ -191,10 +239,10 @@ The `monitor.py` script implements the directory monitoring feature. It has been * Loads the necessary `Configuration`. * Calls `utils.prediction_utils.generate_source_rule_from_archive` to get the `SourceRule`. * Calls `utils.workspace_utils.prepare_processing_workspace` to set up the workspace. - * Instantiates and runs the `ProcessingEngine`. + * Instantiates and runs the `ProcessingEngine` (which in turn uses the `PipelineOrchestrator`). * Handles moving the source archive to 'processed' or 'error' directories. * Cleans up the workspace. ## Summary -These key components, along with the refactored GUI structure and new utility modules, work together to provide the tool's functionality. The architecture emphasizes separation of concerns (configuration, rule generation, processing, UI), utilizes background processing for responsiveness (GUI prediction, Monitor tasks), and relies on the `SourceRule` object as the central data structure passed between different stages of the workflow. \ No newline at end of file +These key components, along with the refactored GUI structure and new utility modules, work together to provide the tool's functionality. The architecture emphasizes separation of concerns (configuration, rule generation, processing, UI), utilizes background processing for responsiveness (GUI prediction, Monitor tasks), and relies on the `SourceRule` object as the central data structure passed between different stages of the workflow. The processing core is now a staged pipeline managed by the `PipelineOrchestrator`, enhancing modularity and maintainability. \ No newline at end of file diff --git a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md index 14d801c..ccff23a 100644 --- a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md +++ b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md @@ -1,72 +1,69 @@ # Developer Guide: Processing Pipeline -This document details the step-by-step technical process executed by the `ProcessingEngine` class (`processing_engine.py`) when processing a single asset. A new instance of `ProcessingEngine` is created for each processing task to ensure state isolation. +This document details the step-by-step technical process executed by the asset processing pipeline, which is initiated by the `ProcessingEngine` class (`processing_engine.py`) and orchestrated by the `PipelineOrchestrator` (`processing/pipeline/orchestrator.py`). -The `ProcessingEngine.process()` method orchestrates the following pipeline based *solely* on the provided `SourceRule` object and the static `Configuration` object passed during engine initialization. It contains no internal prediction, classification, or fallback logic. All necessary overrides and static configuration values are accessed directly from these inputs. +The `ProcessingEngine.process()` method serves as the main entry point. It initializes a `PipelineOrchestrator` instance, providing it with the application's `Configuration` object and a predefined list of processing stages. The `PipelineOrchestrator.process_source_rule()` method then manages the execution of these stages for each asset defined in the input `SourceRule`. -The pipeline steps are: +A crucial component in this architecture is the `AssetProcessingContext` (`processing/pipeline/asset_context.py`). An instance of this dataclass is created for each `AssetRule` being processed. It acts as a stateful container, carrying all relevant data (source files, rules, configuration, intermediate results, metadata) and is passed sequentially through each stage. Each stage can read from and write to the context, allowing data to flow and be modified throughout the pipeline. -1. **Workspace Preparation (External)**: - * Before the `ProcessingEngine` is invoked, the calling code (e.g., `main.ProcessingTask`, `monitor._process_archive_task`) is responsible for setting up a temporary workspace. - * This typically involves using `utils.workspace_utils.prepare_processing_workspace`, which creates a temporary directory and extracts the input source (archive or folder) into it. - * The path to this prepared workspace is passed to the `ProcessingEngine` during initialization. +The pipeline stages are executed in the following order: -2. **Prediction and Rule Generation (External)**: - * Also handled before the `ProcessingEngine` is invoked. - * Either the `RuleBasedPredictionHandler`, `LLMPredictionHandler` (triggered by the GUI), or `utils.prediction_utils.generate_source_rule_from_archive` (used by the Monitor) analyzes the input files and generates a `SourceRule` object. - * This `SourceRule` contains predicted classifications and initial overrides. - * If using the GUI, the user can modify these rules. - * The final `SourceRule` object is the primary input to the `ProcessingEngine.process()` method. +1. **`SupplierDeterminationStage` (`processing/pipeline/stages/supplier_determination.py`)**: + * **Responsibility**: Determines the effective supplier for the asset based on the `SourceRule`'s `supplier_identifier`, `supplier_override`, and supplier definitions in the `Configuration`. + * **Context Interaction**: Updates `AssetProcessingContext.effective_supplier` and potentially `AssetProcessingContext.asset_metadata` with supplier information. -3. **File Inventory (`_inventory_and_classify_files`)**: - * Scans the contents of the *already prepared* temporary workspace. - * This step primarily inventories the files present. The *classification* (determining `item_type`, etc.) is taken directly from the input `SourceRule`. The `item_type` for each file (within the `FileRule` objects of the `SourceRule`) is expected to be a key from `Configuration.FILE_TYPE_DEFINITIONS`. - * Stores the file paths and their associated rules from the `SourceRule` in `self.classified_files`. +2. **`AssetSkipLogicStage` (`processing/pipeline/stages/asset_skip_logic.py`)**: + * **Responsibility**: Checks if the asset should be skipped, typically if the output already exists and overwriting is not forced. + * **Context Interaction**: Sets `AssetProcessingContext.status_flags['skip_asset']` to `True` if the asset should be skipped, halting further processing for this asset by the orchestrator. -4. **Base Metadata Determination (`_determine_base_metadata`, `_determine_single_asset_metadata`)**: - * Determines the base asset name, category, and archetype using the explicit values provided in the input `SourceRule` and the static `Configuration`. Overrides (like `supplier_identifier`, `asset_type`, `asset_name_override`) are taken directly from the `SourceRule`. The `asset_type` (within the `AssetRule` object of the `SourceRule`) is expected to be a key from `Configuration.ASSET_TYPE_DEFINITIONS`. +3. **`MetadataInitializationStage` (`processing/pipeline/stages/metadata_initialization.py`)**: + * **Responsibility**: Initializes the `AssetProcessingContext.asset_metadata` dictionary with base information derived from the `AssetRule`, `SourceRule`, and `Configuration`. This includes asset name, type, and any common metadata. + * **Context Interaction**: Populates `AssetProcessingContext.asset_metadata`. -5. **Skip Check**: - * If the `overwrite` flag is `False`, checks if the final output directory already exists and contains `metadata.json`. - * If so, processing for this asset is skipped. +4. **`FileRuleFilterStage` (`processing/pipeline/stages/file_rule_filter.py`)**: + * **Responsibility**: Filters the `FileRule` objects from the `AssetRule` to determine which files should actually be processed. It respects `FILE_IGNORE` rules. + * **Context Interaction**: Populates `AssetProcessingContext.files_to_process` with the list of `FileRule` objects that passed the filter. -6. **Map Processing (`_process_maps`)**: - * Iterates through files classified as maps in the `SourceRule`. - * Loads images (`cv2.imread`). - * **Glossiness-to-Roughness Inversion**: - * The system identifies a map as a gloss map if its input filename contains "MAP_GLOSS" (case-insensitive) and is intended to become a roughness map (e.g., its `item_type` or `item_type_override` in the `SourceRule` effectively designates it as roughness). - * If these conditions are met, its colors are inverted. - * After inversion, the map is treated as a "MAP_ROUGH" type for subsequent processing steps. - * The fact that a map was derived from a gloss source and inverted is recorded in the output `metadata.json` for that map type using the `derived_from_gloss_filename: true` flag. This replaces the previous reliance on an internal `is_gloss_source` flag within the `FileRule` structure. - * Resizes images based on `Configuration`. - * Determines output bit depth and format based on `Configuration` and `SourceRule`. - * Converts data types and saves images (`cv2.imwrite`). -* The output filename uses the `standard_type` alias (e.g., `COL`, `NRM`) retrieved from the `Configuration.FILE_TYPE_DEFINITIONS` based on the file's effective `item_type`. - * Calculates image statistics. - * Stores processed map details. +5. **`GlossToRoughConversionStage` (`processing/pipeline/stages/gloss_to_rough_conversion.py`)**: + * **Responsibility**: Identifies gloss maps (based on `FileRule` properties and filename conventions) that are intended to be used as roughness maps. If found, it loads the image, inverts its colors, and saves a temporary inverted version. + * **Context Interaction**: Modifies `FileRule` objects in `AssetProcessingContext.files_to_process` (e.g., updates `file_path` to point to the temporary inverted map, sets flags indicating inversion). Updates `AssetProcessingContext.processed_maps_details` with information about the conversion. -7. **Map Merging (`_merge_maps_from_source`)**: - * Iterates through `MAP_MERGE_RULES` in `Configuration`. - * Identifies required source maps by checking the `item_type_override` within the `SourceRule` (specifically in the `FileRule` for each file). Both `item_type` and `item_type_override` are expected to be keys from `Configuration.FILE_TYPE_DEFINITIONS`. Files with a base `item_type` of `"FILE_IGNORE"` are explicitly excluded from consideration. - * Loads source channels, handling missing inputs with defaults from `Configuration` or `SourceRule`. - * Merges channels (`cv2.merge`). - * Determines output format/bit depth and saves the merged map. - * Stores merged map details. +6. **`AlphaExtractionToMaskStage` (`processing/pipeline/stages/alpha_extraction_to_mask.py`)**: + * **Responsibility**: If a `FileRule` specifies alpha channel extraction (e.g., from a diffuse map to create an opacity mask), this stage loads the source image, extracts its alpha channel, and saves it as a new temporary grayscale map. + * **Context Interaction**: May add new `FileRule`-like entries or details to `AssetProcessingContext.processed_maps_details` representing the extracted mask. -8. **Metadata File Generation (`_generate_metadata_file`)**: - * Collects asset metadata, processed/merged map details, ignored files list, etc., primarily from the `SourceRule` and internal processing results. - * Writes data to `metadata.json` in the temporary workspace. +7. **`NormalMapGreenChannelStage` (`processing/pipeline/stages/normal_map_green_channel.py`)**: + * **Responsibility**: Checks `FileRule`s for normal maps and, based on configuration (e.g., `invert_normal_map_green_channel` for a specific supplier), potentially inverts the green channel of the normal map image. + * **Context Interaction**: Modifies the image data for normal maps if inversion is needed, saving a new temporary version. Updates `AssetProcessingContext.processed_maps_details`. -9. **Output Organization (`_organize_output_files`)**: -* Determines the final output directory using the global `OUTPUT_DIRECTORY_PATTERN` and the final filename using the global `OUTPUT_FILENAME_PATTERN` (both from the `Configuration` object). The `utils.path_utils` module combines these with the base output directory and asset-specific data (like asset name, map type, resolution, etc.) to construct the full path for each file. - * Creates the final structured output directory (`///`), using the supplier name from the `SourceRule`. - * Moves processed maps, merged maps, models, metadata, and other classified files from the temporary workspace to the final output directory. +8. **`IndividualMapProcessingStage` (`processing/pipeline/stages/individual_map_processing.py`)**: + * **Responsibility**: Processes individual texture map files. This includes: + * Loading the source image. + * Applying Power-of-Two (POT) scaling. + * Generating multiple resolution variants based on configuration. + * Handling color space conversions (e.g., BGR to RGB). + * Calculating image statistics (min, max, mean, median). + * Determining and storing aspect ratio change information. + * Saving processed temporary map files. + * Applying name variant suffixing and using standard type aliases for filenames. + * **Context Interaction**: Heavily populates `AssetProcessingContext.processed_maps_details` with paths to temporary processed files, dimensions, and other metadata for each map and its variants. Updates `AssetProcessingContext.asset_metadata` with image stats and aspect ratio info. -10. **Workspace Cleanup (External)**: - * After the `ProcessingEngine.process()` method completes (successfully or with errors), the *calling code* is responsible for cleaning up the temporary workspace directory created in Step 1. This is often done in a `finally` block where `utils.workspace_utils.prepare_processing_workspace` was called. +9. **`MapMergingStage` (`processing/pipeline/stages/map_merging.py`)**: + * **Responsibility**: Performs channel packing and other merge operations based on `map_merge_rules` defined in the `Configuration`. + * **Context Interaction**: Reads source map details and temporary file paths from `AssetProcessingContext.processed_maps_details`. Saves new temporary merged maps and records their details in `AssetProcessingContext.merged_maps_details`. -11. **(Optional) Blender Script Execution (External)**: - * If triggered (e.g., via CLI arguments or GUI controls), the orchestrating code (e.g., `main.ProcessingTask`) executes the corresponding Blender scripts (`blenderscripts/*.py`) using `subprocess.run` *after* the `ProcessingEngine.process()` call completes successfully. - * *Note: Centralized logic for this was intended for `utils/blender_utils.py`, but this utility has not yet been implemented.* See `Developer Guide: Blender Integration Internals` for more details. +10. **`MetadataFinalizationAndSaveStage` (`processing/pipeline/stages/metadata_finalization_save.py`)**: + * **Responsibility**: Collects all accumulated metadata from `AssetProcessingContext.asset_metadata`, `AssetProcessingContext.processed_maps_details`, and `AssetProcessingContext.merged_maps_details`. It structures this information and saves it as the `metadata.json` file in a temporary location within the engine's temporary directory. + * **Context Interaction**: Reads from various context fields and writes the `metadata.json` file. Stores the path to this temporary metadata file in the context (e.g., `AssetProcessingContext.asset_metadata['temp_metadata_path']`). -This pipeline, executed by the `ProcessingEngine`, provides a clear and explicit processing flow based on the complete rule set provided by the GUI or other interfaces. \ No newline at end of file +11. **`OutputOrganizationStage` (`processing/pipeline/stages/output_organization.py`)**: + * **Responsibility**: Determines final output paths for all processed maps, merged maps, the metadata file, and any other asset files (like models). It then copies these files from their temporary locations to the final structured output directory. + * **Context Interaction**: Reads temporary file paths from `AssetProcessingContext.processed_maps_details`, `AssetProcessingContext.merged_maps_details`, and the temporary metadata file path. Uses `Configuration` for output path patterns. Updates `AssetProcessingContext.asset_metadata` with final file paths and status. + +**External Steps (Not part of `PipelineOrchestrator`'s direct loop but integral to the overall process):** + +* **Workspace Preparation and Cleanup**: Handled by the code that invokes `ProcessingEngine.process()` (e.g., `main.ProcessingTask`, `monitor._process_archive_task`), typically using `utils.workspace_utils`. The engine itself creates a sub-temporary directory (`engine_temp_dir`) within the workspace provided to it by the orchestrator, which it cleans up. +* **Prediction and Rule Generation**: Also external, performed before `ProcessingEngine` is called. Generates the `SourceRule`. +* **Optional Blender Script Execution**: Triggered externally after successful processing. + +This staged pipeline provides a modular and extensible architecture for asset processing, with clear separation of concerns for each step. The `AssetProcessingContext` ensures that data flows consistently between these stages.r \ No newline at end of file diff --git a/Presets/Dinesen.json b/Presets/Dinesen.json index 654eec1..f91cf36 100644 --- a/Presets/Dinesen.json +++ b/Presets/Dinesen.json @@ -56,7 +56,7 @@ ] }, { - "target_type": "MAP_ROUGH", + "target_type": "MAP_GLOSS", "keywords": [ "GLOSS" ] diff --git a/Presets/Poliigon.json b/Presets/Poliigon.json index 58560fd..efa3076 100644 --- a/Presets/Poliigon.json +++ b/Presets/Poliigon.json @@ -54,7 +54,7 @@ ] }, { - "target_type": "MAP_ROUGH", + "target_type": "MAP_GLOSS", "keywords": [ "GLOSS" ], diff --git a/config/app_settings.json b/config/app_settings.json index ac3a894..cec7393 100644 --- a/config/app_settings.json +++ b/config/app_settings.json @@ -246,7 +246,7 @@ ], "EXTRA_FILES_SUBDIR": "Extra", "OUTPUT_BASE_DIR": "../Asset_Processor_Output_Tests", - "OUTPUT_DIRECTORY_PATTERN": "[supplier]/[sha5]_[assetname]", + "OUTPUT_DIRECTORY_PATTERN": "[supplier]_[assetname]", "OUTPUT_FILENAME_PATTERN": "[assetname]_[maptype]_[resolution].[ext]", "METADATA_FILENAME": "metadata.json", "DEFAULT_NODEGROUP_BLEND_PATH": "G:/02 Content/10-19 Content/19 Catalogs/19.01 Blender Asset Catalogue/_CustomLibraries/Nodes-Linked/PBRSET-Nodes-Testing.blend", @@ -259,7 +259,8 @@ "8K": 8192, "4K": 4096, "2K": 2048, - "1K": 1024 + "1K": 1024, + "PREVIEW": 128 }, "ASPECT_RATIO_DECIMALS": 2, "OUTPUT_FORMAT_16BIT_PRIMARY": "exr", @@ -269,9 +270,9 @@ { "output_map_type": "NRMRGH", "inputs": { - "R": "NRM", - "G": "NRM", - "B": "ROUGH" + "R": "MAP_NRM", + "G": "MAP_NRM", + "B": "MAP_ROUGH" }, "defaults": { "R": 0.5, diff --git a/processing/pipeline/stages/individual_map_processing.py b/processing/pipeline/stages/individual_map_processing.py index 96a7ffa..71614e6 100644 --- a/processing/pipeline/stages/individual_map_processing.py +++ b/processing/pipeline/stages/individual_map_processing.py @@ -1,5 +1,6 @@ import uuid import dataclasses +import re import os import logging from pathlib import Path @@ -63,17 +64,136 @@ class IndividualMapProcessingStage(ProcessingStage): self._update_file_rule_status(context, temp_id_for_fail, 'Failed', map_type=map_type_for_fail, details="Workspace path invalid") return context + # Fetch config settings once before the loop + respect_variant_map_types = getattr(context.config_obj, "respect_variant_map_types", []) + image_resolutions = getattr(context.config_obj, "image_resolutions", {}) + output_filename_pattern = getattr(context.config_obj, "output_filename_pattern", "[assetname]_[maptype]_[resolution].[ext]") + for file_rule_idx, file_rule in enumerate(context.files_to_process): # Generate a unique ID for this file_rule processing instance for processed_maps_details current_map_id_hex = f"map_{file_rule_idx}_{uuid.uuid4().hex[:8]}" - current_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" + initial_current_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" + + # --- START NEW SUFFIXING LOGIC --- + final_current_map_type = initial_current_map_type # Default to initial + + # 1. Determine Base Map Type from initial_current_map_type + base_map_type_match = re.match(r"(MAP_[A-Z]{3})", initial_current_map_type) + + if base_map_type_match and context.asset_rule: + true_base_map_type = base_map_type_match.group(1) # This is "MAP_XXX" + + # 2. Count Occurrences and Find Index of current_file_rule in context.asset_rule.files + peers_of_same_base_type_in_asset_rule = [] + for fr_asset in context.asset_rule.files: + fr_asset_item_type = fr_asset.item_type_override or fr_asset.item_type or "UnknownMapType" + fr_asset_base_map_type_match = re.match(r"(MAP_[A-Z]{3})", fr_asset_item_type) + + if fr_asset_base_map_type_match: + fr_asset_base_map_type = fr_asset_base_map_type_match.group(1) + if fr_asset_base_map_type == true_base_map_type: + peers_of_same_base_type_in_asset_rule.append(fr_asset) + + num_occurrences_of_base_type = len(peers_of_same_base_type_in_asset_rule) + current_instance_index = 0 # 1-based + + try: + current_instance_index = peers_of_same_base_type_in_asset_rule.index(file_rule) + 1 + except ValueError: + logger.warning( + f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Initial Type: '{initial_current_map_type}', Base: '{true_base_map_type}'): " + f"Could not find its own instance in the list of peers from asset_rule.files. " + f"Number of peers found: {num_occurrences_of_base_type}. Suffixing may be affected." + ) + + # 3. Determine Suffix + map_type_for_respect_check = true_base_map_type.replace("MAP_", "") # e.g., "COL" + is_in_respect_list = map_type_for_respect_check in respect_variant_map_types + + suffix_to_append = "" + if num_occurrences_of_base_type > 1: + if current_instance_index > 0: + suffix_to_append = f"-{current_instance_index}" + else: + logger.warning(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}': Index for multi-occurrence map type '{true_base_map_type}' (count: {num_occurrences_of_base_type}) not determined. Omitting numeric suffix.") + elif num_occurrences_of_base_type == 1 and is_in_respect_list: + suffix_to_append = "-1" + + # 4. Form the final_current_map_type + if suffix_to_append: + final_current_map_type = true_base_map_type + suffix_to_append + else: + final_current_map_type = initial_current_map_type + + current_map_type = final_current_map_type + # --- END NEW SUFFIXING LOGIC --- + + # --- START: Filename-friendly map type derivation --- + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: --- Starting Filename-Friendly Map Type Logic for: {current_map_type} ---") + filename_friendly_map_type = current_map_type # Fallback + + # 1. Access FILE_TYPE_DEFINITIONS + file_type_definitions = None + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Attempting to access context.config_obj.FILE_TYPE_DEFINITIONS.") + try: + file_type_definitions = context.config_obj.FILE_TYPE_DEFINITIONS + if not file_type_definitions: # Check if it's None or empty + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: FILE_TYPE_DEFINITIONS is present but empty or None.") + else: + sample_defs_log = {k: file_type_definitions[k] for k in list(file_type_definitions.keys())[:2]} # Log first 2 for brevity + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Accessed FILE_TYPE_DEFINITIONS. Sample: {sample_defs_log}, Total keys: {len(file_type_definitions)}.") + except AttributeError: + logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Could not access context.config_obj.FILE_TYPE_DEFINITIONS via direct attribute.") + + base_map_key = None + suffix_part = "" + + if file_type_definitions and isinstance(file_type_definitions, dict) and len(file_type_definitions) > 0: + base_map_key = None + suffix_part = "" + + sorted_known_base_keys = sorted(list(file_type_definitions.keys()), key=len, reverse=True) + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Sorted known base keys for parsing: {sorted_known_base_keys}") + + for known_key in sorted_known_base_keys: + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Checking if '{current_map_type}' starts with '{known_key}'") + if current_map_type.startswith(known_key): + base_map_key = known_key + suffix_part = current_map_type[len(known_key):] + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Match found! current_map_type: '{current_map_type}', base_map_key: '{base_map_key}', suffix_part: '{suffix_part}'") + break + + if base_map_key is None: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Could not parse base_map_key from '{current_map_type}' using known keys. Fallback: filename_friendly_map_type = '{filename_friendly_map_type}'.") + else: + definition = file_type_definitions.get(base_map_key) + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Definition for '{base_map_key}': {definition}") + if definition and isinstance(definition, dict): + standard_type_alias = definition.get("standard_type") + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Standard type alias for '{base_map_key}': '{standard_type_alias}'") + if standard_type_alias and isinstance(standard_type_alias, str) and standard_type_alias.strip(): + filename_friendly_map_type = standard_type_alias.strip() + suffix_part + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Successfully transformed map type: '{current_map_type}' -> '{filename_friendly_map_type}' (standard_type_alias: '{standard_type_alias}', suffix_part: '{suffix_part}').") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Standard type alias for '{base_map_key}' is missing, empty, or not a string (value: '{standard_type_alias}'). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: No definition or invalid definition for '{base_map_key}' (value: {definition}). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + elif file_type_definitions is None: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: FILE_TYPE_DEFINITIONS not available for lookup (was None). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + elif not isinstance(file_type_definitions, dict): + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: FILE_TYPE_DEFINITIONS is not a dictionary (type: {type(file_type_definitions)}). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: FILE_TYPE_DEFINITIONS is an empty dictionary. Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Final filename_friendly_map_type: '{filename_friendly_map_type}'") + # --- END: Filename-friendly map type derivation --- if not current_map_type or not current_map_type.startswith("MAP_") or current_map_type == "MAP_GEN_COMPOSITE": - logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}': Skipping, item_type '{current_map_type}' not targeted for individual processing.") + logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}': Skipping, item_type '{current_map_type}' (initial: '{initial_current_map_type}') not targeted for individual processing.") continue - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Type: {current_map_type}, ID: {current_map_id_hex}): Starting individual processing.") + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Type: {current_map_type}, Initial Type: {initial_current_map_type}, ID: {current_map_id_hex}): Starting individual processing.") # A. Find Source File (using file_rule.file_path as the pattern relative to source_base_path) # The _find_source_file might need adjustment if file_rule.file_path is absolute or needs complex globbing. @@ -81,117 +201,343 @@ class IndividualMapProcessingStage(ProcessingStage): source_file_path = self._find_source_file(source_base_path, file_rule.file_path, asset_name_for_log, current_map_id_hex) if not source_file_path: logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Source file not found with path/pattern '{file_rule.file_path}' in '{source_base_path}'.") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, details="Source file not found") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, details="Source file not found") continue # B. Load and Transform Image image_data: Optional[np.ndarray] = ipu.load_image(str(source_file_path)) if image_data is None: logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to load image from '{source_file_path}'.") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, source_file=str(source_file_path), details="Image load failed") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, source_file=str(source_file_path), details="Image load failed") continue original_height, original_width = image_data.shape[:2] logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Loaded image '{source_file_path}' with dimensions {original_width}x{original_height}.") - # Initialize transform settings with defaults - transform_settings = { - "target_width": 2048, - "target_height": None, - "resize_mode": "fit", - "ensure_pot": False, - "allow_upscale": False, - "resize_filter": "AREA", - "color_profile_management": False, - "target_color_profile": "sRGB", - "output_format_settings": None - } + # 1. Initial Power-of-Two (POT) Downscaling + pot_width = ipu.get_nearest_power_of_two_downscale(original_width) + pot_height = ipu.get_nearest_power_of_two_downscale(original_height) - # Attempt to load transform settings from file_rule.channel_merge_instructions + # Maintain aspect ratio for initial POT scaling, using the smaller of the scaled dimensions + # This ensures we only downscale. + if original_width > 0 and original_height > 0 : # Avoid division by zero + aspect_ratio = original_width / original_height + + # Calculate new dimensions based on POT width, then POT height, and pick the one that results in downscale or same size + pot_h_from_w = int(pot_width / aspect_ratio) + pot_w_from_h = int(pot_height * aspect_ratio) + + # Option 1: Scale by width, adjust height + candidate1_w, candidate1_h = pot_width, ipu.get_nearest_power_of_two_downscale(pot_h_from_w) + # Option 2: Scale by height, adjust width + candidate2_w, candidate2_h = ipu.get_nearest_power_of_two_downscale(pot_w_from_h), pot_height + + # Ensure candidates are not upscaling + if candidate1_w > original_width or candidate1_h > original_height: + candidate1_w, candidate1_h = original_width, original_height # Fallback to original if upscaling + if candidate2_w > original_width or candidate2_h > original_height: + candidate2_w, candidate2_h = original_width, original_height # Fallback to original if upscaling + + # Choose the candidate that results in a larger area (preferring less downscaling if multiple POT options) + # but still respects the POT downscale logic for each dimension individually. + # The actual POT dimensions are already calculated by get_nearest_power_of_two_downscale. + # We need to decide if we base the aspect ratio calc on pot_width or pot_height. + # The goal is to make one dimension POT and the other POT while maintaining aspect as much as possible, only downscaling. + + final_pot_width = ipu.get_nearest_power_of_two_downscale(original_width) + final_pot_height = ipu.get_nearest_power_of_two_downscale(original_height) + + # If original aspect is not 1:1, one of the POT dimensions might need further adjustment to maintain aspect + # after the other dimension is set to its POT. + # We prioritize fitting within the *downscaled* POT dimensions. + + # Scale to fit within final_pot_width, adjust height, then make height POT (downscale) + scaled_h_for_pot_w = max(1, round(final_pot_width / aspect_ratio)) + h1 = ipu.get_nearest_power_of_two_downscale(scaled_h_for_pot_w) + w1 = final_pot_width + if h1 > final_pot_height: # If this adjustment made height too big, re-evaluate + h1 = final_pot_height + w1 = ipu.get_nearest_power_of_two_downscale(max(1, round(h1 * aspect_ratio))) + + + # Scale to fit within final_pot_height, adjust width, then make width POT (downscale) + scaled_w_for_pot_h = max(1, round(final_pot_height * aspect_ratio)) + w2 = ipu.get_nearest_power_of_two_downscale(scaled_w_for_pot_h) + h2 = final_pot_height + if w2 > final_pot_width: # If this adjustment made width too big, re-evaluate + w2 = final_pot_width + h2 = ipu.get_nearest_power_of_two_downscale(max(1, round(w2 / aspect_ratio))) + + # Choose the option that results in larger area (less aggressive downscaling) + # while ensuring both dimensions are POT and not upscaled from original. + if w1 * h1 >= w2 * h2: + base_pot_width, base_pot_height = w1, h1 + else: + base_pot_width, base_pot_height = w2, h2 + + # Final check to ensure no upscaling from original dimensions + base_pot_width = min(base_pot_width, original_width) + base_pot_height = min(base_pot_height, original_height) + # And ensure they are POT + base_pot_width = ipu.get_nearest_power_of_two_downscale(base_pot_width) + base_pot_height = ipu.get_nearest_power_of_two_downscale(base_pot_height) + + else: # Handle cases like 0-dim images, though load_image should prevent this + base_pot_width, base_pot_height = 1, 1 + + + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Original dims: ({original_width},{original_height}), Initial POT Scaled Dims: ({base_pot_width},{base_pot_height}).") + + # Calculate and store aspect ratio change string + if original_width > 0 and original_height > 0 and base_pot_width > 0 and base_pot_height > 0: + aspect_change_str = ipu.normalize_aspect_ratio_change( + original_width, original_height, + base_pot_width, base_pot_height + ) + if aspect_change_str: + # This will overwrite if multiple maps are processed; specified by requirements. + context.asset_metadata['aspect_ratio_change_string'] = aspect_change_str + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type {current_map_type}: Calculated aspect ratio change string: '{aspect_change_str}' (Original: {original_width}x{original_height}, Base POT: {base_pot_width}x{base_pot_height}). Stored in asset_metadata.") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type {current_map_type}: Failed to calculate aspect ratio change string.") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type {current_map_type}: Skipping aspect ratio change string calculation due to invalid dimensions (Original: {original_width}x{original_height}, Base POT: {base_pot_width}x{base_pot_height}).") + + base_pot_image_data = image_data.copy() + if (base_pot_width, base_pot_height) != (original_width, original_height): + interpolation = cv2.INTER_AREA # Good for downscaling + base_pot_image_data = ipu.resize_image(base_pot_image_data, base_pot_width, base_pot_height, interpolation=interpolation) + if base_pot_image_data is None: + logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to resize image to base POT dimensions.") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), details="Base POT resize failed") + continue + + # Color Profile Management (after initial POT resize, before multi-res saving) + # Initialize transform settings with defaults for color management + transform_settings = { + "color_profile_management": False, # Default, can be overridden by FileRule + "target_color_profile": "sRGB", # Default + "output_format_settings": None # For JPG quality, PNG compression + } if file_rule.channel_merge_instructions and 'transform' in file_rule.channel_merge_instructions: custom_transform_settings = file_rule.channel_merge_instructions['transform'] if isinstance(custom_transform_settings, dict): transform_settings.update(custom_transform_settings) - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Loaded transform settings from file_rule.channel_merge_instructions.") - else: - logger.warning(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): 'transform' in channel_merge_instructions is not a dictionary. Using defaults.") - # TODO: Implement fallback to context.config_obj for global/item_type specific transform settings - # else: - # # Example: config_transforms = context.config_obj.get_transform_settings(file_rule.item_type or file_rule.item_type_override) - # # if config_transforms: - # # transform_settings.update(config_transforms) - - target_width, target_height = ipu.calculate_target_dimensions( - original_width, original_height, - transform_settings['target_width'], transform_settings['target_height'], - transform_settings['resize_mode'], - transform_settings['ensure_pot'], - transform_settings['allow_upscale'] - ) - logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Original dims: ({original_width},{original_height}), Calculated target dims: ({target_width},{target_height}) using sourced transforms.") - - processed_image_data = image_data.copy() - - if (target_width, target_height) != (original_width, original_height): - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Resizing from ({original_width},{original_height}) to ({target_width},{target_height}).") - interpolation_map = {"NEAREST": cv2.INTER_NEAREST, "LINEAR": cv2.INTER_LINEAR, "CUBIC": cv2.INTER_CUBIC, "AREA": cv2.INTER_AREA, "LANCZOS4": cv2.INTER_LANCZOS4} - interpolation = interpolation_map.get(transform_settings['resize_filter'].upper(), cv2.INTER_AREA) - processed_image_data = ipu.resize_image(processed_image_data, target_width, target_height, interpolation=interpolation) - if processed_image_data is None: - logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to resize image.") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), details="Image resize failed") - continue - + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Loaded transform settings for color/output from file_rule.") + if transform_settings['color_profile_management'] and transform_settings['target_color_profile'] == "RGB": - if len(processed_image_data.shape) == 3 and processed_image_data.shape[2] == 3: - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Converting BGR to RGB.") - processed_image_data = ipu.convert_bgr_to_rgb(processed_image_data) - elif len(processed_image_data.shape) == 3 and processed_image_data.shape[2] == 4: - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Converting BGRA to RGBA.") - processed_image_data = ipu.convert_bgra_to_rgba(processed_image_data) - + if len(base_pot_image_data.shape) == 3 and base_pot_image_data.shape[2] == 3: # BGR to RGB + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Converting BGR to RGB for base POT image.") + base_pot_image_data = ipu.convert_bgr_to_rgb(base_pot_image_data) + elif len(base_pot_image_data.shape) == 3 and base_pot_image_data.shape[2] == 4: # BGRA to RGBA + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Converting BGRA to RGBA for base POT image.") + base_pot_image_data = ipu.convert_bgra_to_rgba(base_pot_image_data) + + # Ensure engine_temp_dir exists before saving base POT if not context.engine_temp_dir.exists(): try: context.engine_temp_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Asset '{asset_name_for_log}': Created engine_temp_dir at '{context.engine_temp_dir}'") except OSError as e: logger.error(f"Asset '{asset_name_for_log}': Failed to create engine_temp_dir '{context.engine_temp_dir}': {e}") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, source_file=str(source_file_path), details="Failed to create temp directory") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, source_file=str(source_file_path), details="Failed to create temp directory for base POT") continue - + temp_filename_suffix = Path(source_file_path).suffix - safe_map_type_filename = sanitize_filename(current_map_type) - temp_output_filename = f"processed_{safe_map_type_filename}_{current_map_id_hex}{temp_filename_suffix}" - temp_output_path = context.engine_temp_dir / temp_output_filename - - save_params = [] - if transform_settings['output_format_settings']: - if temp_filename_suffix.lower() in ['.jpg', '.jpeg']: - quality = transform_settings['output_format_settings'].get('quality', 95) - save_params = [cv2.IMWRITE_JPEG_QUALITY, quality] - elif temp_filename_suffix.lower() == '.png': - compression = transform_settings['output_format_settings'].get('compression_level', 3) - save_params = [cv2.IMWRITE_PNG_COMPRESSION, compression] + base_pot_temp_filename = f"{current_map_id_hex}_basePOT{temp_filename_suffix}" + base_pot_temp_path = context.engine_temp_dir / base_pot_temp_filename - save_success = ipu.save_image(str(temp_output_path), processed_image_data, params=save_params) + # Determine save parameters for base POT image (can be different from variants if needed) + base_save_params = [] + base_output_ext = temp_filename_suffix.lstrip('.') # Default to original, can be overridden by format rules + # TODO: Add logic here to determine base_output_ext and base_save_params based on bit depth and config, similar to variants. + # For now, using simple save. - if not save_success: - logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to save temporary image to '{temp_output_path}'.") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=current_map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), processed_dimensions=(processed_image_data.shape[1], processed_image_data.shape[0]) if processed_image_data is not None else None, details="Temporary image save failed") + if not ipu.save_image(str(base_pot_temp_path), base_pot_image_data, params=base_save_params): + logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Failed to save base POT image to '{base_pot_temp_path}'.") + self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), base_pot_dimensions=(base_pot_width, base_pot_height), details="Base POT image save failed") continue - - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Successfully processed and saved temporary map to '{temp_output_path}'.") - - self._update_file_rule_status(context, current_map_id_hex, 'Processed', map_type=current_map_type, source_file=str(source_file_path), temp_processed_file=str(temp_output_path), original_dimensions=(original_width, original_height), processed_dimensions=(processed_image_data.shape[1], processed_image_data.shape[0]), details="Successfully processed") - if 'processed_files' not in context.asset_metadata: - context.asset_metadata['processed_files'] = [] - context.asset_metadata['processed_files'].append({ - 'processed_map_key': current_map_id_hex, # Changed from file_rule_id - 'path': str(temp_output_path), - 'type': 'temporary_map', - 'map_type': current_map_type - }) + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Successfully saved base POT image to '{base_pot_temp_path}' with dims ({base_pot_width}x{base_pot_height}).") + + # Initialize/update the status for this map in processed_maps_details + self._update_file_rule_status( + context, + current_map_id_hex, + 'BasePOTSaved', # Intermediate status, will be updated after variant check + map_type=filename_friendly_map_type, + source_file=str(source_file_path), + original_dimensions=(original_width, original_height), + base_pot_dimensions=(base_pot_width, base_pot_height), + temp_processed_file=str(base_pot_temp_path) # Store path to the saved base POT + ) + + # 2. Multiple Resolution Output (Variants) + processed_at_least_one_resolution_variant = False + # Resolution variants are attempted for all map types individually processed. + # The filter at the beginning of the loop (around line 72) ensures only relevant maps reach this stage. + generate_variants_for_this_map_type = True + + if generate_variants_for_this_map_type: # This will now always be true if code execution reaches here + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Map type '{current_map_type}' is eligible for individual processing. Attempting to generate resolution variants.") + # Sort resolutions from largest to smallest + sorted_resolutions = sorted(image_resolutions.items(), key=lambda item: item[1], reverse=True) + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Sorted resolutions for variant processing: {sorted_resolutions}") + + for res_key, res_max_dim in sorted_resolutions: + current_w, current_h = base_pot_image_data.shape[1], base_pot_image_data.shape[0] + + if current_w <= 0 or current_h <=0: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Base POT image has zero dimension ({current_w}x{current_h}). Skipping this resolution variant.") + continue + + if max(current_w, current_h) >= res_max_dim: + target_w_res, target_h_res = current_w, current_h + if max(current_w, current_h) > res_max_dim: + if current_w >= current_h: + target_w_res = res_max_dim + target_h_res = max(1, round(target_w_res / (current_w / current_h))) + else: + target_h_res = res_max_dim + target_w_res = max(1, round(target_h_res * (current_w / current_h))) + else: + logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Base POT image ({current_w}x{current_h}) is smaller than target max dim {res_max_dim}. Skipping this resolution variant.") + continue + + target_w_res = min(target_w_res, current_w) + target_h_res = min(target_h_res, current_h) + + if target_w_res <=0 or target_h_res <=0: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Calculated target variant dims are zero or negative ({target_w_res}x{target_h_res}). Skipping.") + continue + + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Processing variant for {res_max_dim}. Base POT Dims: ({current_w}x{current_h}), Target Dims for {res_key}: ({target_w_res}x{target_h_res}).") + + output_image_data_for_res = base_pot_image_data + if (target_w_res, target_h_res) != (current_w, current_h): + interpolation_res = cv2.INTER_AREA + output_image_data_for_res = ipu.resize_image(base_pot_image_data, target_w_res, target_h_res, interpolation=interpolation_res) + if output_image_data_for_res is None: + logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Failed to resize image for resolution variant {res_key}.") + continue + + assetname_placeholder = context.asset_rule.asset_name if context.asset_rule else "UnknownAsset" + resolution_placeholder = res_key + + # TODO: Implement proper output format/extension determination for variants + output_ext_variant = temp_filename_suffix.lstrip('.') + + temp_output_filename_variant = output_filename_pattern.replace("[assetname]", sanitize_filename(assetname_placeholder)) \ + .replace("[maptype]", sanitize_filename(filename_friendly_map_type)) \ + .replace("[resolution]", sanitize_filename(resolution_placeholder)) \ + .replace("[ext]", output_ext_variant) + temp_output_filename_variant = f"{current_map_id_hex}_variant_{temp_output_filename_variant}" # Distinguish variant temp files + temp_output_path_variant = context.engine_temp_dir / temp_output_filename_variant + + save_params_variant = [] + if transform_settings.get('output_format_settings'): + if output_ext_variant.lower() in ['jpg', 'jpeg']: + quality = transform_settings['output_format_settings'].get('quality', context.config_obj.get("JPG_QUALITY", 95)) + save_params_variant = [cv2.IMWRITE_JPEG_QUALITY, quality] + elif output_ext_variant.lower() == 'png': + compression = transform_settings['output_format_settings'].get('compression_level', context.config_obj.get("PNG_COMPRESSION_LEVEL", 6)) + save_params_variant = [cv2.IMWRITE_PNG_COMPRESSION, compression] + + save_success_variant = ipu.save_image(str(temp_output_path_variant), output_image_data_for_res, params=save_params_variant) + + if not save_success_variant: + logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Failed to save temporary variant image to '{temp_output_path_variant}'.") + continue + + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Successfully saved temporary variant map to '{temp_output_path_variant}' with dims ({target_w_res}x{target_h_res}).") + processed_at_least_one_resolution_variant = True + + if 'variants' not in context.processed_maps_details[current_map_id_hex]: + context.processed_maps_details[current_map_id_hex]['variants'] = [] + + context.processed_maps_details[current_map_id_hex]['variants'].append({ + 'resolution_key': res_key, + 'temp_path': str(temp_output_path_variant), # Changed 'path' to 'temp_path' + 'dimensions': (target_w_res, target_h_res), + 'resolution_name': f"{target_w_res}x{target_h_res}" # Retain for potential use + }) + + if 'processed_files' not in context.asset_metadata: + context.asset_metadata['processed_files'] = [] + context.asset_metadata['processed_files'].append({ + 'processed_map_key': current_map_id_hex, + 'resolution_key': res_key, + 'path': str(temp_output_path_variant), + 'type': 'temporary_map_variant', + 'map_type': current_map_type, + 'dimensions_w': target_w_res, + 'dimensions_h': target_h_res + }) + # Calculate and store image statistics for the lowest resolution output + lowest_res_image_data_for_stats = None + image_to_stat_path_for_log = "N/A" + source_of_stats_image = "unknown" + + if processed_at_least_one_resolution_variant and \ + current_map_id_hex in context.processed_maps_details and \ + 'variants' in context.processed_maps_details[current_map_id_hex] and \ + context.processed_maps_details[current_map_id_hex]['variants']: + + variants_list = context.processed_maps_details[current_map_id_hex]['variants'] + valid_variants_for_stats = [ + v for v in variants_list + if isinstance(v.get('dimensions'), tuple) and len(v['dimensions']) == 2 and v['dimensions'][0] > 0 and v['dimensions'][1] > 0 + ] + + if valid_variants_for_stats: + smallest_variant = min(valid_variants_for_stats, key=lambda v: v['dimensions'][0] * v['dimensions'][1]) + + if smallest_variant and 'temp_path' in smallest_variant and smallest_variant.get('dimensions'): + smallest_res_w, smallest_res_h = smallest_variant['dimensions'] + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Identified smallest variant for stats: {smallest_variant.get('resolution_key', 'N/A')} ({smallest_res_w}x{smallest_res_h}) at {smallest_variant['temp_path']}") + lowest_res_image_data_for_stats = ipu.load_image(smallest_variant['temp_path']) + image_to_stat_path_for_log = smallest_variant['temp_path'] + source_of_stats_image = f"variant {smallest_variant.get('resolution_key', 'N/A')}" + if lowest_res_image_data_for_stats is None: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Failed to load smallest variant image '{smallest_variant['temp_path']}' for stats.") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Could not determine smallest variant for stats from valid variants list (details missing).") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: No valid variants found to determine the smallest one for stats.") + + if lowest_res_image_data_for_stats is None: + if base_pot_image_data is not None: + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Using base POT image for stats (dimensions: {base_pot_width}x{base_pot_height}). Smallest variant not available/loaded or no variants generated.") + lowest_res_image_data_for_stats = base_pot_image_data + image_to_stat_path_for_log = f"In-memory base POT image (dims: {base_pot_width}x{base_pot_height})" + source_of_stats_image = "base POT" + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Base POT image data is also None. Cannot calculate stats.") + + if lowest_res_image_data_for_stats is not None: + stats_dict = ipu.calculate_image_stats(lowest_res_image_data_for_stats) + if stats_dict and "error" not in stats_dict: + if 'image_stats_lowest_res' not in context.asset_metadata: + context.asset_metadata['image_stats_lowest_res'] = {} + + context.asset_metadata['image_stats_lowest_res'][current_map_type] = stats_dict + logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type '{current_map_type}': Calculated and stored image stats from '{source_of_stats_image}' (source ref: '{image_to_stat_path_for_log}').") + elif stats_dict and "error" in stats_dict: + logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type '{current_map_type}': Error calculating image stats from '{source_of_stats_image}': {stats_dict['error']}.") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type '{current_map_type}': Failed to calculate image stats from '{source_of_stats_image}' (result was None or empty).") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type '{current_map_type}': No image data available (from variant or base POT) to calculate stats.") + + # Final status update based on whether variants were generated (and expected) + if generate_variants_for_this_map_type: + if processed_at_least_one_resolution_variant: + self._update_file_rule_status(context, current_map_id_hex, 'Processed_With_Variants', map_type=filename_friendly_map_type, details="Successfully processed with multiple resolution variants.") + else: + logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Variants were expected for map type '{current_map_type}', but none were generated (e.g., base POT too small for any variant tier).") + self._update_file_rule_status(context, current_map_id_hex, 'Processed_No_Variants', map_type=filename_friendly_map_type, details="Variants expected but none generated (e.g., base POT too small).") + else: # No variants were expected for this map type + self._update_file_rule_status(context, current_map_id_hex, 'Processed_No_Variants', map_type=filename_friendly_map_type, details="Processed to base POT; variants not applicable for this map type.") logger.info(f"Asset '{asset_name_for_log}': Finished individual map processing stage.") return context @@ -260,13 +606,34 @@ class IndividualMapProcessingStage(ProcessingStage): orig_w, orig_h = kwargs['original_dimensions'] context.processed_maps_details[map_id_hex]['original_resolution_name'] = f"{orig_w}x{orig_h}" - if status == 'Processed' and 'processed_dimensions' in kwargs and isinstance(kwargs['processed_dimensions'], tuple) and len(kwargs['processed_dimensions']) == 2: - proc_w, proc_h = kwargs['processed_dimensions'] - context.processed_maps_details[map_id_hex]['processed_resolution_name'] = f"{proc_w}x{proc_h}" - elif 'processed_dimensions' in kwargs: # If present but not as expected, log or handle - logger.warning(f"Asset '{asset_name_for_log}', Map ID {map_id_hex}: 'processed_dimensions' present but not a valid tuple: {kwargs['processed_dimensions']}") + # Determine the correct dimensions to use for 'processed_resolution_name' + # This name refers to the base POT scaled image dimensions before variant generation. + dims_to_log_as_base_processed = None + if 'base_pot_dimensions' in kwargs and isinstance(kwargs['base_pot_dimensions'], tuple) and len(kwargs['base_pot_dimensions']) == 2: + # This key is used when status is 'Processed_With_Variants' + dims_to_log_as_base_processed = kwargs['base_pot_dimensions'] + elif 'processed_dimensions' in kwargs and isinstance(kwargs['processed_dimensions'], tuple) and len(kwargs['processed_dimensions']) == 2: + # This key is used when status is 'Processed_No_Variants' (and potentially others) + dims_to_log_as_base_processed = kwargs['processed_dimensions'] + + if dims_to_log_as_base_processed: + proc_w, proc_h = dims_to_log_as_base_processed + resolution_name_str = f"{proc_w}x{proc_h}" + context.processed_maps_details[map_id_hex]['base_pot_resolution_name'] = resolution_name_str + # Ensure 'processed_resolution_name' is also set for OutputOrganizationStage compatibility + context.processed_maps_details[map_id_hex]['processed_resolution_name'] = resolution_name_str + elif 'processed_dimensions' in kwargs or 'base_pot_dimensions' in kwargs: + details_for_warning = kwargs.get('processed_dimensions', kwargs.get('base_pot_dimensions')) + logger.warning(f"Asset '{asset_name_for_log}', Map ID {map_id_hex}: 'processed_dimensions' or 'base_pot_dimensions' key present but its value is not a valid 2-element tuple: {details_for_warning}") + + # If temp_processed_file was passed, ensure it's in the details + if 'temp_processed_file' in kwargs: + context.processed_maps_details[map_id_hex]['temp_processed_file'] = kwargs['temp_processed_file'] # Log all details being stored for clarity, including the newly added resolution names log_details = context.processed_maps_details[map_id_hex].copy() + # Avoid logging full image data if it accidentally gets into kwargs + if 'image_data' in log_details: del log_details['image_data'] + if 'base_pot_image_data' in log_details: del log_details['base_pot_image_data'] logger.debug(f"Asset '{asset_name_for_log}', Map ID {map_id_hex}: Status updated to '{status}'. Details: {log_details}") \ No newline at end of file diff --git a/processing/pipeline/stages/map_merging.py b/processing/pipeline/stages/map_merging.py index c93093c..791f7b9 100644 --- a/processing/pipeline/stages/map_merging.py +++ b/processing/pipeline/stages/map_merging.py @@ -217,9 +217,28 @@ class MapMergingStage(ProcessingStage): if source_image is not None: if source_image.ndim == 2: # Grayscale source source_data_this_channel = source_image - elif source_image.ndim == 3: # Color source, take the first channel (assuming it's grayscale or R of RGB) - source_data_this_channel = source_image[:,:,0] - logger.debug(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Taking channel 0 from {input_map_type_for_this_channel} for output {out_channel_char}.") + elif source_image.ndim == 3 or source_image.ndim == 4: # Color source (3-channel BGR or 4-channel BGRA), assumed loaded by ipu.load_image + # Standard BGR(A) channel indexing: B=0, G=1, R=2, A=3 (if present) + # This map helps get NRM's Red data for 'R' output, NRM's Green for 'G' output etc. + # based on the semantic meaning of out_channel_char. + semantic_to_bgr_idx = {'R': 2, 'G': 1, 'B': 0, 'A': 3} + + if input_map_type_for_this_channel == "NRM": + idx_to_extract = semantic_to_bgr_idx.get(out_channel_char) + + if idx_to_extract is not None and idx_to_extract < source_image.shape[2]: + source_data_this_channel = source_image[:, :, idx_to_extract] + logger.debug(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: For output '{out_channel_char}', using NRM's semantic '{out_channel_char}' channel (BGR(A) index {idx_to_extract}).") + else: + # Fallback if out_channel_char isn't R,G,B,A or NRM doesn't have the channel (e.g. 3-channel NRM and 'A' requested) + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Could not map output '{out_channel_char}' to a specific BGR(A) channel of NRM (shape {source_image.shape}). Defaulting to NRM's channel 0 (Blue).") + source_data_this_channel = source_image[:, :, 0] + else: + # For other multi-channel sources (e.g., ROUGH as RGB, or other color maps not "NRM") + # Default to taking the first channel (Blue in BGR). + # This covers "Roughness map's greyscale data" if ROUGH is RGB (by taking one of its channels as a proxy). + source_data_this_channel = source_image[:, :, 0] + logger.debug(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: For output '{out_channel_char}', source {input_map_type_for_this_channel} (shape {source_image.shape}) is multi-channel but not NRM. Using its channel 0 (Blue).") else: # Source map was not found, use default default_val_for_channel = default_values.get(out_channel_char) if default_val_for_channel is not None: diff --git a/processing/pipeline/stages/metadata_finalization_save.py b/processing/pipeline/stages/metadata_finalization_save.py index 1fb763f..8f3a555 100644 --- a/processing/pipeline/stages/metadata_finalization_save.py +++ b/processing/pipeline/stages/metadata_finalization_save.py @@ -125,7 +125,27 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): def make_serializable(data: Any) -> Any: if isinstance(data, Path): - return str(data) + # metadata_save_path is available from the outer scope + metadata_dir = metadata_save_path.parent + try: + # Attempt to make the path relative if it's absolute and under the same root + if data.is_absolute(): + # Check if the path can be made relative (e.g., same drive on Windows) + # This check might need to be more robust depending on os.path.relpath behavior + # For pathlib, relative_to will raise ValueError if not possible. + return str(data.relative_to(metadata_dir)) + else: + # If it's already relative, assume it's correct or handle as needed + return str(data) + except ValueError: + # If paths are on different drives or cannot be made relative, + # log a warning and return the absolute path as a string. + # This can happen if an output path was explicitly set to an unrelated directory. + logger.warning( + f"Asset '{asset_name_for_log}': Could not make path {data} " + f"relative to {metadata_dir}. Storing as absolute." + ) + return str(data) if isinstance(data, datetime.datetime): # Ensure datetime is serializable return data.isoformat() if isinstance(data, dict): diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index a6edda9..5087bd6 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -50,57 +50,202 @@ class OutputOrganizationStage(ProcessingStage): # A. Organize Processed Individual Maps if context.processed_maps_details: - logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(context.processed_maps_details)} processed individual map(s).") - for processed_map_key, details in context.processed_maps_details.items(): # Use processed_map_key - if details.get('status') != 'Processed' or not details.get('temp_processed_file'): - logger.debug(f"Asset '{asset_name_for_log}': Skipping processed map key '{processed_map_key}' due to status '{details.get('status')}' or missing temp file.") - continue + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(context.processed_maps_details)} processed individual map entries.") + for processed_map_key, details in context.processed_maps_details.items(): + map_status = details.get('status') + base_map_type = details.get('map_type', 'unknown_map_type') # Original map type - temp_file_path = Path(details['temp_processed_file']) - map_type = details.get('map_type', 'unknown_map_type') - resolution_str = details.get('processed_resolution_name', details.get('original_resolution_name', 'resX')) - - - # Construct token_data for path generation - token_data = { - "assetname": asset_name_for_log, - "supplier": context.effective_supplier or "DefaultSupplier", - "maptype": map_type, - "resolution": resolution_str, - "ext": temp_file_path.suffix.lstrip('.'), # Get extension without dot - "incrementingvalue": getattr(context, 'incrementing_value', None), - "sha5": getattr(context, 'sha5_value', None) - } - token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} - - # Generate filename first using its pattern - # output_filename = f"{asset_name_for_log}_{sanitize_filename(map_type)}{temp_file_path.suffix}" # Old way - output_filename = generate_path_from_pattern(output_filename_pattern_config, token_data_cleaned) - - - try: - relative_dir_path_str = generate_path_from_pattern( - pattern_string=output_dir_pattern, - token_data=token_data_cleaned - ) - final_path = Path(context.output_base_path) / Path(relative_dir_path_str) / Path(output_filename) - final_path.parent.mkdir(parents=True, exist_ok=True) - - if final_path.exists() and not overwrite_existing: - logger.info(f"Asset '{asset_name_for_log}': Output file {final_path} exists and overwrite is disabled. Skipping copy.") - else: - shutil.copy2(temp_file_path, final_path) - logger.info(f"Asset '{asset_name_for_log}': Copied {temp_file_path} to {final_path}") - final_output_files.append(str(final_path)) + if map_status in ['Processed', 'Processed_No_Variants']: + if not details.get('temp_processed_file'): + logger.debug(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status '{map_status}') due to missing 'temp_processed_file'.") + details['status'] = 'Organization Skipped (Missing Temp File)' + continue - context.processed_maps_details[processed_map_key]['final_output_path'] = str(final_path) - context.processed_maps_details[processed_map_key]['status'] = 'Organized' + temp_file_path = Path(details['temp_processed_file']) + resolution_str = details.get('processed_resolution_name', details.get('original_resolution_name', 'resX')) - except Exception as e: - logger.error(f"Asset '{asset_name_for_log}': Failed to copy {temp_file_path} to destination for processed map key '{processed_map_key}'. Error: {e}", exc_info=True) - context.status_flags['output_organization_error'] = True - context.asset_metadata['status'] = "Failed (Output Organization Error)" - context.processed_maps_details[processed_map_key]['status'] = 'Organization Failed' + token_data = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + "maptype": base_map_type, + "resolution": resolution_str, + "ext": temp_file_path.suffix.lstrip('.'), + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} + + output_filename = generate_path_from_pattern(output_filename_pattern_config, token_data_cleaned) + + try: + relative_dir_path_str = generate_path_from_pattern( + pattern_string=output_dir_pattern, + token_data=token_data_cleaned + ) + final_path = Path(context.output_base_path) / Path(relative_dir_path_str) / Path(output_filename) + final_path.parent.mkdir(parents=True, exist_ok=True) + + if final_path.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': Output file {final_path} for map '{processed_map_key}' exists and overwrite is disabled. Skipping copy.") + else: + shutil.copy2(temp_file_path, final_path) + logger.info(f"Asset '{asset_name_for_log}': Copied {temp_file_path} to {final_path} for map '{processed_map_key}'.") + final_output_files.append(str(final_path)) + + details['final_output_path'] = str(final_path) + details['status'] = 'Organized' + + # Update asset_metadata for metadata.json + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) + map_metadata_entry['map_type'] = base_map_type + map_metadata_entry['path'] = str(Path(relative_dir_path_str) / Path(output_filename)) # Store relative path + + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to copy {temp_file_path} for map key '{processed_map_key}'. Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error)" + details['status'] = 'Organization Failed' + + elif map_status == 'Processed_With_Variants': + variants = details.get('variants') + if not variants: # No variants list, or it's empty + logger.warning(f"Asset '{asset_name_for_log}': Map key '{processed_map_key}' (status '{map_status}') has no 'variants' list or it is empty. Attempting fallback to base file.") + if not details.get('temp_processed_file'): + logger.error(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (fallback) as 'temp_processed_file' is also missing.") + details['status'] = 'Organization Failed (No Variants, No Temp File)' + continue # Skip to next map key + + # Fallback: Process the base temp_processed_file + temp_file_path = Path(details['temp_processed_file']) + resolution_str = details.get('processed_resolution_name', details.get('original_resolution_name', 'baseRes')) + + token_data = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + "maptype": base_map_type, + "resolution": resolution_str, + "ext": temp_file_path.suffix.lstrip('.'), + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} + output_filename = generate_path_from_pattern(output_filename_pattern_config, token_data_cleaned) + + try: + relative_dir_path_str = generate_path_from_pattern( + pattern_string=output_dir_pattern, + token_data=token_data_cleaned + ) + final_path = Path(context.output_base_path) / Path(relative_dir_path_str) / Path(output_filename) + final_path.parent.mkdir(parents=True, exist_ok=True) + + if final_path.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': Output file {final_path} for map '{processed_map_key}' (fallback) exists and overwrite is disabled. Skipping copy.") + else: + shutil.copy2(temp_file_path, final_path) + logger.info(f"Asset '{asset_name_for_log}': Copied {temp_file_path} to {final_path} for map '{processed_map_key}' (fallback).") + final_output_files.append(str(final_path)) + + details['final_output_path'] = str(final_path) + details['status'] = 'Organized (Base File Fallback)' + + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) + map_metadata_entry['map_type'] = base_map_type + map_metadata_entry['path'] = str(Path(relative_dir_path_str) / Path(output_filename)) + if 'variant_paths' in map_metadata_entry: # Clean up if it was somehow set + del map_metadata_entry['variant_paths'] + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to copy {temp_file_path} (fallback) for map key '{processed_map_key}'. Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error - Fallback)" + details['status'] = 'Organization Failed (Fallback)' + continue # Finished with this map key due to fallback + + # If we are here, 'variants' list exists and is not empty. Proceed with variant processing. + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(variants)} variants for map key '{processed_map_key}' (map type: {base_map_type}).") + + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) + map_metadata_entry['map_type'] = base_map_type + map_metadata_entry.setdefault('variant_paths', {}) # Initialize if not present + + processed_any_variant_successfully = False + failed_any_variant = False + + for variant_index, variant_detail in enumerate(variants): + temp_variant_path_str = variant_detail.get('temp_path') + if not temp_variant_path_str: + logger.warning(f"Asset '{asset_name_for_log}': Variant {variant_index} for map '{processed_map_key}' is missing 'temp_path'. Skipping.") + variant_detail['status'] = 'Organization Skipped (Missing Temp Path)' + continue + + temp_variant_path = Path(temp_variant_path_str) + variant_resolution_key = variant_detail.get('resolution_key', f"varRes{variant_index}") + variant_ext = temp_variant_path.suffix.lstrip('.') + + token_data_variant = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + "maptype": base_map_type, + "resolution": variant_resolution_key, + "ext": variant_ext, + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + token_data_variant_cleaned = {k: v for k, v in token_data_variant.items() if v is not None} + output_filename_variant = generate_path_from_pattern(output_filename_pattern_config, token_data_variant_cleaned) + + try: + relative_dir_path_str_variant = generate_path_from_pattern( + pattern_string=output_dir_pattern, + token_data=token_data_variant_cleaned + ) + final_variant_path = Path(context.output_base_path) / Path(relative_dir_path_str_variant) / Path(output_filename_variant) + final_variant_path.parent.mkdir(parents=True, exist_ok=True) + + if final_variant_path.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': Output variant file {final_variant_path} for map '{processed_map_key}' (res: {variant_resolution_key}) exists and overwrite is disabled. Skipping copy.") + variant_detail['status'] = 'Organized (Exists, Skipped Copy)' + else: + shutil.copy2(temp_variant_path, final_variant_path) + logger.info(f"Asset '{asset_name_for_log}': Copied variant {temp_variant_path} to {final_variant_path} for map '{processed_map_key}'.") + final_output_files.append(str(final_variant_path)) + variant_detail['status'] = 'Organized' + + variant_detail['final_output_path'] = str(final_variant_path) + relative_final_variant_path_str = str(Path(relative_dir_path_str_variant) / Path(output_filename_variant)) + map_metadata_entry['variant_paths'][variant_resolution_key] = relative_final_variant_path_str + processed_any_variant_successfully = True + + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to copy variant {temp_variant_path} for map key '{processed_map_key}' (res: {variant_resolution_key}). Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error - Variant)" + variant_detail['status'] = 'Organization Failed' + failed_any_variant = True + + # Update parent map detail status based on variant outcomes + if failed_any_variant: + details['status'] = 'Organization Failed (Variants)' + elif processed_any_variant_successfully: + # Check if all processable variants were organized + all_attempted_organized = True + for v_detail in variants: + if v_detail.get('temp_path') and not v_detail.get('status', '').startswith('Organized'): + all_attempted_organized = False + break + if all_attempted_organized: + details['status'] = 'Organized (All Attempted Variants)' + else: + details['status'] = 'Partially Organized (Variants)' + elif not any(v.get('temp_path') for v in variants): # No variants had temp_paths to begin with + details['status'] = 'Processed_With_Variants (No Valid Variants to Organize)' + else: # Variants list existed, items had temp_paths, but none were successfully organized (e.g., all skipped due to existing file and no overwrite) + details['status'] = 'Organization Skipped (No Variants Copied/Needed)' + + + else: # Other statuses like 'Skipped', 'Failed', 'Organization Failed' etc. + logger.debug(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status: '{map_status}') for organization as it's not 'Processed', 'Processed_No_Variants', or 'Processed_With_Variants'.") + continue else: logger.debug(f"Asset '{asset_name_for_log}': No processed individual maps to organize.") diff --git a/processing/utils/image_processing_utils.py b/processing/utils/image_processing_utils.py index 0ea5cbf..81e3e08 100644 --- a/processing/utils/image_processing_utils.py +++ b/processing/utils/image_processing_utils.py @@ -31,17 +31,17 @@ def get_nearest_power_of_two_downscale(value: int) -> int: If the value is already a power of two, it returns the value itself. Returns 1 if the value is less than 1. """ - if value < 1: + if value < 1: return 1 if is_power_of_two(value): return value # Find the largest power of two strictly less than value, # unless value itself is POT. - # (1 << (value.bit_length() - 1)) achieves this. - # Example: value=7 (0111, bl=3), 1<<2 = 4. - # Example: value=8 (1000, bl=4), 1<<3 = 8. - # Example: value=9 (1001, bl=4), 1<<3 = 8. - return 1 << (value.bit_length() - 1) + # (1 << (value.bit_length() - 1)) achieves this. + # Example: value=7 (0111, bl=3), 1<<2 = 4. + # Example: value=8 (1000, bl=4), 1<<3 = 8. + # Example: value=9 (1001, bl=4), 1<<3 = 8. + return 1 << (value.bit_length() - 1) # --- Dimension Calculation --- def calculate_target_dimensions( @@ -184,10 +184,12 @@ def calculate_image_stats(image_data: np.ndarray) -> Optional[Dict]: stats["min"] = float(np.min(data_float)) stats["max"] = float(np.max(data_float)) stats["mean"] = float(np.mean(data_float)) + stats["median"] = float(np.median(data_float)) elif len(data_float.shape) == 3: # Color (H, W, C) stats["min"] = [float(v) for v in np.min(data_float, axis=(0, 1))] stats["max"] = [float(v) for v in np.max(data_float, axis=(0, 1))] stats["mean"] = [float(v) for v in np.mean(data_float, axis=(0, 1))] + stats["median"] = [float(v) for v in np.median(data_float, axis=(0, 1))] else: return None # Unsupported shape return stats @@ -235,46 +237,67 @@ def normalize_aspect_ratio_change(original_width: int, original_height: int, res if abs(output_width - 1.0) < epsilon: output_width = 1 if abs(output_height - 1.0) < epsilon: output_height = 1 + # Helper to format the number part + def format_value(val, dec): + # Multiply by 10^decimals, convert to int to keep trailing zeros in effect + # e.g. val=1.1, dec=2 -> 1.1 * 100 = 110 + # e.g. val=1.0, dec=2 -> 1.0 * 100 = 100 (though this might become "1" if it's exactly 1.0 before this) + # The existing logic already handles output_width/height being 1.0 to produce "EVEN" or skip a component. + # This formatting is for when output_width/height is NOT 1.0. + return str(int(round(val * (10**dec)))) + if abs(output_width - output_height) < epsilon: # Handles original square or aspect maintained output = "EVEN" elif output_width != 1 and abs(output_height - 1.0) < epsilon : # Width changed, height maintained relative to width - output = f"X{str(output_width).replace('.', '')}" + output = f"X{format_value(output_width, decimals)}" elif output_height != 1 and abs(output_width - 1.0) < epsilon: # Height changed, width maintained relative to height - output = f"Y{str(output_height).replace('.', '')}" + output = f"Y{format_value(output_height, decimals)}" else: # Both changed relative to each other - output = f"X{str(output_width).replace('.', '')}Y{str(output_height).replace('.', '')}" + output = f"X{format_value(output_width, decimals)}Y{format_value(output_height, decimals)}" return output # --- Image Loading, Conversion, Resizing --- def load_image(image_path: Union[str, Path], read_flag: int = cv2.IMREAD_UNCHANGED) -> Optional[np.ndarray]: - """Loads an image from the specified path.""" + """Loads an image from the specified path. Converts BGR/BGRA to RGB/RGBA if color.""" try: img = cv2.imread(str(image_path), read_flag) if img is None: # print(f"Warning: Failed to load image: {image_path}") # Optional: for debugging utils return None + + # Ensure RGB/RGBA for color images + if len(img.shape) == 3: + if img.shape[2] == 4: # BGRA from OpenCV + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) + elif img.shape[2] == 3: # BGR from OpenCV + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) return img except Exception: # as e: # print(f"Error loading image {image_path}: {e}") # Optional: for debugging utils return None def convert_bgr_to_rgb(image: np.ndarray) -> np.ndarray: - """Converts an image from BGR to RGB color space.""" + """Converts an image from BGR/BGRA to RGB/RGBA color space.""" if image is None or len(image.shape) < 3: return image # Return as is if not a color image or None if image.shape[2] == 4: # BGRA - return cv2.cvtColor(image, cv2.COLOR_BGRA2RGB) + return cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA) # Keep alpha, convert to RGBA elif image.shape[2] == 3: # BGR return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) return image # Return as is if not 3 or 4 channels def convert_rgb_to_bgr(image: np.ndarray) -> np.ndarray: - """Converts an image from RGB to BGR color space.""" - if image is None or len(image.shape) < 3 or image.shape[2] != 3: # Only for 3-channel RGB - return image # Return as is if not a 3-channel color image or None - return cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + """Converts an image from RGB/RGBA to BGR/BGRA color space.""" + if image is None or len(image.shape) < 3: + return image # Return as is if not a color image or None + + if image.shape[2] == 4: # RGBA + return cv2.cvtColor(image, cv2.COLOR_RGBA2BGRA) + elif image.shape[2] == 3: # RGB + return cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + return image # Return as is if not 3 or 4 channels def resize_image(image: np.ndarray, target_width: int, target_height: int, interpolation: Optional[int] = None) -> np.ndarray: @@ -349,18 +372,19 @@ def save_image( elif img_to_save.dtype == np.float16: img_to_save = img_to_save.astype(np.float32) - # 2. Color Space Conversion (RGB -> BGR) - # Typically, OpenCV expects BGR for formats like PNG, JPG. EXR usually expects RGB. - # The `convert_to_bgr_before_save` flag controls this. - # If output_format is exr, this should generally be False. + # 2. Color Space Conversion (Internal RGB/RGBA -> BGR/BGRA for OpenCV) + # Input `image_data` is assumed to be in RGB/RGBA format (due to `load_image` changes). + # OpenCV's `imwrite` typically expects BGR/BGRA for formats like PNG, JPG. + # EXR format usually expects RGB/RGBA. + # The `convert_to_bgr_before_save` flag controls this behavior. current_format = output_format if output_format else path_obj.suffix.lower().lstrip('.') if convert_to_bgr_before_save and current_format != 'exr': - if len(img_to_save.shape) == 3 and img_to_save.shape[2] == 3: - img_to_save = convert_rgb_to_bgr(img_to_save) - # BGRA is handled by OpenCV imwrite for PNGs, no explicit conversion needed if saving as RGBA. - # If it's 4-channel and not PNG/TIFF with alpha, it might need stripping or specific handling. - # For simplicity, this function assumes 3-channel RGB input if BGR conversion is active. + # If image is 3-channel (RGB) or 4-channel (RGBA), convert to BGR/BGRA. + if len(img_to_save.shape) == 3 and (img_to_save.shape[2] == 3 or img_to_save.shape[2] == 4): + img_to_save = convert_rgb_to_bgr(img_to_save) # Handles RGB->BGR and RGBA->BGRA + # If `convert_to_bgr_before_save` is False or format is 'exr', + # the image (assumed RGB/RGBA) is saved as is. # 3. Save Image try: -- 2.47.2 From 5bf53f036cccdd186e5fe2489ece4c5db419cddc Mon Sep 17 00:00:00 2001 From: Rusfort Date: Fri, 9 May 2025 21:48:45 +0200 Subject: [PATCH 04/16] More Refactor Fixes, Issuetracker updated --- .../stages/gloss_to_rough_conversion.py | 242 ++++++++------- .../stages/individual_map_processing.py | 289 +++++++++++------- processing/pipeline/stages/map_merging.py | 233 ++++++++------ .../stages/metadata_finalization_save.py | 51 +++- .../pipeline/stages/output_organization.py | 12 +- 5 files changed, 509 insertions(+), 318 deletions(-) diff --git a/processing/pipeline/stages/gloss_to_rough_conversion.py b/processing/pipeline/stages/gloss_to_rough_conversion.py index 7c61919..2de863c 100644 --- a/processing/pipeline/stages/gloss_to_rough_conversion.py +++ b/processing/pipeline/stages/gloss_to_rough_conversion.py @@ -2,6 +2,7 @@ import logging from pathlib import Path import numpy as np from typing import List +import dataclasses from .base_stage import ProcessingStage from ..asset_context import AssetProcessingContext @@ -35,135 +36,158 @@ class GlossToRoughConversionStage(ProcessingStage): logger.debug(f"Asset '{asset_name_for_log}': Skipping GlossToRoughConversionStage due to skip_asset flag.") return context - if not context.files_to_process or not context.processed_maps_details: + if not context.processed_maps_details: # files_to_process might be empty if only gloss maps existed and all are converted logger.debug( - f"Asset '{asset_name_for_log}': No files to process or processed_maps_details empty " - f"in GlossToRoughConversionStage. Skipping." + f"Asset '{asset_name_for_log}': processed_maps_details is empty in GlossToRoughConversionStage. Skipping." ) return context - new_files_to_process: List[FileRule] = [] + # Start with a copy of the current file rules. We will modify this list. + new_files_to_process: List[FileRule] = list(context.files_to_process) if context.files_to_process else [] processed_a_gloss_map = False + successful_conversion_statuses = ['BasePOTSaved', 'Processed_With_Variants', 'Processed_No_Variants'] - logger.info(f"Asset '{asset_name_for_log}': Starting Gloss to Roughness Conversion Stage.") + logger.info(f"Asset '{asset_name_for_log}': Starting Gloss to Roughness Conversion Stage. Examining {len(context.processed_maps_details)} processed map entries.") - for idx, file_rule in enumerate(context.files_to_process): - # Assuming FileRule has 'map_type' and 'id' (with a .hex attribute) and 'source_file_path' - # These might need to be checked with hasattr if they are optional or could be missing - if hasattr(file_rule, 'map_type') and file_rule.map_type == "GLOSS": - if not hasattr(file_rule, 'id') or not hasattr(file_rule.id, 'hex'): - logger.warning(f"Asset '{asset_name_for_log}': GLOSS FileRule missing 'id.hex'. Skipping conversion for this rule: {file_rule}") - new_files_to_process.append(file_rule) - continue - map_detail_key = file_rule.id.hex + # Iterate using the index (map_key_index) as the key, which is now standard. + for map_key_index, map_details in context.processed_maps_details.items(): + processing_map_type = map_details.get('processing_map_type', '') + map_status = map_details.get('status') + original_temp_path_str = map_details.get('temp_processed_file') + # source_file_rule_idx from details should align with map_key_index. + # We primarily use map_key_index for accessing FileRule from context.files_to_process. + source_file_rule_idx_from_details = map_details.get('source_file_rule_index') + processing_tag = map_details.get('processing_tag') + + if map_key_index != source_file_rule_idx_from_details: + logger.warning( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Mismatch between map key index and 'source_file_rule_index' ({source_file_rule_idx_from_details}) in details. " + f"Using map_key_index ({map_key_index}) for FileRule lookup. This might indicate a data consistency issue from previous stage." + ) + + if not processing_tag: + logger.warning(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: 'processing_tag' is missing in map_details. Using a fallback for temp filename. This is unexpected.") + processing_tag = f"mki_{map_key_index}_fallback_tag" + + + if not processing_map_type.startswith("MAP_GLOSS"): + # logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Type '{processing_map_type}' is not GLOSS. Skipping.") + continue + + logger.info(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Identified potential GLOSS map (Type: {processing_map_type}).") + + if map_status not in successful_conversion_statuses: + logger.warning( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}) (GLOSS): Status '{map_status}' is not one of {successful_conversion_statuses}. " + f"Skipping conversion for this map." + ) + continue + + if not original_temp_path_str: + logger.warning( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}) (GLOSS): 'temp_processed_file' missing in details. " + f"Skipping conversion." + ) + continue + + original_temp_path = Path(original_temp_path_str) + if not original_temp_path.exists(): + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}) (GLOSS): Temporary file {original_temp_path_str} " + f"does not exist. Skipping conversion." + ) + continue + + # Use map_key_index directly to access the FileRule + # Ensure map_key_index is a valid index for context.files_to_process + if not isinstance(map_key_index, int) or map_key_index < 0 or map_key_index >= len(context.files_to_process): + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}) (GLOSS): Invalid map_key_index ({map_key_index}) for accessing files_to_process (len: {len(context.files_to_process)}). " + f"Skipping conversion." + ) + continue + + original_file_rule = context.files_to_process[map_key_index] + source_file_path_for_log = original_file_rule.file_path if hasattr(original_file_rule, 'file_path') else "Unknown source path" + logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Processing GLOSS map from '{original_temp_path_str}' (Original FileRule path: '{source_file_path_for_log}') for conversion.") + + image_data = ipu.load_image(str(original_temp_path)) + if image_data is None: + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Failed to load image data from {original_temp_path_str}. " + f"Skipping conversion." + ) + continue + + # Perform Inversion + inverted_image_data: np.ndarray + if np.issubdtype(image_data.dtype, np.floating): + inverted_image_data = 1.0 - image_data + inverted_image_data = np.clip(inverted_image_data, 0.0, 1.0) + logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Inverted float image data.") + elif np.issubdtype(image_data.dtype, np.integer): + max_val = np.iinfo(image_data.dtype).max + inverted_image_data = max_val - image_data + logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Inverted integer image data (max_val: {max_val}).") + else: + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Unsupported image data type {image_data.dtype} " + f"for GLOSS map. Cannot invert. Skipping conversion." + ) + continue + + # Save New Temporary (Roughness) Map + new_temp_filename = f"rough_from_gloss_{processing_tag}{original_temp_path.suffix}" + new_temp_path = context.engine_temp_dir / new_temp_filename + + save_success = ipu.save_image(str(new_temp_path), inverted_image_data) + + if save_success: + logger.info( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Converted GLOSS map {original_temp_path_str} " + f"to ROUGHNESS map {new_temp_path}." + ) - source_file_path_for_log = file_rule.source_file_path if hasattr(file_rule, 'source_file_path') else "Unknown source path" - - if map_detail_key not in context.processed_maps_details: - logger.warning( - f"Asset '{asset_name_for_log}': GLOSS map '{source_file_path_for_log}' " - f"(ID: {map_detail_key}) found in files_to_process but not in processed_maps_details. " - f"Adding original rule and skipping conversion for this map." - ) - new_files_to_process.append(file_rule) - continue - - map_details = context.processed_maps_details[map_detail_key] + update_dict = {'item_type': "MAP_ROUGH", 'item_type_override': "MAP_ROUGH"} - if map_details.get('status') != 'Processed' or 'temp_processed_file' not in map_details: - logger.warning( - f"Asset '{asset_name_for_log}': GLOSS map '{source_file_path_for_log}' " - f"(ID: {map_detail_key}) not successfully processed by previous stage or temp file missing. " - f"Status: {map_details.get('status')}. Adding original rule and skipping conversion." - ) - new_files_to_process.append(file_rule) - continue - - original_temp_path_str = map_details['temp_processed_file'] - original_temp_path = Path(original_temp_path_str) - - if not original_temp_path.exists(): - logger.error( - f"Asset '{asset_name_for_log}': Temporary file {original_temp_path_str} for GLOSS map " - f"(ID: {map_detail_key}) does not exist. Adding original rule and skipping conversion." - ) - new_files_to_process.append(file_rule) - continue - - logger.debug(f"Asset '{asset_name_for_log}': Processing GLOSS map {original_temp_path} for conversion.") - image_data = ipu.load_image(original_temp_path) - - if image_data is None: - logger.error( - f"Asset '{asset_name_for_log}': Failed to load image data from {original_temp_path} " - f"for GLOSS map (ID: {map_detail_key}). Adding original rule and skipping conversion." - ) - new_files_to_process.append(file_rule) - continue - - # Perform Inversion - inverted_image_data: np.ndarray - if np.issubdtype(image_data.dtype, np.floating): - inverted_image_data = 1.0 - image_data - inverted_image_data = np.clip(inverted_image_data, 0.0, 1.0) # Ensure range for floats - logger.debug(f"Asset '{asset_name_for_log}': Inverted float image data for {original_temp_path}.") - elif np.issubdtype(image_data.dtype, np.integer): - max_val = np.iinfo(image_data.dtype).max - inverted_image_data = max_val - image_data - logger.debug(f"Asset '{asset_name_for_log}': Inverted integer image data (max_val: {max_val}) for {original_temp_path}.") + modified_file_rule: Optional[FileRule] = None + if hasattr(original_file_rule, 'model_copy') and callable(original_file_rule.model_copy): # Pydantic + modified_file_rule = original_file_rule.model_copy(update=update_dict) + elif dataclasses.is_dataclass(original_file_rule): # Dataclass + modified_file_rule = dataclasses.replace(original_file_rule, **update_dict) else: - logger.error( - f"Asset '{asset_name_for_log}': Unsupported image data type {image_data.dtype} " - f"for GLOSS map {original_temp_path}. Cannot invert. Adding original rule." - ) - new_files_to_process.append(file_rule) + logger.error(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Original FileRule is neither Pydantic nor dataclass. Cannot modify. Skipping update for this rule.") continue - # Save New Temporary (Roughness) Map - # Using original_temp_path.suffix ensures we keep the format (e.g., .png, .exr) - # Ensure file_rule.map_type exists before using sanitize_filename - map_type_for_filename = file_rule.map_type if hasattr(file_rule, 'map_type') else "unknownmaptype" - new_temp_filename = f"rough_from_gloss_{sanitize_filename(map_type_for_filename)}_{file_rule.id.hex}{original_temp_path.suffix}" - new_temp_path = context.engine_temp_dir / new_temp_filename + new_files_to_process[map_key_index] = modified_file_rule # Replace using map_key_index - save_success = ipu.save_image(new_temp_path, inverted_image_data) - - if save_success: - logger.info( - f"Asset '{asset_name_for_log}': Converted GLOSS map {original_temp_path} " - f"to ROUGHNESS map {new_temp_path}." - ) - - # Assuming FileRule has model_copy method - modified_file_rule = file_rule.model_copy(deep=True) if hasattr(file_rule, 'model_copy') else file_rule - modified_file_rule.map_type = "ROUGHNESS" # Ensure map_type can be set - - # Update context.processed_maps_details for the original file_rule.id.hex - context.processed_maps_details[map_detail_key]['temp_processed_file'] = str(new_temp_path) - context.processed_maps_details[map_detail_key]['original_map_type_before_conversion'] = "GLOSS" - context.processed_maps_details[map_detail_key]['notes'] = "Converted from GLOSS by GlossToRoughConversionStage" - - new_files_to_process.append(modified_file_rule) - processed_a_gloss_map = True - else: - logger.error( - f"Asset '{asset_name_for_log}': Failed to save inverted ROUGHNESS map to {new_temp_path} " - f"for original GLOSS map (ID: {map_detail_key}). Adding original rule." - ) - new_files_to_process.append(file_rule) - else: # Not a gloss map - new_files_to_process.append(file_rule) + # Update context.processed_maps_details for this map_key_index + map_details['temp_processed_file'] = str(new_temp_path) + map_details['original_map_type_before_conversion'] = processing_map_type + map_details['processing_map_type'] = "MAP_ROUGH" + map_details['map_type'] = "Roughness" + map_details['status'] = "Converted_To_Rough" + map_details['notes'] = map_details.get('notes', '') + "; Converted from GLOSS by GlossToRoughConversionStage" + if 'base_pot_resolution_name' in map_details: + map_details['processed_resolution_name'] = map_details['base_pot_resolution_name'] + processed_a_gloss_map = True + else: + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Failed to save inverted ROUGHNESS map to {new_temp_path}. " + f"Original GLOSS FileRule remains." + ) + context.files_to_process = new_files_to_process if processed_a_gloss_map: logger.info( - f"Asset '{asset_name_for_log}': Gloss to Roughness conversion stage successfully processed one or more maps and updated file list." + f"Asset '{asset_name_for_log}': Gloss to Roughness conversion stage finished. Processed one or more maps and updated file list and map details." ) else: - logger.debug( - f"Asset '{asset_name_for_log}': No gloss maps were successfully converted in GlossToRoughConversionStage. " - f"File list for next stage contains original non-gloss maps and any gloss maps that failed conversion." + logger.info( + f"Asset '{asset_name_for_log}': No gloss maps were converted in GlossToRoughConversionStage. " + f"File list for next stage contains original non-gloss maps and any gloss maps that failed or were ineligible for conversion." ) return context \ No newline at end of file diff --git a/processing/pipeline/stages/individual_map_processing.py b/processing/pipeline/stages/individual_map_processing.py index 71614e6..acbe8bd 100644 --- a/processing/pipeline/stages/individual_map_processing.py +++ b/processing/pipeline/stages/individual_map_processing.py @@ -48,9 +48,9 @@ class IndividualMapProcessingStage(ProcessingStage): context.status_flags['individual_map_processing_failed'] = True # Mark all file_rules as failed for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): - temp_id_for_fail = f"fr_fail_{fr_idx}" # Temporary ID for status update + # Use fr_idx as the key for status update for these early failures map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" - self._update_file_rule_status(context, temp_id_for_fail, 'Failed', map_type=map_type_for_fail, details="SourceRule.input_path missing") + self._update_file_rule_status(context, fr_idx, 'Failed', map_type=map_type_for_fail, details="SourceRule.input_path missing") return context # The workspace_path in the context should be the directory where files are extracted/available. @@ -59,9 +59,9 @@ class IndividualMapProcessingStage(ProcessingStage): logger.error(f"Asset '{asset_name_for_log}': Workspace path '{source_base_path}' is not a valid directory.") context.status_flags['individual_map_processing_failed'] = True for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): - temp_id_for_fail = f"fr_fail_{fr_idx}" # Use a temporary unique ID for this status update + # Use fr_idx as the key for status update map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" - self._update_file_rule_status(context, temp_id_for_fail, 'Failed', map_type=map_type_for_fail, details="Workspace path invalid") + self._update_file_rule_status(context, fr_idx, 'Failed', map_type=map_type_for_fail, details="Workspace path invalid") return context # Fetch config settings once before the loop @@ -70,8 +70,17 @@ class IndividualMapProcessingStage(ProcessingStage): output_filename_pattern = getattr(context.config_obj, "output_filename_pattern", "[assetname]_[maptype]_[resolution].[ext]") for file_rule_idx, file_rule in enumerate(context.files_to_process): - # Generate a unique ID for this file_rule processing instance for processed_maps_details - current_map_id_hex = f"map_{file_rule_idx}_{uuid.uuid4().hex[:8]}" + # file_rule_idx will be the key for processed_maps_details. + # processing_instance_tag is for unique temp files and detailed logging for this specific run. + processing_instance_tag = f"map_{file_rule_idx}_{uuid.uuid4().hex[:8]}" + current_map_key = file_rule_idx # Key for processed_maps_details + + if not file_rule.file_path: # Ensure file_path exists, critical for later stages if they rely on it from FileRule + logger.error(f"Asset '{asset_name_for_log}', FileRule at index {file_rule_idx} has an empty or None file_path. Skipping this rule.") + self._update_file_rule_status(context, current_map_key, 'Failed', + processing_tag=processing_instance_tag, + details="FileRule has no file_path") + continue initial_current_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" @@ -130,89 +139,98 @@ class IndividualMapProcessingStage(ProcessingStage): # --- END NEW SUFFIXING LOGIC --- # --- START: Filename-friendly map type derivation --- - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: --- Starting Filename-Friendly Map Type Logic for: {current_map_type} ---") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: --- Starting Filename-Friendly Map Type Logic for: {current_map_type} ---") filename_friendly_map_type = current_map_type # Fallback # 1. Access FILE_TYPE_DEFINITIONS file_type_definitions = None - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Attempting to access context.config_obj.FILE_TYPE_DEFINITIONS.") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Attempting to access context.config_obj.FILE_TYPE_DEFINITIONS.") try: file_type_definitions = context.config_obj.FILE_TYPE_DEFINITIONS if not file_type_definitions: # Check if it's None or empty - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: FILE_TYPE_DEFINITIONS is present but empty or None.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: FILE_TYPE_DEFINITIONS is present but empty or None.") else: sample_defs_log = {k: file_type_definitions[k] for k in list(file_type_definitions.keys())[:2]} # Log first 2 for brevity - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Accessed FILE_TYPE_DEFINITIONS. Sample: {sample_defs_log}, Total keys: {len(file_type_definitions)}.") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Accessed FILE_TYPE_DEFINITIONS. Sample: {sample_defs_log}, Total keys: {len(file_type_definitions)}.") except AttributeError: - logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Could not access context.config_obj.FILE_TYPE_DEFINITIONS via direct attribute.") + logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Could not access context.config_obj.FILE_TYPE_DEFINITIONS via direct attribute.") - base_map_key = None + base_map_key_val = None # Renamed from base_map_key to avoid conflict with current_map_key suffix_part = "" if file_type_definitions and isinstance(file_type_definitions, dict) and len(file_type_definitions) > 0: - base_map_key = None + base_map_key_val = None suffix_part = "" sorted_known_base_keys = sorted(list(file_type_definitions.keys()), key=len, reverse=True) - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Sorted known base keys for parsing: {sorted_known_base_keys}") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Sorted known base keys for parsing: {sorted_known_base_keys}") for known_key in sorted_known_base_keys: - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Checking if '{current_map_type}' starts with '{known_key}'") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Checking if '{current_map_type}' starts with '{known_key}'") if current_map_type.startswith(known_key): - base_map_key = known_key + base_map_key_val = known_key suffix_part = current_map_type[len(known_key):] - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Match found! current_map_type: '{current_map_type}', base_map_key: '{base_map_key}', suffix_part: '{suffix_part}'") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Match found! current_map_type: '{current_map_type}', base_map_key_val: '{base_map_key_val}', suffix_part: '{suffix_part}'") break - if base_map_key is None: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Could not parse base_map_key from '{current_map_type}' using known keys. Fallback: filename_friendly_map_type = '{filename_friendly_map_type}'.") + if base_map_key_val is None: + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Could not parse base_map_key_val from '{current_map_type}' using known keys. Fallback: filename_friendly_map_type = '{filename_friendly_map_type}'.") else: - definition = file_type_definitions.get(base_map_key) - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Definition for '{base_map_key}': {definition}") + definition = file_type_definitions.get(base_map_key_val) + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Definition for '{base_map_key_val}': {definition}") if definition and isinstance(definition, dict): standard_type_alias = definition.get("standard_type") - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Standard type alias for '{base_map_key}': '{standard_type_alias}'") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Standard type alias for '{base_map_key_val}': '{standard_type_alias}'") if standard_type_alias and isinstance(standard_type_alias, str) and standard_type_alias.strip(): filename_friendly_map_type = standard_type_alias.strip() + suffix_part - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Successfully transformed map type: '{current_map_type}' -> '{filename_friendly_map_type}' (standard_type_alias: '{standard_type_alias}', suffix_part: '{suffix_part}').") + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Successfully transformed map type: '{current_map_type}' -> '{filename_friendly_map_type}' (standard_type_alias: '{standard_type_alias}', suffix_part: '{suffix_part}').") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Standard type alias for '{base_map_key}' is missing, empty, or not a string (value: '{standard_type_alias}'). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Standard type alias for '{base_map_key_val}' is missing, empty, or not a string (value: '{standard_type_alias}'). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: No definition or invalid definition for '{base_map_key}' (value: {definition}). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: No definition or invalid definition for '{base_map_key_val}' (value: {definition}). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") elif file_type_definitions is None: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: FILE_TYPE_DEFINITIONS not available for lookup (was None). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: FILE_TYPE_DEFINITIONS not available for lookup (was None). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") elif not isinstance(file_type_definitions, dict): - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: FILE_TYPE_DEFINITIONS is not a dictionary (type: {type(file_type_definitions)}). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: FILE_TYPE_DEFINITIONS is not a dictionary (type: {type(file_type_definitions)}). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: FILE_TYPE_DEFINITIONS is an empty dictionary. Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: FILE_TYPE_DEFINITIONS is an empty dictionary. Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Final filename_friendly_map_type: '{filename_friendly_map_type}'") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Final filename_friendly_map_type: '{filename_friendly_map_type}'") # --- END: Filename-friendly map type derivation --- if not current_map_type or not current_map_type.startswith("MAP_") or current_map_type == "MAP_GEN_COMPOSITE": logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}': Skipping, item_type '{current_map_type}' (initial: '{initial_current_map_type}') not targeted for individual processing.") continue - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Type: {current_map_type}, Initial Type: {initial_current_map_type}, ID: {current_map_id_hex}): Starting individual processing.") + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Type: {current_map_type}, Initial Type: {initial_current_map_type}, Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Starting individual processing.") # A. Find Source File (using file_rule.file_path as the pattern relative to source_base_path) - # The _find_source_file might need adjustment if file_rule.file_path is absolute or needs complex globbing. - # For now, assume file_rule.file_path is a relative pattern or exact name. - source_file_path = self._find_source_file(source_base_path, file_rule.file_path, asset_name_for_log, current_map_id_hex) + source_file_path = self._find_source_file(source_base_path, file_rule.file_path, asset_name_for_log, processing_instance_tag) if not source_file_path: - logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Source file not found with path/pattern '{file_rule.file_path}' in '{source_base_path}'.") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, details="Source file not found") + logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Source file not found with path/pattern '{file_rule.file_path}' in '{source_base_path}'.") + self._update_file_rule_status(context, current_map_key, 'Failed', + map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, + details="Source file not found") continue # B. Load and Transform Image image_data: Optional[np.ndarray] = ipu.load_image(str(source_file_path)) if image_data is None: - logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to load image from '{source_file_path}'.") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, source_file=str(source_file_path), details="Image load failed") + logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Failed to load image from '{source_file_path}'.") + self._update_file_rule_status(context, current_map_key, 'Failed', + map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, + source_file=str(source_file_path), + details="Image load failed") continue original_height, original_width = image_data.shape[:2] - logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Loaded image '{source_file_path}' with dimensions {original_width}x{original_height}.") + logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Loaded image '{source_file_path}' with dimensions {original_width}x{original_height}.") # 1. Initial Power-of-Two (POT) Downscaling pot_width = ipu.get_nearest_power_of_two_downscale(original_width) @@ -286,7 +304,7 @@ class IndividualMapProcessingStage(ProcessingStage): base_pot_width, base_pot_height = 1, 1 - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Original dims: ({original_width},{original_height}), Initial POT Scaled Dims: ({base_pot_width},{base_pot_height}).") + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Original dims: ({original_width},{original_height}), Initial POT Scaled Dims: ({base_pot_width},{base_pot_height}).") # Calculate and store aspect ratio change string if original_width > 0 and original_height > 0 and base_pot_width > 0 and base_pot_height > 0: @@ -297,19 +315,26 @@ class IndividualMapProcessingStage(ProcessingStage): if aspect_change_str: # This will overwrite if multiple maps are processed; specified by requirements. context.asset_metadata['aspect_ratio_change_string'] = aspect_change_str - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type {current_map_type}: Calculated aspect ratio change string: '{aspect_change_str}' (Original: {original_width}x{original_height}, Base POT: {base_pot_width}x{base_pot_height}). Stored in asset_metadata.") + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type {current_map_type}: Calculated aspect ratio change string: '{aspect_change_str}' (Original: {original_width}x{original_height}, Base POT: {base_pot_width}x{base_pot_height}). Stored in asset_metadata.") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type {current_map_type}: Failed to calculate aspect ratio change string.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type {current_map_type}: Failed to calculate aspect ratio change string.") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type {current_map_type}: Skipping aspect ratio change string calculation due to invalid dimensions (Original: {original_width}x{original_height}, Base POT: {base_pot_width}x{base_pot_height}).") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type {current_map_type}: Skipping aspect ratio change string calculation due to invalid dimensions (Original: {original_width}x{original_height}, Base POT: {base_pot_width}x{base_pot_height}).") base_pot_image_data = image_data.copy() if (base_pot_width, base_pot_height) != (original_width, original_height): interpolation = cv2.INTER_AREA # Good for downscaling base_pot_image_data = ipu.resize_image(base_pot_image_data, base_pot_width, base_pot_height, interpolation=interpolation) if base_pot_image_data is None: - logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Failed to resize image to base POT dimensions.") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), details="Base POT resize failed") + logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Failed to resize image to base POT dimensions.") + self._update_file_rule_status(context, current_map_key, 'Failed', + map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, + source_file=str(source_file_path), + original_dimensions=(original_width, original_height), + details="Base POT resize failed") continue # Color Profile Management (after initial POT resize, before multi-res saving) @@ -323,14 +348,14 @@ class IndividualMapProcessingStage(ProcessingStage): custom_transform_settings = file_rule.channel_merge_instructions['transform'] if isinstance(custom_transform_settings, dict): transform_settings.update(custom_transform_settings) - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Loaded transform settings for color/output from file_rule.") + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Loaded transform settings for color/output from file_rule.") if transform_settings['color_profile_management'] and transform_settings['target_color_profile'] == "RGB": if len(base_pot_image_data.shape) == 3 and base_pot_image_data.shape[2] == 3: # BGR to RGB - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Converting BGR to RGB for base POT image.") + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Converting BGR to RGB for base POT image.") base_pot_image_data = ipu.convert_bgr_to_rgb(base_pot_image_data) elif len(base_pot_image_data.shape) == 3 and base_pot_image_data.shape[2] == 4: # BGRA to RGBA - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (ID: {current_map_id_hex}): Converting BGRA to RGBA for base POT image.") + logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Converting BGRA to RGBA for base POT image.") base_pot_image_data = ipu.convert_bgra_to_rgba(base_pot_image_data) # Ensure engine_temp_dir exists before saving base POT @@ -340,11 +365,17 @@ class IndividualMapProcessingStage(ProcessingStage): logger.info(f"Asset '{asset_name_for_log}': Created engine_temp_dir at '{context.engine_temp_dir}'") except OSError as e: logger.error(f"Asset '{asset_name_for_log}': Failed to create engine_temp_dir '{context.engine_temp_dir}': {e}") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, source_file=str(source_file_path), details="Failed to create temp directory for base POT") + self._update_file_rule_status(context, current_map_key, 'Failed', + map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, + source_file=str(source_file_path), + details="Failed to create temp directory for base POT") continue temp_filename_suffix = Path(source_file_path).suffix - base_pot_temp_filename = f"{current_map_id_hex}_basePOT{temp_filename_suffix}" + base_pot_temp_filename = f"{processing_instance_tag}_basePOT{temp_filename_suffix}" # Use processing_instance_tag base_pot_temp_path = context.engine_temp_dir / base_pot_temp_filename # Determine save parameters for base POT image (can be different from variants if needed) @@ -354,18 +385,29 @@ class IndividualMapProcessingStage(ProcessingStage): # For now, using simple save. if not ipu.save_image(str(base_pot_temp_path), base_pot_image_data, params=base_save_params): - logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Failed to save base POT image to '{base_pot_temp_path}'.") - self._update_file_rule_status(context, current_map_id_hex, 'Failed', map_type=filename_friendly_map_type, source_file=str(source_file_path), original_dimensions=(original_width, original_height), base_pot_dimensions=(base_pot_width, base_pot_height), details="Base POT image save failed") + logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Failed to save base POT image to '{base_pot_temp_path}'.") + self._update_file_rule_status(context, current_map_key, 'Failed', + map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, + source_file=str(source_file_path), + original_dimensions=(original_width, original_height), + base_pot_dimensions=(base_pot_width, base_pot_height), + details="Base POT image save failed") continue - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Successfully saved base POT image to '{base_pot_temp_path}' with dims ({base_pot_width}x{base_pot_height}).") + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Successfully saved base POT image to '{base_pot_temp_path}' with dims ({base_pot_width}x{base_pot_height}).") # Initialize/update the status for this map in processed_maps_details self._update_file_rule_status( context, - current_map_id_hex, + current_map_key, # Use file_rule_idx as key 'BasePOTSaved', # Intermediate status, will be updated after variant check map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, # Store the tag source_file=str(source_file_path), original_dimensions=(original_width, original_height), base_pot_dimensions=(base_pot_width, base_pot_height), @@ -375,20 +417,20 @@ class IndividualMapProcessingStage(ProcessingStage): # 2. Multiple Resolution Output (Variants) processed_at_least_one_resolution_variant = False # Resolution variants are attempted for all map types individually processed. - # The filter at the beginning of the loop (around line 72) ensures only relevant maps reach this stage. + # The filter at the beginning of the loop ensures only relevant maps reach this stage. generate_variants_for_this_map_type = True if generate_variants_for_this_map_type: # This will now always be true if code execution reaches here - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Map type '{current_map_type}' is eligible for individual processing. Attempting to generate resolution variants.") + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Map type '{current_map_type}' is eligible for individual processing. Attempting to generate resolution variants.") # Sort resolutions from largest to smallest sorted_resolutions = sorted(image_resolutions.items(), key=lambda item: item[1], reverse=True) - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Sorted resolutions for variant processing: {sorted_resolutions}") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Sorted resolutions for variant processing: {sorted_resolutions}") for res_key, res_max_dim in sorted_resolutions: current_w, current_h = base_pot_image_data.shape[1], base_pot_image_data.shape[0] if current_w <= 0 or current_h <=0: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Base POT image has zero dimension ({current_w}x{current_h}). Skipping this resolution variant.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Base POT image has zero dimension ({current_w}x{current_h}). Skipping this resolution variant.") continue if max(current_w, current_h) >= res_max_dim: @@ -401,24 +443,24 @@ class IndividualMapProcessingStage(ProcessingStage): target_h_res = res_max_dim target_w_res = max(1, round(target_h_res * (current_w / current_h))) else: - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Base POT image ({current_w}x{current_h}) is smaller than target max dim {res_max_dim}. Skipping this resolution variant.") + logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Base POT image ({current_w}x{current_h}) is smaller than target max dim {res_max_dim}. Skipping this resolution variant.") continue target_w_res = min(target_w_res, current_w) target_h_res = min(target_h_res, current_h) if target_w_res <=0 or target_h_res <=0: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Calculated target variant dims are zero or negative ({target_w_res}x{target_h_res}). Skipping.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Calculated target variant dims are zero or negative ({target_w_res}x{target_h_res}). Skipping.") continue - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Processing variant for {res_max_dim}. Base POT Dims: ({current_w}x{current_h}), Target Dims for {res_key}: ({target_w_res}x{target_h_res}).") + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Processing variant for {res_max_dim}. Base POT Dims: ({current_w}x{current_h}), Target Dims for {res_key}: ({target_w_res}x{target_h_res}).") output_image_data_for_res = base_pot_image_data if (target_w_res, target_h_res) != (current_w, current_h): interpolation_res = cv2.INTER_AREA output_image_data_for_res = ipu.resize_image(base_pot_image_data, target_w_res, target_h_res, interpolation=interpolation_res) if output_image_data_for_res is None: - logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Failed to resize image for resolution variant {res_key}.") + logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Failed to resize image for resolution variant {res_key}.") continue assetname_placeholder = context.asset_rule.asset_name if context.asset_rule else "UnknownAsset" @@ -431,7 +473,7 @@ class IndividualMapProcessingStage(ProcessingStage): .replace("[maptype]", sanitize_filename(filename_friendly_map_type)) \ .replace("[resolution]", sanitize_filename(resolution_placeholder)) \ .replace("[ext]", output_ext_variant) - temp_output_filename_variant = f"{current_map_id_hex}_variant_{temp_output_filename_variant}" # Distinguish variant temp files + temp_output_filename_variant = f"{processing_instance_tag}_variant_{temp_output_filename_variant}" # Use processing_instance_tag temp_output_path_variant = context.engine_temp_dir / temp_output_filename_variant save_params_variant = [] @@ -446,26 +488,26 @@ class IndividualMapProcessingStage(ProcessingStage): save_success_variant = ipu.save_image(str(temp_output_path_variant), output_image_data_for_res, params=save_params_variant) if not save_success_variant: - logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Failed to save temporary variant image to '{temp_output_path_variant}'.") + logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Failed to save temporary variant image to '{temp_output_path_variant}'.") continue - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Res {res_key}: Successfully saved temporary variant map to '{temp_output_path_variant}' with dims ({target_w_res}x{target_h_res}).") + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Successfully saved temporary variant map to '{temp_output_path_variant}' with dims ({target_w_res}x{target_h_res}).") processed_at_least_one_resolution_variant = True - if 'variants' not in context.processed_maps_details[current_map_id_hex]: - context.processed_maps_details[current_map_id_hex]['variants'] = [] + if 'variants' not in context.processed_maps_details[current_map_key]: # Use current_map_key (file_rule_idx) + context.processed_maps_details[current_map_key]['variants'] = [] - context.processed_maps_details[current_map_id_hex]['variants'].append({ + context.processed_maps_details[current_map_key]['variants'].append({ # Use current_map_key (file_rule_idx) 'resolution_key': res_key, - 'temp_path': str(temp_output_path_variant), # Changed 'path' to 'temp_path' + 'temp_path': str(temp_output_path_variant), 'dimensions': (target_w_res, target_h_res), - 'resolution_name': f"{target_w_res}x{target_h_res}" # Retain for potential use + 'resolution_name': f"{target_w_res}x{target_h_res}" }) if 'processed_files' not in context.asset_metadata: context.asset_metadata['processed_files'] = [] context.asset_metadata['processed_files'].append({ - 'processed_map_key': current_map_id_hex, + 'processed_map_key': current_map_key, # Use current_map_key (file_rule_idx) 'resolution_key': res_key, 'path': str(temp_output_path_variant), 'type': 'temporary_map_variant', @@ -479,11 +521,11 @@ class IndividualMapProcessingStage(ProcessingStage): source_of_stats_image = "unknown" if processed_at_least_one_resolution_variant and \ - current_map_id_hex in context.processed_maps_details and \ - 'variants' in context.processed_maps_details[current_map_id_hex] and \ - context.processed_maps_details[current_map_id_hex]['variants']: + current_map_key in context.processed_maps_details and \ + 'variants' in context.processed_maps_details[current_map_key] and \ + context.processed_maps_details[current_map_key]['variants']: - variants_list = context.processed_maps_details[current_map_id_hex]['variants'] + variants_list = context.processed_maps_details[current_map_key]['variants'] valid_variants_for_stats = [ v for v in variants_list if isinstance(v.get('dimensions'), tuple) and len(v['dimensions']) == 2 and v['dimensions'][0] > 0 and v['dimensions'][1] > 0 @@ -494,25 +536,25 @@ class IndividualMapProcessingStage(ProcessingStage): if smallest_variant and 'temp_path' in smallest_variant and smallest_variant.get('dimensions'): smallest_res_w, smallest_res_h = smallest_variant['dimensions'] - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Identified smallest variant for stats: {smallest_variant.get('resolution_key', 'N/A')} ({smallest_res_w}x{smallest_res_h}) at {smallest_variant['temp_path']}") + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Identified smallest variant for stats: {smallest_variant.get('resolution_key', 'N/A')} ({smallest_res_w}x{smallest_res_h}) at {smallest_variant['temp_path']}") lowest_res_image_data_for_stats = ipu.load_image(smallest_variant['temp_path']) image_to_stat_path_for_log = smallest_variant['temp_path'] source_of_stats_image = f"variant {smallest_variant.get('resolution_key', 'N/A')}" if lowest_res_image_data_for_stats is None: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Failed to load smallest variant image '{smallest_variant['temp_path']}' for stats.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Failed to load smallest variant image '{smallest_variant['temp_path']}' for stats.") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Could not determine smallest variant for stats from valid variants list (details missing).") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Could not determine smallest variant for stats from valid variants list (details missing).") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: No valid variants found to determine the smallest one for stats.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: No valid variants found to determine the smallest one for stats.") if lowest_res_image_data_for_stats is None: if base_pot_image_data is not None: - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Using base POT image for stats (dimensions: {base_pot_width}x{base_pot_height}). Smallest variant not available/loaded or no variants generated.") + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Using base POT image for stats (dimensions: {base_pot_width}x{base_pot_height}). Smallest variant not available/loaded or no variants generated.") lowest_res_image_data_for_stats = base_pot_image_data image_to_stat_path_for_log = f"In-memory base POT image (dims: {base_pot_width}x{base_pot_height})" source_of_stats_image = "base POT" else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Base POT image data is also None. Cannot calculate stats.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Base POT image data is also None. Cannot calculate stats.") if lowest_res_image_data_for_stats is not None: stats_dict = ipu.calculate_image_stats(lowest_res_image_data_for_stats) @@ -520,43 +562,59 @@ class IndividualMapProcessingStage(ProcessingStage): if 'image_stats_lowest_res' not in context.asset_metadata: context.asset_metadata['image_stats_lowest_res'] = {} - context.asset_metadata['image_stats_lowest_res'][current_map_type] = stats_dict - logger.info(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type '{current_map_type}': Calculated and stored image stats from '{source_of_stats_image}' (source ref: '{image_to_stat_path_for_log}').") + context.asset_metadata['image_stats_lowest_res'][current_map_type] = stats_dict # Keyed by map_type + logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type '{current_map_type}': Calculated and stored image stats from '{source_of_stats_image}' (source ref: '{image_to_stat_path_for_log}').") elif stats_dict and "error" in stats_dict: - logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type '{current_map_type}': Error calculating image stats from '{source_of_stats_image}': {stats_dict['error']}.") + logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type '{current_map_type}': Error calculating image stats from '{source_of_stats_image}': {stats_dict['error']}.") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type '{current_map_type}': Failed to calculate image stats from '{source_of_stats_image}' (result was None or empty).") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type '{current_map_type}': Failed to calculate image stats from '{source_of_stats_image}' (result was None or empty).") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}, Map Type '{current_map_type}': No image data available (from variant or base POT) to calculate stats.") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type '{current_map_type}': No image data available (from variant or base POT) to calculate stats.") # Final status update based on whether variants were generated (and expected) if generate_variants_for_this_map_type: if processed_at_least_one_resolution_variant: - self._update_file_rule_status(context, current_map_id_hex, 'Processed_With_Variants', map_type=filename_friendly_map_type, details="Successfully processed with multiple resolution variants.") + self._update_file_rule_status(context, current_map_key, 'Processed_With_Variants', + map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, + details="Successfully processed with multiple resolution variants.") else: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Variants were expected for map type '{current_map_type}', but none were generated (e.g., base POT too small for any variant tier).") - self._update_file_rule_status(context, current_map_id_hex, 'Processed_No_Variants', map_type=filename_friendly_map_type, details="Variants expected but none generated (e.g., base POT too small).") + logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Variants were expected for map type '{current_map_type}', but none were generated (e.g., base POT too small for any variant tier).") + self._update_file_rule_status(context, current_map_key, 'Processed_No_Variants', + map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, + details="Variants expected but none generated (e.g., base POT too small).") else: # No variants were expected for this map type - self._update_file_rule_status(context, current_map_id_hex, 'Processed_No_Variants', map_type=filename_friendly_map_type, details="Processed to base POT; variants not applicable for this map type.") + self._update_file_rule_status(context, current_map_key, 'Processed_No_Variants', + map_type=filename_friendly_map_type, + processing_map_type=current_map_type, + source_file_rule_index=file_rule_idx, + processing_tag=processing_instance_tag, + details="Processed to base POT; variants not applicable for this map type.") logger.info(f"Asset '{asset_name_for_log}': Finished individual map processing stage.") return context - def _find_source_file(self, base_path: Path, pattern: str, asset_name_for_log: str, current_map_id_hex: str) -> Optional[Path]: # asset_id -> asset_name_for_log, file_rule_id_hex -> current_map_id_hex + def _find_source_file(self, base_path: Path, pattern: str, asset_name_for_log: str, processing_instance_tag: str) -> Optional[Path]: """ Finds a single source file matching the pattern within the base_path. + Logs use processing_instance_tag for specific run tracing. """ - if not pattern: # pattern is now file_rule.file_path - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Empty file_path provided in FileRule.") + if not pattern: + logger.warning(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Empty file_path provided in FileRule.") return None # If pattern is an absolute path, use it directly potential_abs_path = Path(pattern) if potential_abs_path.is_absolute() and potential_abs_path.exists(): - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: file_path '{pattern}' is absolute and exists. Using it directly.") + logger.debug(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: file_path '{pattern}' is absolute and exists. Using it directly.") return potential_abs_path elif potential_abs_path.is_absolute(): - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: file_path '{pattern}' is absolute but does not exist.") + logger.warning(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: file_path '{pattern}' is absolute but does not exist.") # Fall through to try resolving against base_path if it's just a name/relative pattern # Treat pattern as relative to base_path @@ -565,46 +623,49 @@ class IndividualMapProcessingStage(ProcessingStage): # First, check if pattern is an exact relative path exact_match_path = base_path / pattern if exact_match_path.exists() and exact_match_path.is_file(): - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Found exact match for '{pattern}' at '{exact_match_path}'.") + logger.debug(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Found exact match for '{pattern}' at '{exact_match_path}'.") return exact_match_path # If not an exact match, try as a glob pattern (recursive) matched_files_rglob = list(base_path.rglob(pattern)) if matched_files_rglob: if len(matched_files_rglob) > 1: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Multiple files ({len(matched_files_rglob)}) found for pattern '{pattern}' in '{base_path}' (recursive). Using first: {matched_files_rglob[0]}. Files: {matched_files_rglob}") + logger.warning(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Multiple files ({len(matched_files_rglob)}) found for pattern '{pattern}' in '{base_path}' (recursive). Using first: {matched_files_rglob[0]}. Files: {matched_files_rglob}") return matched_files_rglob[0] # Try non-recursive glob if rglob fails matched_files_glob = list(base_path.glob(pattern)) if matched_files_glob: if len(matched_files_glob) > 1: - logger.warning(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Multiple files ({len(matched_files_glob)}) found for pattern '{pattern}' in '{base_path}' (non-recursive). Using first: {matched_files_glob[0]}. Files: {matched_files_glob}") + logger.warning(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Multiple files ({len(matched_files_glob)}) found for pattern '{pattern}' in '{base_path}' (non-recursive). Using first: {matched_files_glob[0]}. Files: {matched_files_glob}") return matched_files_glob[0] - logger.debug(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: No files found matching pattern '{pattern}' in '{base_path}' (exact, recursive, or non-recursive).") + logger.debug(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: No files found matching pattern '{pattern}' in '{base_path}' (exact, recursive, or non-recursive).") return None except Exception as e: - logger.error(f"Asset '{asset_name_for_log}', Map ID {current_map_id_hex}: Error searching for file with pattern '{pattern}' in '{base_path}': {e}") + logger.error(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Error searching for file with pattern '{pattern}' in '{base_path}': {e}") return None - def _update_file_rule_status(self, context: AssetProcessingContext, map_id_hex: str, status: str, **kwargs): # file_rule_id_hex -> map_id_hex - """Helper to update processed_maps_details for a map.""" + def _update_file_rule_status(self, context: AssetProcessingContext, map_key_index: int, status: str, **kwargs): # Renamed map_id_hex to map_key_index + """Helper to update processed_maps_details for a map, keyed by file_rule_idx.""" asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" - if map_id_hex not in context.processed_maps_details: - context.processed_maps_details[map_id_hex] = {} + if map_key_index not in context.processed_maps_details: + context.processed_maps_details[map_key_index] = {} - context.processed_maps_details[map_id_hex]['status'] = status + context.processed_maps_details[map_key_index]['status'] = status for key, value in kwargs.items(): - context.processed_maps_details[map_id_hex][key] = value + # Ensure source_file_rule_id_hex is not added if it was somehow passed (it shouldn't be) + if key == 'source_file_rule_id_hex': + continue + context.processed_maps_details[map_key_index][key] = value - if 'map_type' not in context.processed_maps_details[map_id_hex] and 'map_type' in kwargs: - context.processed_maps_details[map_id_hex]['map_type'] = kwargs['map_type'] + if 'map_type' not in context.processed_maps_details[map_key_index] and 'map_type' in kwargs: + context.processed_maps_details[map_key_index]['map_type'] = kwargs['map_type'] # Add formatted resolution names if 'original_dimensions' in kwargs and isinstance(kwargs['original_dimensions'], tuple) and len(kwargs['original_dimensions']) == 2: orig_w, orig_h = kwargs['original_dimensions'] - context.processed_maps_details[map_id_hex]['original_resolution_name'] = f"{orig_w}x{orig_h}" + context.processed_maps_details[map_key_index]['original_resolution_name'] = f"{orig_w}x{orig_h}" # Determine the correct dimensions to use for 'processed_resolution_name' # This name refers to the base POT scaled image dimensions before variant generation. @@ -619,21 +680,21 @@ class IndividualMapProcessingStage(ProcessingStage): if dims_to_log_as_base_processed: proc_w, proc_h = dims_to_log_as_base_processed resolution_name_str = f"{proc_w}x{proc_h}" - context.processed_maps_details[map_id_hex]['base_pot_resolution_name'] = resolution_name_str + context.processed_maps_details[map_key_index]['base_pot_resolution_name'] = resolution_name_str # Ensure 'processed_resolution_name' is also set for OutputOrganizationStage compatibility - context.processed_maps_details[map_id_hex]['processed_resolution_name'] = resolution_name_str + context.processed_maps_details[map_key_index]['processed_resolution_name'] = resolution_name_str elif 'processed_dimensions' in kwargs or 'base_pot_dimensions' in kwargs: details_for_warning = kwargs.get('processed_dimensions', kwargs.get('base_pot_dimensions')) - logger.warning(f"Asset '{asset_name_for_log}', Map ID {map_id_hex}: 'processed_dimensions' or 'base_pot_dimensions' key present but its value is not a valid 2-element tuple: {details_for_warning}") + logger.warning(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: 'processed_dimensions' or 'base_pot_dimensions' key present but its value is not a valid 2-element tuple: {details_for_warning}") # If temp_processed_file was passed, ensure it's in the details if 'temp_processed_file' in kwargs: - context.processed_maps_details[map_id_hex]['temp_processed_file'] = kwargs['temp_processed_file'] + context.processed_maps_details[map_key_index]['temp_processed_file'] = kwargs['temp_processed_file'] # Log all details being stored for clarity, including the newly added resolution names - log_details = context.processed_maps_details[map_id_hex].copy() + log_details = context.processed_maps_details[map_key_index].copy() # Avoid logging full image data if it accidentally gets into kwargs if 'image_data' in log_details: del log_details['image_data'] if 'base_pot_image_data' in log_details: del log_details['base_pot_image_data'] - logger.debug(f"Asset '{asset_name_for_log}', Map ID {map_id_hex}: Status updated to '{status}'. Details: {log_details}") \ No newline at end of file + logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Status updated to '{status}'. Details: {log_details}") \ No newline at end of file diff --git a/processing/pipeline/stages/map_merging.py b/processing/pipeline/stages/map_merging.py index 791f7b9..5dacc73 100644 --- a/processing/pipeline/stages/map_merging.py +++ b/processing/pipeline/stages/map_merging.py @@ -125,72 +125,120 @@ class MapMergingStage(ProcessingStage): required_input_map_types = set(inputs_map_type_to_channel.values()) for required_map_type in required_input_map_types: - found_processed_map = None - processed_map_key = None - for p_key, p_details in context.processed_maps_details.items(): - processed_map_type_in_details = p_details.get('map_type') - # Check for direct match or match with "MAP_" prefix - if (processed_map_type_in_details == required_map_type or \ - processed_map_type_in_details == f"MAP_{required_map_type}") and \ - p_details.get('status') == 'Processed': - found_processed_map = p_details - processed_map_key = p_key # The UUID hex key from individual processing + found_processed_map_details = None + # The key `p_key_idx` is the file_rule_idx from the IndividualMapProcessingStage + for p_key_idx, p_details in context.processed_maps_details.items(): # p_key_idx is an int + processed_map_identifier = p_details.get('processing_map_type', p_details.get('map_type')) + + # Comprehensive list of valid statuses for an input map to be used in merging + valid_input_statuses = ['BasePOTSaved', 'Processed_With_Variants', 'Processed_No_Variants', 'Converted_To_Rough'] + + is_match = False + if processed_map_identifier == required_map_type: + is_match = True + elif required_map_type.startswith("MAP_") and processed_map_identifier == required_map_type.split("MAP_")[-1]: + is_match = True + elif not required_map_type.startswith("MAP_") and processed_map_identifier == f"MAP_{required_map_type}": + is_match = True + + if is_match and p_details.get('status') in valid_input_statuses: + found_processed_map_details = p_details + # The key `p_key_idx` (which is the FileRule index) is implicitly associated with these details. break - if not found_processed_map: - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Required input map_type '{required_map_type}' for output '{output_map_type}' not found or not processed in processed_maps_details.") - # Option: Use default value for the entire map if one could be constructed for this map_type - # For now, we fail the merge if a required map is missing. - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Required input map_type '{required_map_type}' missing."} - break # Break from finding inputs for this merge rule + if not found_processed_map_details: + can_be_fully_defaulted = True + channels_requiring_this_map = [ + ch_key for ch_key, map_type_val in inputs_map_type_to_channel.items() + if map_type_val == required_map_type + ] - temp_file_path = Path(found_processed_map['temp_processed_file']) - if not temp_file_path.exists(): - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Temp file {temp_file_path} for input map_type '{required_map_type}' does not exist.") - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Temp file for input '{required_map_type}' missing."} - break - - try: - image_data = ipu.load_image(temp_file_path) - if image_data is None: raise ValueError("Loaded image is None") - except Exception as e: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error loading image {temp_file_path} for input map_type '{required_map_type}': {e}") - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Error loading input '{required_map_type}'."} - break - - loaded_input_maps[required_map_type] = image_data - input_map_paths[required_map_type] = str(temp_file_path) - - current_dims = (image_data.shape[1], image_data.shape[0]) - if target_dims is None: - target_dims = current_dims - elif current_dims != target_dims: - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{required_map_type}' dims {current_dims} differ from target {target_dims}. Resizing.") - try: - image_data = ipu.resize_image(image_data, target_dims[0], target_dims[1]) - if image_data is None: raise ValueError("Resize returned None") - loaded_input_maps[required_map_type] = image_data - except Exception as e: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to resize '{required_map_type}': {e}") + if not channels_requiring_this_map: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Internal logic error. Required map_type '{required_map_type}' is not actually used by any output channel. Configuration: {inputs_map_type_to_channel}") all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Failed to resize input '{required_map_type}'."} + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Internal error: required map_type '{required_map_type}' not in use."} break + + for channel_char_needing_default in channels_requiring_this_map: + if default_values.get(channel_char_needing_default) is None: + can_be_fully_defaulted = False + break + + if can_be_fully_defaulted: + logger.info(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Required input map_type '{required_map_type}' for output '{output_map_type}' not found or not in usable state. Will attempt to use default values for its channels: {channels_requiring_this_map}.") + else: + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Required input map_type '{required_map_type}' for output '{output_map_type}' not found/unusable, AND not all its required channels ({channels_requiring_this_map}) have defaults. Failing merge op.") + all_inputs_valid = False + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Input '{required_map_type}' missing and defaults incomplete."} + break + + if found_processed_map_details: + temp_file_path_str = found_processed_map_details.get('temp_processed_file') + if not temp_file_path_str: + # Log with p_key_idx if available, or just the map type if not (though it should be if found_processed_map_details is set) + log_key_info = f"(Associated Key Index: {p_key_idx})" if 'p_key_idx' in locals() and found_processed_map_details else "" # Use locals() to check if p_key_idx is defined in this scope + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: 'temp_processed_file' missing in details for found map_type '{required_map_type}' {log_key_info}.") + all_inputs_valid = False + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Temp file path missing for input '{required_map_type}'."} + break + + temp_file_path = Path(temp_file_path_str) + if not temp_file_path.exists(): + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Temp file {temp_file_path} for input map_type '{required_map_type}' does not exist.") + all_inputs_valid = False + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Temp file for input '{required_map_type}' missing."} + break + + try: + image_data = ipu.load_image(str(temp_file_path)) + if image_data is None: raise ValueError("Loaded image is None") + except Exception as e: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error loading image {temp_file_path} for input map_type '{required_map_type}': {e}") + all_inputs_valid = False + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Error loading input '{required_map_type}'."} + break + + loaded_input_maps[required_map_type] = image_data + input_map_paths[required_map_type] = str(temp_file_path) + + current_dims = (image_data.shape[1], image_data.shape[0]) + if target_dims is None: + target_dims = current_dims + elif current_dims != target_dims: + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{required_map_type}' dims {current_dims} differ from target {target_dims}. Resizing.") + try: + image_data_resized = ipu.resize_image(image_data, target_dims[0], target_dims[1]) + if image_data_resized is None: raise ValueError("Resize returned None") + loaded_input_maps[required_map_type] = image_data_resized + except Exception as e: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to resize '{required_map_type}': {e}") + all_inputs_valid = False + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Failed to resize input '{required_map_type}'."} + break if not all_inputs_valid: logger.warning(f"Asset {asset_name_for_log}: Skipping merge for Op ID {merge_op_id} ('{output_map_type}') due to invalid inputs.") continue - if not loaded_input_maps or target_dims is None: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: No input maps loaded or target_dims not set for '{output_map_type}'. This shouldn't happen if all_inputs_valid was true.") - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'Internal error: input maps not loaded or target_dims missing.'} - continue + if not loaded_input_maps and not any(default_values.get(ch) is not None for ch in inputs_map_type_to_channel.keys()): + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: No input maps loaded and no defaults available for any channel for '{output_map_type}'. Cannot proceed.") + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'No input maps loaded and no defaults available.'} + continue - # Determine output channels (e.g., 3 for RGB, 1 for Grayscale) - # This depends on the keys in inputs_map_type_to_channel (R,G,B,A) - output_channel_keys = sorted(list(inputs_map_type_to_channel.keys())) # e.g. ['B', 'G', 'R'] + if target_dims is None: + default_res_key = context.config_obj.get("default_output_resolution_key_for_merge", "1K") + image_resolutions_cfg = getattr(context.config_obj, "image_resolutions", {}) + default_max_dim = image_resolutions_cfg.get(default_res_key) + + if default_max_dim: + target_dims = (default_max_dim, default_max_dim) + logger.info(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Target dimensions not set by inputs (all defaulted). Using configured default resolution '{default_res_key}': {target_dims}.") + else: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Target dimensions could not be determined for '{output_map_type}' (all inputs defaulted and no default output resolution configured).") + context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'Target dimensions undetermined for fully defaulted merge.'} + continue + + output_channel_keys = sorted(list(inputs_map_type_to_channel.keys())) num_output_channels = len(output_channel_keys) if num_output_channels == 0: @@ -199,79 +247,86 @@ class MapMergingStage(ProcessingStage): continue try: - if num_output_channels == 1: # Grayscale output - merged_image = np.zeros((target_dims[1], target_dims[0]), dtype=np.uint8) - else: # Color output - merged_image = np.zeros((target_dims[1], target_dims[0], num_output_channels), dtype=np.uint8) + output_dtype = np.uint8 + + if num_output_channels == 1: + merged_image = np.zeros((target_dims[1], target_dims[0]), dtype=output_dtype) + else: + merged_image = np.zeros((target_dims[1], target_dims[0], num_output_channels), dtype=output_dtype) except Exception as e: logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error creating empty merged image for '{output_map_type}': {e}") context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f'Error creating output canvas: {e}'} continue merge_op_failed_detail = False - for i, out_channel_char in enumerate(output_channel_keys): # e.g. R, G, B + for i, out_channel_char in enumerate(output_channel_keys): input_map_type_for_this_channel = inputs_map_type_to_channel[out_channel_char] source_image = loaded_input_maps.get(input_map_type_for_this_channel) source_data_this_channel = None if source_image is not None: - if source_image.ndim == 2: # Grayscale source + if source_image.dtype != np.uint8: + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{input_map_type_for_this_channel}' has dtype {source_image.dtype}, expected uint8. Attempting conversion.") + source_image = ipu.convert_to_uint8(source_image) + if source_image is None: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to convert input '{input_map_type_for_this_channel}' to uint8.") + merge_op_failed_detail = True; break + + + if source_image.ndim == 2: source_data_this_channel = source_image - elif source_image.ndim == 3 or source_image.ndim == 4: # Color source (3-channel BGR or 4-channel BGRA), assumed loaded by ipu.load_image - # Standard BGR(A) channel indexing: B=0, G=1, R=2, A=3 (if present) - # This map helps get NRM's Red data for 'R' output, NRM's Green for 'G' output etc. - # based on the semantic meaning of out_channel_char. + elif source_image.ndim == 3: semantic_to_bgr_idx = {'R': 2, 'G': 1, 'B': 0, 'A': 3} - if input_map_type_for_this_channel == "NRM": - idx_to_extract = semantic_to_bgr_idx.get(out_channel_char) - - if idx_to_extract is not None and idx_to_extract < source_image.shape[2]: - source_data_this_channel = source_image[:, :, idx_to_extract] - logger.debug(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: For output '{out_channel_char}', using NRM's semantic '{out_channel_char}' channel (BGR(A) index {idx_to_extract}).") - else: - # Fallback if out_channel_char isn't R,G,B,A or NRM doesn't have the channel (e.g. 3-channel NRM and 'A' requested) - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Could not map output '{out_channel_char}' to a specific BGR(A) channel of NRM (shape {source_image.shape}). Defaulting to NRM's channel 0 (Blue).") - source_data_this_channel = source_image[:, :, 0] + idx_to_extract = semantic_to_bgr_idx.get(out_channel_char.upper()) + + if idx_to_extract is not None and idx_to_extract < source_image.shape[2]: + source_data_this_channel = source_image[:, :, idx_to_extract] + logger.debug(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: For output '{out_channel_char}', using source '{input_map_type_for_this_channel}' semantic '{out_channel_char}' (BGR(A) index {idx_to_extract}).") else: - # For other multi-channel sources (e.g., ROUGH as RGB, or other color maps not "NRM") - # Default to taking the first channel (Blue in BGR). - # This covers "Roughness map's greyscale data" if ROUGH is RGB (by taking one of its channels as a proxy). + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Could not map output '{out_channel_char}' to a specific BGR(A) channel of '{input_map_type_for_this_channel}' (shape {source_image.shape}). Defaulting to its channel 0 (Blue).") source_data_this_channel = source_image[:, :, 0] - logger.debug(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: For output '{out_channel_char}', source {input_map_type_for_this_channel} (shape {source_image.shape}) is multi-channel but not NRM. Using its channel 0 (Blue).") - else: # Source map was not found, use default + else: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Source image '{input_map_type_for_this_channel}' has unexpected dimensions: {source_image.ndim} (shape {source_image.shape}).") + merge_op_failed_detail = True; break + + else: default_val_for_channel = default_values.get(out_channel_char) if default_val_for_channel is not None: - # Convert 0-1 float default to 0-255 uint8 - source_data_this_channel = np.full((target_dims[1], target_dims[0]), int(default_val_for_channel * 255), dtype=np.uint8) - logger.info(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Using default value {default_val_for_channel} for output channel '{out_channel_char}' as input map '{input_map_type_for_this_channel}' was missing.") + try: + scaled_default_val = int(float(default_val_for_channel) * 255) + source_data_this_channel = np.full((target_dims[1], target_dims[0]), scaled_default_val, dtype=np.uint8) + logger.info(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Using default value {default_val_for_channel} (scaled to {scaled_default_val}) for output channel '{out_channel_char}' as input map '{input_map_type_for_this_channel}' was missing.") + except ValueError: + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Default value '{default_val_for_channel}' for channel '{out_channel_char}' is not a valid float. Cannot scale.") + merge_op_failed_detail = True; break else: logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{input_map_type_for_this_channel}' for output channel '{out_channel_char}' is missing and no default value provided.") merge_op_failed_detail = True; break - if source_data_this_channel is None: # Should be caught by default value logic or earlier checks + if source_data_this_channel is None: logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to get source data for output channel '{out_channel_char}'.") merge_op_failed_detail = True; break try: - if merged_image.ndim == 2: # Single channel output + if merged_image.ndim == 2: merged_image = source_data_this_channel - else: # Multi-channel output + else: merged_image[:, :, i] = source_data_this_channel except Exception as e: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error assigning data to output channel '{out_channel_char}' (index {i}): {e}") + logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error assigning data to output channel '{out_channel_char}' (index {i}): {e}. Merged shape: {merged_image.shape}, Source data shape: {source_data_this_channel.shape}") merge_op_failed_detail = True; break if merge_op_failed_detail: context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'Error during channel assignment.'} continue - output_format = 'png' # Default, can be configured per rule later + output_format = 'png' temp_merged_filename = f"merged_{sanitize_filename(output_map_type)}_{merge_op_id}.{output_format}" temp_merged_path = context.engine_temp_dir / temp_merged_filename try: - save_success = ipu.save_image(temp_merged_path, merged_image) + save_success = ipu.save_image(str(temp_merged_path), merged_image) if not save_success: raise ValueError("Save image returned false") except Exception as e: logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error saving merged image {temp_merged_path}: {e}") diff --git a/processing/pipeline/stages/metadata_finalization_save.py b/processing/pipeline/stages/metadata_finalization_save.py index 8f3a555..f2adb70 100644 --- a/processing/pipeline/stages/metadata_finalization_save.py +++ b/processing/pipeline/stages/metadata_finalization_save.py @@ -66,7 +66,56 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): context.asset_metadata['status'] = "Processed" # Add details of processed and merged maps - context.asset_metadata['processed_map_details'] = getattr(context, 'processed_maps_details', {}) + # Restructure processed_map_details before assigning + restructured_processed_maps = {} + # getattr(context, 'processed_maps_details', {}) is the source (plural 'maps') + original_processed_maps = getattr(context, 'processed_maps_details', {}) + + # Define keys to remove at the top level of each map entry + map_keys_to_remove = [ + "status", "source_file_path", "temp_processed_file", # Assuming "source_file_path" is the correct key + "original_resolution_name", "base_pot_resolution_name", "processed_resolution_name" + ] + # Define keys to remove from each variant + variant_keys_to_remove = ["temp_path", "dimensions"] + + for map_key, map_detail_original in original_processed_maps.items(): + # Create a new dictionary for the modified map entry + new_map_entry = {} + for key, value in map_detail_original.items(): + if key not in map_keys_to_remove: + new_map_entry[key] = value + + if "variants" in map_detail_original and isinstance(map_detail_original["variants"], dict): + new_variants_dict = {} + for variant_name, variant_data_original in map_detail_original["variants"].items(): + new_variant_entry = {} + for key, value in variant_data_original.items(): + if key not in variant_keys_to_remove: + new_variant_entry[key] = value + + # Add 'path_to_file' + # This path is expected to be set by OutputOrganizationStage in the context. + # It should be a Path object representing the path relative to the metadata directory, + # or an absolute Path that make_serializable can convert. + # Using 'final_output_path_for_metadata' as the key from context. + if 'final_output_path_for_metadata' in variant_data_original: + new_variant_entry['path_to_file'] = variant_data_original['final_output_path_for_metadata'] + else: + # Log a warning if the expected path is not found + logger.warning( + f"Asset '{asset_name_for_log}': 'final_output_path_for_metadata' " + f"missing for variant '{variant_name}' in map '{map_key}'. " + f"Metadata will be incomplete for this variant's path." + ) + new_variant_entry['path_to_file'] = "ERROR_PATH_NOT_FOUND" # Placeholder + new_variants_dict[variant_name] = new_variant_entry + new_map_entry["variants"] = new_variants_dict + + restructured_processed_maps[map_key] = new_map_entry + + # Assign the restructured details. Note: 'processed_map_details' (singular 'map') is the key in asset_metadata. + context.asset_metadata['processed_map_details'] = restructured_processed_maps context.asset_metadata['merged_map_details'] = getattr(context, 'merged_maps_details', {}) # (Optional) Add a list of all temporary files diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index 5087bd6..69fe625 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -210,11 +210,13 @@ class OutputOrganizationStage(ProcessingStage): logger.info(f"Asset '{asset_name_for_log}': Copied variant {temp_variant_path} to {final_variant_path} for map '{processed_map_key}'.") final_output_files.append(str(final_variant_path)) variant_detail['status'] = 'Organized' - - variant_detail['final_output_path'] = str(final_variant_path) - relative_final_variant_path_str = str(Path(relative_dir_path_str_variant) / Path(output_filename_variant)) - map_metadata_entry['variant_paths'][variant_resolution_key] = relative_final_variant_path_str - processed_any_variant_successfully = True + + variant_detail['final_output_path'] = str(final_variant_path) + # Store the Path object for metadata stage to make it relative later + variant_detail['final_output_path_for_metadata'] = final_variant_path + relative_final_variant_path_str = str(Path(relative_dir_path_str_variant) / Path(output_filename_variant)) + map_metadata_entry['variant_paths'][variant_resolution_key] = relative_final_variant_path_str + processed_any_variant_successfully = True except Exception as e: logger.error(f"Asset '{asset_name_for_log}': Failed to copy variant {temp_variant_path} for map key '{processed_map_key}' (res: {variant_resolution_key}). Error: {e}", exc_info=True) -- 2.47.2 From 4ffb2ff78c4de076126168769fac55b0e83eb158 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Mon, 12 May 2025 13:31:58 +0200 Subject: [PATCH 05/16] Pipeline simplification - Needs testing! --- ProjectNotes/PipelineRefactoringPlan.md | 154 +++ ProjectNotes/ProcessingEngineRefactorPlan.md | 181 --- config/app_settings.json | 7 +- .../stages/individual_map_processing.py | 1169 ++++++++--------- processing/pipeline/stages/map_merging.py | 303 +---- processing/utils/image_processing_utils.py | 31 + processing/utils/image_saving_utils.py | 250 ++++ 7 files changed, 1046 insertions(+), 1049 deletions(-) create mode 100644 ProjectNotes/PipelineRefactoringPlan.md delete mode 100644 ProjectNotes/ProcessingEngineRefactorPlan.md create mode 100644 processing/utils/image_saving_utils.py diff --git a/ProjectNotes/PipelineRefactoringPlan.md b/ProjectNotes/PipelineRefactoringPlan.md new file mode 100644 index 0000000..18b8df8 --- /dev/null +++ b/ProjectNotes/PipelineRefactoringPlan.md @@ -0,0 +1,154 @@ +# Revised Refactoring Plan: Processing Pipeline + +**Overall Goal:** To simplify the processing pipeline by refactoring the map merging process, consolidating map transformations (Gloss-to-Rough, Normal Green Invert), and creating a unified, configurable image saving utility. This plan aims to improve clarity, significantly reduce I/O by favoring in-memory operations, and make Power-of-Two (POT) scaling an optional, integrated step. + +**I. Map Merging Stage (`processing/pipeline/stages/map_merging.py`)** + +* **Objective:** Transform this stage from performing merges to generating tasks for merged images. +* **Changes to `MapMergingStage.execute()`:** + 1. Iterate through `context.config_obj.map_merge_rules`. + 2. Identify required input map types and find their corresponding source file paths (potentially original paths or outputs of prior essential stages if any). + 3. Create "merged image tasks" and add them to `context.merged_image_tasks`. + 4. Each task entry will contain: + * `output_map_type`: Target map type (e.g., "MAP_NRMRGH"). + * `input_map_sources`: Details of source map types and file paths. + * `merge_rule_config`: Complete merge rule configuration (including fallback values). + * `source_dimensions`: Dimensions for the high-resolution merged map basis. + * `source_bit_depths`: Information about the bit depth of original source maps (needed for "respect_inputs" rule in save utility). + +**II. Individual Map Processing Stage (`processing/pipeline/stages/individual_map_processing.py`)** + +* **Objective:** Adapt this stage to handle both individual raw maps and `merged_image_tasks`. It will perform necessary in-memory transformations (Gloss-to-Rough, Normal Green Invert) and prepare a single "high-resolution" source image (in memory) to be passed to the `UnifiedSaveUtility`. +* **Changes to `IndividualMapProcessingStage.execute()`:** + 1. **Input Handling Loop:** Iterate through `context.files_to_process` (regular maps) and `context.merged_image_tasks`. + 2. **Image Data Preparation:** + * **For regular maps:** Load the source image file into memory (`current_image_data`). Determine `base_map_type` from the `FileRule`. Determine source bit depth. + * **For `merged_image_tasks`:** + * Attempt to load input map files specified in `input_map_sources`. If a file is missing, log a warning and generate placeholder data using fallback values from `merge_rule_config`. Handle other load errors. + * Check dimensions of loaded/fallback data. Apply `MERGE_DIMENSION_MISMATCH_STRATEGY` (e.g., resize, log warning) or handle "ERROR_SKIP" strategy (log error, mark task failed, continue). + * Perform the merge operation in memory according to `merge_rule_config`. Result is `current_image_data`. `base_map_type` is the task's `output_map_type`. + 3. **In-Memory Transformations:** + * **Gloss-to-Rough Conversion:** + * If `base_map_type` starts with "MAP_GLOSS": + * Perform inversion on `current_image_data` (in memory). + * Update `base_map_type` to "MAP_ROUGH". + * Log the conversion. + * **Normal Map Green Channel Inversion:** + * If `base_map_type` is "NORMAL" *and* `context.config_obj.general_settings.invert_normal_map_green_channel_globally` is true: + * Perform green channel inversion on `current_image_data` (in memory). + * Log the inversion. + 4. **Optional Initial Scaling (POT or other):** + * Check `INITIAL_SCALING_MODE` from config. + * If `"POT_DOWNSCALE"`: Perform POT downscaling on `current_image_data` (in memory) -> `image_to_save`. + * If `"NONE"`: `image_to_save` = `current_image_data`. + * *(Note: `image_to_save` now reflects any prior transformations)*. + 5. **Color Management:** Apply necessary color management to `image_to_save`. + 6. **Pass to Save Utility:** Pass `image_to_save`, the (potentially updated) `base_map_type`, original source bit depth info (for "respect_inputs" rule), and other necessary details (like specific config values) to the `UnifiedSaveUtility`. + 7. **Remove Old Logic:** Remove old save logic, separate Gloss/Normal stage calls. + 8. **Context Update:** Update `context.processed_maps_details` with results from the `UnifiedSaveUtility`, including notes about any conversions/inversions performed or merge task failures. + +**III. Unified Image Save Utility (New file: `processing/utils/image_saving_utils.py`)** + +* **Objective:** Centralize all image saving logic (resolution variants, format, bit depth, compression). +* **Interface (e.g., `save_image_variants` function):** + * **Inputs:** + * `source_image_data (np.ndarray)`: High-res image data (in memory, potentially transformed). + * `base_map_type (str)`: Final map type (e.g., "COL", "ROUGH", "NORMAL", "MAP_NRMRGH"). + * `source_bit_depth_info (list)`: List of original source bit depth(s). + * Specific config values (e.g., `image_resolutions: dict`, `file_type_defs: dict`, `output_format_8bit: str`, etc.). + * `output_filename_pattern_tokens (dict)`. + * `output_base_directory (Path)`. + * **Core Functionality:** + 1. Use provided configuration inputs. + 2. Determine Target Bit Depth: + * Use `bit_depth_rule` for `base_map_type` from `file_type_defs`. + * If "force_8bit": target 8-bit. + * If "respect_inputs": If `any(depth > 8 for depth in source_bit_depth_info)`, target 16-bit, else 8-bit. + 3. Determine Output File Format(s) (based on target bit depth, config). + 4. Generate and Save Resolution Variants: + * Iterate through `image_resolutions`. + * Resize `source_image_data` (in memory) for each variant (no upscaling). + * Construct filename and path. + * Prepare save parameters. + * Convert variant data to target bit depth/color space just before saving. + * Save variant using `cv2.imwrite` or similar. + * Discard in-memory variant after saving. + 5. Return List of Saved File Details: `{'path': str, 'resolution_key': str, 'format': str, 'bit_depth': int, 'dimensions': (w,h)}`. + * **Memory Management:** Holds `source_image_data` + one variant in memory at a time. + +**IV. Configuration Changes (`config/app_settings.json`)** + +1. **Add/Confirm Settings:** + * `"INITIAL_SCALING_MODE": "POT_DOWNSCALE"` (Options: "POT_DOWNSCALE", "NONE"). + * `"MERGE_DIMENSION_MISMATCH_STRATEGY": "USE_LARGEST"` (Options: "USE_LARGEST", "USE_FIRST", "ERROR_SKIP"). + * Ensure `general_settings.invert_normal_map_green_channel_globally` exists (boolean). +2. **Review/Confirm Existing Settings:** + * Ensure `IMAGE_RESOLUTIONS`, `FILE_TYPE_DEFINITIONS` (`bit_depth_rule`), `MAP_MERGE_RULES` (`output_bit_depth`, fallback values), format settings, quality settings are comprehensive. +3. **Remove Obsolete Setting:** + * `RESPECT_VARIANT_MAP_TYPES`. + +**V. Data Flow Diagram (Mermaid)** + +```mermaid +graph TD + A[Start Asset Processing] --> B[File Rules Filter]; + B --> STAGE_INDIVIDUAL_MAP_PROCESSING[Individual Map Processing Stage]; + + subgraph STAGE_INDIVIDUAL_MAP_PROCESSING [Individual Map Processing Stage] + direction LR + C1{Is it a regular map or merged task?} + C1 -- Regular Map --> C2[Load Source Image File into Memory (current_image_data)]; + C1 -- Merged Task (from Map Merging Stage) --> C3[Load Inputs (Handle Missing w/ Fallbacks) & Merge in Memory (Handle Dim Mismatch) (current_image_data)]; + + C2 --> C4[current_image_data]; + C3 --> C4; + + C4 --> C4_TRANSFORM{Transformations?}; + C4_TRANSFORM -- Gloss Map? --> C4a[Invert Data (in memory), Update base_map_type to ROUGH]; + C4_TRANSFORM -- Normal Map & Invert Config? --> C4b[Invert Green Channel (in memory)]; + C4_TRANSFORM -- No Transformation Needed --> C4_POST_TRANSFORM; + C4a --> C4_POST_TRANSFORM; + C4b --> C4_POST_TRANSFORM; + + C4_POST_TRANSFORM[current_image_data (potentially transformed)] --> C5{INITIAL_SCALING_MODE}; + C5 -- "POT_DOWNSCALE" --> C6[Perform POT Scale (in memory) --> image_to_save]; + C5 -- "NONE" --> C7[image_to_save = current_image_data]; + + C6 --> C8[Apply Color Management to image_to_save (in memory)]; + C7 --> C8; + + C8 --> UNIFIED_SAVE_UTILITY[Call Unified Save Utility with image_to_save, final base_map_type, source bit depth info, config]; + end + + UNIFIED_SAVE_UTILITY --> H[Update context.processed_maps_details with list of saved files & notes]; + H --> STAGE_METADATA_SAVE[Metadata Finalization & Save Stage]; + + STAGE_MAP_MERGING[Map Merging Stage] --> N{Identify Merge Rules}; + N --> O[Create Merged Image Tasks (incl. inputs, config, source bit depths)]; + O --> STAGE_INDIVIDUAL_MAP_PROCESSING; %% Feed tasks + + A --> STAGE_OTHER_INITIAL[Other Initial Stages] + STAGE_OTHER_INITIAL --> STAGE_MAP_MERGING; + + STAGE_METADATA_SAVE --> Z[End Asset Processing]; + + subgraph UNIFIED_SAVE_UTILITY_DETAILS [Unified Save Utility (processing.utils.image_saving_utils)] + direction TB + INPUTS[Input: in-memory image_to_save, final base_map_type, source_bit_depth_info, config_params, tokens, out_base_dir] + INPUTS --> CONFIG_LOAD[1. Use Provided Config Params] + CONFIG_LOAD --> DETERMINE_BIT_DEPTH[2. Determine Target Bit Depth (using rule & source_bit_depth_info)] + DETERMINE_BIT_DEPTH --> DETERMINE_FORMAT[3. Determine Output Format] + DETERMINE_FORMAT --> LOOP_VARIANTS[4. For each Resolution:] + LOOP_VARIANTS --> RESIZE_VARIANT[4a. Resize image_to_save to Variant (in memory)] + RESIZE_VARIANT --> PREPARE_SAVE[4b. Prepare Filename & Save Params] + PREPARE_SAVE --> SAVE_IMAGE[4c. Convert & Save Variant to Disk] + SAVE_IMAGE --> LOOP_VARIANTS; + LOOP_VARIANTS --> OUTPUT_LIST[5. Return List of Saved File Details] + end + + style STAGE_INDIVIDUAL_MAP_PROCESSING fill:#f9f,stroke:#333,stroke-width:2px; + style STAGE_MAP_MERGING fill:#f9f,stroke:#333,stroke-width:2px; + style UNIFIED_SAVE_UTILITY fill:#ccf,stroke:#333,stroke-width:2px; + style UNIFIED_SAVE_UTILITY_DETAILS fill:#ccf,stroke:#333,stroke-width:1px,dashed; + style O fill:#lightgrey,stroke:#333,stroke-width:2px; + style C4_POST_TRANSFORM fill:#e6ffe6,stroke:#333,stroke-width:1px; \ No newline at end of file diff --git a/ProjectNotes/ProcessingEngineRefactorPlan.md b/ProjectNotes/ProcessingEngineRefactorPlan.md deleted file mode 100644 index 1364ab6..0000000 --- a/ProjectNotes/ProcessingEngineRefactorPlan.md +++ /dev/null @@ -1,181 +0,0 @@ -# Project Plan: Modularizing the Asset Processing Engine - -**Last Updated:** May 9, 2025 - -**1. Project Vision & Goals** - -* **Vision:** Transform the asset processing pipeline into a highly modular, extensible, and testable system. -* **Primary Goals:** - 1. Decouple processing steps into independent, reusable stages. - 2. Simplify the addition of new processing capabilities (e.g., GLOSS > ROUGH conversion, Alpha to MASK, Normal Map Green Channel inversion). - 3. Improve code maintainability and readability. - 4. Enhance unit and integration testing capabilities for each processing component. - 5. Centralize common utility functions (image manipulation, path generation). - -**2. Proposed Architecture Overview** - -* **Core Concept:** A `PipelineOrchestrator` will manage a sequence of `ProcessingStage`s. Each stage will operate on an `AssetProcessingContext` object, which carries all necessary data and state for a single asset through the pipeline. -* **Key Components:** - * `AssetProcessingContext`: Data class holding asset-specific data, configuration, temporary paths, and status. - * `PipelineOrchestrator`: Class to manage the overall processing flow for a `SourceRule`, iterating through assets and executing the pipeline of stages for each. - * `ProcessingStage` (Base Class/Interface): Defines the contract for all individual processing stages (e.g., `execute(context)` method). - * Specific Stage Classes: (e.g., `SupplierDeterminationStage`, `IndividualMapProcessingStage`, etc.) - * Utility Modules: `image_processing_utils.py`, enhancements to `utils/path_utils.py`. - -**3. Proposed File Structure** - -* `processing/` - * `pipeline/` - * `__init__.py` - * `asset_context.py` (Defines `AssetProcessingContext`) - * `orchestrator.py` (Defines `PipelineOrchestrator`) - * `stages/` - * `__init__.py` - * `base_stage.py` (Defines `ProcessingStage` interface) - * `supplier_determination.py` - * `asset_skip_logic.py` - * `metadata_initialization.py` - * `file_rule_filter.py` - * `gloss_to_rough_conversion.py` - * `alpha_extraction_to_mask.py` - * `normal_map_green_channel.py` - * `individual_map_processing.py` - * `map_merging.py` - * `metadata_finalization.py` - * `output_organization.py` - * `utils/` - * `__init__.py` - * `image_processing_utils.py` (New module for image functions) -* `utils/` (Top-level existing directory) - * `path_utils.py` (To be enhanced with `sanitize_filename` from `processing_engine.py`) - -**4. Detailed Phases and Tasks** - -**Phase 0: Setup & Core Structures Definition** -*Goal: Establish the foundational classes for the new pipeline.* -* **Task 0.1: Define `AssetProcessingContext`** - * Create `processing/pipeline/asset_context.py`. - * Define the `AssetProcessingContext` data class with fields: `source_rule: SourceRule`, `asset_rule: AssetRule`, `workspace_path: Path`, `engine_temp_dir: Path`, `output_base_path: Path`, `effective_supplier: Optional[str]`, `asset_metadata: Dict`, `processed_maps_details: Dict[str, Dict[str, Dict]]`, `merged_maps_details: Dict[str, Dict[str, Dict]]`, `files_to_process: List[FileRule]`, `loaded_data_cache: Dict`, `config_obj: Configuration`, `status_flags: Dict`, `incrementing_value: Optional[str]`, `sha5_value: Optional[str]`. - * Ensure proper type hinting. -* **Task 0.2: Define `ProcessingStage` Base Class/Interface** - * Create `processing/pipeline/stages/base_stage.py`. - * Define an abstract base class `ProcessingStage` with an abstract method `execute(self, context: AssetProcessingContext) -> AssetProcessingContext`. -* **Task 0.3: Implement Initial `PipelineOrchestrator`** - * Create `processing/pipeline/orchestrator.py`. - * Define the `PipelineOrchestrator` class. - * Implement `__init__(self, config_obj: Configuration, stages: List[ProcessingStage])`. - * Implement `process_source_rule(self, source_rule: SourceRule, workspace_path: Path, output_base_path: Path, overwrite: bool, incrementing_value: Optional[str], sha5_value: Optional[str]) -> Dict[str, List[str]]`. - * Handles creation/cleanup of the main engine temporary directory. - * Loops through `source_rule.assets`, initializes `AssetProcessingContext` for each. - * Iterates `self.stages`, calling `stage.execute(context)`. - * Collects overall status. - -**Phase 1: Utility Module Refactoring** -*Goal: Consolidate and centralize common utility functions.* -* **Task 1.1: Refactor Path Utilities** - * Move `_sanitize_filename` from `processing_engine.py` to `utils/path_utils.py`. - * Update uses to call the new utility function. -* **Task 1.2: Create `image_processing_utils.py`** - * Create `processing/utils/image_processing_utils.py`. - * Move general-purpose image functions from `processing_engine.py`: - * `is_power_of_two` - * `get_nearest_pot` - * `calculate_target_dimensions` - * `calculate_image_stats` - * `normalize_aspect_ratio_change` - * Core image loading, BGR<>RGB conversion, generic resizing (from `_load_and_transform_source`). - * Core data type conversion for saving, color conversion for saving, `cv2.imwrite` call (from `_save_image`). - * Ensure functions are pure and testable. - -**Phase 2: Implementing Core Processing Stages (Migrating Existing Logic)** -*Goal: Migrate existing functionalities from `processing_engine.py` into the new stage-based architecture.* -(For each task: create stage file, implement class, move logic, adapt to `AssetProcessingContext`) -* **Task 2.1: Implement `SupplierDeterminationStage`** -* **Task 2.2: Implement `AssetSkipLogicStage`** -* **Task 2.3: Implement `MetadataInitializationStage`** -* **Task 2.4: Implement `FileRuleFilterStage`** (New logic for `item_type == "FILE_IGNORE"`) -* **Task 2.5: Implement `IndividualMapProcessingStage`** (Adapts `_process_individual_maps`, uses `image_processing_utils.py`) -* **Task 2.6: Implement `MapMergingStage`** (Adapts `_merge_maps`, uses `image_processing_utils.py`) -* **Task 2.7: Implement `MetadataFinalizationAndSaveStage`** (Adapts `_generate_metadata_file`, uses `utils.path_utils.generate_path_from_pattern`) -* **Task 2.8: Implement `OutputOrganizationStage`** (Adapts `_organize_output_files`) - -**Phase 3: Implementing New Feature Stages** -*Goal: Add the new desired processing capabilities as distinct stages.* -* **Task 3.1: Implement `GlossToRoughConversionStage`** (Identify gloss, convert, invert, save temp, update `FileRule`) -* **Task 3.2: Implement `AlphaExtractionToMaskStage`** (Check existing mask, find MAP_COL with alpha, extract, save temp, add new `FileRule`) -* **Task 3.3: Implement `NormalMapGreenChannelStage`** (Identify normal maps, invert green based on config, save temp, update `FileRule`) - -**Phase 4: Integration, Testing & Finalization** -*Goal: Assemble the pipeline, test thoroughly, and deprecate old code.* -* **Task 4.1: Configure `PipelineOrchestrator`** - * Instantiate `PipelineOrchestrator` in main application logic with the ordered list of stage instances. -* **Task 4.2: Unit Testing** - * Unit tests for each `ProcessingStage` (mocking `AssetProcessingContext`). - * Unit tests for `image_processing_utils.py` and `utils/path_utils.py` functions. -* **Task 4.3: Integration Testing** - * Test `PipelineOrchestrator` end-to-end with sample data. - * Compare outputs with the existing engine for consistency. -* **Task 4.4: Documentation Update** - * Update developer documentation (e.g., `Documentation/02_Developer_Guide/05_Processing_Pipeline.md`). - * Document `AssetProcessingContext` and stage responsibilities. -* **Task 4.5: Deprecate/Remove Old `ProcessingEngine` Code** - * Gradually remove refactored logic from `processing_engine.py`. - -**5. Workflow Diagram** - -```mermaid -graph TD - AA[Load SourceRule & Config] --> BA(PipelineOrchestrator: process_source_rule); - BA --> CA{For Each Asset in SourceRule}; - CA -- Yes --> DA(Orchestrator: Create AssetProcessingContext); - DA --> EA(SupplierDeterminationStage); - EA -- context --> FA(AssetSkipLogicStage); - FA -- context --> GA{context.skip_asset?}; - GA -- Yes --> HA(Orchestrator: Record Skipped); - HA --> CA; - GA -- No --> IA(MetadataInitializationStage); - IA -- context --> JA(FileRuleFilterStage); - JA -- context --> KA(GlossToRoughConversionStage); - KA -- context --> LA(AlphaExtractionToMaskStage); - LA -- context --> MA(NormalMapGreenChannelStage); - MA -- context --> NA(IndividualMapProcessingStage); - NA -- context --> OA(MapMergingStage); - OA -- context --> PA(MetadataFinalizationAndSaveStage); - PA -- context --> QA(OutputOrganizationStage); - QA -- context --> RA(Orchestrator: Record Processed/Failed); - RA --> CA; - CA -- No --> SA(Orchestrator: Cleanup Engine Temp Dir); - SA --> TA[Processing Complete]; - - subgraph Stages - direction LR - EA - FA - IA - JA - KA - LA - MA - NA - OA - PA - QA - end - - subgraph Utils - direction LR - U1[image_processing_utils.py] - U2[utils/path_utils.py] - end - - NA -.-> U1; - OA -.-> U1; - KA -.-> U1; - LA -.-> U1; - MA -.-> U1; - - PA -.-> U2; - QA -.-> U2; - - classDef context fill:#f9f,stroke:#333,stroke-width:2px; - class DA,EA,FA,IA,JA,KA,LA,MA,NA,OA,PA,QA context; \ No newline at end of file diff --git a/config/app_settings.json b/config/app_settings.json index cec7393..efe09ac 100644 --- a/config/app_settings.json +++ b/config/app_settings.json @@ -284,5 +284,10 @@ ], "CALCULATE_STATS_RESOLUTION": "1K", "DEFAULT_ASSET_CATEGORY": "Surface", - "TEMP_DIR_PREFIX": "_PROCESS_ASSET_" + "TEMP_DIR_PREFIX": "_PROCESS_ASSET_", + "INITIAL_SCALING_MODE": "POT_DOWNSCALE", + "MERGE_DIMENSION_MISMATCH_STRATEGY": "USE_LARGEST", + "general_settings": { + "invert_normal_map_green_channel_globally": false + } } \ No newline at end of file diff --git a/processing/pipeline/stages/individual_map_processing.py b/processing/pipeline/stages/individual_map_processing.py index acbe8bd..21b6d47 100644 --- a/processing/pipeline/stages/individual_map_processing.py +++ b/processing/pipeline/stages/individual_map_processing.py @@ -4,7 +4,7 @@ import re import os import logging from pathlib import Path -from typing import Optional, Tuple, Dict +from typing import Optional, Tuple, Dict, List, Any, Union import cv2 import numpy as np @@ -13,21 +13,58 @@ from .base_stage import ProcessingStage from ..asset_context import AssetProcessingContext from rule_structure import FileRule from utils.path_utils import sanitize_filename -from ...utils import image_processing_utils as ipu +from ...utils import image_processing_utils as ipu # Includes get_image_bit_depth implicitly now +from ...utils.image_saving_utils import save_image_variants # Added import logger = logging.getLogger(__name__) +# Helper function to get filename-friendly map type (adapted from old logic) +def get_filename_friendly_map_type(internal_map_type: str, file_type_definitions: Optional[Dict[str, Dict]]) -> str: + """Derives a filename-friendly map type from the internal map type.""" + filename_friendly_map_type = internal_map_type # Fallback + if not file_type_definitions or not isinstance(file_type_definitions, dict) or not file_type_definitions: + logger.warning(f"Filename-friendly lookup: FILE_TYPE_DEFINITIONS not available or invalid. Falling back to internal type: {internal_map_type}") + return filename_friendly_map_type + + base_map_key_val = None + suffix_part = "" + sorted_known_base_keys = sorted(list(file_type_definitions.keys()), key=len, reverse=True) + + for known_key in sorted_known_base_keys: + if internal_map_type.startswith(known_key): + base_map_key_val = known_key + suffix_part = internal_map_type[len(known_key):] + break + + if base_map_key_val: + definition = file_type_definitions.get(base_map_key_val) + if definition and isinstance(definition, dict): + standard_type_alias = definition.get("standard_type") + if standard_type_alias and isinstance(standard_type_alias, str) and standard_type_alias.strip(): + filename_friendly_map_type = standard_type_alias.strip() + suffix_part + logger.debug(f"Filename-friendly lookup: Transformed '{internal_map_type}' -> '{filename_friendly_map_type}'") + else: + logger.warning(f"Filename-friendly lookup: Standard type alias for '{base_map_key_val}' is missing or invalid. Falling back.") + else: + logger.warning(f"Filename-friendly lookup: No valid definition for '{base_map_key_val}'. Falling back.") + else: + logger.warning(f"Filename-friendly lookup: Could not parse base key from '{internal_map_type}'. Falling back.") + + return filename_friendly_map_type + + class IndividualMapProcessingStage(ProcessingStage): """ - Processes individual texture map files based on FileRules. - This stage finds the source file, loads it, applies transformations - (resize, color space), saves a temporary processed version, and updates - the AssetProcessingContext with details. + Processes individual texture maps and merged map tasks. + This stage loads source images (or merges inputs for tasks), performs + in-memory transformations (Gloss-to-Rough, Normal Green Invert, optional scaling), + and passes the result to the UnifiedSaveUtility for final output generation. + It updates the AssetProcessingContext with detailed results. """ def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: """ - Executes the individual map processing logic. + Executes the individual map and merged task processing logic. """ asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" if context.status_flags.get('skip_asset', False): @@ -38,663 +75,549 @@ class IndividualMapProcessingStage(ProcessingStage): context.processed_maps_details = {} logger.debug(f"Asset '{asset_name_for_log}': Initialized processed_maps_details.") - if not context.files_to_process: - logger.info(f"Asset '{asset_name_for_log}': No files to process in this stage.") - return context - - # Source path for the asset group comes from SourceRule - if not context.source_rule or not context.source_rule.input_path: - logger.error(f"Asset '{asset_name_for_log}': SourceRule or SourceRule.input_path is not set. Cannot determine source base path.") - context.status_flags['individual_map_processing_failed'] = True - # Mark all file_rules as failed - for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): - # Use fr_idx as the key for status update for these early failures - map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" - self._update_file_rule_status(context, fr_idx, 'Failed', map_type=map_type_for_fail, details="SourceRule.input_path missing") - return context - - # The workspace_path in the context should be the directory where files are extracted/available. - source_base_path = context.workspace_path - if not source_base_path.is_dir(): - logger.error(f"Asset '{asset_name_for_log}': Workspace path '{source_base_path}' is not a valid directory.") - context.status_flags['individual_map_processing_failed'] = True - for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): - # Use fr_idx as the key for status update - map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" - self._update_file_rule_status(context, fr_idx, 'Failed', map_type=map_type_for_fail, details="Workspace path invalid") - return context + # --- Configuration Fetching --- + config = context.config_obj + file_type_definitions = getattr(config, "FILE_TYPE_DEFINITIONS", {}) + respect_variant_map_types = getattr(config, "respect_variant_map_types", []) # Needed for suffixing logic + initial_scaling_mode = getattr(config, "INITIAL_SCALING_MODE", "NONE") + merge_dimension_mismatch_strategy = getattr(config, "MERGE_DIMENSION_MISMATCH_STRATEGY", "USE_LARGEST") + invert_normal_green = getattr(config.general_settings, "invert_normal_map_green_channel_globally", False) + output_base_dir = context.output_dir # Assuming output_dir is set in context + asset_name = context.asset_rule.asset_name if context.asset_rule else "UnknownAsset" + output_filename_pattern_tokens = {'asset_name': asset_name, 'output_base_directory': str(output_base_dir)} - # Fetch config settings once before the loop - respect_variant_map_types = getattr(context.config_obj, "respect_variant_map_types", []) - image_resolutions = getattr(context.config_obj, "image_resolutions", {}) - output_filename_pattern = getattr(context.config_obj, "output_filename_pattern", "[assetname]_[maptype]_[resolution].[ext]") + # --- Prepare Items to Process --- + items_to_process: List[Union[Tuple[int, FileRule], Tuple[str, Dict]]] = [] - for file_rule_idx, file_rule in enumerate(context.files_to_process): - # file_rule_idx will be the key for processed_maps_details. - # processing_instance_tag is for unique temp files and detailed logging for this specific run. - processing_instance_tag = f"map_{file_rule_idx}_{uuid.uuid4().hex[:8]}" - current_map_key = file_rule_idx # Key for processed_maps_details + # Add regular files + if context.files_to_process: + # Validate source path early for regular files + if not context.source_rule or not context.source_rule.input_path: + logger.error(f"Asset '{asset_name_for_log}': SourceRule or SourceRule.input_path is not set. Cannot process regular files.") + context.status_flags['individual_map_processing_failed'] = True + # Mark all file_rules as failed if source path is missing + for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): + map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" + ff_map_type = get_filename_friendly_map_type(map_type_for_fail, file_type_definitions) + context.processed_maps_details[fr_idx] = { + 'status': 'Failed', + 'map_type': ff_map_type, + 'processing_map_type': map_type_for_fail, + 'notes': "SourceRule.input_path missing", + 'saved_files_info': [] + } + # Don't add regular files if source path is bad + elif not context.workspace_path or not context.workspace_path.is_dir(): + logger.error(f"Asset '{asset_name_for_log}': Workspace path '{context.workspace_path}' is not a valid directory. Cannot process regular files.") + context.status_flags['individual_map_processing_failed'] = True + for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): + map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" + ff_map_type = get_filename_friendly_map_type(map_type_for_fail, file_type_definitions) + context.processed_maps_details[fr_idx] = { + 'status': 'Failed', + 'map_type': ff_map_type, + 'processing_map_type': map_type_for_fail, + 'notes': "Workspace path invalid", + 'saved_files_info': [] + } + # Don't add regular files if workspace path is bad + else: + for idx, file_rule in enumerate(context.files_to_process): + items_to_process.append((idx, file_rule)) - if not file_rule.file_path: # Ensure file_path exists, critical for later stages if they rely on it from FileRule - logger.error(f"Asset '{asset_name_for_log}', FileRule at index {file_rule_idx} has an empty or None file_path. Skipping this rule.") - self._update_file_rule_status(context, current_map_key, 'Failed', - processing_tag=processing_instance_tag, - details="FileRule has no file_path") - continue - - initial_current_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" + # Add merged tasks + if hasattr(context, 'merged_image_tasks') and context.merged_image_tasks: + for task_idx, task_data in enumerate(context.merged_image_tasks): + task_key = f"merged_task_{task_idx}" + items_to_process.append((task_key, task_data)) - # --- START NEW SUFFIXING LOGIC --- - final_current_map_type = initial_current_map_type # Default to initial - - # 1. Determine Base Map Type from initial_current_map_type - base_map_type_match = re.match(r"(MAP_[A-Z]{3})", initial_current_map_type) - - if base_map_type_match and context.asset_rule: - true_base_map_type = base_map_type_match.group(1) # This is "MAP_XXX" + if not items_to_process: + logger.info(f"Asset '{asset_name_for_log}': No regular files or merged tasks to process in this stage.") + return context - # 2. Count Occurrences and Find Index of current_file_rule in context.asset_rule.files - peers_of_same_base_type_in_asset_rule = [] - for fr_asset in context.asset_rule.files: - fr_asset_item_type = fr_asset.item_type_override or fr_asset.item_type or "UnknownMapType" - fr_asset_base_map_type_match = re.match(r"(MAP_[A-Z]{3})", fr_asset_item_type) - - if fr_asset_base_map_type_match: - fr_asset_base_map_type = fr_asset_base_map_type_match.group(1) - if fr_asset_base_map_type == true_base_map_type: - peers_of_same_base_type_in_asset_rule.append(fr_asset) - - num_occurrences_of_base_type = len(peers_of_same_base_type_in_asset_rule) - current_instance_index = 0 # 1-based + # --- Unified Processing Loop --- + for item_key, item_data in items_to_process: + current_image_data: Optional[np.ndarray] = None + base_map_type: str = "Unknown" # Filename-friendly + processing_map_type: str = "Unknown" # Internal MAP_XXX type + source_bit_depth_info_for_save_util: List[int] = [] + is_merged_task: bool = False + status_notes: List[str] = [] + processing_status: str = "Started" + saved_files_details_list: List[Dict] = [] + original_dimensions: Optional[Tuple[int, int]] = None + source_file_path_regular: Optional[Path] = None # For regular maps + merge_task_config_output_type: Optional[str] = None # For merged tasks + inputs_used_for_merge: Optional[Dict[str, str]] = None # For merged tasks + processing_instance_tag = f"item_{item_key}_{uuid.uuid4().hex[:8]}" # Unique tag for logging this item - try: - current_instance_index = peers_of_same_base_type_in_asset_rule.index(file_rule) + 1 - except ValueError: - logger.warning( - f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Initial Type: '{initial_current_map_type}', Base: '{true_base_map_type}'): " - f"Could not find its own instance in the list of peers from asset_rule.files. " - f"Number of peers found: {num_occurrences_of_base_type}. Suffixing may be affected." - ) - - # 3. Determine Suffix - map_type_for_respect_check = true_base_map_type.replace("MAP_", "") # e.g., "COL" - is_in_respect_list = map_type_for_respect_check in respect_variant_map_types - - suffix_to_append = "" - if num_occurrences_of_base_type > 1: - if current_instance_index > 0: - suffix_to_append = f"-{current_instance_index}" - else: - logger.warning(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}': Index for multi-occurrence map type '{true_base_map_type}' (count: {num_occurrences_of_base_type}) not determined. Omitting numeric suffix.") - elif num_occurrences_of_base_type == 1 and is_in_respect_list: - suffix_to_append = "-1" - - # 4. Form the final_current_map_type - if suffix_to_append: - final_current_map_type = true_base_map_type + suffix_to_append - else: - final_current_map_type = initial_current_map_type - - current_map_type = final_current_map_type - # --- END NEW SUFFIXING LOGIC --- - - # --- START: Filename-friendly map type derivation --- - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: --- Starting Filename-Friendly Map Type Logic for: {current_map_type} ---") - filename_friendly_map_type = current_map_type # Fallback - - # 1. Access FILE_TYPE_DEFINITIONS - file_type_definitions = None - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Attempting to access context.config_obj.FILE_TYPE_DEFINITIONS.") try: - file_type_definitions = context.config_obj.FILE_TYPE_DEFINITIONS - if not file_type_definitions: # Check if it's None or empty - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: FILE_TYPE_DEFINITIONS is present but empty or None.") - else: - sample_defs_log = {k: file_type_definitions[k] for k in list(file_type_definitions.keys())[:2]} # Log first 2 for brevity - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Accessed FILE_TYPE_DEFINITIONS. Sample: {sample_defs_log}, Total keys: {len(file_type_definitions)}.") - except AttributeError: - logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Could not access context.config_obj.FILE_TYPE_DEFINITIONS via direct attribute.") - - base_map_key_val = None # Renamed from base_map_key to avoid conflict with current_map_key - suffix_part = "" + # --- A. Regular Map Processing --- + if isinstance(item_data, FileRule): + file_rule: FileRule = item_data + file_rule_idx: int = item_key # Key is the index for regular maps + is_merged_task = False + logger.info(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Processing Regular Map from FileRule: {file_rule.file_path}") - if file_type_definitions and isinstance(file_type_definitions, dict) and len(file_type_definitions) > 0: - base_map_key_val = None - suffix_part = "" - - sorted_known_base_keys = sorted(list(file_type_definitions.keys()), key=len, reverse=True) - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Sorted known base keys for parsing: {sorted_known_base_keys}") + if not file_rule.file_path: + logger.error(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: FileRule has an empty or None file_path. Skipping.") + processing_status = "Failed" + status_notes.append("FileRule has no file_path") + continue # To finally block - for known_key in sorted_known_base_keys: - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Checking if '{current_map_type}' starts with '{known_key}'") - if current_map_type.startswith(known_key): - base_map_key_val = known_key - suffix_part = current_map_type[len(known_key):] - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Match found! current_map_type: '{current_map_type}', base_map_key_val: '{base_map_key_val}', suffix_part: '{suffix_part}'") - break + # Determine internal map type (MAP_XXX) with suffixing + initial_internal_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" + processing_map_type = self._get_suffixed_internal_map_type(context, file_rule, initial_internal_map_type, respect_variant_map_types) + base_map_type = get_filename_friendly_map_type(processing_map_type, file_type_definitions) # Get filename friendly version - if base_map_key_val is None: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Could not parse base_map_key_val from '{current_map_type}' using known keys. Fallback: filename_friendly_map_type = '{filename_friendly_map_type}'.") - else: - definition = file_type_definitions.get(base_map_key_val) - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Definition for '{base_map_key_val}': {definition}") - if definition and isinstance(definition, dict): - standard_type_alias = definition.get("standard_type") - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Standard type alias for '{base_map_key_val}': '{standard_type_alias}'") - if standard_type_alias and isinstance(standard_type_alias, str) and standard_type_alias.strip(): - filename_friendly_map_type = standard_type_alias.strip() + suffix_part - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Successfully transformed map type: '{current_map_type}' -> '{filename_friendly_map_type}' (standard_type_alias: '{standard_type_alias}', suffix_part: '{suffix_part}').") + # Skip types not meant for individual processing (e.g., composites handled elsewhere) + if not processing_map_type or not processing_map_type.startswith("MAP_") or processing_map_type == "MAP_GEN_COMPOSITE": + logger.debug(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Skipping, type '{processing_map_type}' (Filename: '{base_map_type}') not targeted for individual processing.") + processing_status = "Skipped" + status_notes.append(f"Type '{processing_map_type}' not processed individually.") + continue # To finally block + + # Find source file (relative to workspace_path) + source_base_path = context.workspace_path + # Use the file_rule.file_path directly as it should be relative now + potential_source_path = source_base_path / file_rule.file_path + if potential_source_path.is_file(): + source_file_path_regular = potential_source_path + logger.info(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Found source file: {source_file_path_regular}") + else: + # Attempt globbing as a fallback if direct path fails (optional, based on previous logic) + found_files = list(source_base_path.glob(file_rule.file_path)) + if len(found_files) == 1: + source_file_path_regular = found_files[0] + logger.info(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Found source file via glob: {source_file_path_regular}") + elif len(found_files) > 1: + logger.warning(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Multiple files found for pattern '{file_rule.file_path}' in '{source_base_path}'. Using first: {found_files[0]}") + source_file_path_regular = found_files[0] else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Standard type alias for '{base_map_key_val}' is missing, empty, or not a string (value: '{standard_type_alias}'). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") - else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: No definition or invalid definition for '{base_map_key_val}' (value: {definition}). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") - elif file_type_definitions is None: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: FILE_TYPE_DEFINITIONS not available for lookup (was None). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") - elif not isinstance(file_type_definitions, dict): - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: FILE_TYPE_DEFINITIONS is not a dictionary (type: {type(file_type_definitions)}). Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") - else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: FILE_TYPE_DEFINITIONS is an empty dictionary. Using fallback. filename_friendly_map_type = '{filename_friendly_map_type}'.") - - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Final filename_friendly_map_type: '{filename_friendly_map_type}'") - # --- END: Filename-friendly map type derivation --- + logger.error(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Source file not found using path/pattern '{file_rule.file_path}' in '{source_base_path}'.") + processing_status = "Failed" + status_notes.append("Source file not found") + continue # To finally block - if not current_map_type or not current_map_type.startswith("MAP_") or current_map_type == "MAP_GEN_COMPOSITE": - logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}': Skipping, item_type '{current_map_type}' (initial: '{initial_current_map_type}') not targeted for individual processing.") - continue + # Load image + source_image_data = ipu.load_image(str(source_file_path_regular)) + if source_image_data is None: + logger.error(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Failed to load image from '{source_file_path_regular}'.") + processing_status = "Failed" + status_notes.append("Image load failed") + continue # To finally block - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Type: {current_map_type}, Initial Type: {initial_current_map_type}, Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Starting individual processing.") + original_height, original_width = source_image_data.shape[:2] + original_dimensions = (original_width, original_height) + logger.debug(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Loaded image with dimensions {original_width}x{original_height}.") - # A. Find Source File (using file_rule.file_path as the pattern relative to source_base_path) - source_file_path = self._find_source_file(source_base_path, file_rule.file_path, asset_name_for_log, processing_instance_tag) - if not source_file_path: - logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Source file not found with path/pattern '{file_rule.file_path}' in '{source_base_path}'.") - self._update_file_rule_status(context, current_map_key, 'Failed', - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, - details="Source file not found") - continue + # Get original bit depth + try: + original_source_bit_depth = ipu.get_image_bit_depth(str(source_file_path_regular)) + source_bit_depth_info_for_save_util = [original_source_bit_depth] + logger.info(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Determined source bit depth: {original_source_bit_depth}") + except Exception as e: + logger.warning(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Could not determine source bit depth for {source_file_path_regular}: {e}. Using default [8].") + source_bit_depth_info_for_save_util = [8] # Default fallback + status_notes.append("Could not determine source bit depth, defaulted to 8.") - # B. Load and Transform Image - image_data: Optional[np.ndarray] = ipu.load_image(str(source_file_path)) - if image_data is None: - logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Failed to load image from '{source_file_path}'.") - self._update_file_rule_status(context, current_map_key, 'Failed', - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, - source_file=str(source_file_path), - details="Image load failed") - continue - - original_height, original_width = image_data.shape[:2] - logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Loaded image '{source_file_path}' with dimensions {original_width}x{original_height}.") + current_image_data = source_image_data.copy() - # 1. Initial Power-of-Two (POT) Downscaling - pot_width = ipu.get_nearest_power_of_two_downscale(original_width) - pot_height = ipu.get_nearest_power_of_two_downscale(original_height) + # --- B. Merged Image Task Processing --- + elif isinstance(item_data, dict): + task: Dict = item_data + task_key: str = item_key # Key is the generated string for merged tasks + is_merged_task = True + merge_task_config_output_type = task.get('output_map_type', 'UnknownMergeOutput') + logger.info(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Processing Merged Task for output type: {merge_task_config_output_type}") - # Maintain aspect ratio for initial POT scaling, using the smaller of the scaled dimensions - # This ensures we only downscale. - if original_width > 0 and original_height > 0 : # Avoid division by zero - aspect_ratio = original_width / original_height - - # Calculate new dimensions based on POT width, then POT height, and pick the one that results in downscale or same size - pot_h_from_w = int(pot_width / aspect_ratio) - pot_w_from_h = int(pot_height * aspect_ratio) + processing_map_type = merge_task_config_output_type # Internal type is the output type from config + base_map_type = get_filename_friendly_map_type(processing_map_type, file_type_definitions) # Get filename friendly version + source_bit_depth_info_for_save_util = task.get('source_bit_depths', []) + merge_rule_config = task.get('merge_rule_config', {}) + input_map_sources = task.get('input_map_sources', {}) + target_dimensions = task.get('source_dimensions') # Expected dimensions (h, w) - # Option 1: Scale by width, adjust height - candidate1_w, candidate1_h = pot_width, ipu.get_nearest_power_of_two_downscale(pot_h_from_w) - # Option 2: Scale by height, adjust width - candidate2_w, candidate2_h = ipu.get_nearest_power_of_two_downscale(pot_w_from_h), pot_height - - # Ensure candidates are not upscaling - if candidate1_w > original_width or candidate1_h > original_height: - candidate1_w, candidate1_h = original_width, original_height # Fallback to original if upscaling - if candidate2_w > original_width or candidate2_h > original_height: - candidate2_w, candidate2_h = original_width, original_height # Fallback to original if upscaling - - # Choose the candidate that results in a larger area (preferring less downscaling if multiple POT options) - # but still respects the POT downscale logic for each dimension individually. - # The actual POT dimensions are already calculated by get_nearest_power_of_two_downscale. - # We need to decide if we base the aspect ratio calc on pot_width or pot_height. - # The goal is to make one dimension POT and the other POT while maintaining aspect as much as possible, only downscaling. + if not merge_rule_config or not input_map_sources or not target_dimensions: + logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Merge task data is incomplete (missing config, sources, or dimensions). Skipping.") + processing_status = "Failed" + status_notes.append("Incomplete merge task data") + continue # To finally block - final_pot_width = ipu.get_nearest_power_of_two_downscale(original_width) - final_pot_height = ipu.get_nearest_power_of_two_downscale(original_height) + loaded_inputs_for_merge: Dict[str, np.ndarray] = {} + actual_input_dimensions: List[Tuple[int, int]] = [] # List of (h, w) + inputs_used_for_merge = {} # Track actual files/fallbacks used - # If original aspect is not 1:1, one of the POT dimensions might need further adjustment to maintain aspect - # after the other dimension is set to its POT. - # We prioritize fitting within the *downscaled* POT dimensions. - - # Scale to fit within final_pot_width, adjust height, then make height POT (downscale) - scaled_h_for_pot_w = max(1, round(final_pot_width / aspect_ratio)) - h1 = ipu.get_nearest_power_of_two_downscale(scaled_h_for_pot_w) - w1 = final_pot_width - if h1 > final_pot_height: # If this adjustment made height too big, re-evaluate - h1 = final_pot_height - w1 = ipu.get_nearest_power_of_two_downscale(max(1, round(h1 * aspect_ratio))) + # Load/Prepare Inputs for Merge + merge_inputs_config = merge_rule_config.get('inputs', {}) + merge_defaults = merge_rule_config.get('defaults', {}) + for channel_char, required_map_type_from_rule in merge_inputs_config.items(): + input_info = input_map_sources.get(required_map_type_from_rule) + input_image_data = None + input_source_desc = f"Fallback for {required_map_type_from_rule}" - # Scale to fit within final_pot_height, adjust width, then make width POT (downscale) - scaled_w_for_pot_h = max(1, round(final_pot_height * aspect_ratio)) - w2 = ipu.get_nearest_power_of_two_downscale(scaled_w_for_pot_h) - h2 = final_pot_height - if w2 > final_pot_width: # If this adjustment made width too big, re-evaluate - w2 = final_pot_width - h2 = ipu.get_nearest_power_of_two_downscale(max(1, round(w2 / aspect_ratio))) - - # Choose the option that results in larger area (less aggressive downscaling) - # while ensuring both dimensions are POT and not upscaled from original. - if w1 * h1 >= w2 * h2: - base_pot_width, base_pot_height = w1, h1 - else: - base_pot_width, base_pot_height = w2, h2 - - # Final check to ensure no upscaling from original dimensions - base_pot_width = min(base_pot_width, original_width) - base_pot_height = min(base_pot_height, original_height) - # And ensure they are POT - base_pot_width = ipu.get_nearest_power_of_two_downscale(base_pot_width) - base_pot_height = ipu.get_nearest_power_of_two_downscale(base_pot_height) - - else: # Handle cases like 0-dim images, though load_image should prevent this - base_pot_width, base_pot_height = 1, 1 - - - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Original dims: ({original_width},{original_height}), Initial POT Scaled Dims: ({base_pot_width},{base_pot_height}).") - - # Calculate and store aspect ratio change string - if original_width > 0 and original_height > 0 and base_pot_width > 0 and base_pot_height > 0: - aspect_change_str = ipu.normalize_aspect_ratio_change( - original_width, original_height, - base_pot_width, base_pot_height - ) - if aspect_change_str: - # This will overwrite if multiple maps are processed; specified by requirements. - context.asset_metadata['aspect_ratio_change_string'] = aspect_change_str - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type {current_map_type}: Calculated aspect ratio change string: '{aspect_change_str}' (Original: {original_width}x{original_height}, Base POT: {base_pot_width}x{base_pot_height}). Stored in asset_metadata.") - else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type {current_map_type}: Failed to calculate aspect ratio change string.") - else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type {current_map_type}: Skipping aspect ratio change string calculation due to invalid dimensions (Original: {original_width}x{original_height}, Base POT: {base_pot_width}x{base_pot_height}).") - - base_pot_image_data = image_data.copy() - if (base_pot_width, base_pot_height) != (original_width, original_height): - interpolation = cv2.INTER_AREA # Good for downscaling - base_pot_image_data = ipu.resize_image(base_pot_image_data, base_pot_width, base_pot_height, interpolation=interpolation) - if base_pot_image_data is None: - logger.error(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Failed to resize image to base POT dimensions.") - self._update_file_rule_status(context, current_map_key, 'Failed', - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, - source_file=str(source_file_path), - original_dimensions=(original_width, original_height), - details="Base POT resize failed") - continue - - # Color Profile Management (after initial POT resize, before multi-res saving) - # Initialize transform settings with defaults for color management - transform_settings = { - "color_profile_management": False, # Default, can be overridden by FileRule - "target_color_profile": "sRGB", # Default - "output_format_settings": None # For JPG quality, PNG compression - } - if file_rule.channel_merge_instructions and 'transform' in file_rule.channel_merge_instructions: - custom_transform_settings = file_rule.channel_merge_instructions['transform'] - if isinstance(custom_transform_settings, dict): - transform_settings.update(custom_transform_settings) - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Loaded transform settings for color/output from file_rule.") - - if transform_settings['color_profile_management'] and transform_settings['target_color_profile'] == "RGB": - if len(base_pot_image_data.shape) == 3 and base_pot_image_data.shape[2] == 3: # BGR to RGB - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Converting BGR to RGB for base POT image.") - base_pot_image_data = ipu.convert_bgr_to_rgb(base_pot_image_data) - elif len(base_pot_image_data.shape) == 3 and base_pot_image_data.shape[2] == 4: # BGRA to RGBA - logger.info(f"Asset '{asset_name_for_log}', FileRule path '{file_rule.file_path}' (Key: {current_map_key}, Proc. Tag: {processing_instance_tag}): Converting BGRA to RGBA for base POT image.") - base_pot_image_data = ipu.convert_bgra_to_rgba(base_pot_image_data) - - # Ensure engine_temp_dir exists before saving base POT - if not context.engine_temp_dir.exists(): - try: - context.engine_temp_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Asset '{asset_name_for_log}': Created engine_temp_dir at '{context.engine_temp_dir}'") - except OSError as e: - logger.error(f"Asset '{asset_name_for_log}': Failed to create engine_temp_dir '{context.engine_temp_dir}': {e}") - self._update_file_rule_status(context, current_map_key, 'Failed', - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, - source_file=str(source_file_path), - details="Failed to create temp directory for base POT") - continue - - temp_filename_suffix = Path(source_file_path).suffix - base_pot_temp_filename = f"{processing_instance_tag}_basePOT{temp_filename_suffix}" # Use processing_instance_tag - base_pot_temp_path = context.engine_temp_dir / base_pot_temp_filename - - # Determine save parameters for base POT image (can be different from variants if needed) - base_save_params = [] - base_output_ext = temp_filename_suffix.lstrip('.') # Default to original, can be overridden by format rules - # TODO: Add logic here to determine base_output_ext and base_save_params based on bit depth and config, similar to variants. - # For now, using simple save. - - if not ipu.save_image(str(base_pot_temp_path), base_pot_image_data, params=base_save_params): - logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Failed to save base POT image to '{base_pot_temp_path}'.") - self._update_file_rule_status(context, current_map_key, 'Failed', - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, - source_file=str(source_file_path), - original_dimensions=(original_width, original_height), - base_pot_dimensions=(base_pot_width, base_pot_height), - details="Base POT image save failed") - continue - - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Successfully saved base POT image to '{base_pot_temp_path}' with dims ({base_pot_width}x{base_pot_height}).") - - # Initialize/update the status for this map in processed_maps_details - self._update_file_rule_status( - context, - current_map_key, # Use file_rule_idx as key - 'BasePOTSaved', # Intermediate status, will be updated after variant check - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, # Store the tag - source_file=str(source_file_path), - original_dimensions=(original_width, original_height), - base_pot_dimensions=(base_pot_width, base_pot_height), - temp_processed_file=str(base_pot_temp_path) # Store path to the saved base POT - ) - - # 2. Multiple Resolution Output (Variants) - processed_at_least_one_resolution_variant = False - # Resolution variants are attempted for all map types individually processed. - # The filter at the beginning of the loop ensures only relevant maps reach this stage. - generate_variants_for_this_map_type = True - - if generate_variants_for_this_map_type: # This will now always be true if code execution reaches here - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Map type '{current_map_type}' is eligible for individual processing. Attempting to generate resolution variants.") - # Sort resolutions from largest to smallest - sorted_resolutions = sorted(image_resolutions.items(), key=lambda item: item[1], reverse=True) - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Sorted resolutions for variant processing: {sorted_resolutions}") - - for res_key, res_max_dim in sorted_resolutions: - current_w, current_h = base_pot_image_data.shape[1], base_pot_image_data.shape[0] - - if current_w <= 0 or current_h <=0: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Base POT image has zero dimension ({current_w}x{current_h}). Skipping this resolution variant.") - continue - - if max(current_w, current_h) >= res_max_dim: - target_w_res, target_h_res = current_w, current_h - if max(current_w, current_h) > res_max_dim: - if current_w >= current_h: - target_w_res = res_max_dim - target_h_res = max(1, round(target_w_res / (current_w / current_h))) + if input_info and input_info.get('file_path'): + # Paths in merged tasks should ideally be absolute or relative to a known base (e.g., workspace) + # Assuming they are resolvable as is for now. + input_file_path = Path(input_info['file_path']) + if input_file_path.is_file(): + try: + input_image_data = ipu.load_image(str(input_file_path)) + if input_image_data is not None: + logger.info(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Loaded input '{required_map_type_from_rule}' for channel '{channel_char}' from: {input_file_path}") + actual_input_dimensions.append(input_image_data.shape[:2]) # (h, w) + input_source_desc = str(input_file_path) + else: + logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Failed to load input '{required_map_type_from_rule}' from {input_file_path}. Attempting fallback.") + except Exception as e: + logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Error loading input '{required_map_type_from_rule}' from {input_file_path}: {e}. Attempting fallback.") else: - target_h_res = res_max_dim - target_w_res = max(1, round(target_h_res * (current_w / current_h))) + logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Input file path for '{required_map_type_from_rule}' not found: {input_file_path}. Attempting fallback.") + else: + logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: No file path provided for required input '{required_map_type_from_rule}'. Attempting fallback.") + + # Fallback if load failed or no path + if input_image_data is None: + fallback_value = merge_defaults.get(channel_char) + if fallback_value is not None: + try: + # Determine shape and dtype for fallback + h, w = target_dimensions + # Infer channels needed based on typical usage or config (e.g., RGB default, single channel for masks) + # This might need refinement based on how defaults are structured. Assuming uint8 for now. + # If fallback_value is a single number, assume grayscale, else assume color based on length? + num_channels = 1 if isinstance(fallback_value, (int, float)) else len(fallback_value) if isinstance(fallback_value, (list, tuple)) else 3 # Default to 3? Risky. + dtype = np.uint8 # Default dtype, might need adjustment based on context + shape = (h, w) if num_channels == 1 else (h, w, num_channels) + + input_image_data = np.full(shape, fallback_value, dtype=dtype) + logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Using fallback value {fallback_value} for channel '{channel_char}' (Target Dims: {target_dimensions}).") + # Fallback uses target dimensions, don't add to actual_input_dimensions for mismatch check unless required + # actual_input_dimensions.append(target_dimensions) # Optional: Treat fallback as having target dims + status_notes.append(f"Used fallback for {required_map_type_from_rule}") + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Error creating fallback for channel '{channel_char}': {e}. Cannot proceed with merge.") + processing_status = "Failed" + status_notes.append(f"Fallback creation failed for {required_map_type_from_rule}") + break # Break inner loop + else: + logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Missing input '{required_map_type_from_rule}' and no fallback default provided for channel '{channel_char}'. Cannot proceed.") + processing_status = "Failed" + status_notes.append(f"Missing input {required_map_type_from_rule} and no fallback") + break # Break inner loop + + if processing_status == "Failed": break # Exit outer loop if inner loop failed + + loaded_inputs_for_merge[channel_char] = input_image_data + inputs_used_for_merge[required_map_type_from_rule] = input_source_desc + + if processing_status == "Failed": continue # To finally block + + # Dimension Mismatch Handling + unique_dimensions = set(actual_input_dimensions) + target_merge_dims = target_dimensions # Default + if len(unique_dimensions) > 1: + logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Mismatched dimensions found among loaded inputs: {unique_dimensions}. Applying strategy: {merge_dimension_mismatch_strategy}") + status_notes.append(f"Mismatched input dimensions ({unique_dimensions}), applied {merge_dimension_mismatch_strategy}") + + if merge_dimension_mismatch_strategy == "ERROR_SKIP": + logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Dimension mismatch strategy is ERROR_SKIP. Failing task.") + processing_status = "Failed" + status_notes.append("Dimension mismatch (ERROR_SKIP)") + continue # To finally block + elif merge_dimension_mismatch_strategy == "USE_LARGEST": + max_h = max(h for h, w in unique_dimensions) + max_w = max(w for h, w in unique_dimensions) + target_merge_dims = (max_h, max_w) + elif merge_dimension_mismatch_strategy == "USE_FIRST": + target_merge_dims = actual_input_dimensions[0] if actual_input_dimensions else target_dimensions + else: # Default or unknown: Use largest + logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Unknown dimension mismatch strategy '{merge_dimension_mismatch_strategy}'. Defaulting to USE_LARGEST.") + max_h = max(h for h, w in unique_dimensions) + max_w = max(w for h, w in unique_dimensions) + target_merge_dims = (max_h, max_w) + + logger.info(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Resizing inputs to target merge dimensions: {target_merge_dims}") + # Resize loaded inputs (not fallbacks unless they were added to actual_input_dimensions) + for channel_char, img_data in loaded_inputs_for_merge.items(): + # Only resize if it was a loaded input that contributed to the mismatch check + if img_data.shape[:2] in unique_dimensions and img_data.shape[:2] != target_merge_dims: + resized_img = ipu.resize_image(img_data, target_merge_dims[1], target_merge_dims[0]) # w, h + if resized_img is None: + logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Failed to resize input for channel '{channel_char}' to {target_merge_dims}. Failing task.") + processing_status = "Failed" + status_notes.append(f"Input resize failed for {channel_char}") + break + loaded_inputs_for_merge[channel_char] = resized_img + if processing_status == "Failed": continue # To finally block + + # Perform Merge (Example: Simple Channel Packing - Adapt as needed) + # This needs to be robust based on merge_rule_config structure + try: + merge_channels_order = merge_rule_config.get('channel_order', 'RGB') # e.g., 'RGB', 'BGR', 'R', 'RGBA' etc. + output_channels = len(merge_channels_order) + h, w = target_merge_dims # Use the potentially adjusted dimensions + + if output_channels == 1: + # Assume the first channel in order is the one to use + channel_char_to_use = merge_channels_order[0] + source_img = loaded_inputs_for_merge[channel_char_to_use] + # Ensure it's grayscale (take first channel if it's multi-channel) + if len(source_img.shape) == 3: + current_image_data = source_img[:, :, 0].copy() + else: + current_image_data = source_img.copy() + elif output_channels > 1: + # Assume uint8 dtype for merged output unless specified otherwise + merged_image = np.zeros((h, w, output_channels), dtype=np.uint8) + for i, channel_char in enumerate(merge_channels_order): + source_img = loaded_inputs_for_merge.get(channel_char) + if source_img is not None: + # Extract the correct channel (e.g., R from RGB, or use grayscale directly) + if len(source_img.shape) == 3: + # Assuming standard RGB/BGR order in source based on channel_char? Needs clear definition. + # Example: If source is RGB and channel_char is 'R', take channel 0. + # This mapping needs to be defined in merge_rule_config or conventions. + # Simple approach: take the first channel if source is color. + merged_image[:, :, i] = source_img[:, :, 0] + else: # Grayscale source + merged_image[:, :, i] = source_img + else: + # This case should have been caught by fallback logic earlier + logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Missing prepared input for channel '{channel_char}' during final merge assembly. This shouldn't happen.") + processing_status = "Failed" + status_notes.append(f"Internal error: Missing input '{channel_char}' at merge assembly") + break + if processing_status != "Failed": + current_image_data = merged_image + else: + logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Invalid channel_order '{merge_channels_order}' in merge config.") + processing_status = "Failed" + status_notes.append("Invalid merge channel_order") + + if processing_status != "Failed": + logger.info(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Successfully merged inputs into image with shape {current_image_data.shape}") + original_dimensions = (current_image_data.shape[1], current_image_data.shape[0]) # Set original dims after merge + + except Exception as e: + logger.exception(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Error during merge operation: {e}") + processing_status = "Failed" + status_notes.append(f"Merge operation failed: {e}") + continue # To finally block + + else: + logger.error(f"Asset '{asset_name_for_log}', Key {item_key}: Unknown item type in processing loop: {type(item_data)}. Skipping.") + processing_status = "Failed" + status_notes.append("Unknown item type in loop") + continue # To finally block + + # --- C. Common Processing Path --- + if current_image_data is None: + logger.error(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: current_image_data is None before common processing. Status: {processing_status}. Skipping common path.") + # Status should already be Failed or Skipped from A or B + if processing_status not in ["Failed", "Skipped"]: + processing_status = "Failed" + status_notes.append("Internal error: Image data missing before common processing") + continue # To finally block + + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Entering common processing path for '{base_map_type}' (Internal: '{processing_map_type}')") + + # In-Memory Transformations + transformation_applied = False + # Gloss-to-Rough + # Use filename-friendly 'GLOSS' or internal 'MAP_GLOSS' + if base_map_type == "GLOSS" or processing_map_type.startswith("MAP_GLOSS"): + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Applying Gloss-to-Rough conversion.") + current_image_data = ipu.invert_image_colors(current_image_data) + # Update map types + new_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") + new_base_map_type = get_filename_friendly_map_type(new_processing_map_type, file_type_definitions) + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Map type updated: '{processing_map_type}' -> '{new_processing_map_type}', Filename type: '{base_map_type}' -> '{new_base_map_type}'") + processing_map_type = new_processing_map_type + base_map_type = new_base_map_type + status_notes.append("Gloss-to-Rough applied") + transformation_applied = True + + # Normal Green Invert + # Use filename-friendly 'NRM' or internal 'MAP_NRM' + if (base_map_type == "NRM" or processing_map_type == "MAP_NRM") and invert_normal_green: + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Applying Normal Map Green Channel Inversion (Global Setting).") + current_image_data = ipu.invert_normal_map_green_channel(current_image_data) + status_notes.append("Normal Green Inverted (Global)") + transformation_applied = True + + # Optional Initial Scaling (In Memory) + image_to_save = None + scaling_applied = False + h_pre_scale, w_pre_scale = current_image_data.shape[:2] + + if initial_scaling_mode == "POT_DOWNSCALE": + pot_w = ipu.get_nearest_power_of_two_downscale(w_pre_scale) + pot_h = ipu.get_nearest_power_of_two_downscale(h_pre_scale) + if (pot_w, pot_h) != (w_pre_scale, h_pre_scale): + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Applying Initial Scaling: POT Downscale from ({w_pre_scale},{h_pre_scale}) to ({pot_w},{pot_h}).") + # Use aspect ratio preserving POT logic if needed, or simple independent POT per dim? Plan implies simple POT. + # Let's use the more robust aspect-preserving POT downscale logic from ipu if available, otherwise simple resize. + # Simple resize for now based on calculated pot_w, pot_h: + resized_img = ipu.resize_image(current_image_data, pot_w, pot_h, interpolation=cv2.INTER_AREA) + if resized_img is not None: + image_to_save = resized_img + scaling_applied = True + status_notes.append(f"Initial POT Downscale applied ({pot_w}x{pot_h})") + else: + logger.warning(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: POT Downscale resize failed. Using original data for saving.") + image_to_save = current_image_data.copy() + status_notes.append("Initial POT Downscale failed, used original") else: - logger.debug(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Base POT image ({current_w}x{current_h}) is smaller than target max dim {res_max_dim}. Skipping this resolution variant.") - continue + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Initial Scaling: POT Downscale - Image already POT or smaller. No scaling needed.") + image_to_save = current_image_data.copy() + elif initial_scaling_mode == "NONE": + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Initial Scaling: Mode is NONE.") + image_to_save = current_image_data.copy() + else: + logger.warning(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unknown INITIAL_SCALING_MODE '{initial_scaling_mode}'. Defaulting to NONE.") + image_to_save = current_image_data.copy() + status_notes.append(f"Unknown initial scale mode '{initial_scaling_mode}', used original") - target_w_res = min(target_w_res, current_w) - target_h_res = min(target_h_res, current_h) - - if target_w_res <=0 or target_h_res <=0: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Calculated target variant dims are zero or negative ({target_w_res}x{target_h_res}). Skipping.") - continue + if image_to_save is None: # Should not happen if logic above is correct + logger.error(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: image_to_save is None after scaling block. This indicates an error. Failing.") + processing_status = "Failed" + status_notes.append("Internal error: image_to_save is None post-scaling") + continue # To finally block - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Processing variant for {res_max_dim}. Base POT Dims: ({current_w}x{current_h}), Target Dims for {res_key}: ({target_w_res}x{target_h_res}).") + # Color Management (Example: BGR to RGB if needed) + # This logic might need refinement based on actual requirements and ipu capabilities + # Assuming save_image_variants expects RGB by default if color conversion is needed. + # Let's assume save_image_variants handles color internally based on format/config for now. + # If specific BGR->RGB conversion is needed *before* saving based on map type: + # if base_map_type in ["COL", "DIFF", "ALB"] and len(image_to_save.shape) == 3 and image_to_save.shape[2] == 3: + # logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Applying BGR to RGB conversion before saving.") + # image_to_save = ipu.convert_bgr_to_rgb(image_to_save) + # status_notes.append("BGR->RGB applied") - output_image_data_for_res = base_pot_image_data - if (target_w_res, target_h_res) != (current_w, current_h): - interpolation_res = cv2.INTER_AREA - output_image_data_for_res = ipu.resize_image(base_pot_image_data, target_w_res, target_h_res, interpolation=interpolation_res) - if output_image_data_for_res is None: - logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Failed to resize image for resolution variant {res_key}.") - continue - - assetname_placeholder = context.asset_rule.asset_name if context.asset_rule else "UnknownAsset" - resolution_placeholder = res_key - - # TODO: Implement proper output format/extension determination for variants - output_ext_variant = temp_filename_suffix.lstrip('.') + # Call Unified Save Utility + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Calling Unified Save Utility for map type '{base_map_type}' (Internal: '{processing_map_type}')") - temp_output_filename_variant = output_filename_pattern.replace("[assetname]", sanitize_filename(assetname_placeholder)) \ - .replace("[maptype]", sanitize_filename(filename_friendly_map_type)) \ - .replace("[resolution]", sanitize_filename(resolution_placeholder)) \ - .replace("[ext]", output_ext_variant) - temp_output_filename_variant = f"{processing_instance_tag}_variant_{temp_output_filename_variant}" # Use processing_instance_tag - temp_output_path_variant = context.engine_temp_dir / temp_output_filename_variant + try: + # Prepare arguments for save_image_variants + save_args = { + "source_image_data": image_to_save, + "base_map_type": base_map_type, # Filename-friendly + "source_bit_depth_info": source_bit_depth_info_for_save_util, + "output_filename_pattern_tokens": output_filename_pattern_tokens, + "config_obj": config, # Pass the whole config object + "asset_name_for_log": asset_name_for_log, # Pass asset name for logging within save util + "processing_instance_tag": processing_instance_tag # Pass tag for logging within save util + } - save_params_variant = [] - if transform_settings.get('output_format_settings'): - if output_ext_variant.lower() in ['jpg', 'jpeg']: - quality = transform_settings['output_format_settings'].get('quality', context.config_obj.get("JPG_QUALITY", 95)) - save_params_variant = [cv2.IMWRITE_JPEG_QUALITY, quality] - elif output_ext_variant.lower() == 'png': - compression = transform_settings['output_format_settings'].get('compression_level', context.config_obj.get("PNG_COMPRESSION_LEVEL", 6)) - save_params_variant = [cv2.IMWRITE_PNG_COMPRESSION, compression] - - save_success_variant = ipu.save_image(str(temp_output_path_variant), output_image_data_for_res, params=save_params_variant) + saved_files_details_list = save_image_variants(**save_args) - if not save_success_variant: - logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Failed to save temporary variant image to '{temp_output_path_variant}'.") - continue - - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Res {res_key}: Successfully saved temporary variant map to '{temp_output_path_variant}' with dims ({target_w_res}x{target_h_res}).") - processed_at_least_one_resolution_variant = True - - if 'variants' not in context.processed_maps_details[current_map_key]: # Use current_map_key (file_rule_idx) - context.processed_maps_details[current_map_key]['variants'] = [] - - context.processed_maps_details[current_map_key]['variants'].append({ # Use current_map_key (file_rule_idx) - 'resolution_key': res_key, - 'temp_path': str(temp_output_path_variant), - 'dimensions': (target_w_res, target_h_res), - 'resolution_name': f"{target_w_res}x{target_h_res}" - }) - - if 'processed_files' not in context.asset_metadata: - context.asset_metadata['processed_files'] = [] - context.asset_metadata['processed_files'].append({ - 'processed_map_key': current_map_key, # Use current_map_key (file_rule_idx) - 'resolution_key': res_key, - 'path': str(temp_output_path_variant), - 'type': 'temporary_map_variant', - 'map_type': current_map_type, - 'dimensions_w': target_w_res, - 'dimensions_h': target_h_res - }) - # Calculate and store image statistics for the lowest resolution output - lowest_res_image_data_for_stats = None - image_to_stat_path_for_log = "N/A" - source_of_stats_image = "unknown" - - if processed_at_least_one_resolution_variant and \ - current_map_key in context.processed_maps_details and \ - 'variants' in context.processed_maps_details[current_map_key] and \ - context.processed_maps_details[current_map_key]['variants']: - - variants_list = context.processed_maps_details[current_map_key]['variants'] - valid_variants_for_stats = [ - v for v in variants_list - if isinstance(v.get('dimensions'), tuple) and len(v['dimensions']) == 2 and v['dimensions'][0] > 0 and v['dimensions'][1] > 0 - ] - - if valid_variants_for_stats: - smallest_variant = min(valid_variants_for_stats, key=lambda v: v['dimensions'][0] * v['dimensions'][1]) - - if smallest_variant and 'temp_path' in smallest_variant and smallest_variant.get('dimensions'): - smallest_res_w, smallest_res_h = smallest_variant['dimensions'] - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Identified smallest variant for stats: {smallest_variant.get('resolution_key', 'N/A')} ({smallest_res_w}x{smallest_res_h}) at {smallest_variant['temp_path']}") - lowest_res_image_data_for_stats = ipu.load_image(smallest_variant['temp_path']) - image_to_stat_path_for_log = smallest_variant['temp_path'] - source_of_stats_image = f"variant {smallest_variant.get('resolution_key', 'N/A')}" - if lowest_res_image_data_for_stats is None: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Failed to load smallest variant image '{smallest_variant['temp_path']}' for stats.") + if saved_files_details_list: + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unified Save Utility completed successfully. Saved {len(saved_files_details_list)} variants.") + processing_status = "Processed_Via_Save_Utility" else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Could not determine smallest variant for stats from valid variants list (details missing).") - else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: No valid variants found to determine the smallest one for stats.") + logger.warning(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unified Save Utility returned no saved file details. Check utility logs.") + processing_status = "Processed_Save_Utility_No_Output" # Or potentially "Failed" depending on severity + status_notes.append("Save utility reported no files saved") - if lowest_res_image_data_for_stats is None: - if base_pot_image_data is not None: - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Using base POT image for stats (dimensions: {base_pot_width}x{base_pot_height}). Smallest variant not available/loaded or no variants generated.") - lowest_res_image_data_for_stats = base_pot_image_data - image_to_stat_path_for_log = f"In-memory base POT image (dims: {base_pot_width}x{base_pot_height})" - source_of_stats_image = "base POT" - else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Base POT image data is also None. Cannot calculate stats.") + except Exception as e: + logger.exception(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Error calling or executing save_image_variants: {e}") + processing_status = "Failed" + status_notes.append(f"Save utility call failed: {e}") + # saved_files_details_list remains empty - if lowest_res_image_data_for_stats is not None: - stats_dict = ipu.calculate_image_stats(lowest_res_image_data_for_stats) - if stats_dict and "error" not in stats_dict: - if 'image_stats_lowest_res' not in context.asset_metadata: - context.asset_metadata['image_stats_lowest_res'] = {} - - context.asset_metadata['image_stats_lowest_res'][current_map_type] = stats_dict # Keyed by map_type - logger.info(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type '{current_map_type}': Calculated and stored image stats from '{source_of_stats_image}' (source ref: '{image_to_stat_path_for_log}').") - elif stats_dict and "error" in stats_dict: - logger.error(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type '{current_map_type}': Error calculating image stats from '{source_of_stats_image}': {stats_dict['error']}.") - else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type '{current_map_type}': Failed to calculate image stats from '{source_of_stats_image}' (result was None or empty).") - else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}, Map Type '{current_map_type}': No image data available (from variant or base POT) to calculate stats.") + except Exception as e: + logger.exception(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unhandled exception during processing loop for item: {e}") + processing_status = "Failed" + status_notes.append(f"Unhandled exception: {e}") - # Final status update based on whether variants were generated (and expected) - if generate_variants_for_this_map_type: - if processed_at_least_one_resolution_variant: - self._update_file_rule_status(context, current_map_key, 'Processed_With_Variants', - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, - details="Successfully processed with multiple resolution variants.") + finally: + # --- Update Context --- + logger.debug(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Updating context. Status: {processing_status}, Notes: {status_notes}") + details_entry = { + 'status': processing_status, + 'map_type': base_map_type, # Final filename-friendly type + 'processing_map_type': processing_map_type, # Final internal type + 'notes': " | ".join(status_notes), + 'saved_files_info': saved_files_details_list, + 'original_dimensions': original_dimensions, # (w, h) + } + if is_merged_task: + details_entry['merge_task_config_output_type'] = merge_task_config_output_type + details_entry['inputs_used_for_merge'] = inputs_used_for_merge + details_entry['source_bit_depths'] = source_bit_depth_info_for_save_util # Store the list used else: - logger.warning(f"Asset '{asset_name_for_log}', Map Key {current_map_key}, Proc. Tag {processing_instance_tag}: Variants were expected for map type '{current_map_type}', but none were generated (e.g., base POT too small for any variant tier).") - self._update_file_rule_status(context, current_map_key, 'Processed_No_Variants', - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, - details="Variants expected but none generated (e.g., base POT too small).") - else: # No variants were expected for this map type - self._update_file_rule_status(context, current_map_key, 'Processed_No_Variants', - map_type=filename_friendly_map_type, - processing_map_type=current_map_type, - source_file_rule_index=file_rule_idx, - processing_tag=processing_instance_tag, - details="Processed to base POT; variants not applicable for this map type.") + # Regular map specific details + details_entry['source_file'] = str(source_file_path_regular) if source_file_path_regular else "N/A" + details_entry['original_bit_depth'] = source_bit_depth_info_for_save_util[0] if source_bit_depth_info_for_save_util else None + details_entry['source_file_rule_index'] = item_key # Store original index + + context.processed_maps_details[item_key] = details_entry + logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Context updated for this item.") logger.info(f"Asset '{asset_name_for_log}': Finished individual map processing stage.") return context - def _find_source_file(self, base_path: Path, pattern: str, asset_name_for_log: str, processing_instance_tag: str) -> Optional[Path]: + def _get_suffixed_internal_map_type(self, context: AssetProcessingContext, current_file_rule: FileRule, initial_internal_map_type: str, respect_variant_map_types: List[str]) -> str: """ - Finds a single source file matching the pattern within the base_path. - Logs use processing_instance_tag for specific run tracing. + Determines the potentially suffixed internal map type (e.g., MAP_COL-1) + based on occurrences within the asset rule's file list. """ - if not pattern: - logger.warning(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Empty file_path provided in FileRule.") - return None - - # If pattern is an absolute path, use it directly - potential_abs_path = Path(pattern) - if potential_abs_path.is_absolute() and potential_abs_path.exists(): - logger.debug(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: file_path '{pattern}' is absolute and exists. Using it directly.") - return potential_abs_path - elif potential_abs_path.is_absolute(): - logger.warning(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: file_path '{pattern}' is absolute but does not exist.") - # Fall through to try resolving against base_path if it's just a name/relative pattern - - # Treat pattern as relative to base_path - # This could be an exact name or a glob pattern - try: - # First, check if pattern is an exact relative path - exact_match_path = base_path / pattern - if exact_match_path.exists() and exact_match_path.is_file(): - logger.debug(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Found exact match for '{pattern}' at '{exact_match_path}'.") - return exact_match_path - - # If not an exact match, try as a glob pattern (recursive) - matched_files_rglob = list(base_path.rglob(pattern)) - if matched_files_rglob: - if len(matched_files_rglob) > 1: - logger.warning(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Multiple files ({len(matched_files_rglob)}) found for pattern '{pattern}' in '{base_path}' (recursive). Using first: {matched_files_rglob[0]}. Files: {matched_files_rglob}") - return matched_files_rglob[0] - - # Try non-recursive glob if rglob fails - matched_files_glob = list(base_path.glob(pattern)) - if matched_files_glob: - if len(matched_files_glob) > 1: - logger.warning(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Multiple files ({len(matched_files_glob)}) found for pattern '{pattern}' in '{base_path}' (non-recursive). Using first: {matched_files_glob[0]}. Files: {matched_files_glob}") - return matched_files_glob[0] - - logger.debug(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: No files found matching pattern '{pattern}' in '{base_path}' (exact, recursive, or non-recursive).") - return None - except Exception as e: - logger.error(f"Asset '{asset_name_for_log}', Proc. Tag {processing_instance_tag}: Error searching for file with pattern '{pattern}' in '{base_path}': {e}") - return None - - def _update_file_rule_status(self, context: AssetProcessingContext, map_key_index: int, status: str, **kwargs): # Renamed map_id_hex to map_key_index - """Helper to update processed_maps_details for a map, keyed by file_rule_idx.""" + final_internal_map_type = initial_internal_map_type # Default asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" - if map_key_index not in context.processed_maps_details: - context.processed_maps_details[map_key_index] = {} - - context.processed_maps_details[map_key_index]['status'] = status - for key, value in kwargs.items(): - # Ensure source_file_rule_id_hex is not added if it was somehow passed (it shouldn't be) - if key == 'source_file_rule_id_hex': - continue - context.processed_maps_details[map_key_index][key] = value - - if 'map_type' not in context.processed_maps_details[map_key_index] and 'map_type' in kwargs: - context.processed_maps_details[map_key_index]['map_type'] = kwargs['map_type'] - - # Add formatted resolution names - if 'original_dimensions' in kwargs and isinstance(kwargs['original_dimensions'], tuple) and len(kwargs['original_dimensions']) == 2: - orig_w, orig_h = kwargs['original_dimensions'] - context.processed_maps_details[map_key_index]['original_resolution_name'] = f"{orig_w}x{orig_h}" - - # Determine the correct dimensions to use for 'processed_resolution_name' - # This name refers to the base POT scaled image dimensions before variant generation. - dims_to_log_as_base_processed = None - if 'base_pot_dimensions' in kwargs and isinstance(kwargs['base_pot_dimensions'], tuple) and len(kwargs['base_pot_dimensions']) == 2: - # This key is used when status is 'Processed_With_Variants' - dims_to_log_as_base_processed = kwargs['base_pot_dimensions'] - elif 'processed_dimensions' in kwargs and isinstance(kwargs['processed_dimensions'], tuple) and len(kwargs['processed_dimensions']) == 2: - # This key is used when status is 'Processed_No_Variants' (and potentially others) - dims_to_log_as_base_processed = kwargs['processed_dimensions'] - - if dims_to_log_as_base_processed: - proc_w, proc_h = dims_to_log_as_base_processed - resolution_name_str = f"{proc_w}x{proc_h}" - context.processed_maps_details[map_key_index]['base_pot_resolution_name'] = resolution_name_str - # Ensure 'processed_resolution_name' is also set for OutputOrganizationStage compatibility - context.processed_maps_details[map_key_index]['processed_resolution_name'] = resolution_name_str - elif 'processed_dimensions' in kwargs or 'base_pot_dimensions' in kwargs: - details_for_warning = kwargs.get('processed_dimensions', kwargs.get('base_pot_dimensions')) - logger.warning(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: 'processed_dimensions' or 'base_pot_dimensions' key present but its value is not a valid 2-element tuple: {details_for_warning}") - - # If temp_processed_file was passed, ensure it's in the details - if 'temp_processed_file' in kwargs: - context.processed_maps_details[map_key_index]['temp_processed_file'] = kwargs['temp_processed_file'] - # Log all details being stored for clarity, including the newly added resolution names - log_details = context.processed_maps_details[map_key_index].copy() - # Avoid logging full image data if it accidentally gets into kwargs - if 'image_data' in log_details: del log_details['image_data'] - if 'base_pot_image_data' in log_details: del log_details['base_pot_image_data'] - logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Status updated to '{status}'. Details: {log_details}") \ No newline at end of file + base_map_type_match = re.match(r"(MAP_[A-Z]{3})", initial_internal_map_type) + if not base_map_type_match or not context.asset_rule or not context.asset_rule.files: + return final_internal_map_type # Cannot determine suffix without base type or asset rule files + + true_base_map_type = base_map_type_match.group(1) # This is "MAP_XXX" + + peers_of_same_base_type = [] + for fr_asset in context.asset_rule.files: + fr_asset_item_type = fr_asset.item_type_override or fr_asset.item_type or "UnknownMapType" + fr_asset_base_match = re.match(r"(MAP_[A-Z]{3})", fr_asset_item_type) + if fr_asset_base_match and fr_asset_base_match.group(1) == true_base_map_type: + peers_of_same_base_type.append(fr_asset) + + num_occurrences = len(peers_of_same_base_type) + current_instance_index = 0 # 1-based index + + try: + # Find the index based on the FileRule object itself + current_instance_index = peers_of_same_base_type.index(current_file_rule) + 1 + except ValueError: + # Fallback: try matching by file_path if object identity fails (less reliable) + try: + current_instance_index = [fr.file_path for fr in peers_of_same_base_type].index(current_file_rule.file_path) + 1 + logger.warning(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Found peer index using file_path fallback.") + except (ValueError, AttributeError): # Catch AttributeError if file_path is None + logger.warning( + f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}' (Initial Type: '{initial_internal_map_type}', Base: '{true_base_map_type}'): " + f"Could not find its own instance in the list of {num_occurrences} peers from asset_rule.files using object identity or path. Suffixing may be incorrect." + ) + # Keep index 0, suffix logic below will handle it + + # Determine Suffix + map_type_for_respect_check = true_base_map_type.replace("MAP_", "") # e.g., "COL" + is_in_respect_list = map_type_for_respect_check in respect_variant_map_types + + suffix_to_append = "" + if num_occurrences > 1: + if current_instance_index > 0: + suffix_to_append = f"-{current_instance_index}" + else: + # If index is still 0 (not found), don't add suffix to avoid ambiguity + logger.warning(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Index for multi-occurrence map type '{true_base_map_type}' (count: {num_occurrences}) not determined. Omitting numeric suffix.") + elif num_occurrences == 1 and is_in_respect_list: + suffix_to_append = "-1" # Add suffix even for single instance if in respect list + + if suffix_to_append: + final_internal_map_type = true_base_map_type + suffix_to_append + # else: final_internal_map_type remains the initial_internal_map_type if no suffix needed + + if final_internal_map_type != initial_internal_map_type: + logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Suffixed internal map type determined: '{initial_internal_map_type}' -> '{final_internal_map_type}'") + + return final_internal_map_type \ No newline at end of file diff --git a/processing/pipeline/stages/map_merging.py b/processing/pipeline/stages/map_merging.py index 5dacc73..696b05e 100644 --- a/processing/pipeline/stages/map_merging.py +++ b/processing/pipeline/stages/map_merging.py @@ -2,14 +2,12 @@ import logging from pathlib import Path from typing import Dict, Optional, List, Tuple -import numpy as np -import cv2 # For potential direct cv2 operations if ipu doesn't cover all merge needs from .base_stage import ProcessingStage from ..asset_context import AssetProcessingContext from rule_structure import FileRule from utils.path_utils import sanitize_filename -from ...utils import image_processing_utils as ipu + logger = logging.getLogger(__name__) @@ -34,48 +32,20 @@ class MapMergingStage(ProcessingStage): if context.status_flags.get('skip_asset'): logger.info(f"Skipping map merging for asset {asset_name_for_log} as skip_asset flag is set.") return context - if not hasattr(context, 'merged_maps_details'): context.merged_maps_details = {} + + if not hasattr(context, 'merged_image_tasks'): + context.merged_image_tasks = [] if not hasattr(context, 'processed_maps_details'): - logger.warning(f"Asset {asset_name_for_log}: 'processed_maps_details' not found in context. Cannot perform map merging.") + logger.warning(f"Asset {asset_name_for_log}: 'processed_maps_details' not found in context. Cannot generate merge tasks.") return context - if not context.files_to_process: # This list might not be relevant if merge rules are defined elsewhere or implicitly - logger.info(f"Asset {asset_name_for_log}: No files_to_process defined. This stage might rely on config or processed_maps_details directly for merge rules.") - # Depending on design, this might not be an error, so we don't return yet. - logger.info(f"Starting MapMergingStage for asset: {asset_name_for_log}") - # TODO: The logic for identifying merge rules and their inputs needs significant rework - # as FileRule no longer has 'id' or 'merge_settings' directly in the way this stage expects. - # Merge rules are likely defined in the main configuration (context.config_obj.map_merge_rules) - # and need to be matched against available maps in context.processed_maps_details. - - # Placeholder for the loop that would iterate over context.config_obj.map_merge_rules - # For now, this stage will effectively do nothing until that logic is implemented. - - # Example of how one might start to adapt: - # for configured_merge_rule in context.config_obj.map_merge_rules: - # output_map_type = configured_merge_rule.get('output_map_type') - # inputs_config = configured_merge_rule.get('inputs') # e.g. {"R": "NORMAL", "G": "ROUGHNESS"} - # # ... then find these input map_types in context.processed_maps_details ... - # # ... and perform the merge ... - # # This is a complex change beyond simple attribute renaming. - - # The following is the original loop structure, which will likely fail due to missing attributes on FileRule. - # Keeping it commented out to show what was there. - """ - for merge_rule in context.files_to_process: # This iteration logic is likely incorrect for merge rules - if not isinstance(merge_rule, FileRule) or merge_rule.item_type != "MAP_MERGE": - continue - - # FileRule does not have merge_settings or id.hex - # This entire block needs to be re-thought based on where merge rules are defined. - # Assuming merge_rule_id_hex would be a generated UUID for this operation. - merge_rule_id_hex = f"merge_op_{uuid.uuid4().hex[:8]}" - current_map_type = merge_rule.item_type_override or merge_rule.item_type + # The core merge rules are in context.config_obj.map_merge_rules + # Each rule in there defines an output_map_type and its inputs. logger.error(f"Asset {asset_name_for_log}, Potential Merge for {current_map_type}: Merge rule processing needs rework. FileRule lacks 'merge_settings' and 'id'. Skipping this rule.") context.merged_maps_details[merge_rule_id_hex] = { @@ -84,7 +54,7 @@ class MapMergingStage(ProcessingStage): 'reason': 'Merge rule processing logic in MapMergingStage needs refactor due to FileRule changes.' } continue - """ + # For now, let's assume no merge rules are processed until the logic is fixed. num_merge_rules_attempted = 0 @@ -115,24 +85,20 @@ class MapMergingStage(ProcessingStage): merge_op_id = f"merge_{sanitize_filename(output_map_type)}_{rule_idx}" logger.info(f"Asset {asset_name_for_log}: Processing configured merge rule for '{output_map_type}' (Op ID: {merge_op_id})") - loaded_input_maps: Dict[str, np.ndarray] = {} # Key: input_map_type (e.g. "NRM"), Value: image_data - input_map_paths: Dict[str, str] = {} # Key: input_map_type, Value: path_str - target_dims: Optional[Tuple[int, int]] = None - all_inputs_valid = True - - # Find and load input maps from processed_maps_details - # This assumes one processed map per map_type. If multiple variants exist, this needs refinement. + input_map_sources_list = [] + source_bit_depths_list = [] + primary_source_dimensions = None + + # Find required input maps from processed_maps_details required_input_map_types = set(inputs_map_type_to_channel.values()) - + for required_map_type in required_input_map_types: found_processed_map_details = None - # The key `p_key_idx` is the file_rule_idx from the IndividualMapProcessingStage - for p_key_idx, p_details in context.processed_maps_details.items(): # p_key_idx is an int + # Iterate through processed_maps_details to find the required map type + for p_key_idx, p_details in context.processed_maps_details.items(): processed_map_identifier = p_details.get('processing_map_type', p_details.get('map_type')) - - # Comprehensive list of valid statuses for an input map to be used in merging - valid_input_statuses = ['BasePOTSaved', 'Processed_With_Variants', 'Processed_No_Variants', 'Converted_To_Rough'] + # Check for a match, considering both "MAP_TYPE" and "TYPE" formats is_match = False if processed_map_identifier == required_map_type: is_match = True @@ -141,207 +107,56 @@ class MapMergingStage(ProcessingStage): elif not required_map_type.startswith("MAP_") and processed_map_identifier == f"MAP_{required_map_type}": is_match = True - if is_match and p_details.get('status') in valid_input_statuses: - found_processed_map_details = p_details - # The key `p_key_idx` (which is the FileRule index) is implicitly associated with these details. - break - - if not found_processed_map_details: - can_be_fully_defaulted = True - channels_requiring_this_map = [ - ch_key for ch_key, map_type_val in inputs_map_type_to_channel.items() - if map_type_val == required_map_type - ] + # Check if the found map is in a usable status and has a temporary file + valid_input_statuses = ['BasePOTSaved', 'Processed_With_Variants', 'Processed_No_Variants', 'Converted_To_Rough'] # Add other relevant statuses if needed + if is_match and p_details.get('status') in valid_input_statuses and p_details.get('temp_processed_file'): + # Also check if the temp file actually exists on disk + if Path(p_details.get('temp_processed_file')).exists(): + found_processed_map_details = p_details + break # Found a suitable input, move to the next required map type - if not channels_requiring_this_map: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Internal logic error. Required map_type '{required_map_type}' is not actually used by any output channel. Configuration: {inputs_map_type_to_channel}") - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Internal error: required map_type '{required_map_type}' not in use."} - break - - for channel_char_needing_default in channels_requiring_this_map: - if default_values.get(channel_char_needing_default) is None: - can_be_fully_defaulted = False - break - - if can_be_fully_defaulted: - logger.info(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Required input map_type '{required_map_type}' for output '{output_map_type}' not found or not in usable state. Will attempt to use default values for its channels: {channels_requiring_this_map}.") - else: - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Required input map_type '{required_map_type}' for output '{output_map_type}' not found/unusable, AND not all its required channels ({channels_requiring_this_map}) have defaults. Failing merge op.") - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Input '{required_map_type}' missing and defaults incomplete."} - break - if found_processed_map_details: - temp_file_path_str = found_processed_map_details.get('temp_processed_file') - if not temp_file_path_str: - # Log with p_key_idx if available, or just the map type if not (though it should be if found_processed_map_details is set) - log_key_info = f"(Associated Key Index: {p_key_idx})" if 'p_key_idx' in locals() and found_processed_map_details else "" # Use locals() to check if p_key_idx is defined in this scope - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: 'temp_processed_file' missing in details for found map_type '{required_map_type}' {log_key_info}.") - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Temp file path missing for input '{required_map_type}'."} - break - - temp_file_path = Path(temp_file_path_str) - if not temp_file_path.exists(): - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Temp file {temp_file_path} for input map_type '{required_map_type}' does not exist.") - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Temp file for input '{required_map_type}' missing."} - break + file_path = found_processed_map_details.get('temp_processed_file') + dimensions = found_processed_map_details.get('base_pot_dimensions') - try: - image_data = ipu.load_image(str(temp_file_path)) - if image_data is None: raise ValueError("Loaded image is None") - except Exception as e: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error loading image {temp_file_path} for input map_type '{required_map_type}': {e}") - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Error loading input '{required_map_type}'."} - break - - loaded_input_maps[required_map_type] = image_data - input_map_paths[required_map_type] = str(temp_file_path) + # Attempt to get original_bit_depth, log warning if not found + original_bit_depth = found_processed_map_details.get('original_bit_depth') + if original_bit_depth is None: + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: 'original_bit_depth' not found in processed_maps_details for map type '{required_map_type}'. This value is pending IndividualMapProcessingStage refactoring and will be None or a default for now.") - current_dims = (image_data.shape[1], image_data.shape[0]) - if target_dims is None: - target_dims = current_dims - elif current_dims != target_dims: - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{required_map_type}' dims {current_dims} differ from target {target_dims}. Resizing.") - try: - image_data_resized = ipu.resize_image(image_data, target_dims[0], target_dims[1]) - if image_data_resized is None: raise ValueError("Resize returned None") - loaded_input_maps[required_map_type] = image_data_resized - except Exception as e: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to resize '{required_map_type}': {e}") - all_inputs_valid = False - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f"Failed to resize input '{required_map_type}'."} - break - - if not all_inputs_valid: - logger.warning(f"Asset {asset_name_for_log}: Skipping merge for Op ID {merge_op_id} ('{output_map_type}') due to invalid inputs.") - continue + input_map_sources_list.append({ + 'map_type': required_map_type, + 'file_path': file_path, + 'dimensions': dimensions, + 'original_bit_depth': original_bit_depth + }) + source_bit_depths_list.append(original_bit_depth) - if not loaded_input_maps and not any(default_values.get(ch) is not None for ch in inputs_map_type_to_channel.keys()): - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: No input maps loaded and no defaults available for any channel for '{output_map_type}'. Cannot proceed.") - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'No input maps loaded and no defaults available.'} - continue - - if target_dims is None: - default_res_key = context.config_obj.get("default_output_resolution_key_for_merge", "1K") - image_resolutions_cfg = getattr(context.config_obj, "image_resolutions", {}) - default_max_dim = image_resolutions_cfg.get(default_res_key) - - if default_max_dim: - target_dims = (default_max_dim, default_max_dim) - logger.info(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Target dimensions not set by inputs (all defaulted). Using configured default resolution '{default_res_key}': {target_dims}.") + # Set primary_source_dimensions from the first valid input found + if primary_source_dimensions is None and dimensions: + primary_source_dimensions = dimensions else: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Target dimensions could not be determined for '{output_map_type}' (all inputs defaulted and no default output resolution configured).") - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'Target dimensions undetermined for fully defaulted merge.'} - continue - - output_channel_keys = sorted(list(inputs_map_type_to_channel.keys())) - num_output_channels = len(output_channel_keys) - - if num_output_channels == 0: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: No output channels defined in 'inputs' for '{output_map_type}'.") - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'No output channels defined.'} - continue - - try: - output_dtype = np.uint8 - - if num_output_channels == 1: - merged_image = np.zeros((target_dims[1], target_dims[0]), dtype=output_dtype) - else: - merged_image = np.zeros((target_dims[1], target_dims[0], num_output_channels), dtype=output_dtype) - except Exception as e: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error creating empty merged image for '{output_map_type}': {e}") - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f'Error creating output canvas: {e}'} - continue - - merge_op_failed_detail = False - for i, out_channel_char in enumerate(output_channel_keys): - input_map_type_for_this_channel = inputs_map_type_to_channel[out_channel_char] - source_image = loaded_input_maps.get(input_map_type_for_this_channel) - - source_data_this_channel = None - if source_image is not None: - if source_image.dtype != np.uint8: - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{input_map_type_for_this_channel}' has dtype {source_image.dtype}, expected uint8. Attempting conversion.") - source_image = ipu.convert_to_uint8(source_image) - if source_image is None: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to convert input '{input_map_type_for_this_channel}' to uint8.") - merge_op_failed_detail = True; break + # If a required map is not found, log a warning but don't fail the task generation. + # The consuming stage will handle missing inputs and fallbacks. + logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Required input map type '{required_map_type}' not found or not in a usable state in context.processed_maps_details. This input will be skipped for task generation.") - if source_image.ndim == 2: - source_data_this_channel = source_image - elif source_image.ndim == 3: - semantic_to_bgr_idx = {'R': 2, 'G': 1, 'B': 0, 'A': 3} - - idx_to_extract = semantic_to_bgr_idx.get(out_channel_char.upper()) - - if idx_to_extract is not None and idx_to_extract < source_image.shape[2]: - source_data_this_channel = source_image[:, :, idx_to_extract] - logger.debug(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: For output '{out_channel_char}', using source '{input_map_type_for_this_channel}' semantic '{out_channel_char}' (BGR(A) index {idx_to_extract}).") - else: - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Could not map output '{out_channel_char}' to a specific BGR(A) channel of '{input_map_type_for_this_channel}' (shape {source_image.shape}). Defaulting to its channel 0 (Blue).") - source_data_this_channel = source_image[:, :, 0] - else: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Source image '{input_map_type_for_this_channel}' has unexpected dimensions: {source_image.ndim} (shape {source_image.shape}).") - merge_op_failed_detail = True; break - - else: - default_val_for_channel = default_values.get(out_channel_char) - if default_val_for_channel is not None: - try: - scaled_default_val = int(float(default_val_for_channel) * 255) - source_data_this_channel = np.full((target_dims[1], target_dims[0]), scaled_default_val, dtype=np.uint8) - logger.info(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Using default value {default_val_for_channel} (scaled to {scaled_default_val}) for output channel '{out_channel_char}' as input map '{input_map_type_for_this_channel}' was missing.") - except ValueError: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Default value '{default_val_for_channel}' for channel '{out_channel_char}' is not a valid float. Cannot scale.") - merge_op_failed_detail = True; break - else: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Input map '{input_map_type_for_this_channel}' for output channel '{out_channel_char}' is missing and no default value provided.") - merge_op_failed_detail = True; break - - if source_data_this_channel is None: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Failed to get source data for output channel '{out_channel_char}'.") - merge_op_failed_detail = True; break - - try: - if merged_image.ndim == 2: - merged_image = source_data_this_channel - else: - merged_image[:, :, i] = source_data_this_channel - except Exception as e: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error assigning data to output channel '{out_channel_char}' (index {i}): {e}. Merged shape: {merged_image.shape}, Source data shape: {source_data_this_channel.shape}") - merge_op_failed_detail = True; break - - if merge_op_failed_detail: - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': 'Error during channel assignment.'} - continue - - output_format = 'png' - temp_merged_filename = f"merged_{sanitize_filename(output_map_type)}_{merge_op_id}.{output_format}" - temp_merged_path = context.engine_temp_dir / temp_merged_filename - - try: - save_success = ipu.save_image(str(temp_merged_path), merged_image) - if not save_success: raise ValueError("Save image returned false") - except Exception as e: - logger.error(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Error saving merged image {temp_merged_path}: {e}") - context.merged_maps_details[merge_op_id] = {'map_type': output_map_type, 'status': 'Failed', 'reason': f'Failed to save merged image: {e}'} - continue - - logger.info(f"Asset {asset_name_for_log}: Successfully merged and saved '{output_map_type}' (Op ID: {merge_op_id}) to {temp_merged_path}") - context.merged_maps_details[merge_op_id] = { - 'map_type': output_map_type, - 'temp_merged_file': str(temp_merged_path), - 'input_map_types_used': list(inputs_map_type_to_channel.values()), - 'input_map_files_used': input_map_paths, - 'merged_dimensions': target_dims, - 'status': 'Processed' + # Create the merged image task dictionary + merged_task = { + 'output_map_type': output_map_type, + 'input_map_sources': input_map_sources_list, + 'merge_rule_config': configured_merge_rule, + 'source_dimensions': primary_source_dimensions, # Can be None if no inputs were found + 'source_bit_depths': source_bit_depths_list } - logger.info(f"Finished MapMergingStage for asset: {asset_name_for_log}. Merged maps operations attempted: {num_merge_rules_attempted}, Succeeded: {len([d for d in context.merged_maps_details.values() if d.get('status') == 'Processed'])}") + # Append the task to the context + context.merged_image_tasks.append(merged_task) + logger.info(f"Asset {asset_name_for_log}: Generated merge task for '{output_map_type}' (Op ID: {merge_op_id}). Task details: {merged_task}") + + # Note: We no longer populate context.merged_maps_details with 'Processed' status here, + # as this stage only generates tasks, it doesn't perform the merge or save files. + # The merged_maps_details will be populated by the stage that consumes these tasks. + + logger.info(f"Finished MapMergingStage for asset: {asset_name_for_log}. Merge tasks generated: {len(context.merged_image_tasks)}") return context \ No newline at end of file diff --git a/processing/utils/image_processing_utils.py b/processing/utils/image_processing_utils.py index 81e3e08..e9181b8 100644 --- a/processing/utils/image_processing_utils.py +++ b/processing/utils/image_processing_utils.py @@ -163,6 +163,37 @@ def calculate_target_dimensions( # --- Image Statistics --- +def get_image_bit_depth(image_path_str: str) -> Optional[int]: + """ + Determines the bit depth of an image file. + """ + try: + # Use IMREAD_UNCHANGED to preserve original bit depth + img = cv2.imread(image_path_str, cv2.IMREAD_UNCHANGED) + if img is None: + # logger.error(f"Failed to read image for bit depth: {image_path_str}") # Use print for utils + print(f"Warning: Failed to read image for bit depth: {image_path_str}") + return None + + dtype_to_bit_depth = { + np.dtype('uint8'): 8, + np.dtype('uint16'): 16, + np.dtype('float32'): 32, # Typically for EXR etc. + np.dtype('int8'): 8, # Unlikely for images but good to have + np.dtype('int16'): 16, # Unlikely + # Add other dtypes if necessary + } + bit_depth = dtype_to_bit_depth.get(img.dtype) + if bit_depth is None: + # logger.warning(f"Unknown dtype {img.dtype} for image {image_path_str}, cannot determine bit depth.") # Use print for utils + print(f"Warning: Unknown dtype {img.dtype} for image {image_path_str}, cannot determine bit depth.") + pass # Return None + return bit_depth + except Exception as e: + # logger.error(f"Error getting bit depth for {image_path_str}: {e}") # Use print for utils + print(f"Error getting bit depth for {image_path_str}: {e}") + return None + def calculate_image_stats(image_data: np.ndarray) -> Optional[Dict]: """ Calculates min, max, mean for a given numpy image array. diff --git a/processing/utils/image_saving_utils.py b/processing/utils/image_saving_utils.py new file mode 100644 index 0000000..7a51d14 --- /dev/null +++ b/processing/utils/image_saving_utils.py @@ -0,0 +1,250 @@ +import logging +import cv2 +import numpy as np +from pathlib import Path +from typing import List, Dict, Any, Tuple, Optional + +# Potentially import ipu from ...utils import image_processing_utils as ipu +# Assuming ipu is available in the same utils directory or parent +try: + from . import image_processing_utils as ipu +except ImportError: + # Fallback for different import structures if needed, adjust based on actual project structure + # For this project structure, the relative import should work. + logging.warning("Could not import image_processing_utils using relative path. Attempting absolute import.") + try: + from processing.utils import image_processing_utils as ipu + except ImportError: + logging.error("Could not import image_processing_utils.") + ipu = None # Handle case where ipu is not available + +logger = logging.getLogger(__name__) + +def save_image_variants( + source_image_data: np.ndarray, + base_map_type: str, # Filename-friendly map type + source_bit_depth_info: List[Optional[int]], + image_resolutions: Dict[str, int], + file_type_defs: Dict[str, Dict[str, Any]], + output_format_8bit: str, + output_format_16bit_primary: str, + output_format_16bit_fallback: str, + png_compression_level: int, + jpg_quality: int, + output_filename_pattern_tokens: Dict[str, Any], # Must include 'output_base_directory': Path and 'asset_name': str + output_filename_pattern: str, + # Consider adding ipu or relevant parts of it if not importing globally +) -> List[Dict[str, Any]]: + """ + Centralizes image saving logic, generating and saving various resolution variants + according to configuration. + + Args: + source_image_data (np.ndarray): High-res image data (in memory, potentially transformed). + base_map_type (str): Final map type (e.g., "COL", "ROUGH", "NORMAL", "MAP_NRMRGH"). + This is the filename-friendly map type. + source_bit_depth_info (List[Optional[int]]): List of original source bit depth(s) + (e.g., [8], [16], [8, 16]). Can contain None. + image_resolutions (Dict[str, int]): Dictionary mapping resolution keys (e.g., "4K") + to max dimensions (e.g., 4096). + file_type_defs (Dict[str, Dict[str, Any]]): Dictionary defining properties for map types, + including 'bit_depth_rule'. + output_format_8bit (str): File extension for 8-bit output (e.g., "jpg", "png"). + output_format_16bit_primary (str): Primary file extension for 16-bit output (e.g., "png", "tif"). + output_format_16bit_fallback (str): Fallback file extension for 16-bit output. + png_compression_level (int): Compression level for PNG output (0-9). + jpg_quality (int): Quality level for JPG output (0-100). + output_filename_pattern_tokens (Dict[str, Any]): Dictionary of tokens for filename + pattern replacement. Must include + 'output_base_directory' (Path) and + 'asset_name' (str). + output_filename_pattern (str): Pattern string for generating output filenames + (e.g., "[assetname]_[maptype]_[resolution].[ext]"). + + Returns: + List[Dict[str, Any]]: A list of dictionaries, each containing details about a saved file. + Example: [{'path': str, 'resolution_key': str, 'format': str, + 'bit_depth': int, 'dimensions': (w,h)}, ...] + """ + if ipu is None: + logger.error("image_processing_utils is not available. Cannot save images.") + return [] + + saved_file_details = [] + source_h, source_w = source_image_data.shape[:2] + source_max_dim = max(source_h, source_w) + + # 1. Use provided configuration inputs (already available as function arguments) + logger.info(f"Saving variants for map type: {base_map_type}") + + # 2. Determine Target Bit Depth + target_bit_depth = 8 # Default + bit_depth_rule = file_type_defs.get(base_map_type, {}).get('bit_depth_rule', 'force_8bit') + if bit_depth_rule not in ['force_8bit', 'respect_inputs']: + logger.warning(f"Unknown bit_depth_rule '{bit_depth_rule}' for map type '{base_map_type}'. Defaulting to 'force_8bit'.") + bit_depth_rule = 'force_8bit' + + if bit_depth_rule == 'respect_inputs': + # Check if any source bit depth is > 8, ignoring None + if any(depth is not None and depth > 8 for depth in source_bit_depth_info): + target_bit_depth = 16 + else: + target_bit_depth = 8 + logger.info(f"Bit depth rule 'respect_inputs' applied. Source bit depths: {source_bit_depth_info}. Target bit depth: {target_bit_depth}") + else: # force_8bit + target_bit_depth = 8 + logger.info(f"Bit depth rule 'force_8bit' applied. Target bit depth: {target_bit_depth}") + + + # 3. Determine Output File Format(s) + if target_bit_depth == 8: + output_ext = output_format_8bit.lstrip('.').lower() + elif target_bit_depth == 16: + # Prioritize primary, fallback to fallback if primary is not supported/desired + # For now, just use primary. More complex logic might be needed later. + output_ext = output_format_16bit_primary.lstrip('.').lower() + # Basic fallback logic example (can be expanded) + if output_ext not in ['png', 'tif']: # Assuming common 16-bit formats + output_ext = output_format_16bit_fallback.lstrip('.').lower() + logger.warning(f"Primary 16-bit format '{output_format_16bit_primary}' might not be suitable. Using fallback '{output_format_16bit_fallback}'.") + else: + logger.error(f"Unsupported target bit depth: {target_bit_depth}. Defaulting to 8-bit format.") + output_ext = output_format_8bit.lstrip('.').lower() + + logger.info(f"Target bit depth: {target_bit_depth}, Output format: {output_ext}") + + # 4. Generate and Save Resolution Variants + # Sort resolutions by max dimension descending + sorted_resolutions = sorted(image_resolutions.items(), key=lambda item: item[1], reverse=True) + + for res_key, res_max_dim in sorted_resolutions: + logger.info(f"Processing resolution variant: {res_key} ({res_max_dim} max dim)") + + # Calculate target dimensions, ensuring no upscaling + if source_max_dim <= res_max_dim: + # If source is smaller or equal, use source dimensions + target_w_res, target_h_res = source_w, source_h + if source_max_dim < res_max_dim: + logger.info(f"Source image ({source_w}x{source_h}) is smaller than target resolution {res_key} ({res_max_dim}). Saving at source resolution.") + else: + # Downscale, maintaining aspect ratio + aspect_ratio = source_w / source_h + if source_w > source_h: + target_w_res = res_max_dim + target_h_res = int(res_max_dim / aspect_ratio) + else: + target_h_res = res_max_dim + target_w_res = int(res_max_dim * aspect_ratio) + logger.info(f"Resizing source image ({source_w}x{source_h}) to {target_w_res}x{target_h_res} for {res_key} variant.") + + + # Resize source_image_data + # Use INTER_AREA for downscaling, INTER_LINEAR or INTER_CUBIC for upscaling (though we avoid upscaling here) + interpolation_method = cv2.INTER_AREA # Good for downscaling + # If we were allowing upscaling, we might add logic like: + # if target_w_res > source_w or target_h_res > source_h: + # interpolation_method = cv2.INTER_LINEAR # Or INTER_CUBIC + + try: + variant_data = ipu.resize_image(source_image_data, (target_w_res, target_h_res), interpolation=interpolation_method) + logger.debug(f"Resized variant data shape: {variant_data.shape}") + except Exception as e: + logger.error(f"Error resizing image for {res_key} variant: {e}") + continue # Skip this variant if resizing fails + + # Filename Construction + current_tokens = output_filename_pattern_tokens.copy() + current_tokens['maptype'] = base_map_type + current_tokens['resolution'] = res_key + current_tokens['ext'] = output_ext + + try: + # Replace placeholders in the pattern + filename = output_filename_pattern + for token, value in current_tokens.items(): + # Ensure value is string for replacement, handle Path objects later + filename = filename.replace(f"[{token}]", str(value)) + + # Construct full output path + output_base_directory = current_tokens.get('output_base_directory') + if not isinstance(output_base_directory, Path): + logger.error(f"'output_base_directory' token is missing or not a Path object: {output_base_directory}. Cannot save file.") + continue # Skip this variant + + output_path = output_base_directory / filename + logger.info(f"Constructed output path: {output_path}") + + # Ensure parent directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + logger.debug(f"Ensured directory exists: {output_path.parent}") + + except Exception as e: + logger.error(f"Error constructing filepath for {res_key} variant: {e}") + continue # Skip this variant if path construction fails + + + # Prepare Save Parameters + save_params_cv2 = [] + if output_ext == 'jpg': + save_params_cv2.append(cv2.IMWRITE_JPEG_QUALITY) + save_params_cv2.append(jpg_quality) + logger.debug(f"Using JPG quality: {jpg_quality}") + elif output_ext == 'png': + save_params_cv2.append(cv2.IMWRITE_PNG_COMPRESSION) + save_params_cv2.append(png_compression_level) + logger.debug(f"Using PNG compression level: {png_compression_level}") + # Add other format specific parameters if needed (e.g., TIFF compression) + + + # Bit Depth Conversion (just before saving) + image_data_for_save = variant_data + try: + if target_bit_depth == 8: + image_data_for_save = ipu.convert_to_uint8(variant_data) + logger.debug("Converted variant data to uint8.") + elif target_bit_depth == 16: + # ipu.convert_to_uint16 might handle different input types (float, uint8) + # Assuming variant_data might be float after resizing, convert to uint16 + image_data_for_save = ipu.convert_to_uint16(variant_data) + logger.debug("Converted variant data to uint16.") + # Add other bit depth conversions if needed + except Exception as e: + logger.error(f"Error converting image data to target bit depth {target_bit_depth} for {res_key} variant: {e}") + continue # Skip this variant if conversion fails + + + # Saving + try: + # ipu.save_image is expected to handle the actual cv2.imwrite call + success = ipu.save_image(str(output_path), image_data_for_save, params=save_params_cv2) + if success: + logger.info(f"Successfully saved {res_key} variant to {output_path}") + # Collect details for the returned list + saved_file_details.append({ + 'path': str(output_path), + 'resolution_key': res_key, + 'format': output_ext, + 'bit_depth': target_bit_depth, + 'dimensions': (target_w_res, target_h_res) + }) + else: + logger.error(f"Failed to save {res_key} variant to {output_path}") + + except Exception as e: + logger.error(f"Error saving image for {res_key} variant to {output_path}: {e}") + # Continue to next variant even if one fails + + + # Discard in-memory variant after saving (Python's garbage collection handles this) + del variant_data + del image_data_for_save + + + # 5. Return List of Saved File Details + logger.info(f"Finished saving variants for map type: {base_map_type}. Saved {len(saved_file_details)} variants.") + return saved_file_details + +# Optional Helper Functions (can be added here if needed) +# def _determine_target_bit_depth(...): ... +# def _determine_output_format(...): ... +# def _construct_variant_filepath(...): ... \ No newline at end of file -- 2.47.2 From 06552216d5be45c09f4fb111141cdbe439cab608 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Mon, 12 May 2025 14:22:01 +0200 Subject: [PATCH 06/16] Logic Update - Perform MapType transforms before merging --- .../stages/individual_map_processing.py | 83 +++++++++++++------ 1 file changed, 59 insertions(+), 24 deletions(-) diff --git a/processing/pipeline/stages/individual_map_processing.py b/processing/pipeline/stages/individual_map_processing.py index 21b6d47..a937834 100644 --- a/processing/pipeline/stages/individual_map_processing.py +++ b/processing/pipeline/stages/individual_map_processing.py @@ -62,6 +62,44 @@ class IndividualMapProcessingStage(ProcessingStage): It updates the AssetProcessingContext with detailed results. """ + def _apply_in_memory_transformations( + self, + image_data: np.ndarray, + processing_map_type: str, + invert_normal_green: bool, + file_type_definitions: Dict[str, Dict], + log_prefix: str # e.g., "Asset 'X', Key Y, Proc. Tag Z" + ) -> Tuple[np.ndarray, str, List[str]]: + """ + Applies in-memory transformations (Gloss-to-Rough, Normal Green Invert). + + Returns: + Tuple containing: + - Potentially transformed image data. + - Potentially updated processing_map_type (e.g., MAP_GLOSS -> MAP_ROUGH). + - List of strings describing applied transformations. + """ + transformation_notes = [] + current_image_data = image_data # Start with original data + updated_processing_map_type = processing_map_type # Start with original type + + # Gloss-to-Rough + if processing_map_type.startswith("MAP_GLOSS"): + logger.info(f"{log_prefix}: Applying Gloss-to-Rough conversion.") + current_image_data = ipu.invert_image_colors(current_image_data) + updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") + logger.info(f"{log_prefix}: Map type updated: '{processing_map_type}' -> '{updated_processing_map_type}'") + transformation_notes.append("Gloss-to-Rough applied") + + # Normal Green Invert + # Use internal 'MAP_NRM' type for check + if processing_map_type == "MAP_NRM" and invert_normal_green: + logger.info(f"{log_prefix}: Applying Normal Map Green Channel Inversion (Global Setting).") + current_image_data = ipu.invert_normal_map_green_channel(current_image_data) + transformation_notes.append("Normal Green Inverted (Global)") + + return current_image_data, updated_processing_map_type, transformation_notes + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: """ Executes the individual map and merged task processing logic. @@ -222,6 +260,15 @@ class IndividualMapProcessingStage(ProcessingStage): status_notes.append("Could not determine source bit depth, defaulted to 8.") current_image_data = source_image_data.copy() + # Apply transformations for regular maps AFTER loading + log_prefix_regular = f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}" + current_image_data, processing_map_type, transform_notes = self._apply_in_memory_transformations( + current_image_data, processing_map_type, invert_normal_green, file_type_definitions, log_prefix_regular + ) + status_notes.extend(transform_notes) + # Update base_map_type AFTER potential transformation + base_map_type = get_filename_friendly_map_type(processing_map_type, file_type_definitions) + # --- B. Merged Image Task Processing --- elif isinstance(item_data, dict): @@ -309,6 +356,17 @@ class IndividualMapProcessingStage(ProcessingStage): if processing_status == "Failed": break # Exit outer loop if inner loop failed + # --- Apply Pre-Merge Transformations using Helper --- + if input_image_data is not None: # Only transform if we have data + log_prefix_merge_input = f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}, Input {required_map_type_from_rule}" + input_image_data, _, transform_notes = self._apply_in_memory_transformations( + input_image_data, required_map_type_from_rule, invert_normal_green, file_type_definitions, log_prefix_merge_input + ) + # We don't need the updated map type for the input key, just the transformed data + status_notes.extend(transform_notes) # Add notes to the main task's notes + + # --- End Pre-Merge Transformations --- + loaded_inputs_for_merge[channel_char] = input_image_data inputs_used_for_merge[required_map_type_from_rule] = input_source_desc @@ -423,31 +481,8 @@ class IndividualMapProcessingStage(ProcessingStage): logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Entering common processing path for '{base_map_type}' (Internal: '{processing_map_type}')") - # In-Memory Transformations - transformation_applied = False - # Gloss-to-Rough - # Use filename-friendly 'GLOSS' or internal 'MAP_GLOSS' - if base_map_type == "GLOSS" or processing_map_type.startswith("MAP_GLOSS"): - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Applying Gloss-to-Rough conversion.") - current_image_data = ipu.invert_image_colors(current_image_data) - # Update map types - new_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") - new_base_map_type = get_filename_friendly_map_type(new_processing_map_type, file_type_definitions) - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Map type updated: '{processing_map_type}' -> '{new_processing_map_type}', Filename type: '{base_map_type}' -> '{new_base_map_type}'") - processing_map_type = new_processing_map_type - base_map_type = new_base_map_type - status_notes.append("Gloss-to-Rough applied") - transformation_applied = True - - # Normal Green Invert - # Use filename-friendly 'NRM' or internal 'MAP_NRM' - if (base_map_type == "NRM" or processing_map_type == "MAP_NRM") and invert_normal_green: - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Applying Normal Map Green Channel Inversion (Global Setting).") - current_image_data = ipu.invert_normal_map_green_channel(current_image_data) - status_notes.append("Normal Green Inverted (Global)") - transformation_applied = True - # Optional Initial Scaling (In Memory) + # Transformations are now handled earlier by the helper function image_to_save = None scaling_applied = False h_pre_scale, w_pre_scale = current_image_data.shape[:2] -- 2.47.2 From ab4db1b8bd1c0512232142c91c0ea8134d1a37eb Mon Sep 17 00:00:00 2001 From: Rusfort Date: Mon, 12 May 2025 16:49:57 +0200 Subject: [PATCH 07/16] BugFixes --- configuration.py | 16 ++ .../stages/individual_map_processing.py | 59 ++++- processing/pipeline/stages/map_merging.py | 9 - .../pipeline/stages/output_organization.py | 243 ++++++++++-------- processing/utils/image_saving_utils.py | 114 ++++---- 5 files changed, 261 insertions(+), 180 deletions(-) diff --git a/configuration.py b/configuration.py index cbaec74..dec107d 100644 --- a/configuration.py +++ b/configuration.py @@ -379,6 +379,22 @@ class Configuration: """Gets the configured JPG quality level.""" return self._core_settings.get('JPG_QUALITY', 95) + @property + def invert_normal_green_globally(self) -> bool: + """Gets the global setting for inverting the green channel of normal maps.""" + # Default to False if the setting is missing in the core config + return self._core_settings.get('invert_normal_map_green_channel_globally', False) + + @property + def overwrite_existing(self) -> bool: + """Gets the setting for overwriting existing files from core settings.""" + return self._core_settings.get('overwrite_existing', False) + + @property + def png_compression_level(self) -> int: + """Gets the PNG compression level from core settings.""" + return self._core_settings.get('PNG_COMPRESSION', 6) # Default to 6 if not found + @property def resolution_threshold_for_jpg(self) -> int: """Gets the pixel dimension threshold for using JPG for 8-bit images.""" diff --git a/processing/pipeline/stages/individual_map_processing.py b/processing/pipeline/stages/individual_map_processing.py index a937834..6386a33 100644 --- a/processing/pipeline/stages/individual_map_processing.py +++ b/processing/pipeline/stages/individual_map_processing.py @@ -86,10 +86,27 @@ class IndividualMapProcessingStage(ProcessingStage): # Gloss-to-Rough if processing_map_type.startswith("MAP_GLOSS"): logger.info(f"{log_prefix}: Applying Gloss-to-Rough conversion.") - current_image_data = ipu.invert_image_colors(current_image_data) - updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") - logger.info(f"{log_prefix}: Map type updated: '{processing_map_type}' -> '{updated_processing_map_type}'") - transformation_notes.append("Gloss-to-Rough applied") + inversion_succeeded = False + # Replicate inversion logic from GlossToRoughConversionStage + if np.issubdtype(current_image_data.dtype, np.floating): + current_image_data = 1.0 - current_image_data + current_image_data = np.clip(current_image_data, 0.0, 1.0) + logger.debug(f"{log_prefix}: Inverted float image data for Gloss->Rough.") + inversion_succeeded = True + elif np.issubdtype(current_image_data.dtype, np.integer): + max_val = np.iinfo(current_image_data.dtype).max + current_image_data = max_val - current_image_data + logger.debug(f"{log_prefix}: Inverted integer image data (max_val: {max_val}) for Gloss->Rough.") + inversion_succeeded = True + else: + logger.error(f"{log_prefix}: Unsupported image data type {current_image_data.dtype} for GLOSS map. Cannot invert.") + transformation_notes.append("Gloss-to-Rough FAILED (unsupported dtype)") + + # Update type and notes based on success flag + if inversion_succeeded: + updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") + logger.info(f"{log_prefix}: Map type updated: '{processing_map_type}' -> '{updated_processing_map_type}'") + transformation_notes.append("Gloss-to-Rough applied") # Normal Green Invert # Use internal 'MAP_NRM' type for check @@ -119,10 +136,13 @@ class IndividualMapProcessingStage(ProcessingStage): respect_variant_map_types = getattr(config, "respect_variant_map_types", []) # Needed for suffixing logic initial_scaling_mode = getattr(config, "INITIAL_SCALING_MODE", "NONE") merge_dimension_mismatch_strategy = getattr(config, "MERGE_DIMENSION_MISMATCH_STRATEGY", "USE_LARGEST") - invert_normal_green = getattr(config.general_settings, "invert_normal_map_green_channel_globally", False) - output_base_dir = context.output_dir # Assuming output_dir is set in context + invert_normal_green = config.invert_normal_green_globally # Use the new property + output_base_dir = context.output_base_path # This is the FINAL base path asset_name = context.asset_rule.asset_name if context.asset_rule else "UnknownAsset" - output_filename_pattern_tokens = {'asset_name': asset_name, 'output_base_directory': str(output_base_dir)} + # For save_image_variants, the 'output_base_directory' should be the engine_temp_dir, + # as these are intermediate variant files before final organization. + temp_output_base_dir_for_variants = context.engine_temp_dir + output_filename_pattern_tokens = {'asset_name': asset_name, 'output_base_directory': temp_output_base_dir_for_variants} # --- Prepare Items to Process --- items_to_process: List[Union[Tuple[int, FileRule], Tuple[str, Dict]]] = [] @@ -541,12 +561,29 @@ class IndividualMapProcessingStage(ProcessingStage): "base_map_type": base_map_type, # Filename-friendly "source_bit_depth_info": source_bit_depth_info_for_save_util, "output_filename_pattern_tokens": output_filename_pattern_tokens, - "config_obj": config, # Pass the whole config object - "asset_name_for_log": asset_name_for_log, # Pass asset name for logging within save util - "processing_instance_tag": processing_instance_tag # Pass tag for logging within save util + # "config_obj": config, # Removed: save_image_variants doesn't expect this directly + # "asset_name_for_log": asset_name_for_log, # Removed: save_image_variants doesn't expect this + # "processing_instance_tag": processing_instance_tag # Removed: save_image_variants doesn't expect this } - saved_files_details_list = save_image_variants(**save_args) + # Pass only the expected arguments to save_image_variants + # We need to extract the required args from config and pass them individually + save_args_filtered = { + "source_image_data": image_to_save, + "base_map_type": base_map_type, + "source_bit_depth_info": source_bit_depth_info_for_save_util, + "image_resolutions": config.image_resolutions, + "file_type_defs": config.FILE_TYPE_DEFINITIONS, + "output_format_8bit": config.get_8bit_output_format(), + "output_format_16bit_primary": config.get_16bit_output_formats()[0], + "output_format_16bit_fallback": config.get_16bit_output_formats()[1], + "png_compression_level": config.png_compression_level, + "jpg_quality": config.jpg_quality, + "output_filename_pattern_tokens": output_filename_pattern_tokens, + "output_filename_pattern": config.output_filename_pattern, + } + + saved_files_details_list = save_image_variants(**save_args_filtered) if saved_files_details_list: logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unified Save Utility completed successfully. Saved {len(saved_files_details_list)} variants.") diff --git a/processing/pipeline/stages/map_merging.py b/processing/pipeline/stages/map_merging.py index 696b05e..1c8b180 100644 --- a/processing/pipeline/stages/map_merging.py +++ b/processing/pipeline/stages/map_merging.py @@ -47,15 +47,6 @@ class MapMergingStage(ProcessingStage): # The core merge rules are in context.config_obj.map_merge_rules # Each rule in there defines an output_map_type and its inputs. - logger.error(f"Asset {asset_name_for_log}, Potential Merge for {current_map_type}: Merge rule processing needs rework. FileRule lacks 'merge_settings' and 'id'. Skipping this rule.") - context.merged_maps_details[merge_rule_id_hex] = { - 'map_type': current_map_type, - 'status': 'Failed', - 'reason': 'Merge rule processing logic in MapMergingStage needs refactor due to FileRule changes.' - } - continue - - # For now, let's assume no merge rules are processed until the logic is fixed. num_merge_rules_attempted = 0 # If context.config_obj.map_merge_rules exists, iterate it here. diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index 69fe625..c612251 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -34,15 +34,7 @@ class OutputOrganizationStage(ProcessingStage): return context final_output_files: List[str] = [] - overwrite_existing = False - # Correctly access general_settings and overwrite_existing from config_obj - if hasattr(context.config_obj, 'general_settings'): - if isinstance(context.config_obj.general_settings, dict): - overwrite_existing = context.config_obj.general_settings.get('overwrite_existing', False) - elif hasattr(context.config_obj.general_settings, 'overwrite_existing'): # If general_settings is an object - overwrite_existing = getattr(context.config_obj.general_settings, 'overwrite_existing', False) - else: - logger.warning(f"Asset '{asset_name_for_log}': config_obj.general_settings not found, defaulting overwrite_existing to False.") + overwrite_existing = context.config_obj.overwrite_existing output_dir_pattern = getattr(context.config_obj, 'output_directory_pattern', "[supplier]/[assetname]") output_filename_pattern_config = getattr(context.config_obj, 'output_filename_pattern', "[assetname]_[maptype]_[resolution].[ext]") @@ -53,15 +45,104 @@ class OutputOrganizationStage(ProcessingStage): logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(context.processed_maps_details)} processed individual map entries.") for processed_map_key, details in context.processed_maps_details.items(): map_status = details.get('status') - base_map_type = details.get('map_type', 'unknown_map_type') # Original map type + base_map_type = details.get('map_type', 'unknown_map_type') # Final filename-friendly type - if map_status in ['Processed', 'Processed_No_Variants']: - if not details.get('temp_processed_file'): - logger.debug(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status '{map_status}') due to missing 'temp_processed_file'.") + # --- Handle maps processed by the Unified Save Utility --- + if map_status == 'Processed_Via_Save_Utility': + saved_files_info = details.get('saved_files_info') + if not saved_files_info or not isinstance(saved_files_info, list): + logger.warning(f"Asset '{asset_name_for_log}': Map key '{processed_map_key}' (status '{map_status}') has missing or invalid 'saved_files_info'. Skipping organization.") + details['status'] = 'Organization Failed (Missing saved_files_info)' + continue + + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(saved_files_info)} variants for map key '{processed_map_key}' (map type: {base_map_type}) from Save Utility.") + + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) + map_metadata_entry['map_type'] = base_map_type + map_metadata_entry.setdefault('variant_paths', {}) # Initialize if not present + + processed_any_variant_successfully = False + failed_any_variant = False + + for variant_index, variant_detail in enumerate(saved_files_info): + # Extract info from the save utility's output structure + temp_variant_path_str = variant_detail.get('path') # Key is 'path' + if not temp_variant_path_str: + logger.warning(f"Asset '{asset_name_for_log}': Variant {variant_index} for map '{processed_map_key}' is missing 'path' in saved_files_info. Skipping.") + # Optionally update variant_detail status if it's mutable and tracked, otherwise just skip + continue + + temp_variant_path = Path(temp_variant_path_str) + if not temp_variant_path.is_file(): + logger.warning(f"Asset '{asset_name_for_log}': Temporary variant file '{temp_variant_path}' for map '{processed_map_key}' not found. Skipping.") + continue + + variant_resolution_key = variant_detail.get('resolution_key', f"varRes{variant_index}") + variant_ext = variant_detail.get('format', temp_variant_path.suffix.lstrip('.')) # Use 'format' key + + token_data_variant = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + "maptype": base_map_type, + "resolution": variant_resolution_key, + "ext": variant_ext, + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + token_data_variant_cleaned = {k: v for k, v in token_data_variant.items() if v is not None} + output_filename_variant = generate_path_from_pattern(output_filename_pattern_config, token_data_variant_cleaned) + + try: + relative_dir_path_str_variant = generate_path_from_pattern( + pattern_string=output_dir_pattern, + token_data=token_data_variant_cleaned + ) + final_variant_path = Path(context.output_base_path) / Path(relative_dir_path_str_variant) / Path(output_filename_variant) + final_variant_path.parent.mkdir(parents=True, exist_ok=True) + + if final_variant_path.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': Output variant file {final_variant_path} for map '{processed_map_key}' (res: {variant_resolution_key}) exists and overwrite is disabled. Skipping copy.") + # Optionally update variant_detail status if needed + else: + shutil.copy2(temp_variant_path, final_variant_path) + logger.info(f"Asset '{asset_name_for_log}': Copied variant {temp_variant_path} to {final_variant_path} for map '{processed_map_key}'.") + final_output_files.append(str(final_variant_path)) + # Optionally update variant_detail status if needed + + # Store relative path in metadata + relative_final_variant_path_str = str(Path(relative_dir_path_str_variant) / Path(output_filename_variant)) + map_metadata_entry['variant_paths'][variant_resolution_key] = relative_final_variant_path_str + processed_any_variant_successfully = True + + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to copy variant {temp_variant_path} for map key '{processed_map_key}' (res: {variant_resolution_key}). Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error - Variant)" + # Optionally update variant_detail status if needed + failed_any_variant = True + + # Update parent map detail status based on variant outcomes + if failed_any_variant: + details['status'] = 'Organization Failed (Save Utility Variants)' + elif processed_any_variant_successfully: + details['status'] = 'Organized (Save Utility Variants)' + else: # No variants were successfully copied (e.g., all skipped due to existing file or missing temp file) + details['status'] = 'Organization Skipped (No Save Utility Variants Copied/Needed)' + + # --- Handle older/other processing statuses (like single file processing) --- + elif map_status in ['Processed', 'Processed_No_Variants', 'Converted_To_Rough']: # Add other single-file statuses if needed + temp_file_path_str = details.get('temp_processed_file') + if not temp_file_path_str: + logger.warning(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status '{map_status}') due to missing 'temp_processed_file'.") details['status'] = 'Organization Skipped (Missing Temp File)' continue - - temp_file_path = Path(details['temp_processed_file']) + + temp_file_path = Path(temp_file_path_str) + if not temp_file_path.is_file(): + logger.warning(f"Asset '{asset_name_for_log}': Temporary file '{temp_file_path}' for map '{processed_map_key}' not found. Skipping.") + details['status'] = 'Organization Skipped (Temp File Not Found)' + continue + resolution_str = details.get('processed_resolution_name', details.get('original_resolution_name', 'resX')) token_data = { @@ -74,7 +155,7 @@ class OutputOrganizationStage(ProcessingStage): "sha5": getattr(context, 'sha5_value', None) } token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} - + output_filename = generate_path_from_pattern(output_filename_pattern_config, token_data_cleaned) try: @@ -87,18 +168,21 @@ class OutputOrganizationStage(ProcessingStage): if final_path.exists() and not overwrite_existing: logger.info(f"Asset '{asset_name_for_log}': Output file {final_path} for map '{processed_map_key}' exists and overwrite is disabled. Skipping copy.") + details['status'] = 'Organized (Exists, Skipped Copy)' else: shutil.copy2(temp_file_path, final_path) logger.info(f"Asset '{asset_name_for_log}': Copied {temp_file_path} to {final_path} for map '{processed_map_key}'.") final_output_files.append(str(final_path)) - + details['status'] = 'Organized' + details['final_output_path'] = str(final_path) - details['status'] = 'Organized' # Update asset_metadata for metadata.json map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) map_metadata_entry['map_type'] = base_map_type map_metadata_entry['path'] = str(Path(relative_dir_path_str) / Path(output_filename)) # Store relative path + if 'variant_paths' in map_metadata_entry: # Clean up variant paths if present from previous runs + del map_metadata_entry['variant_paths'] except Exception as e: logger.error(f"Asset '{asset_name_for_log}': Failed to copy {temp_file_path} for map key '{processed_map_key}'. Error: {e}", exc_info=True) @@ -106,79 +190,36 @@ class OutputOrganizationStage(ProcessingStage): context.asset_metadata['status'] = "Failed (Output Organization Error)" details['status'] = 'Organization Failed' + # --- Handle legacy 'Processed_With_Variants' status (if still needed, otherwise remove) --- + # This block is kept for potential backward compatibility but might be redundant + # if 'Processed_Via_Save_Utility' is the new standard for variants. elif map_status == 'Processed_With_Variants': - variants = details.get('variants') - if not variants: # No variants list, or it's empty - logger.warning(f"Asset '{asset_name_for_log}': Map key '{processed_map_key}' (status '{map_status}') has no 'variants' list or it is empty. Attempting fallback to base file.") - if not details.get('temp_processed_file'): - logger.error(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (fallback) as 'temp_processed_file' is also missing.") - details['status'] = 'Organization Failed (No Variants, No Temp File)' - continue # Skip to next map key + variants = details.get('variants') # Expects old structure: list of dicts with 'temp_path' + if not variants: + logger.warning(f"Asset '{asset_name_for_log}': Map key '{processed_map_key}' (status '{map_status}') has no 'variants' list. Skipping.") + details['status'] = 'Organization Failed (Legacy Variants Missing)' + continue - # Fallback: Process the base temp_processed_file - temp_file_path = Path(details['temp_processed_file']) - resolution_str = details.get('processed_resolution_name', details.get('original_resolution_name', 'baseRes')) + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(variants)} legacy variants for map key '{processed_map_key}' (map type: {base_map_type}).") - token_data = { - "assetname": asset_name_for_log, - "supplier": context.effective_supplier or "DefaultSupplier", - "maptype": base_map_type, - "resolution": resolution_str, - "ext": temp_file_path.suffix.lstrip('.'), - "incrementingvalue": getattr(context, 'incrementing_value', None), - "sha5": getattr(context, 'sha5_value', None) - } - token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} - output_filename = generate_path_from_pattern(output_filename_pattern_config, token_data_cleaned) - - try: - relative_dir_path_str = generate_path_from_pattern( - pattern_string=output_dir_pattern, - token_data=token_data_cleaned - ) - final_path = Path(context.output_base_path) / Path(relative_dir_path_str) / Path(output_filename) - final_path.parent.mkdir(parents=True, exist_ok=True) - - if final_path.exists() and not overwrite_existing: - logger.info(f"Asset '{asset_name_for_log}': Output file {final_path} for map '{processed_map_key}' (fallback) exists and overwrite is disabled. Skipping copy.") - else: - shutil.copy2(temp_file_path, final_path) - logger.info(f"Asset '{asset_name_for_log}': Copied {temp_file_path} to {final_path} for map '{processed_map_key}' (fallback).") - final_output_files.append(str(final_path)) - - details['final_output_path'] = str(final_path) - details['status'] = 'Organized (Base File Fallback)' - - map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) - map_metadata_entry['map_type'] = base_map_type - map_metadata_entry['path'] = str(Path(relative_dir_path_str) / Path(output_filename)) - if 'variant_paths' in map_metadata_entry: # Clean up if it was somehow set - del map_metadata_entry['variant_paths'] - except Exception as e: - logger.error(f"Asset '{asset_name_for_log}': Failed to copy {temp_file_path} (fallback) for map key '{processed_map_key}'. Error: {e}", exc_info=True) - context.status_flags['output_organization_error'] = True - context.asset_metadata['status'] = "Failed (Output Organization Error - Fallback)" - details['status'] = 'Organization Failed (Fallback)' - continue # Finished with this map key due to fallback - - # If we are here, 'variants' list exists and is not empty. Proceed with variant processing. - logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(variants)} variants for map key '{processed_map_key}' (map type: {base_map_type}).") - map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) map_metadata_entry['map_type'] = base_map_type - map_metadata_entry.setdefault('variant_paths', {}) # Initialize if not present + map_metadata_entry.setdefault('variant_paths', {}) processed_any_variant_successfully = False failed_any_variant = False for variant_index, variant_detail in enumerate(variants): - temp_variant_path_str = variant_detail.get('temp_path') + temp_variant_path_str = variant_detail.get('temp_path') # Uses 'temp_path' if not temp_variant_path_str: - logger.warning(f"Asset '{asset_name_for_log}': Variant {variant_index} for map '{processed_map_key}' is missing 'temp_path'. Skipping.") - variant_detail['status'] = 'Organization Skipped (Missing Temp Path)' + logger.warning(f"Asset '{asset_name_for_log}': Legacy Variant {variant_index} for map '{processed_map_key}' is missing 'temp_path'. Skipping.") continue - + temp_variant_path = Path(temp_variant_path_str) + if not temp_variant_path.is_file(): + logger.warning(f"Asset '{asset_name_for_log}': Legacy temporary variant file '{temp_variant_path}' for map '{processed_map_key}' not found. Skipping.") + continue + variant_resolution_key = variant_detail.get('resolution_key', f"varRes{variant_index}") variant_ext = temp_variant_path.suffix.lstrip('.') @@ -193,7 +234,7 @@ class OutputOrganizationStage(ProcessingStage): } token_data_variant_cleaned = {k: v for k, v in token_data_variant.items() if v is not None} output_filename_variant = generate_path_from_pattern(output_filename_pattern_config, token_data_variant_cleaned) - + try: relative_dir_path_str_variant = generate_path_from_pattern( pattern_string=output_dir_pattern, @@ -203,50 +244,32 @@ class OutputOrganizationStage(ProcessingStage): final_variant_path.parent.mkdir(parents=True, exist_ok=True) if final_variant_path.exists() and not overwrite_existing: - logger.info(f"Asset '{asset_name_for_log}': Output variant file {final_variant_path} for map '{processed_map_key}' (res: {variant_resolution_key}) exists and overwrite is disabled. Skipping copy.") - variant_detail['status'] = 'Organized (Exists, Skipped Copy)' + logger.info(f"Asset '{asset_name_for_log}': Output legacy variant file {final_variant_path} exists and overwrite is disabled. Skipping copy.") else: shutil.copy2(temp_variant_path, final_variant_path) - logger.info(f"Asset '{asset_name_for_log}': Copied variant {temp_variant_path} to {final_variant_path} for map '{processed_map_key}'.") + logger.info(f"Asset '{asset_name_for_log}': Copied legacy variant {temp_variant_path} to {final_variant_path}.") final_output_files.append(str(final_variant_path)) - variant_detail['status'] = 'Organized' - - variant_detail['final_output_path'] = str(final_variant_path) - # Store the Path object for metadata stage to make it relative later - variant_detail['final_output_path_for_metadata'] = final_variant_path + relative_final_variant_path_str = str(Path(relative_dir_path_str_variant) / Path(output_filename_variant)) map_metadata_entry['variant_paths'][variant_resolution_key] = relative_final_variant_path_str processed_any_variant_successfully = True except Exception as e: - logger.error(f"Asset '{asset_name_for_log}': Failed to copy variant {temp_variant_path} for map key '{processed_map_key}' (res: {variant_resolution_key}). Error: {e}", exc_info=True) + logger.error(f"Asset '{asset_name_for_log}': Failed to copy legacy variant {temp_variant_path}. Error: {e}", exc_info=True) context.status_flags['output_organization_error'] = True - context.asset_metadata['status'] = "Failed (Output Organization Error - Variant)" - variant_detail['status'] = 'Organization Failed' + context.asset_metadata['status'] = "Failed (Output Organization Error - Legacy Variant)" failed_any_variant = True - - # Update parent map detail status based on variant outcomes + if failed_any_variant: - details['status'] = 'Organization Failed (Variants)' + details['status'] = 'Organization Failed (Legacy Variants)' elif processed_any_variant_successfully: - # Check if all processable variants were organized - all_attempted_organized = True - for v_detail in variants: - if v_detail.get('temp_path') and not v_detail.get('status', '').startswith('Organized'): - all_attempted_organized = False - break - if all_attempted_organized: - details['status'] = 'Organized (All Attempted Variants)' - else: - details['status'] = 'Partially Organized (Variants)' - elif not any(v.get('temp_path') for v in variants): # No variants had temp_paths to begin with - details['status'] = 'Processed_With_Variants (No Valid Variants to Organize)' - else: # Variants list existed, items had temp_paths, but none were successfully organized (e.g., all skipped due to existing file and no overwrite) - details['status'] = 'Organization Skipped (No Variants Copied/Needed)' + details['status'] = 'Organized (Legacy Variants)' + else: + details['status'] = 'Organization Skipped (No Legacy Variants Copied/Needed)' - - else: # Other statuses like 'Skipped', 'Failed', 'Organization Failed' etc. - logger.debug(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status: '{map_status}') for organization as it's not 'Processed', 'Processed_No_Variants', or 'Processed_With_Variants'.") + # --- Handle other statuses (Skipped, Failed, etc.) --- + else: # Catches statuses not explicitly handled above + logger.debug(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status: '{map_status}') for organization as it's not a recognized final processed state or variant state.") continue else: logger.debug(f"Asset '{asset_name_for_log}': No processed individual maps to organize.") diff --git a/processing/utils/image_saving_utils.py b/processing/utils/image_saving_utils.py index 7a51d14..66591a8 100644 --- a/processing/utils/image_saving_utils.py +++ b/processing/utils/image_saving_utils.py @@ -75,7 +75,10 @@ def save_image_variants( source_max_dim = max(source_h, source_w) # 1. Use provided configuration inputs (already available as function arguments) - logger.info(f"Saving variants for map type: {base_map_type}") + logger.info(f"SaveImageVariants: Starting for map type: {base_map_type}. Source shape: {source_image_data.shape}, Source bit depths: {source_bit_depth_info}") + logger.debug(f"SaveImageVariants: Resolutions: {image_resolutions}, File Type Defs: {file_type_defs.keys()}, Output Formats: 8bit={output_format_8bit}, 16bit_pri={output_format_16bit_primary}, 16bit_fall={output_format_16bit_fallback}") + logger.debug(f"SaveImageVariants: PNG Comp: {png_compression_level}, JPG Qual: {jpg_quality}") + logger.debug(f"SaveImageVariants: Output Tokens: {output_filename_pattern_tokens}, Output Pattern: {output_filename_pattern}") # 2. Determine Target Bit Depth target_bit_depth = 8 # Default @@ -111,46 +114,54 @@ def save_image_variants( logger.error(f"Unsupported target bit depth: {target_bit_depth}. Defaulting to 8-bit format.") output_ext = output_format_8bit.lstrip('.').lower() - logger.info(f"Target bit depth: {target_bit_depth}, Output format: {output_ext}") + logger.info(f"SaveImageVariants: Determined target bit depth: {target_bit_depth}, Output format: {output_ext} for map type {base_map_type}") # 4. Generate and Save Resolution Variants # Sort resolutions by max dimension descending sorted_resolutions = sorted(image_resolutions.items(), key=lambda item: item[1], reverse=True) for res_key, res_max_dim in sorted_resolutions: - logger.info(f"Processing resolution variant: {res_key} ({res_max_dim} max dim)") + logger.info(f"SaveImageVariants: Processing variant {res_key} ({res_max_dim}px) for {base_map_type}") - # Calculate target dimensions, ensuring no upscaling - if source_max_dim <= res_max_dim: - # If source is smaller or equal, use source dimensions + # --- Prevent Upscaling --- + # Skip this resolution variant if its target dimension is larger than the source image's largest dimension. + if res_max_dim > source_max_dim: + logger.info(f"SaveImageVariants: Skipping variant {res_key} ({res_max_dim}px) for {base_map_type} because target resolution is larger than source ({source_max_dim}px).") + continue # Skip to the next resolution + + # Calculate target dimensions for valid variants (equal or smaller than source) + if source_max_dim == res_max_dim: + # Use source dimensions if target is equal target_w_res, target_h_res = source_w, source_h - if source_max_dim < res_max_dim: - logger.info(f"Source image ({source_w}x{source_h}) is smaller than target resolution {res_key} ({res_max_dim}). Saving at source resolution.") - else: + logger.info(f"SaveImageVariants: Using source resolution ({source_w}x{source_h}) for {res_key} variant of {base_map_type} as target matches source.") + else: # Downscale (source_max_dim > res_max_dim) # Downscale, maintaining aspect ratio aspect_ratio = source_w / source_h - if source_w > source_h: + if source_w >= source_h: # Use >= to handle square images correctly target_w_res = res_max_dim - target_h_res = int(res_max_dim / aspect_ratio) + target_h_res = max(1, int(res_max_dim / aspect_ratio)) # Ensure height is at least 1 else: target_h_res = res_max_dim - target_w_res = int(res_max_dim * aspect_ratio) - logger.info(f"Resizing source image ({source_w}x{source_h}) to {target_w_res}x{target_h_res} for {res_key} variant.") + target_w_res = max(1, int(res_max_dim * aspect_ratio)) # Ensure width is at least 1 + logger.info(f"SaveImageVariants: Calculated downscale for {base_map_type} {res_key}: from ({source_w}x{source_h}) to ({target_w_res}x{target_h_res})") - # Resize source_image_data - # Use INTER_AREA for downscaling, INTER_LINEAR or INTER_CUBIC for upscaling (though we avoid upscaling here) - interpolation_method = cv2.INTER_AREA # Good for downscaling - # If we were allowing upscaling, we might add logic like: - # if target_w_res > source_w or target_h_res > source_h: - # interpolation_method = cv2.INTER_LINEAR # Or INTER_CUBIC - - try: - variant_data = ipu.resize_image(source_image_data, (target_w_res, target_h_res), interpolation=interpolation_method) - logger.debug(f"Resized variant data shape: {variant_data.shape}") - except Exception as e: - logger.error(f"Error resizing image for {res_key} variant: {e}") - continue # Skip this variant if resizing fails + # Resize source_image_data (only if necessary) + if (target_w_res, target_h_res) == (source_w, source_h): + # No resize needed if dimensions match + variant_data = source_image_data.copy() # Copy to avoid modifying original if needed later + logger.debug(f"SaveImageVariants: No resize needed for {base_map_type} {res_key}, using copy of source data.") + else: + # Perform resize only if dimensions differ (i.e., downscaling) + interpolation_method = cv2.INTER_AREA # Good for downscaling + try: + variant_data = ipu.resize_image(source_image_data, target_w_res, target_h_res, interpolation=interpolation_method) + if variant_data is None: # Check if resize failed + raise ValueError("ipu.resize_image returned None") + logger.debug(f"SaveImageVariants: Resized variant data shape for {base_map_type} {res_key}: {variant_data.shape}") + except Exception as e: + logger.error(f"SaveImageVariants: Error resizing image for {base_map_type} {res_key} variant: {e}") + continue # Skip this variant if resizing fails # Filename Construction current_tokens = output_filename_pattern_tokens.copy() @@ -172,14 +183,14 @@ def save_image_variants( continue # Skip this variant output_path = output_base_directory / filename - logger.info(f"Constructed output path: {output_path}") + logger.info(f"SaveImageVariants: Constructed output path for {base_map_type} {res_key}: {output_path}") # Ensure parent directory exists output_path.parent.mkdir(parents=True, exist_ok=True) - logger.debug(f"Ensured directory exists: {output_path.parent}") + logger.debug(f"SaveImageVariants: Ensured directory exists for {base_map_type} {res_key}: {output_path.parent}") except Exception as e: - logger.error(f"Error constructing filepath for {res_key} variant: {e}") + logger.error(f"SaveImageVariants: Error constructing filepath for {base_map_type} {res_key} variant: {e}") continue # Skip this variant if path construction fails @@ -188,37 +199,40 @@ def save_image_variants( if output_ext == 'jpg': save_params_cv2.append(cv2.IMWRITE_JPEG_QUALITY) save_params_cv2.append(jpg_quality) - logger.debug(f"Using JPG quality: {jpg_quality}") + logger.debug(f"SaveImageVariants: Using JPG quality: {jpg_quality} for {base_map_type} {res_key}") elif output_ext == 'png': save_params_cv2.append(cv2.IMWRITE_PNG_COMPRESSION) save_params_cv2.append(png_compression_level) - logger.debug(f"Using PNG compression level: {png_compression_level}") + logger.debug(f"SaveImageVariants: Using PNG compression level: {png_compression_level} for {base_map_type} {res_key}") # Add other format specific parameters if needed (e.g., TIFF compression) - # Bit Depth Conversion (just before saving) - image_data_for_save = variant_data - try: - if target_bit_depth == 8: - image_data_for_save = ipu.convert_to_uint8(variant_data) - logger.debug("Converted variant data to uint8.") - elif target_bit_depth == 16: - # ipu.convert_to_uint16 might handle different input types (float, uint8) - # Assuming variant_data might be float after resizing, convert to uint16 - image_data_for_save = ipu.convert_to_uint16(variant_data) - logger.debug("Converted variant data to uint16.") - # Add other bit depth conversions if needed - except Exception as e: - logger.error(f"Error converting image data to target bit depth {target_bit_depth} for {res_key} variant: {e}") - continue # Skip this variant if conversion fails + # Bit Depth Conversion is handled by ipu.save_image via output_dtype_target + image_data_for_save = variant_data # Use the resized variant data directly + + # Determine the target dtype for ipu.save_image + output_dtype_for_save: Optional[np.dtype] = None + if target_bit_depth == 8: + output_dtype_for_save = np.uint8 + elif target_bit_depth == 16: + output_dtype_for_save = np.uint16 + # Add other target bit depths like float16/float32 if necessary + # elif target_bit_depth == 32: # Assuming float32 for EXR etc. + # output_dtype_for_save = np.float32 # Saving try: # ipu.save_image is expected to handle the actual cv2.imwrite call - success = ipu.save_image(str(output_path), image_data_for_save, params=save_params_cv2) + logger.debug(f"SaveImageVariants: Attempting to save {base_map_type} {res_key} to {output_path} with params {save_params_cv2}, target_dtype: {output_dtype_for_save}") + success = ipu.save_image( + str(output_path), + image_data_for_save, + output_dtype_target=output_dtype_for_save, # Pass the target dtype + params=save_params_cv2 + ) if success: - logger.info(f"Successfully saved {res_key} variant to {output_path}") + logger.info(f"SaveImageVariants: Successfully saved {base_map_type} {res_key} variant to {output_path}") # Collect details for the returned list saved_file_details.append({ 'path': str(output_path), @@ -228,10 +242,10 @@ def save_image_variants( 'dimensions': (target_w_res, target_h_res) }) else: - logger.error(f"Failed to save {res_key} variant to {output_path}") + logger.error(f"SaveImageVariants: Failed to save {base_map_type} {res_key} variant to {output_path} (ipu.save_image returned False)") except Exception as e: - logger.error(f"Error saving image for {res_key} variant to {output_path}: {e}") + logger.error(f"SaveImageVariants: Error during ipu.save_image for {base_map_type} {res_key} variant to {output_path}: {e}", exc_info=True) # Continue to next variant even if one fails -- 2.47.2 From 81d8404576dc773152d8f1160398f410283751fb Mon Sep 17 00:00:00 2001 From: Rusfort Date: Mon, 12 May 2025 22:46:49 +0200 Subject: [PATCH 08/16] yet another processing refactor :3 Mostly works --- ProjectNotes/PipelineRefactoringPlan.md | 194 ++--- config/app_settings.json | 2 +- processing/pipeline/asset_context.py | 87 ++- processing/pipeline/orchestrator.py | 409 +++++++++-- .../stages/individual_map_processing.py | 695 ------------------ processing/pipeline/stages/initial_scaling.py | 83 +++ processing/pipeline/stages/map_merging.py | 153 ---- .../pipeline/stages/merged_task_processor.py | 304 ++++++++ .../pipeline/stages/output_organization.py | 30 +- .../stages/prepare_processing_items.py | 92 +++ .../pipeline/stages/regular_map_processor.py | 257 +++++++ processing/pipeline/stages/save_variants.py | 88 +++ processing_engine.py | 39 +- utils/path_utils.py | 33 + 14 files changed, 1384 insertions(+), 1082 deletions(-) delete mode 100644 processing/pipeline/stages/individual_map_processing.py create mode 100644 processing/pipeline/stages/initial_scaling.py delete mode 100644 processing/pipeline/stages/map_merging.py create mode 100644 processing/pipeline/stages/merged_task_processor.py create mode 100644 processing/pipeline/stages/prepare_processing_items.py create mode 100644 processing/pipeline/stages/regular_map_processor.py create mode 100644 processing/pipeline/stages/save_variants.py diff --git a/ProjectNotes/PipelineRefactoringPlan.md b/ProjectNotes/PipelineRefactoringPlan.md index 18b8df8..f74fa35 100644 --- a/ProjectNotes/PipelineRefactoringPlan.md +++ b/ProjectNotes/PipelineRefactoringPlan.md @@ -1,154 +1,72 @@ -# Revised Refactoring Plan: Processing Pipeline +# Processing Pipeline Refactoring Plan -**Overall Goal:** To simplify the processing pipeline by refactoring the map merging process, consolidating map transformations (Gloss-to-Rough, Normal Green Invert), and creating a unified, configurable image saving utility. This plan aims to improve clarity, significantly reduce I/O by favoring in-memory operations, and make Power-of-Two (POT) scaling an optional, integrated step. +## 1. Problem Summary -**I. Map Merging Stage (`processing/pipeline/stages/map_merging.py`)** +The current processing pipeline, particularly the `IndividualMapProcessingStage`, exhibits maintainability challenges: -* **Objective:** Transform this stage from performing merges to generating tasks for merged images. -* **Changes to `MapMergingStage.execute()`:** - 1. Iterate through `context.config_obj.map_merge_rules`. - 2. Identify required input map types and find their corresponding source file paths (potentially original paths or outputs of prior essential stages if any). - 3. Create "merged image tasks" and add them to `context.merged_image_tasks`. - 4. Each task entry will contain: - * `output_map_type`: Target map type (e.g., "MAP_NRMRGH"). - * `input_map_sources`: Details of source map types and file paths. - * `merge_rule_config`: Complete merge rule configuration (including fallback values). - * `source_dimensions`: Dimensions for the high-resolution merged map basis. - * `source_bit_depths`: Information about the bit depth of original source maps (needed for "respect_inputs" rule in save utility). +* **High Complexity:** The stage handles too many responsibilities (loading, merging, transformations, scaling, saving). +* **Duplicated Logic:** Image transformations (Gloss-to-Rough, Normal Green Invert) are duplicated within the stage instead of relying solely on dedicated stages or being handled consistently. +* **Tight Coupling:** Heavy reliance on the large, mutable `AssetProcessingContext` object creates implicit dependencies and makes isolated testing difficult. -**II. Individual Map Processing Stage (`processing/pipeline/stages/individual_map_processing.py`)** +## 2. Refactoring Goals -* **Objective:** Adapt this stage to handle both individual raw maps and `merged_image_tasks`. It will perform necessary in-memory transformations (Gloss-to-Rough, Normal Green Invert) and prepare a single "high-resolution" source image (in memory) to be passed to the `UnifiedSaveUtility`. -* **Changes to `IndividualMapProcessingStage.execute()`:** - 1. **Input Handling Loop:** Iterate through `context.files_to_process` (regular maps) and `context.merged_image_tasks`. - 2. **Image Data Preparation:** - * **For regular maps:** Load the source image file into memory (`current_image_data`). Determine `base_map_type` from the `FileRule`. Determine source bit depth. - * **For `merged_image_tasks`:** - * Attempt to load input map files specified in `input_map_sources`. If a file is missing, log a warning and generate placeholder data using fallback values from `merge_rule_config`. Handle other load errors. - * Check dimensions of loaded/fallback data. Apply `MERGE_DIMENSION_MISMATCH_STRATEGY` (e.g., resize, log warning) or handle "ERROR_SKIP" strategy (log error, mark task failed, continue). - * Perform the merge operation in memory according to `merge_rule_config`. Result is `current_image_data`. `base_map_type` is the task's `output_map_type`. - 3. **In-Memory Transformations:** - * **Gloss-to-Rough Conversion:** - * If `base_map_type` starts with "MAP_GLOSS": - * Perform inversion on `current_image_data` (in memory). - * Update `base_map_type` to "MAP_ROUGH". - * Log the conversion. - * **Normal Map Green Channel Inversion:** - * If `base_map_type` is "NORMAL" *and* `context.config_obj.general_settings.invert_normal_map_green_channel_globally` is true: - * Perform green channel inversion on `current_image_data` (in memory). - * Log the inversion. - 4. **Optional Initial Scaling (POT or other):** - * Check `INITIAL_SCALING_MODE` from config. - * If `"POT_DOWNSCALE"`: Perform POT downscaling on `current_image_data` (in memory) -> `image_to_save`. - * If `"NONE"`: `image_to_save` = `current_image_data`. - * *(Note: `image_to_save` now reflects any prior transformations)*. - 5. **Color Management:** Apply necessary color management to `image_to_save`. - 6. **Pass to Save Utility:** Pass `image_to_save`, the (potentially updated) `base_map_type`, original source bit depth info (for "respect_inputs" rule), and other necessary details (like specific config values) to the `UnifiedSaveUtility`. - 7. **Remove Old Logic:** Remove old save logic, separate Gloss/Normal stage calls. - 8. **Context Update:** Update `context.processed_maps_details` with results from the `UnifiedSaveUtility`, including notes about any conversions/inversions performed or merge task failures. +* Improve code readability and understanding. +* Enhance maintainability by localizing changes and removing duplication. +* Increase testability through smaller, focused components with clear interfaces. +* Clarify data dependencies between pipeline stages. +* Adhere more closely to the Single Responsibility Principle (SRP). -**III. Unified Image Save Utility (New file: `processing/utils/image_saving_utils.py`)** +## 3. Proposed New Pipeline Stages -* **Objective:** Centralize all image saving logic (resolution variants, format, bit depth, compression). -* **Interface (e.g., `save_image_variants` function):** - * **Inputs:** - * `source_image_data (np.ndarray)`: High-res image data (in memory, potentially transformed). - * `base_map_type (str)`: Final map type (e.g., "COL", "ROUGH", "NORMAL", "MAP_NRMRGH"). - * `source_bit_depth_info (list)`: List of original source bit depth(s). - * Specific config values (e.g., `image_resolutions: dict`, `file_type_defs: dict`, `output_format_8bit: str`, etc.). - * `output_filename_pattern_tokens (dict)`. - * `output_base_directory (Path)`. - * **Core Functionality:** - 1. Use provided configuration inputs. - 2. Determine Target Bit Depth: - * Use `bit_depth_rule` for `base_map_type` from `file_type_defs`. - * If "force_8bit": target 8-bit. - * If "respect_inputs": If `any(depth > 8 for depth in source_bit_depth_info)`, target 16-bit, else 8-bit. - 3. Determine Output File Format(s) (based on target bit depth, config). - 4. Generate and Save Resolution Variants: - * Iterate through `image_resolutions`. - * Resize `source_image_data` (in memory) for each variant (no upscaling). - * Construct filename and path. - * Prepare save parameters. - * Convert variant data to target bit depth/color space just before saving. - * Save variant using `cv2.imwrite` or similar. - * Discard in-memory variant after saving. - 5. Return List of Saved File Details: `{'path': str, 'resolution_key': str, 'format': str, 'bit_depth': int, 'dimensions': (w,h)}`. - * **Memory Management:** Holds `source_image_data` + one variant in memory at a time. +Replace the existing `IndividualMapProcessingStage` with the following sequence of smaller, focused stages, executed by the `PipelineOrchestrator` for each processing item: -**IV. Configuration Changes (`config/app_settings.json`)** +1. **`PrepareProcessingItemsStage`:** + * **Responsibility:** Identifies and lists all items (`FileRule`, `MergeTaskDefinition`) to be processed from the main context. + * **Output:** Updates `context.processing_items`. -1. **Add/Confirm Settings:** - * `"INITIAL_SCALING_MODE": "POT_DOWNSCALE"` (Options: "POT_DOWNSCALE", "NONE"). - * `"MERGE_DIMENSION_MISMATCH_STRATEGY": "USE_LARGEST"` (Options: "USE_LARGEST", "USE_FIRST", "ERROR_SKIP"). - * Ensure `general_settings.invert_normal_map_green_channel_globally` exists (boolean). -2. **Review/Confirm Existing Settings:** - * Ensure `IMAGE_RESOLUTIONS`, `FILE_TYPE_DEFINITIONS` (`bit_depth_rule`), `MAP_MERGE_RULES` (`output_bit_depth`, fallback values), format settings, quality settings are comprehensive. -3. **Remove Obsolete Setting:** - * `RESPECT_VARIANT_MAP_TYPES`. +2. **`RegularMapProcessorStage`:** (Handles `FileRule` items) + * **Responsibility:** Loads source image, determines internal map type (with suffix), applies relevant transformations (Gloss-to-Rough, Normal Green Invert), determines original metadata. + * **Output:** `ProcessedRegularMapData` object containing transformed image data and metadata. -**V. Data Flow Diagram (Mermaid)** +3. **`MergedTaskProcessorStage`:** (Handles `MergeTaskDefinition` items) + * **Responsibility:** Loads input images, applies transformations to inputs, handles fallbacks/resizing, performs merge operation. + * **Output:** `ProcessedMergedMapData` object containing merged image data and metadata. + +4. **`InitialScalingStage`:** (Optional) + * **Responsibility:** Applies configured scaling (e.g., POT downscale) to the processed image data received from the previous stage. + * **Output:** Scaled image data. + +5. **`SaveVariantsStage`:** + * **Responsibility:** Takes the final processed (and potentially scaled) image data and orchestrates saving variants using the `save_image_variants` utility. + * **Output:** List of saved file details (`saved_files_details`). + +## 4. Proposed Data Flow + +* **Input/Output Objects:** Key stages (`RegularMapProcessor`, `MergedTaskProcessor`, `InitialScaling`, `SaveVariants`) will use specific Input and Output dataclasses for clearer interfaces. +* **Orchestrator Role:** The `PipelineOrchestrator` manages the overall flow. It calls stages, passes necessary data (extracting image data references and metadata from previous stage outputs to create inputs for the next), receives output objects, and integrates final results (like saved file details) back into the main `AssetProcessingContext`. +* **Image Data Handling:** Large image arrays (`np.ndarray`) are passed primarily via stage return values (Output objects) and used as inputs to subsequent stages, managed by the Orchestrator. They are not stored long-term in the main `AssetProcessingContext`. +* **Main Context:** The `AssetProcessingContext` remains for overall state (rules, paths, configuration access, final status tracking) and potentially for simpler stages with minimal side effects. + +## 5. Visualization (Conceptual) ```mermaid graph TD - A[Start Asset Processing] --> B[File Rules Filter]; - B --> STAGE_INDIVIDUAL_MAP_PROCESSING[Individual Map Processing Stage]; - - subgraph STAGE_INDIVIDUAL_MAP_PROCESSING [Individual Map Processing Stage] - direction LR - C1{Is it a regular map or merged task?} - C1 -- Regular Map --> C2[Load Source Image File into Memory (current_image_data)]; - C1 -- Merged Task (from Map Merging Stage) --> C3[Load Inputs (Handle Missing w/ Fallbacks) & Merge in Memory (Handle Dim Mismatch) (current_image_data)]; - - C2 --> C4[current_image_data]; - C3 --> C4; - - C4 --> C4_TRANSFORM{Transformations?}; - C4_TRANSFORM -- Gloss Map? --> C4a[Invert Data (in memory), Update base_map_type to ROUGH]; - C4_TRANSFORM -- Normal Map & Invert Config? --> C4b[Invert Green Channel (in memory)]; - C4_TRANSFORM -- No Transformation Needed --> C4_POST_TRANSFORM; - C4a --> C4_POST_TRANSFORM; - C4b --> C4_POST_TRANSFORM; - - C4_POST_TRANSFORM[current_image_data (potentially transformed)] --> C5{INITIAL_SCALING_MODE}; - C5 -- "POT_DOWNSCALE" --> C6[Perform POT Scale (in memory) --> image_to_save]; - C5 -- "NONE" --> C7[image_to_save = current_image_data]; - - C6 --> C8[Apply Color Management to image_to_save (in memory)]; - C7 --> C8; - - C8 --> UNIFIED_SAVE_UTILITY[Call Unified Save Utility with image_to_save, final base_map_type, source bit depth info, config]; + subgraph Proposed Pipeline Stages + Start --> Prep[PrepareProcessingItemsStage] + Prep --> ItemLoop{Loop per Item} + ItemLoop -- FileRule --> RegProc[RegularMapProcessorStage] + ItemLoop -- MergeTask --> MergeProc[MergedTaskProcessorStage] + RegProc --> Scale(InitialScalingStage) + MergeProc --> Scale + Scale --> Save[SaveVariantsStage] + Save --> UpdateContext[Update Main Context w/ Results] + UpdateContext --> ItemLoop end +``` - UNIFIED_SAVE_UTILITY --> H[Update context.processed_maps_details with list of saved files & notes]; - H --> STAGE_METADATA_SAVE[Metadata Finalization & Save Stage]; +## 6. Benefits - STAGE_MAP_MERGING[Map Merging Stage] --> N{Identify Merge Rules}; - N --> O[Create Merged Image Tasks (incl. inputs, config, source bit depths)]; - O --> STAGE_INDIVIDUAL_MAP_PROCESSING; %% Feed tasks - - A --> STAGE_OTHER_INITIAL[Other Initial Stages] - STAGE_OTHER_INITIAL --> STAGE_MAP_MERGING; - - STAGE_METADATA_SAVE --> Z[End Asset Processing]; - - subgraph UNIFIED_SAVE_UTILITY_DETAILS [Unified Save Utility (processing.utils.image_saving_utils)] - direction TB - INPUTS[Input: in-memory image_to_save, final base_map_type, source_bit_depth_info, config_params, tokens, out_base_dir] - INPUTS --> CONFIG_LOAD[1. Use Provided Config Params] - CONFIG_LOAD --> DETERMINE_BIT_DEPTH[2. Determine Target Bit Depth (using rule & source_bit_depth_info)] - DETERMINE_BIT_DEPTH --> DETERMINE_FORMAT[3. Determine Output Format] - DETERMINE_FORMAT --> LOOP_VARIANTS[4. For each Resolution:] - LOOP_VARIANTS --> RESIZE_VARIANT[4a. Resize image_to_save to Variant (in memory)] - RESIZE_VARIANT --> PREPARE_SAVE[4b. Prepare Filename & Save Params] - PREPARE_SAVE --> SAVE_IMAGE[4c. Convert & Save Variant to Disk] - SAVE_IMAGE --> LOOP_VARIANTS; - LOOP_VARIANTS --> OUTPUT_LIST[5. Return List of Saved File Details] - end - - style STAGE_INDIVIDUAL_MAP_PROCESSING fill:#f9f,stroke:#333,stroke-width:2px; - style STAGE_MAP_MERGING fill:#f9f,stroke:#333,stroke-width:2px; - style UNIFIED_SAVE_UTILITY fill:#ccf,stroke:#333,stroke-width:2px; - style UNIFIED_SAVE_UTILITY_DETAILS fill:#ccf,stroke:#333,stroke-width:1px,dashed; - style O fill:#lightgrey,stroke:#333,stroke-width:2px; - style C4_POST_TRANSFORM fill:#e6ffe6,stroke:#333,stroke-width:1px; \ No newline at end of file +* Improved Readability & Understanding. +* Enhanced Maintainability & Reduced Risk. +* Better Testability. +* Clearer Dependencies. \ No newline at end of file diff --git a/config/app_settings.json b/config/app_settings.json index efe09ac..086a044 100644 --- a/config/app_settings.json +++ b/config/app_settings.json @@ -268,7 +268,7 @@ "OUTPUT_FORMAT_8BIT": "png", "MAP_MERGE_RULES": [ { - "output_map_type": "NRMRGH", + "output_map_type": "MAP_NRMRGH", "inputs": { "R": "MAP_NRM", "G": "MAP_NRM", diff --git a/processing/pipeline/asset_context.py b/processing/pipeline/asset_context.py index 5b411d7..b195927 100644 --- a/processing/pipeline/asset_context.py +++ b/processing/pipeline/asset_context.py @@ -5,6 +5,82 @@ from typing import Dict, List, Optional from rule_structure import AssetRule, FileRule, SourceRule from configuration import Configuration +# Imports needed for new dataclasses +import numpy as np +from typing import Any, Tuple, Union + +# --- Stage Input/Output Dataclasses --- + +# Item types for PrepareProcessingItemsStage output +@dataclass +class MergeTaskDefinition: + """Represents a merge task identified by PrepareProcessingItemsStage.""" + task_data: Dict # The original task data from context.merged_image_tasks + task_key: str # e.g., "merged_task_0" + +# Output for RegularMapProcessorStage +@dataclass +class ProcessedRegularMapData: + processed_image_data: np.ndarray + final_internal_map_type: str + source_file_path: Path + original_bit_depth: Optional[int] + original_dimensions: Optional[Tuple[int, int]] # (width, height) + transformations_applied: List[str] + status: str = "Processed" + error_message: Optional[str] = None + +# Output for MergedTaskProcessorStage +@dataclass +class ProcessedMergedMapData: + merged_image_data: np.ndarray + output_map_type: str # Internal type + source_bit_depths: List[int] + final_dimensions: Optional[Tuple[int, int]] # (width, height) + transformations_applied_to_inputs: Dict[str, List[str]] # Map type -> list of transforms + status: str = "Processed" + error_message: Optional[str] = None + +# Input for InitialScalingStage +@dataclass +class InitialScalingInput: + image_data: np.ndarray + original_dimensions: Optional[Tuple[int, int]] # (width, height) + # Configuration needed + initial_scaling_mode: str + +# Output for InitialScalingStage +@dataclass +class InitialScalingOutput: + scaled_image_data: np.ndarray + scaling_applied: bool + final_dimensions: Tuple[int, int] # (width, height) + +# Input for SaveVariantsStage +@dataclass +class SaveVariantsInput: + image_data: np.ndarray # Final data (potentially scaled) + internal_map_type: str # Final internal type (e.g., MAP_ROUGH, MAP_COL-1) + source_bit_depth_info: List[int] + # Configuration needed + output_filename_pattern_tokens: Dict[str, Any] + image_resolutions: List[int] + file_type_defs: Dict[str, Dict] + output_format_8bit: str + output_format_16bit_primary: str + output_format_16bit_fallback: str + png_compression_level: int + jpg_quality: int + output_filename_pattern: str + +# Output for SaveVariantsStage +@dataclass +class SaveVariantsOutput: + saved_files_details: List[Dict] + status: str = "Processed" + error_message: Optional[str] = None + +# Add a field to AssetProcessingContext for the prepared items @dataclass class AssetProcessingContext: source_rule: SourceRule @@ -14,11 +90,16 @@ class AssetProcessingContext: output_base_path: Path effective_supplier: Optional[str] asset_metadata: Dict - processed_maps_details: Dict[str, Dict[str, Dict]] - merged_maps_details: Dict[str, Dict[str, Dict]] + processed_maps_details: Dict[str, Dict] # Will store final results per item_key + merged_maps_details: Dict[str, Dict] # This might become redundant? Keep for now. files_to_process: List[FileRule] loaded_data_cache: Dict config_obj: Configuration status_flags: Dict incrementing_value: Optional[str] - sha5_value: Optional[str] \ No newline at end of file + sha5_value: Optional[str] # Keep existing fields + # New field for prepared items + processing_items: Optional[List[Union[FileRule, MergeTaskDefinition]]] = None + # Temporary storage during pipeline execution (managed by orchestrator) + # Keys could be FileRule object hash/id or MergeTaskDefinition task_key + intermediate_results: Optional[Dict[Any, Union[ProcessedRegularMapData, ProcessedMergedMapData, InitialScalingOutput]]] = None \ No newline at end of file diff --git a/processing/pipeline/orchestrator.py b/processing/pipeline/orchestrator.py index 4d3cba7..6765506 100644 --- a/processing/pipeline/orchestrator.py +++ b/processing/pipeline/orchestrator.py @@ -1,126 +1,405 @@ -from typing import List, Dict, Optional -from pathlib import Path +# --- Imports --- +import logging import shutil import tempfile -import logging +from pathlib import Path +from typing import List, Dict, Optional, Any, Union # Added Any, Union + +import numpy as np # Added numpy from configuration import Configuration -from rule_structure import SourceRule, AssetRule -from .asset_context import AssetProcessingContext +from rule_structure import SourceRule, AssetRule, FileRule # Added FileRule + +# Import new context classes and stages +from .asset_context import ( + AssetProcessingContext, + MergeTaskDefinition, + ProcessedRegularMapData, + ProcessedMergedMapData, + InitialScalingInput, + InitialScalingOutput, + SaveVariantsInput, + SaveVariantsOutput, +) from .stages.base_stage import ProcessingStage +# Import the new stages we created +from .stages.prepare_processing_items import PrepareProcessingItemsStage +from .stages.regular_map_processor import RegularMapProcessorStage +from .stages.merged_task_processor import MergedTaskProcessorStage +from .stages.initial_scaling import InitialScalingStage +from .stages.save_variants import SaveVariantsStage log = logging.getLogger(__name__) +# --- PipelineOrchestrator Class --- + class PipelineOrchestrator: """ Orchestrates the processing of assets based on source rules and a series of processing stages. + Manages the overall flow, including the core item processing sequence. """ - def __init__(self, config_obj: Configuration, stages: List[ProcessingStage]): + def __init__(self, config_obj: Configuration, + pre_item_stages: List[ProcessingStage], + post_item_stages: List[ProcessingStage]): """ Initializes the PipelineOrchestrator. Args: config_obj: The main configuration object. - stages: A list of processing stages to be executed in order. + pre_item_stages: Stages to run before the core item processing loop. + post_item_stages: Stages to run after the core item processing loop. """ self.config_obj: Configuration = config_obj - self.stages: List[ProcessingStage] = stages + self.pre_item_stages: List[ProcessingStage] = pre_item_stages + self.post_item_stages: List[ProcessingStage] = post_item_stages + # Instantiate the core item processing stages internally + self._prepare_stage = PrepareProcessingItemsStage() + self._regular_processor_stage = RegularMapProcessorStage() + self._merged_processor_stage = MergedTaskProcessorStage() + self._scaling_stage = InitialScalingStage() + self._save_stage = SaveVariantsStage() + + def _execute_specific_stages( + self, context: AssetProcessingContext, + stages_to_run: List[ProcessingStage], + stage_group_name: str, + stop_on_skip: bool = True + ) -> AssetProcessingContext: + """Executes a specific list of stages.""" + asset_name = context.asset_rule.asset_name if context.asset_rule else "Unknown" + log.debug(f"Asset '{asset_name}': Executing {stage_group_name} stages...") + for stage in stages_to_run: + stage_name = stage.__class__.__name__ + log.debug(f"Asset '{asset_name}': Executing {stage_group_name} stage: {stage_name}") + try: + # Check if stage expects context directly or specific input + # For now, assume outer stages take context directly + # This might need refinement if outer stages also adopt Input/Output pattern + context = stage.execute(context) + except Exception as e: + log.error(f"Asset '{asset_name}': Error during outer stage '{stage_name}': {e}", exc_info=True) + context.status_flags["asset_failed"] = True + context.status_flags["asset_failed_stage"] = stage_name + context.status_flags["asset_failed_reason"] = str(e) + # Update overall metadata immediately on outer stage failure + context.asset_metadata["status"] = f"Failed: Error in stage {stage_name}" + context.asset_metadata["error_message"] = str(e) + break # Stop processing outer stages for this asset on error + + if stop_on_skip and context.status_flags.get("skip_asset"): + log.info(f"Asset '{asset_name}': Skipped by outer stage '{stage_name}'. Reason: {context.status_flags.get('skip_reason', 'N/A')}") + break # Skip remaining outer stages for this asset + return context def process_source_rule( self, source_rule: SourceRule, workspace_path: Path, output_base_path: Path, - overwrite: bool, # Not used in this initial implementation, but part of the signature + overwrite: bool, incrementing_value: Optional[str], - sha5_value: Optional[str] # Corrected from sha5_value to sha256_value as per typical usage, assuming typo + sha5_value: Optional[str] # Keep param name consistent for now ) -> Dict[str, List[str]]: """ - Processes a single source rule, iterating through its asset rules and applying all stages. - - Args: - source_rule: The source rule to process. - workspace_path: The base path of the workspace. - output_base_path: The base path for output files. - overwrite: Whether to overwrite existing files (not fully implemented yet). - incrementing_value: An optional incrementing value for versioning or naming. - sha5_value: An optional SHA5 hash value for the asset (assuming typo, likely sha256). - - Returns: - A dictionary summarizing the processing status of assets. + Processes a single source rule, applying pre-processing stages, + the core item processing loop (Prepare, Process, Scale, Save), + and post-processing stages. """ overall_status: Dict[str, List[str]] = { "processed": [], "skipped": [], "failed": [], } - engine_temp_dir_path: Optional[Path] = None # Initialize to None + engine_temp_dir_path: Optional[Path] = None try: - # Create a temporary directory for this processing run if needed by any stage - # This temp dir is for the entire source_rule processing, not per asset. - # Individual stages might create their own sub-temp dirs if necessary. + # --- Setup Temporary Directory --- temp_dir_path_str = tempfile.mkdtemp(prefix=self.config_obj.temp_dir_prefix) engine_temp_dir_path = Path(temp_dir_path_str) - log.debug(f"PipelineOrchestrator created temporary directory: {engine_temp_dir_path} using prefix '{self.config_obj.temp_dir_prefix}'") - + log.debug(f"PipelineOrchestrator created temporary directory: {engine_temp_dir_path}") + # --- Process Each Asset Rule --- for asset_rule in source_rule.assets: - log.debug(f"Orchestrator: Processing asset '{asset_rule.asset_name}'") + asset_name = asset_rule.asset_name + log.info(f"Orchestrator: Processing asset '{asset_name}'") + + # --- Initialize Asset Context --- context = AssetProcessingContext( source_rule=source_rule, asset_rule=asset_rule, - workspace_path=workspace_path, # This is the path to the source files (e.g. extracted archive) - engine_temp_dir=engine_temp_dir_path, # Pass the orchestrator's temp dir + workspace_path=workspace_path, + engine_temp_dir=engine_temp_dir_path, output_base_path=output_base_path, - effective_supplier=None, # Will be set by SupplierDeterminationStage - asset_metadata={}, # Will be populated by stages - processed_maps_details={}, # Will be populated by stages - merged_maps_details={}, # Will be populated by stages - files_to_process=[], # Will be populated by FileRuleFilterStage - loaded_data_cache={}, # For image loading cache within this asset's processing + effective_supplier=None, + asset_metadata={}, + processed_maps_details={}, # Final results per item + merged_maps_details={}, # Keep for potential backward compat or other uses? + files_to_process=[], # Populated by FileRuleFilterStage (assumed in outer_stages) + loaded_data_cache={}, config_obj=self.config_obj, - status_flags={"skip_asset": False, "asset_failed": False}, # Initialize common flags + status_flags={"skip_asset": False, "asset_failed": False}, incrementing_value=incrementing_value, - sha5_value=sha5_value + sha5_value=sha5_value, + processing_items=[], # Initialize new fields + intermediate_results={} ) - for stage_idx, stage in enumerate(self.stages): - log.debug(f"Asset '{asset_rule.asset_name}': Executing stage {stage_idx + 1}/{len(self.stages)}: {stage.__class__.__name__}") + # --- Execute Pre-Item-Processing Outer Stages --- + # (e.g., MetadataInit, SupplierDet, FileRuleFilter, GlossToRough, NormalInvert) + # Identify which outer stages run before the item loop + # This requires knowing the intended order. Assume all run before for now. + context = self._execute_specific_stages(context, self.pre_item_stages, "pre-item", stop_on_skip=True) + + # Check if asset should be skipped or failed after pre-processing + if context.status_flags.get("asset_failed"): + log.error(f"Asset '{asset_name}': Failed during pre-processing stage '{context.status_flags.get('asset_failed_stage', 'Unknown')}'. Skipping item processing.") + overall_status["failed"].append(f"{asset_name} (Failed in {context.status_flags.get('asset_failed_stage', 'Pre-Processing')})") + continue # Move to the next asset rule + + if context.status_flags.get("skip_asset"): + log.info(f"Asset '{asset_name}': Skipped during pre-processing. Skipping item processing.") + overall_status["skipped"].append(asset_name) + continue # Move to the next asset rule + + # --- Prepare Processing Items --- + log.debug(f"Asset '{asset_name}': Preparing processing items...") + try: + # Prepare stage modifies context directly + context = self._prepare_stage.execute(context) + except Exception as e: + log.error(f"Asset '{asset_name}': Error during PrepareProcessingItemsStage: {e}", exc_info=True) + context.status_flags["asset_failed"] = True + context.status_flags["asset_failed_stage"] = "PrepareProcessingItemsStage" + context.status_flags["asset_failed_reason"] = str(e) + overall_status["failed"].append(f"{asset_name} (Failed in Prepare Items)") + continue # Move to next asset + + if context.status_flags.get('prepare_items_failed'): + log.error(f"Asset '{asset_name}': Failed during item preparation. Reason: {context.status_flags.get('prepare_items_failed_reason', 'Unknown')}. Skipping item processing loop.") + overall_status["failed"].append(f"{asset_name} (Failed Prepare Items: {context.status_flags.get('prepare_items_failed_reason', 'Unknown')})") + continue # Move to next asset + + if not context.processing_items: + log.info(f"Asset '{asset_name}': No items to process after preparation stage.") + # Status will be determined at the end + + # --- Core Item Processing Loop --- + log.info("ORCHESTRATOR: Starting processing items loop for asset '%s'", asset_name) # Corrected indentation and message + log.info(f"Asset '{asset_name}': Starting core item processing loop for {len(context.processing_items)} items...") + asset_had_item_errors = False + for item_index, item in enumerate(context.processing_items): + item_key: Any = None # Key for storing results (FileRule object or task_key string) + item_log_prefix = f"Asset '{asset_name}', Item {item_index + 1}/{len(context.processing_items)}" + processed_data: Optional[Union[ProcessedRegularMapData, ProcessedMergedMapData]] = None + scaled_data_output: Optional[InitialScalingOutput] = None # Store output object + saved_data: Optional[SaveVariantsOutput] = None + item_status = "Failed" # Default item status + current_image_data: Optional[np.ndarray] = None # Track current image data ref + try: - context = stage.execute(context) + # 1. Process (Load/Merge + Transform) + if isinstance(item, FileRule): + item_key = item.file_path # Use file_path string as key + log.debug(f"{item_log_prefix}: Processing FileRule '{item.file_path}'...") + processed_data = self._regular_processor_stage.execute(context, item) + elif isinstance(item, MergeTaskDefinition): + item_key = item.task_key # Use task_key string as key + log.debug(f"{item_log_prefix}: Processing MergeTask '{item_key}'...") + processed_data = self._merged_processor_stage.execute(context, item) + else: + log.warning(f"{item_log_prefix}: Unknown item type '{type(item)}'. Skipping.") + item_key = f"unknown_item_{item_index}" + context.processed_maps_details[item_key] = {"status": "Skipped", "notes": f"Unknown item type {type(item)}"} + asset_had_item_errors = True + continue # Next item + + # Check for processing failure + if not processed_data or processed_data.status != "Processed": + error_msg = processed_data.error_message if processed_data else "Processor returned None" + log.error(f"{item_log_prefix}: Failed during processing stage. Error: {error_msg}") + context.processed_maps_details[item_key] = {"status": "Failed", "notes": f"Processing Error: {error_msg}", "stage": processed_data.__class__.__name__ if processed_data else "UnknownProcessor"} + asset_had_item_errors = True + continue # Next item + + # Store intermediate result & get current image data + context.intermediate_results[item_key] = processed_data + current_image_data = processed_data.processed_image_data if isinstance(processed_data, ProcessedRegularMapData) else processed_data.merged_image_data + current_dimensions = processed_data.original_dimensions if isinstance(processed_data, ProcessedRegularMapData) else processed_data.final_dimensions + + # 2. Scale (Optional) + scaling_mode = getattr(context.config_obj, "INITIAL_SCALING_MODE", "NONE") + if scaling_mode != "NONE" and current_image_data is not None and current_image_data.size > 0: + log.debug(f"{item_log_prefix}: Applying initial scaling (Mode: {scaling_mode})...") + scale_input = InitialScalingInput( + image_data=current_image_data, + original_dimensions=current_dimensions, # Pass original/merged dims + initial_scaling_mode=scaling_mode + ) + scaled_data_output = self._scaling_stage.execute(scale_input) + # Update intermediate result and current image data reference + context.intermediate_results[item_key] = scaled_data_output # Overwrite previous intermediate + current_image_data = scaled_data_output.scaled_image_data # Use scaled data for saving + log.debug(f"{item_log_prefix}: Scaling applied: {scaled_data_output.scaling_applied}. New Dims: {scaled_data_output.final_dimensions}") + else: + log.debug(f"{item_log_prefix}: Initial scaling skipped (Mode: NONE or empty image).") + # Create dummy output if scaling skipped, using current dims + final_dims = current_dimensions if current_dimensions else (current_image_data.shape[1], current_image_data.shape[0]) if current_image_data is not None else (0,0) + scaled_data_output = InitialScalingOutput(scaled_image_data=current_image_data, scaling_applied=False, final_dimensions=final_dims) + + + # 3. Save Variants + if current_image_data is None or current_image_data.size == 0: + log.warning(f"{item_log_prefix}: Skipping save stage because image data is empty.") + context.processed_maps_details[item_key] = {"status": "Skipped", "notes": "No image data to save", "stage": "SaveVariantsStage"} + # Don't mark as asset error, just skip this item's saving + continue # Next item + + log.debug(f"{item_log_prefix}: Saving variants...") + # Prepare input for save stage + internal_map_type = processed_data.final_internal_map_type if isinstance(processed_data, ProcessedRegularMapData) else processed_data.output_map_type + source_bit_depth = [processed_data.original_bit_depth] if isinstance(processed_data, ProcessedRegularMapData) and processed_data.original_bit_depth is not None else processed_data.source_bit_depths if isinstance(processed_data, ProcessedMergedMapData) else [8] # Default bit depth if unknown + + # Construct filename tokens (ensure temp dir is used) + output_filename_tokens = { + 'asset_name': asset_name, + 'output_base_directory': context.engine_temp_dir, # Save variants to temp dir + # Add other tokens from context/config as needed by the pattern + 'supplier': context.effective_supplier or 'UnknownSupplier', + } + + save_input = SaveVariantsInput( + image_data=current_image_data, # Use potentially scaled data + internal_map_type=internal_map_type, + source_bit_depth_info=source_bit_depth, + output_filename_pattern_tokens=output_filename_tokens, + # Pass config values needed by save stage + image_resolutions=context.config_obj.image_resolutions, + file_type_defs=getattr(context.config_obj, "FILE_TYPE_DEFINITIONS", {}), + output_format_8bit=context.config_obj.get_8bit_output_format(), + output_format_16bit_primary=context.config_obj.get_16bit_output_formats()[0], + output_format_16bit_fallback=context.config_obj.get_16bit_output_formats()[1], + png_compression_level=context.config_obj.png_compression_level, + jpg_quality=context.config_obj.jpg_quality, + output_filename_pattern=context.config_obj.output_filename_pattern, + ) + saved_data = self._save_stage.execute(save_input) + + # Check save status and finalize item result + if saved_data and saved_data.status.startswith("Processed"): + item_status = saved_data.status # e.g., "Processed" or "Processed (No Output)" + log.info(f"{item_log_prefix}: Item successfully processed and saved. Status: {item_status}") + # Populate final details for this item + final_details = { + "status": item_status, + "saved_files_info": saved_data.saved_files_details, # List of dicts from save util + "internal_map_type": internal_map_type, + "original_dimensions": processed_data.original_dimensions if isinstance(processed_data, ProcessedRegularMapData) else None, + "final_dimensions": scaled_data_output.final_dimensions if scaled_data_output else current_dimensions, + "transformations": processed_data.transformations_applied if isinstance(processed_data, ProcessedRegularMapData) else processed_data.transformations_applied_to_inputs, + # Add source file if regular map + "source_file": str(processed_data.source_file_path) if isinstance(processed_data, ProcessedRegularMapData) else None, + } + context.processed_maps_details[item_key] = final_details + else: + error_msg = saved_data.error_message if saved_data else "Save stage returned None" + log.error(f"{item_log_prefix}: Failed during save stage. Error: {error_msg}") + context.processed_maps_details[item_key] = {"status": "Failed", "notes": f"Save Error: {error_msg}", "stage": "SaveVariantsStage"} + asset_had_item_errors = True + item_status = "Failed" # Ensure item status reflects failure + except Exception as e: - log.error(f"Asset '{asset_rule.asset_name}': Error during stage '{stage.__class__.__name__}': {e}", exc_info=True) - context.status_flags["asset_failed"] = True - context.asset_metadata["status"] = f"Failed: Error in stage {stage.__class__.__name__}" - context.asset_metadata["error_message"] = str(e) - break # Stop processing stages for this asset on error + log.exception(f"{item_log_prefix}: Unhandled exception during item processing loop: {e}") + # Ensure details are recorded even on unhandled exception + if item_key is not None: + context.processed_maps_details[item_key] = {"status": "Failed", "notes": f"Unhandled Loop Error: {e}", "stage": "OrchestratorLoop"} + else: + log.error(f"Asset '{asset_name}': Unhandled exception in item loop before item key was set.") + asset_had_item_errors = True + item_status = "Failed" + # Optionally break loop or continue? Continue for now to process other items. - if context.status_flags.get("skip_asset"): - log.info(f"Asset '{asset_rule.asset_name}': Skipped by stage '{stage.__class__.__name__}'. Reason: {context.status_flags.get('skip_reason', 'N/A')}") - break # Skip remaining stages for this asset + log.info("ORCHESTRATOR: Finished processing items loop for asset '%s'", asset_name) + log.info(f"Asset '{asset_name}': Finished core item processing loop.") + + # --- Execute Post-Item-Processing Outer Stages --- + # (e.g., OutputOrganization, MetadataFinalizationSave) + # Identify which outer stages run after the item loop + # This needs better handling based on stage purpose. Assume none run after for now. + if not context.status_flags.get("asset_failed"): + log.info("ORCHESTRATOR: Executing post-item-processing outer stages for asset '%s'", asset_name) + context = self._execute_specific_stages(context, self.post_item_stages, "post-item", stop_on_skip=False) + + # --- Final Asset Status Determination --- + final_asset_status = "Unknown" + fail_reason = "" + if context.status_flags.get("asset_failed"): + final_asset_status = "Failed" + fail_reason = f"(Failed in {context.status_flags.get('asset_failed_stage', 'Unknown Stage')}: {context.status_flags.get('asset_failed_reason', 'Unknown Reason')})" + elif context.status_flags.get("skip_asset"): + final_asset_status = "Skipped" + fail_reason = f"(Skipped: {context.status_flags.get('skip_reason', 'Unknown Reason')})" + elif asset_had_item_errors: + final_asset_status = "Failed" + fail_reason = "(One or more items failed)" + elif not context.processing_items: + # No items prepared, no errors -> consider skipped or processed based on definition? + final_asset_status = "Skipped" # Or "Processed (No Items)" + fail_reason = "(No items to process)" + elif not context.processed_maps_details and context.processing_items: + # Items were prepared, but none resulted in processed_maps_details entry + final_asset_status = "Skipped" # Or Failed? + fail_reason = "(All processing items skipped or failed internally)" + elif context.processed_maps_details: + # Check if all items in processed_maps_details are actually processed successfully + all_processed_ok = all( + str(details.get("status", "")).startswith("Processed") + for details in context.processed_maps_details.values() + ) + some_processed_ok = any( + str(details.get("status", "")).startswith("Processed") + for details in context.processed_maps_details.values() + ) + + if all_processed_ok: + final_asset_status = "Processed" + elif some_processed_ok: + final_asset_status = "Partial" # Introduce a partial status? Or just Failed? + fail_reason = "(Some items failed)" + final_asset_status = "Failed" # Treat partial as Failed for overall status + else: # No items processed successfully + final_asset_status = "Failed" + fail_reason = "(All items failed)" + else: + # Should not happen if processing_items existed + final_asset_status = "Failed" + fail_reason = "(Unknown state after item processing)" + + + # Update overall status list + if final_asset_status == "Processed": + overall_status["processed"].append(asset_name) + elif final_asset_status == "Skipped": + overall_status["skipped"].append(f"{asset_name} {fail_reason}") + else: # Failed or Unknown + overall_status["failed"].append(f"{asset_name} {fail_reason}") + + log.info(f"Asset '{asset_name}' final status: {final_asset_status} {fail_reason}") + # Clean up intermediate results for the asset to save memory + context.intermediate_results = {} - # Refined status collection - if context.status_flags.get('skip_asset'): - overall_status["skipped"].append(asset_rule.asset_name) - elif context.status_flags.get('asset_failed') or str(context.asset_metadata.get('status', '')).startswith("Failed"): - overall_status["failed"].append(asset_rule.asset_name) - elif context.asset_metadata.get('status') == "Processed": - overall_status["processed"].append(asset_rule.asset_name) - else: # Default or unknown state - log.warning(f"Asset '{asset_rule.asset_name}': Unknown status after pipeline execution. Metadata status: '{context.asset_metadata.get('status')}'. Marking as failed.") - overall_status["failed"].append(f"{asset_rule.asset_name} (Unknown Status: {context.asset_metadata.get('status')})") - log.debug(f"Asset '{asset_rule.asset_name}' final status: {context.asset_metadata.get('status', 'N/A')}, Flags: {context.status_flags}") except Exception as e: - log.error(f"PipelineOrchestrator.process_source_rule failed: {e}", exc_info=True) - # Mark all remaining assets as failed if a top-level error occurs - processed_or_skipped_or_failed = set(overall_status["processed"] + overall_status["skipped"] + overall_status["failed"]) + log.error(f"PipelineOrchestrator.process_source_rule failed critically: {e}", exc_info=True) + # Mark all assets from this source rule that weren't finished as failed + processed_or_skipped_or_failed = set(overall_status["processed"]) | \ + set(name.split(" ")[0] for name in overall_status["skipped"]) | \ + set(name.split(" ")[0] for name in overall_status["failed"]) for asset_rule in source_rule.assets: if asset_rule.asset_name not in processed_or_skipped_or_failed: - overall_status["failed"].append(f"{asset_rule.asset_name} (Orchestrator Error)") + overall_status["failed"].append(f"{asset_rule.asset_name} (Orchestrator Error: {e})") finally: + # --- Cleanup Temporary Directory --- if engine_temp_dir_path and engine_temp_dir_path.exists(): try: log.debug(f"PipelineOrchestrator cleaning up temporary directory: {engine_temp_dir_path}") diff --git a/processing/pipeline/stages/individual_map_processing.py b/processing/pipeline/stages/individual_map_processing.py deleted file mode 100644 index 6386a33..0000000 --- a/processing/pipeline/stages/individual_map_processing.py +++ /dev/null @@ -1,695 +0,0 @@ -import uuid -import dataclasses -import re -import os -import logging -from pathlib import Path -from typing import Optional, Tuple, Dict, List, Any, Union - -import cv2 -import numpy as np - -from .base_stage import ProcessingStage -from ..asset_context import AssetProcessingContext -from rule_structure import FileRule -from utils.path_utils import sanitize_filename -from ...utils import image_processing_utils as ipu # Includes get_image_bit_depth implicitly now -from ...utils.image_saving_utils import save_image_variants # Added import - -logger = logging.getLogger(__name__) - -# Helper function to get filename-friendly map type (adapted from old logic) -def get_filename_friendly_map_type(internal_map_type: str, file_type_definitions: Optional[Dict[str, Dict]]) -> str: - """Derives a filename-friendly map type from the internal map type.""" - filename_friendly_map_type = internal_map_type # Fallback - if not file_type_definitions or not isinstance(file_type_definitions, dict) or not file_type_definitions: - logger.warning(f"Filename-friendly lookup: FILE_TYPE_DEFINITIONS not available or invalid. Falling back to internal type: {internal_map_type}") - return filename_friendly_map_type - - base_map_key_val = None - suffix_part = "" - sorted_known_base_keys = sorted(list(file_type_definitions.keys()), key=len, reverse=True) - - for known_key in sorted_known_base_keys: - if internal_map_type.startswith(known_key): - base_map_key_val = known_key - suffix_part = internal_map_type[len(known_key):] - break - - if base_map_key_val: - definition = file_type_definitions.get(base_map_key_val) - if definition and isinstance(definition, dict): - standard_type_alias = definition.get("standard_type") - if standard_type_alias and isinstance(standard_type_alias, str) and standard_type_alias.strip(): - filename_friendly_map_type = standard_type_alias.strip() + suffix_part - logger.debug(f"Filename-friendly lookup: Transformed '{internal_map_type}' -> '{filename_friendly_map_type}'") - else: - logger.warning(f"Filename-friendly lookup: Standard type alias for '{base_map_key_val}' is missing or invalid. Falling back.") - else: - logger.warning(f"Filename-friendly lookup: No valid definition for '{base_map_key_val}'. Falling back.") - else: - logger.warning(f"Filename-friendly lookup: Could not parse base key from '{internal_map_type}'. Falling back.") - - return filename_friendly_map_type - - -class IndividualMapProcessingStage(ProcessingStage): - """ - Processes individual texture maps and merged map tasks. - This stage loads source images (or merges inputs for tasks), performs - in-memory transformations (Gloss-to-Rough, Normal Green Invert, optional scaling), - and passes the result to the UnifiedSaveUtility for final output generation. - It updates the AssetProcessingContext with detailed results. - """ - - def _apply_in_memory_transformations( - self, - image_data: np.ndarray, - processing_map_type: str, - invert_normal_green: bool, - file_type_definitions: Dict[str, Dict], - log_prefix: str # e.g., "Asset 'X', Key Y, Proc. Tag Z" - ) -> Tuple[np.ndarray, str, List[str]]: - """ - Applies in-memory transformations (Gloss-to-Rough, Normal Green Invert). - - Returns: - Tuple containing: - - Potentially transformed image data. - - Potentially updated processing_map_type (e.g., MAP_GLOSS -> MAP_ROUGH). - - List of strings describing applied transformations. - """ - transformation_notes = [] - current_image_data = image_data # Start with original data - updated_processing_map_type = processing_map_type # Start with original type - - # Gloss-to-Rough - if processing_map_type.startswith("MAP_GLOSS"): - logger.info(f"{log_prefix}: Applying Gloss-to-Rough conversion.") - inversion_succeeded = False - # Replicate inversion logic from GlossToRoughConversionStage - if np.issubdtype(current_image_data.dtype, np.floating): - current_image_data = 1.0 - current_image_data - current_image_data = np.clip(current_image_data, 0.0, 1.0) - logger.debug(f"{log_prefix}: Inverted float image data for Gloss->Rough.") - inversion_succeeded = True - elif np.issubdtype(current_image_data.dtype, np.integer): - max_val = np.iinfo(current_image_data.dtype).max - current_image_data = max_val - current_image_data - logger.debug(f"{log_prefix}: Inverted integer image data (max_val: {max_val}) for Gloss->Rough.") - inversion_succeeded = True - else: - logger.error(f"{log_prefix}: Unsupported image data type {current_image_data.dtype} for GLOSS map. Cannot invert.") - transformation_notes.append("Gloss-to-Rough FAILED (unsupported dtype)") - - # Update type and notes based on success flag - if inversion_succeeded: - updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") - logger.info(f"{log_prefix}: Map type updated: '{processing_map_type}' -> '{updated_processing_map_type}'") - transformation_notes.append("Gloss-to-Rough applied") - - # Normal Green Invert - # Use internal 'MAP_NRM' type for check - if processing_map_type == "MAP_NRM" and invert_normal_green: - logger.info(f"{log_prefix}: Applying Normal Map Green Channel Inversion (Global Setting).") - current_image_data = ipu.invert_normal_map_green_channel(current_image_data) - transformation_notes.append("Normal Green Inverted (Global)") - - return current_image_data, updated_processing_map_type, transformation_notes - - def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: - """ - Executes the individual map and merged task processing logic. - """ - asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" - if context.status_flags.get('skip_asset', False): - logger.info(f"Asset '{asset_name_for_log}': Skipping individual map processing due to skip_asset flag.") - return context - - if not hasattr(context, 'processed_maps_details') or context.processed_maps_details is None: - context.processed_maps_details = {} - logger.debug(f"Asset '{asset_name_for_log}': Initialized processed_maps_details.") - - # --- Configuration Fetching --- - config = context.config_obj - file_type_definitions = getattr(config, "FILE_TYPE_DEFINITIONS", {}) - respect_variant_map_types = getattr(config, "respect_variant_map_types", []) # Needed for suffixing logic - initial_scaling_mode = getattr(config, "INITIAL_SCALING_MODE", "NONE") - merge_dimension_mismatch_strategy = getattr(config, "MERGE_DIMENSION_MISMATCH_STRATEGY", "USE_LARGEST") - invert_normal_green = config.invert_normal_green_globally # Use the new property - output_base_dir = context.output_base_path # This is the FINAL base path - asset_name = context.asset_rule.asset_name if context.asset_rule else "UnknownAsset" - # For save_image_variants, the 'output_base_directory' should be the engine_temp_dir, - # as these are intermediate variant files before final organization. - temp_output_base_dir_for_variants = context.engine_temp_dir - output_filename_pattern_tokens = {'asset_name': asset_name, 'output_base_directory': temp_output_base_dir_for_variants} - - # --- Prepare Items to Process --- - items_to_process: List[Union[Tuple[int, FileRule], Tuple[str, Dict]]] = [] - - # Add regular files - if context.files_to_process: - # Validate source path early for regular files - if not context.source_rule or not context.source_rule.input_path: - logger.error(f"Asset '{asset_name_for_log}': SourceRule or SourceRule.input_path is not set. Cannot process regular files.") - context.status_flags['individual_map_processing_failed'] = True - # Mark all file_rules as failed if source path is missing - for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): - map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" - ff_map_type = get_filename_friendly_map_type(map_type_for_fail, file_type_definitions) - context.processed_maps_details[fr_idx] = { - 'status': 'Failed', - 'map_type': ff_map_type, - 'processing_map_type': map_type_for_fail, - 'notes': "SourceRule.input_path missing", - 'saved_files_info': [] - } - # Don't add regular files if source path is bad - elif not context.workspace_path or not context.workspace_path.is_dir(): - logger.error(f"Asset '{asset_name_for_log}': Workspace path '{context.workspace_path}' is not a valid directory. Cannot process regular files.") - context.status_flags['individual_map_processing_failed'] = True - for fr_idx, file_rule_to_fail in enumerate(context.files_to_process): - map_type_for_fail = file_rule_to_fail.item_type_override or file_rule_to_fail.item_type or "UnknownMapType" - ff_map_type = get_filename_friendly_map_type(map_type_for_fail, file_type_definitions) - context.processed_maps_details[fr_idx] = { - 'status': 'Failed', - 'map_type': ff_map_type, - 'processing_map_type': map_type_for_fail, - 'notes': "Workspace path invalid", - 'saved_files_info': [] - } - # Don't add regular files if workspace path is bad - else: - for idx, file_rule in enumerate(context.files_to_process): - items_to_process.append((idx, file_rule)) - - # Add merged tasks - if hasattr(context, 'merged_image_tasks') and context.merged_image_tasks: - for task_idx, task_data in enumerate(context.merged_image_tasks): - task_key = f"merged_task_{task_idx}" - items_to_process.append((task_key, task_data)) - - if not items_to_process: - logger.info(f"Asset '{asset_name_for_log}': No regular files or merged tasks to process in this stage.") - return context - - # --- Unified Processing Loop --- - for item_key, item_data in items_to_process: - current_image_data: Optional[np.ndarray] = None - base_map_type: str = "Unknown" # Filename-friendly - processing_map_type: str = "Unknown" # Internal MAP_XXX type - source_bit_depth_info_for_save_util: List[int] = [] - is_merged_task: bool = False - status_notes: List[str] = [] - processing_status: str = "Started" - saved_files_details_list: List[Dict] = [] - original_dimensions: Optional[Tuple[int, int]] = None - source_file_path_regular: Optional[Path] = None # For regular maps - merge_task_config_output_type: Optional[str] = None # For merged tasks - inputs_used_for_merge: Optional[Dict[str, str]] = None # For merged tasks - processing_instance_tag = f"item_{item_key}_{uuid.uuid4().hex[:8]}" # Unique tag for logging this item - - try: - # --- A. Regular Map Processing --- - if isinstance(item_data, FileRule): - file_rule: FileRule = item_data - file_rule_idx: int = item_key # Key is the index for regular maps - is_merged_task = False - logger.info(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Processing Regular Map from FileRule: {file_rule.file_path}") - - if not file_rule.file_path: - logger.error(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: FileRule has an empty or None file_path. Skipping.") - processing_status = "Failed" - status_notes.append("FileRule has no file_path") - continue # To finally block - - # Determine internal map type (MAP_XXX) with suffixing - initial_internal_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" - processing_map_type = self._get_suffixed_internal_map_type(context, file_rule, initial_internal_map_type, respect_variant_map_types) - base_map_type = get_filename_friendly_map_type(processing_map_type, file_type_definitions) # Get filename friendly version - - # Skip types not meant for individual processing (e.g., composites handled elsewhere) - if not processing_map_type or not processing_map_type.startswith("MAP_") or processing_map_type == "MAP_GEN_COMPOSITE": - logger.debug(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Skipping, type '{processing_map_type}' (Filename: '{base_map_type}') not targeted for individual processing.") - processing_status = "Skipped" - status_notes.append(f"Type '{processing_map_type}' not processed individually.") - continue # To finally block - - # Find source file (relative to workspace_path) - source_base_path = context.workspace_path - # Use the file_rule.file_path directly as it should be relative now - potential_source_path = source_base_path / file_rule.file_path - if potential_source_path.is_file(): - source_file_path_regular = potential_source_path - logger.info(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Found source file: {source_file_path_regular}") - else: - # Attempt globbing as a fallback if direct path fails (optional, based on previous logic) - found_files = list(source_base_path.glob(file_rule.file_path)) - if len(found_files) == 1: - source_file_path_regular = found_files[0] - logger.info(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Found source file via glob: {source_file_path_regular}") - elif len(found_files) > 1: - logger.warning(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Multiple files found for pattern '{file_rule.file_path}' in '{source_base_path}'. Using first: {found_files[0]}") - source_file_path_regular = found_files[0] - else: - logger.error(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Source file not found using path/pattern '{file_rule.file_path}' in '{source_base_path}'.") - processing_status = "Failed" - status_notes.append("Source file not found") - continue # To finally block - - # Load image - source_image_data = ipu.load_image(str(source_file_path_regular)) - if source_image_data is None: - logger.error(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Failed to load image from '{source_file_path_regular}'.") - processing_status = "Failed" - status_notes.append("Image load failed") - continue # To finally block - - original_height, original_width = source_image_data.shape[:2] - original_dimensions = (original_width, original_height) - logger.debug(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Loaded image with dimensions {original_width}x{original_height}.") - - # Get original bit depth - try: - original_source_bit_depth = ipu.get_image_bit_depth(str(source_file_path_regular)) - source_bit_depth_info_for_save_util = [original_source_bit_depth] - logger.info(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Determined source bit depth: {original_source_bit_depth}") - except Exception as e: - logger.warning(f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}: Could not determine source bit depth for {source_file_path_regular}: {e}. Using default [8].") - source_bit_depth_info_for_save_util = [8] # Default fallback - status_notes.append("Could not determine source bit depth, defaulted to 8.") - - current_image_data = source_image_data.copy() - # Apply transformations for regular maps AFTER loading - log_prefix_regular = f"Asset '{asset_name_for_log}', Key {file_rule_idx}, Proc. Tag {processing_instance_tag}" - current_image_data, processing_map_type, transform_notes = self._apply_in_memory_transformations( - current_image_data, processing_map_type, invert_normal_green, file_type_definitions, log_prefix_regular - ) - status_notes.extend(transform_notes) - # Update base_map_type AFTER potential transformation - base_map_type = get_filename_friendly_map_type(processing_map_type, file_type_definitions) - - - # --- B. Merged Image Task Processing --- - elif isinstance(item_data, dict): - task: Dict = item_data - task_key: str = item_key # Key is the generated string for merged tasks - is_merged_task = True - merge_task_config_output_type = task.get('output_map_type', 'UnknownMergeOutput') - logger.info(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Processing Merged Task for output type: {merge_task_config_output_type}") - - processing_map_type = merge_task_config_output_type # Internal type is the output type from config - base_map_type = get_filename_friendly_map_type(processing_map_type, file_type_definitions) # Get filename friendly version - source_bit_depth_info_for_save_util = task.get('source_bit_depths', []) - merge_rule_config = task.get('merge_rule_config', {}) - input_map_sources = task.get('input_map_sources', {}) - target_dimensions = task.get('source_dimensions') # Expected dimensions (h, w) - - if not merge_rule_config or not input_map_sources or not target_dimensions: - logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Merge task data is incomplete (missing config, sources, or dimensions). Skipping.") - processing_status = "Failed" - status_notes.append("Incomplete merge task data") - continue # To finally block - - loaded_inputs_for_merge: Dict[str, np.ndarray] = {} - actual_input_dimensions: List[Tuple[int, int]] = [] # List of (h, w) - inputs_used_for_merge = {} # Track actual files/fallbacks used - - # Load/Prepare Inputs for Merge - merge_inputs_config = merge_rule_config.get('inputs', {}) - merge_defaults = merge_rule_config.get('defaults', {}) - - for channel_char, required_map_type_from_rule in merge_inputs_config.items(): - input_info = input_map_sources.get(required_map_type_from_rule) - input_image_data = None - input_source_desc = f"Fallback for {required_map_type_from_rule}" - - if input_info and input_info.get('file_path'): - # Paths in merged tasks should ideally be absolute or relative to a known base (e.g., workspace) - # Assuming they are resolvable as is for now. - input_file_path = Path(input_info['file_path']) - if input_file_path.is_file(): - try: - input_image_data = ipu.load_image(str(input_file_path)) - if input_image_data is not None: - logger.info(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Loaded input '{required_map_type_from_rule}' for channel '{channel_char}' from: {input_file_path}") - actual_input_dimensions.append(input_image_data.shape[:2]) # (h, w) - input_source_desc = str(input_file_path) - else: - logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Failed to load input '{required_map_type_from_rule}' from {input_file_path}. Attempting fallback.") - except Exception as e: - logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Error loading input '{required_map_type_from_rule}' from {input_file_path}: {e}. Attempting fallback.") - else: - logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Input file path for '{required_map_type_from_rule}' not found: {input_file_path}. Attempting fallback.") - else: - logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: No file path provided for required input '{required_map_type_from_rule}'. Attempting fallback.") - - # Fallback if load failed or no path - if input_image_data is None: - fallback_value = merge_defaults.get(channel_char) - if fallback_value is not None: - try: - # Determine shape and dtype for fallback - h, w = target_dimensions - # Infer channels needed based on typical usage or config (e.g., RGB default, single channel for masks) - # This might need refinement based on how defaults are structured. Assuming uint8 for now. - # If fallback_value is a single number, assume grayscale, else assume color based on length? - num_channels = 1 if isinstance(fallback_value, (int, float)) else len(fallback_value) if isinstance(fallback_value, (list, tuple)) else 3 # Default to 3? Risky. - dtype = np.uint8 # Default dtype, might need adjustment based on context - shape = (h, w) if num_channels == 1 else (h, w, num_channels) - - input_image_data = np.full(shape, fallback_value, dtype=dtype) - logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Using fallback value {fallback_value} for channel '{channel_char}' (Target Dims: {target_dimensions}).") - # Fallback uses target dimensions, don't add to actual_input_dimensions for mismatch check unless required - # actual_input_dimensions.append(target_dimensions) # Optional: Treat fallback as having target dims - status_notes.append(f"Used fallback for {required_map_type_from_rule}") - except Exception as e: - logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Error creating fallback for channel '{channel_char}': {e}. Cannot proceed with merge.") - processing_status = "Failed" - status_notes.append(f"Fallback creation failed for {required_map_type_from_rule}") - break # Break inner loop - else: - logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Missing input '{required_map_type_from_rule}' and no fallback default provided for channel '{channel_char}'. Cannot proceed.") - processing_status = "Failed" - status_notes.append(f"Missing input {required_map_type_from_rule} and no fallback") - break # Break inner loop - - if processing_status == "Failed": break # Exit outer loop if inner loop failed - - # --- Apply Pre-Merge Transformations using Helper --- - if input_image_data is not None: # Only transform if we have data - log_prefix_merge_input = f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}, Input {required_map_type_from_rule}" - input_image_data, _, transform_notes = self._apply_in_memory_transformations( - input_image_data, required_map_type_from_rule, invert_normal_green, file_type_definitions, log_prefix_merge_input - ) - # We don't need the updated map type for the input key, just the transformed data - status_notes.extend(transform_notes) # Add notes to the main task's notes - - # --- End Pre-Merge Transformations --- - - loaded_inputs_for_merge[channel_char] = input_image_data - inputs_used_for_merge[required_map_type_from_rule] = input_source_desc - - if processing_status == "Failed": continue # To finally block - - # Dimension Mismatch Handling - unique_dimensions = set(actual_input_dimensions) - target_merge_dims = target_dimensions # Default - if len(unique_dimensions) > 1: - logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Mismatched dimensions found among loaded inputs: {unique_dimensions}. Applying strategy: {merge_dimension_mismatch_strategy}") - status_notes.append(f"Mismatched input dimensions ({unique_dimensions}), applied {merge_dimension_mismatch_strategy}") - - if merge_dimension_mismatch_strategy == "ERROR_SKIP": - logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Dimension mismatch strategy is ERROR_SKIP. Failing task.") - processing_status = "Failed" - status_notes.append("Dimension mismatch (ERROR_SKIP)") - continue # To finally block - elif merge_dimension_mismatch_strategy == "USE_LARGEST": - max_h = max(h for h, w in unique_dimensions) - max_w = max(w for h, w in unique_dimensions) - target_merge_dims = (max_h, max_w) - elif merge_dimension_mismatch_strategy == "USE_FIRST": - target_merge_dims = actual_input_dimensions[0] if actual_input_dimensions else target_dimensions - else: # Default or unknown: Use largest - logger.warning(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Unknown dimension mismatch strategy '{merge_dimension_mismatch_strategy}'. Defaulting to USE_LARGEST.") - max_h = max(h for h, w in unique_dimensions) - max_w = max(w for h, w in unique_dimensions) - target_merge_dims = (max_h, max_w) - - logger.info(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Resizing inputs to target merge dimensions: {target_merge_dims}") - # Resize loaded inputs (not fallbacks unless they were added to actual_input_dimensions) - for channel_char, img_data in loaded_inputs_for_merge.items(): - # Only resize if it was a loaded input that contributed to the mismatch check - if img_data.shape[:2] in unique_dimensions and img_data.shape[:2] != target_merge_dims: - resized_img = ipu.resize_image(img_data, target_merge_dims[1], target_merge_dims[0]) # w, h - if resized_img is None: - logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Failed to resize input for channel '{channel_char}' to {target_merge_dims}. Failing task.") - processing_status = "Failed" - status_notes.append(f"Input resize failed for {channel_char}") - break - loaded_inputs_for_merge[channel_char] = resized_img - if processing_status == "Failed": continue # To finally block - - # Perform Merge (Example: Simple Channel Packing - Adapt as needed) - # This needs to be robust based on merge_rule_config structure - try: - merge_channels_order = merge_rule_config.get('channel_order', 'RGB') # e.g., 'RGB', 'BGR', 'R', 'RGBA' etc. - output_channels = len(merge_channels_order) - h, w = target_merge_dims # Use the potentially adjusted dimensions - - if output_channels == 1: - # Assume the first channel in order is the one to use - channel_char_to_use = merge_channels_order[0] - source_img = loaded_inputs_for_merge[channel_char_to_use] - # Ensure it's grayscale (take first channel if it's multi-channel) - if len(source_img.shape) == 3: - current_image_data = source_img[:, :, 0].copy() - else: - current_image_data = source_img.copy() - elif output_channels > 1: - # Assume uint8 dtype for merged output unless specified otherwise - merged_image = np.zeros((h, w, output_channels), dtype=np.uint8) - for i, channel_char in enumerate(merge_channels_order): - source_img = loaded_inputs_for_merge.get(channel_char) - if source_img is not None: - # Extract the correct channel (e.g., R from RGB, or use grayscale directly) - if len(source_img.shape) == 3: - # Assuming standard RGB/BGR order in source based on channel_char? Needs clear definition. - # Example: If source is RGB and channel_char is 'R', take channel 0. - # This mapping needs to be defined in merge_rule_config or conventions. - # Simple approach: take the first channel if source is color. - merged_image[:, :, i] = source_img[:, :, 0] - else: # Grayscale source - merged_image[:, :, i] = source_img - else: - # This case should have been caught by fallback logic earlier - logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Missing prepared input for channel '{channel_char}' during final merge assembly. This shouldn't happen.") - processing_status = "Failed" - status_notes.append(f"Internal error: Missing input '{channel_char}' at merge assembly") - break - if processing_status != "Failed": - current_image_data = merged_image - else: - logger.error(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Invalid channel_order '{merge_channels_order}' in merge config.") - processing_status = "Failed" - status_notes.append("Invalid merge channel_order") - - if processing_status != "Failed": - logger.info(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Successfully merged inputs into image with shape {current_image_data.shape}") - original_dimensions = (current_image_data.shape[1], current_image_data.shape[0]) # Set original dims after merge - - except Exception as e: - logger.exception(f"Asset '{asset_name_for_log}', Key {task_key}, Proc. Tag {processing_instance_tag}: Error during merge operation: {e}") - processing_status = "Failed" - status_notes.append(f"Merge operation failed: {e}") - continue # To finally block - - else: - logger.error(f"Asset '{asset_name_for_log}', Key {item_key}: Unknown item type in processing loop: {type(item_data)}. Skipping.") - processing_status = "Failed" - status_notes.append("Unknown item type in loop") - continue # To finally block - - # --- C. Common Processing Path --- - if current_image_data is None: - logger.error(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: current_image_data is None before common processing. Status: {processing_status}. Skipping common path.") - # Status should already be Failed or Skipped from A or B - if processing_status not in ["Failed", "Skipped"]: - processing_status = "Failed" - status_notes.append("Internal error: Image data missing before common processing") - continue # To finally block - - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Entering common processing path for '{base_map_type}' (Internal: '{processing_map_type}')") - - # Optional Initial Scaling (In Memory) - # Transformations are now handled earlier by the helper function - image_to_save = None - scaling_applied = False - h_pre_scale, w_pre_scale = current_image_data.shape[:2] - - if initial_scaling_mode == "POT_DOWNSCALE": - pot_w = ipu.get_nearest_power_of_two_downscale(w_pre_scale) - pot_h = ipu.get_nearest_power_of_two_downscale(h_pre_scale) - if (pot_w, pot_h) != (w_pre_scale, h_pre_scale): - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Applying Initial Scaling: POT Downscale from ({w_pre_scale},{h_pre_scale}) to ({pot_w},{pot_h}).") - # Use aspect ratio preserving POT logic if needed, or simple independent POT per dim? Plan implies simple POT. - # Let's use the more robust aspect-preserving POT downscale logic from ipu if available, otherwise simple resize. - # Simple resize for now based on calculated pot_w, pot_h: - resized_img = ipu.resize_image(current_image_data, pot_w, pot_h, interpolation=cv2.INTER_AREA) - if resized_img is not None: - image_to_save = resized_img - scaling_applied = True - status_notes.append(f"Initial POT Downscale applied ({pot_w}x{pot_h})") - else: - logger.warning(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: POT Downscale resize failed. Using original data for saving.") - image_to_save = current_image_data.copy() - status_notes.append("Initial POT Downscale failed, used original") - else: - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Initial Scaling: POT Downscale - Image already POT or smaller. No scaling needed.") - image_to_save = current_image_data.copy() - elif initial_scaling_mode == "NONE": - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Initial Scaling: Mode is NONE.") - image_to_save = current_image_data.copy() - else: - logger.warning(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unknown INITIAL_SCALING_MODE '{initial_scaling_mode}'. Defaulting to NONE.") - image_to_save = current_image_data.copy() - status_notes.append(f"Unknown initial scale mode '{initial_scaling_mode}', used original") - - if image_to_save is None: # Should not happen if logic above is correct - logger.error(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: image_to_save is None after scaling block. This indicates an error. Failing.") - processing_status = "Failed" - status_notes.append("Internal error: image_to_save is None post-scaling") - continue # To finally block - - # Color Management (Example: BGR to RGB if needed) - # This logic might need refinement based on actual requirements and ipu capabilities - # Assuming save_image_variants expects RGB by default if color conversion is needed. - # Let's assume save_image_variants handles color internally based on format/config for now. - # If specific BGR->RGB conversion is needed *before* saving based on map type: - # if base_map_type in ["COL", "DIFF", "ALB"] and len(image_to_save.shape) == 3 and image_to_save.shape[2] == 3: - # logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Applying BGR to RGB conversion before saving.") - # image_to_save = ipu.convert_bgr_to_rgb(image_to_save) - # status_notes.append("BGR->RGB applied") - - # Call Unified Save Utility - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Calling Unified Save Utility for map type '{base_map_type}' (Internal: '{processing_map_type}')") - - try: - # Prepare arguments for save_image_variants - save_args = { - "source_image_data": image_to_save, - "base_map_type": base_map_type, # Filename-friendly - "source_bit_depth_info": source_bit_depth_info_for_save_util, - "output_filename_pattern_tokens": output_filename_pattern_tokens, - # "config_obj": config, # Removed: save_image_variants doesn't expect this directly - # "asset_name_for_log": asset_name_for_log, # Removed: save_image_variants doesn't expect this - # "processing_instance_tag": processing_instance_tag # Removed: save_image_variants doesn't expect this - } - - # Pass only the expected arguments to save_image_variants - # We need to extract the required args from config and pass them individually - save_args_filtered = { - "source_image_data": image_to_save, - "base_map_type": base_map_type, - "source_bit_depth_info": source_bit_depth_info_for_save_util, - "image_resolutions": config.image_resolutions, - "file_type_defs": config.FILE_TYPE_DEFINITIONS, - "output_format_8bit": config.get_8bit_output_format(), - "output_format_16bit_primary": config.get_16bit_output_formats()[0], - "output_format_16bit_fallback": config.get_16bit_output_formats()[1], - "png_compression_level": config.png_compression_level, - "jpg_quality": config.jpg_quality, - "output_filename_pattern_tokens": output_filename_pattern_tokens, - "output_filename_pattern": config.output_filename_pattern, - } - - saved_files_details_list = save_image_variants(**save_args_filtered) - - if saved_files_details_list: - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unified Save Utility completed successfully. Saved {len(saved_files_details_list)} variants.") - processing_status = "Processed_Via_Save_Utility" - else: - logger.warning(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unified Save Utility returned no saved file details. Check utility logs.") - processing_status = "Processed_Save_Utility_No_Output" # Or potentially "Failed" depending on severity - status_notes.append("Save utility reported no files saved") - - except Exception as e: - logger.exception(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Error calling or executing save_image_variants: {e}") - processing_status = "Failed" - status_notes.append(f"Save utility call failed: {e}") - # saved_files_details_list remains empty - - except Exception as e: - logger.exception(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Unhandled exception during processing loop for item: {e}") - processing_status = "Failed" - status_notes.append(f"Unhandled exception: {e}") - - finally: - # --- Update Context --- - logger.debug(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Updating context. Status: {processing_status}, Notes: {status_notes}") - details_entry = { - 'status': processing_status, - 'map_type': base_map_type, # Final filename-friendly type - 'processing_map_type': processing_map_type, # Final internal type - 'notes': " | ".join(status_notes), - 'saved_files_info': saved_files_details_list, - 'original_dimensions': original_dimensions, # (w, h) - } - if is_merged_task: - details_entry['merge_task_config_output_type'] = merge_task_config_output_type - details_entry['inputs_used_for_merge'] = inputs_used_for_merge - details_entry['source_bit_depths'] = source_bit_depth_info_for_save_util # Store the list used - else: - # Regular map specific details - details_entry['source_file'] = str(source_file_path_regular) if source_file_path_regular else "N/A" - details_entry['original_bit_depth'] = source_bit_depth_info_for_save_util[0] if source_bit_depth_info_for_save_util else None - details_entry['source_file_rule_index'] = item_key # Store original index - - context.processed_maps_details[item_key] = details_entry - logger.info(f"Asset '{asset_name_for_log}', Key {item_key}, Proc. Tag {processing_instance_tag}: Context updated for this item.") - - logger.info(f"Asset '{asset_name_for_log}': Finished individual map processing stage.") - return context - - def _get_suffixed_internal_map_type(self, context: AssetProcessingContext, current_file_rule: FileRule, initial_internal_map_type: str, respect_variant_map_types: List[str]) -> str: - """ - Determines the potentially suffixed internal map type (e.g., MAP_COL-1) - based on occurrences within the asset rule's file list. - """ - final_internal_map_type = initial_internal_map_type # Default - asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" - - - base_map_type_match = re.match(r"(MAP_[A-Z]{3})", initial_internal_map_type) - if not base_map_type_match or not context.asset_rule or not context.asset_rule.files: - return final_internal_map_type # Cannot determine suffix without base type or asset rule files - - true_base_map_type = base_map_type_match.group(1) # This is "MAP_XXX" - - peers_of_same_base_type = [] - for fr_asset in context.asset_rule.files: - fr_asset_item_type = fr_asset.item_type_override or fr_asset.item_type or "UnknownMapType" - fr_asset_base_match = re.match(r"(MAP_[A-Z]{3})", fr_asset_item_type) - if fr_asset_base_match and fr_asset_base_match.group(1) == true_base_map_type: - peers_of_same_base_type.append(fr_asset) - - num_occurrences = len(peers_of_same_base_type) - current_instance_index = 0 # 1-based index - - try: - # Find the index based on the FileRule object itself - current_instance_index = peers_of_same_base_type.index(current_file_rule) + 1 - except ValueError: - # Fallback: try matching by file_path if object identity fails (less reliable) - try: - current_instance_index = [fr.file_path for fr in peers_of_same_base_type].index(current_file_rule.file_path) + 1 - logger.warning(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Found peer index using file_path fallback.") - except (ValueError, AttributeError): # Catch AttributeError if file_path is None - logger.warning( - f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}' (Initial Type: '{initial_internal_map_type}', Base: '{true_base_map_type}'): " - f"Could not find its own instance in the list of {num_occurrences} peers from asset_rule.files using object identity or path. Suffixing may be incorrect." - ) - # Keep index 0, suffix logic below will handle it - - # Determine Suffix - map_type_for_respect_check = true_base_map_type.replace("MAP_", "") # e.g., "COL" - is_in_respect_list = map_type_for_respect_check in respect_variant_map_types - - suffix_to_append = "" - if num_occurrences > 1: - if current_instance_index > 0: - suffix_to_append = f"-{current_instance_index}" - else: - # If index is still 0 (not found), don't add suffix to avoid ambiguity - logger.warning(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Index for multi-occurrence map type '{true_base_map_type}' (count: {num_occurrences}) not determined. Omitting numeric suffix.") - elif num_occurrences == 1 and is_in_respect_list: - suffix_to_append = "-1" # Add suffix even for single instance if in respect list - - if suffix_to_append: - final_internal_map_type = true_base_map_type + suffix_to_append - # else: final_internal_map_type remains the initial_internal_map_type if no suffix needed - - if final_internal_map_type != initial_internal_map_type: - logger.debug(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Suffixed internal map type determined: '{initial_internal_map_type}' -> '{final_internal_map_type}'") - - return final_internal_map_type \ No newline at end of file diff --git a/processing/pipeline/stages/initial_scaling.py b/processing/pipeline/stages/initial_scaling.py new file mode 100644 index 0000000..6fc27ab --- /dev/null +++ b/processing/pipeline/stages/initial_scaling.py @@ -0,0 +1,83 @@ +import logging +from typing import Tuple + +import cv2 # Assuming cv2 is available for interpolation flags +import numpy as np + +from .base_stage import ProcessingStage +# Import necessary context classes and utils +from ..asset_context import InitialScalingInput, InitialScalingOutput +from ...utils import image_processing_utils as ipu + +log = logging.getLogger(__name__) + +class InitialScalingStage(ProcessingStage): + """ + Applies initial scaling (e.g., Power-of-Two downscaling) to image data + if configured via the InitialScalingInput. + """ + + def execute(self, input_data: InitialScalingInput) -> InitialScalingOutput: + """ + Applies scaling based on input_data.initial_scaling_mode. + """ + log.debug(f"Initial Scaling Stage: Mode '{input_data.initial_scaling_mode}'.") + + image_to_scale = input_data.image_data + original_dims_wh = input_data.original_dimensions + scaling_mode = input_data.initial_scaling_mode + scaling_applied = False + final_image_data = image_to_scale # Default to original if no scaling happens + + if image_to_scale is None or image_to_scale.size == 0: + log.warning("Initial Scaling Stage: Input image data is None or empty. Skipping.") + # Return original (empty) data and indicate no scaling + return InitialScalingOutput( + scaled_image_data=np.array([]), + scaling_applied=False, + final_dimensions=(0, 0) + ) + + if original_dims_wh is None: + log.warning("Initial Scaling Stage: Original dimensions not provided. Using current image shape.") + h_pre_scale, w_pre_scale = image_to_scale.shape[:2] + original_dims_wh = (w_pre_scale, h_pre_scale) + else: + w_pre_scale, h_pre_scale = original_dims_wh + + + if scaling_mode == "POT_DOWNSCALE": + pot_w = ipu.get_nearest_power_of_two_downscale(w_pre_scale) + pot_h = ipu.get_nearest_power_of_two_downscale(h_pre_scale) + + if (pot_w, pot_h) != (w_pre_scale, h_pre_scale): + log.info(f"Initial Scaling: Applying POT Downscale from ({w_pre_scale},{h_pre_scale}) to ({pot_w},{pot_h}).") + # Use INTER_AREA for downscaling generally + resized_img = ipu.resize_image(image_to_scale, pot_w, pot_h, interpolation=cv2.INTER_AREA) + if resized_img is not None: + final_image_data = resized_img + scaling_applied = True + log.debug("Initial Scaling: POT Downscale applied successfully.") + else: + log.warning("Initial Scaling: POT Downscale resize failed. Using original data.") + # final_image_data remains image_to_scale + else: + log.info("Initial Scaling: POT Downscale - Image already POT or smaller. No scaling needed.") + # final_image_data remains image_to_scale + + elif scaling_mode == "NONE": + log.info("Initial Scaling: Mode is NONE. No scaling applied.") + # final_image_data remains image_to_scale + else: + log.warning(f"Initial Scaling: Unknown INITIAL_SCALING_MODE '{scaling_mode}'. Defaulting to NONE.") + # final_image_data remains image_to_scale + + # Determine final dimensions + final_h, final_w = final_image_data.shape[:2] + final_dims_wh = (final_w, final_h) + + return InitialScalingOutput( + scaled_image_data=final_image_data, + scaling_applied=scaling_applied, + final_dimensions=final_dims_wh + ) \ No newline at end of file diff --git a/processing/pipeline/stages/map_merging.py b/processing/pipeline/stages/map_merging.py deleted file mode 100644 index 1c8b180..0000000 --- a/processing/pipeline/stages/map_merging.py +++ /dev/null @@ -1,153 +0,0 @@ -import logging -from pathlib import Path -from typing import Dict, Optional, List, Tuple - - -from .base_stage import ProcessingStage -from ..asset_context import AssetProcessingContext -from rule_structure import FileRule -from utils.path_utils import sanitize_filename - - - -logger = logging.getLogger(__name__) - -class MapMergingStage(ProcessingStage): - """ - Merges individually processed maps based on MAP_MERGE rules. - This stage performs operations like channel packing. - """ - - def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: - """ - Executes the map merging logic. - - Args: - context: The asset processing context. - - Returns: - The updated asset processing context. - """ - asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" - if context.status_flags.get('skip_asset'): - logger.info(f"Skipping map merging for asset {asset_name_for_log} as skip_asset flag is set.") - return context - if not hasattr(context, 'merged_maps_details'): - context.merged_maps_details = {} - - if not hasattr(context, 'merged_image_tasks'): - context.merged_image_tasks = [] - - if not hasattr(context, 'processed_maps_details'): - logger.warning(f"Asset {asset_name_for_log}: 'processed_maps_details' not found in context. Cannot generate merge tasks.") - return context - - logger.info(f"Starting MapMergingStage for asset: {asset_name_for_log}") - - # The core merge rules are in context.config_obj.map_merge_rules - # Each rule in there defines an output_map_type and its inputs. - - # For now, let's assume no merge rules are processed until the logic is fixed. - num_merge_rules_attempted = 0 - # If context.config_obj.map_merge_rules exists, iterate it here. - # The original code iterated context.files_to_process looking for item_type "MAP_MERGE". - # This implies FileRule objects were being used to define merge operations, which is no longer the case - # if 'merge_settings' and 'id' were removed from FileRule. - - # The core merge rules are in context.config_obj.map_merge_rules - # Each rule in there defines an output_map_type and its inputs. - - config_merge_rules = context.config_obj.map_merge_rules - if not config_merge_rules: - logger.info(f"Asset {asset_name_for_log}: No map_merge_rules found in configuration. Skipping map merging.") - return context - - for rule_idx, configured_merge_rule in enumerate(config_merge_rules): - output_map_type = configured_merge_rule.get('output_map_type') - inputs_map_type_to_channel = configured_merge_rule.get('inputs') # e.g. {"R": "NRM", "G": "NRM", "B": "ROUGH"} - default_values = configured_merge_rule.get('defaults', {}) # e.g. {"R": 0.5, "G": 0.5, "B": 0.5} - # output_bit_depth_rule = configured_merge_rule.get('output_bit_depth', 'respect_inputs') # Not used yet - - if not output_map_type or not inputs_map_type_to_channel: - logger.warning(f"Asset {asset_name_for_log}: Invalid configured_merge_rule at index {rule_idx}. Missing 'output_map_type' or 'inputs'. Rule: {configured_merge_rule}") - continue - - num_merge_rules_attempted +=1 - merge_op_id = f"merge_{sanitize_filename(output_map_type)}_{rule_idx}" - logger.info(f"Asset {asset_name_for_log}: Processing configured merge rule for '{output_map_type}' (Op ID: {merge_op_id})") - - input_map_sources_list = [] - source_bit_depths_list = [] - primary_source_dimensions = None - - # Find required input maps from processed_maps_details - required_input_map_types = set(inputs_map_type_to_channel.values()) - - for required_map_type in required_input_map_types: - found_processed_map_details = None - # Iterate through processed_maps_details to find the required map type - for p_key_idx, p_details in context.processed_maps_details.items(): - processed_map_identifier = p_details.get('processing_map_type', p_details.get('map_type')) - - # Check for a match, considering both "MAP_TYPE" and "TYPE" formats - is_match = False - if processed_map_identifier == required_map_type: - is_match = True - elif required_map_type.startswith("MAP_") and processed_map_identifier == required_map_type.split("MAP_")[-1]: - is_match = True - elif not required_map_type.startswith("MAP_") and processed_map_identifier == f"MAP_{required_map_type}": - is_match = True - - # Check if the found map is in a usable status and has a temporary file - valid_input_statuses = ['BasePOTSaved', 'Processed_With_Variants', 'Processed_No_Variants', 'Converted_To_Rough'] # Add other relevant statuses if needed - if is_match and p_details.get('status') in valid_input_statuses and p_details.get('temp_processed_file'): - # Also check if the temp file actually exists on disk - if Path(p_details.get('temp_processed_file')).exists(): - found_processed_map_details = p_details - break # Found a suitable input, move to the next required map type - - if found_processed_map_details: - file_path = found_processed_map_details.get('temp_processed_file') - dimensions = found_processed_map_details.get('base_pot_dimensions') - - # Attempt to get original_bit_depth, log warning if not found - original_bit_depth = found_processed_map_details.get('original_bit_depth') - if original_bit_depth is None: - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: 'original_bit_depth' not found in processed_maps_details for map type '{required_map_type}'. This value is pending IndividualMapProcessingStage refactoring and will be None or a default for now.") - - input_map_sources_list.append({ - 'map_type': required_map_type, - 'file_path': file_path, - 'dimensions': dimensions, - 'original_bit_depth': original_bit_depth - }) - source_bit_depths_list.append(original_bit_depth) - - # Set primary_source_dimensions from the first valid input found - if primary_source_dimensions is None and dimensions: - primary_source_dimensions = dimensions - else: - # If a required map is not found, log a warning but don't fail the task generation. - # The consuming stage will handle missing inputs and fallbacks. - logger.warning(f"Asset {asset_name_for_log}, Merge Op ID {merge_op_id}: Required input map type '{required_map_type}' not found or not in a usable state in context.processed_maps_details. This input will be skipped for task generation.") - - - # Create the merged image task dictionary - merged_task = { - 'output_map_type': output_map_type, - 'input_map_sources': input_map_sources_list, - 'merge_rule_config': configured_merge_rule, - 'source_dimensions': primary_source_dimensions, # Can be None if no inputs were found - 'source_bit_depths': source_bit_depths_list - } - - # Append the task to the context - context.merged_image_tasks.append(merged_task) - logger.info(f"Asset {asset_name_for_log}: Generated merge task for '{output_map_type}' (Op ID: {merge_op_id}). Task details: {merged_task}") - - # Note: We no longer populate context.merged_maps_details with 'Processed' status here, - # as this stage only generates tasks, it doesn't perform the merge or save files. - # The merged_maps_details will be populated by the stage that consumes these tasks. - - logger.info(f"Finished MapMergingStage for asset: {asset_name_for_log}. Merge tasks generated: {len(context.merged_image_tasks)}") - return context \ No newline at end of file diff --git a/processing/pipeline/stages/merged_task_processor.py b/processing/pipeline/stages/merged_task_processor.py new file mode 100644 index 0000000..e9a8eea --- /dev/null +++ b/processing/pipeline/stages/merged_task_processor.py @@ -0,0 +1,304 @@ +import logging +import re +from pathlib import Path +from typing import List, Optional, Tuple, Dict, Any + +import cv2 +import numpy as np + +from .base_stage import ProcessingStage +# Import necessary context classes and utils +from ..asset_context import AssetProcessingContext, MergeTaskDefinition, ProcessedMergedMapData +from ...utils import image_processing_utils as ipu + +log = logging.getLogger(__name__) + +# Helper function (Duplicated from RegularMapProcessorStage - consider moving to utils) +def _apply_in_memory_transformations( + image_data: np.ndarray, + processing_map_type: str, # The internal type of the *input* map + invert_normal_green: bool, + file_type_definitions: Dict[str, Dict], + log_prefix: str +) -> Tuple[np.ndarray, str, List[str]]: + """ + Applies in-memory transformations (Gloss-to-Rough, Normal Green Invert). + Returns potentially transformed image data, potentially updated map type, and notes. + NOTE: This is applied to individual inputs *before* merging. + """ + transformation_notes = [] + current_image_data = image_data # Start with original data + updated_processing_map_type = processing_map_type # Start with original type + + # Gloss-to-Rough + base_map_type_match = re.match(r"(MAP_GLOSS)", processing_map_type) + if base_map_type_match: + log.info(f"{log_prefix}: Applying Gloss-to-Rough conversion to input.") + inversion_succeeded = False + if np.issubdtype(current_image_data.dtype, np.floating): + current_image_data = 1.0 - current_image_data + current_image_data = np.clip(current_image_data, 0.0, 1.0) + log.debug(f"{log_prefix}: Inverted float input data for Gloss->Rough.") + inversion_succeeded = True + elif np.issubdtype(current_image_data.dtype, np.integer): + max_val = np.iinfo(current_image_data.dtype).max + current_image_data = max_val - current_image_data + log.debug(f"{log_prefix}: Inverted integer input data (max_val: {max_val}) for Gloss->Rough.") + inversion_succeeded = True + else: + log.error(f"{log_prefix}: Unsupported image data type {current_image_data.dtype} for GLOSS input map. Cannot invert.") + transformation_notes.append("Gloss-to-Rough FAILED (unsupported dtype)") + + if inversion_succeeded: + updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") + log.info(f"{log_prefix}: Input map type conceptually updated: '{processing_map_type}' -> '{updated_processing_map_type}'") + transformation_notes.append("Gloss-to-Rough applied to input") + + # Normal Green Invert + base_map_type_match_nrm = re.match(r"(MAP_NRM)", processing_map_type) + if base_map_type_match_nrm and invert_normal_green: + log.info(f"{log_prefix}: Applying Normal Map Green Channel Inversion (Global Setting) to input.") + current_image_data = ipu.invert_normal_map_green_channel(current_image_data) + transformation_notes.append("Normal Green Inverted (Global) applied to input") + + # Return the transformed data, the *original* map type (as it identifies the input source), and notes + return current_image_data, processing_map_type, transformation_notes + + +class MergedTaskProcessorStage(ProcessingStage): + """ + Processes a single merge task defined in the configuration. + Loads inputs, applies transformations to inputs, handles fallbacks/resizing, + performs the merge, and returns the merged data. + """ + + def execute( + self, + context: AssetProcessingContext, + merge_task: MergeTaskDefinition # Specific item passed by orchestrator + ) -> ProcessedMergedMapData: + """ + Processes the given MergeTaskDefinition item. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + task_key = merge_task.task_key + task_data = merge_task.task_data + log_prefix = f"Asset '{asset_name_for_log}', Task '{task_key}'" + log.info(f"{log_prefix}: Processing Merge Task.") + + # Initialize output object with default failure state + result = ProcessedMergedMapData( + merged_image_data=np.array([]), # Placeholder + output_map_type=task_data.get('output_map_type', 'UnknownMergeOutput'), + source_bit_depths=[], + final_dimensions=None, + transformations_applied_to_inputs={}, + status="Failed", + error_message="Initialization error" + ) + + try: + # --- Configuration & Task Data --- + config = context.config_obj + file_type_definitions = getattr(config, "FILE_TYPE_DEFINITIONS", {}) + invert_normal_green = config.invert_normal_green_globally + merge_dimension_mismatch_strategy = getattr(config, "MERGE_DIMENSION_MISMATCH_STRATEGY", "USE_LARGEST") + workspace_path = context.workspace_path # Base for resolving relative input paths + + merge_rule_config = task_data.get('merge_rule_config', {}) + input_map_sources_from_task = task_data.get('input_map_sources', {}) # Info about where inputs come from + target_dimensions_hw = task_data.get('source_dimensions') # Expected dimensions (h, w) from previous stage + merge_inputs_config = merge_rule_config.get('inputs', {}) # e.g., {'R': 'MAP_AO', 'G': 'MAP_ROUGH', ...} + merge_defaults = merge_rule_config.get('defaults', {}) # e.g., {'R': 255, 'G': 255, ...} + merge_channels_order = merge_rule_config.get('channel_order', 'RGB') # e.g., 'RGB', 'RGBA' + + if not merge_rule_config or not input_map_sources_from_task or not target_dimensions_hw or not merge_inputs_config: + result.error_message = "Merge task data is incomplete (missing config, sources, dimensions, or input mapping)." + log.error(f"{log_prefix}: {result.error_message}") + return result + + loaded_inputs_for_merge: Dict[str, np.ndarray] = {} # Channel char -> image data + actual_input_dimensions: List[Tuple[int, int]] = [] # List of (h, w) for loaded files + input_source_bit_depths: Dict[str, int] = {} # Channel char -> bit depth + all_transform_notes: Dict[str, List[str]] = {} # Channel char -> list of transform notes + + # --- Load, Transform, and Prepare Inputs --- + log.debug(f"{log_prefix}: Loading and preparing inputs...") + for channel_char, required_map_type_from_rule in merge_inputs_config.items(): + input_info = input_map_sources_from_task.get(required_map_type_from_rule) + input_image_data: Optional[np.ndarray] = None + input_source_desc = f"Fallback for {required_map_type_from_rule}" + input_log_prefix = f"{log_prefix}, Input '{required_map_type_from_rule}' (Channel '{channel_char}')" + channel_transform_notes: List[str] = [] + + # 1. Attempt to load from file path + if input_info and input_info.get('file_path'): + # Paths in merged tasks should be relative to workspace_path + input_file_path_str = input_info['file_path'] + input_file_path = workspace_path / input_file_path_str + if input_file_path.is_file(): + try: + input_image_data = ipu.load_image(str(input_file_path)) + if input_image_data is not None: + log.info(f"{input_log_prefix}: Loaded from: {input_file_path}") + actual_input_dimensions.append(input_image_data.shape[:2]) # (h, w) + input_source_desc = str(input_file_path) + try: + input_source_bit_depths[channel_char] = ipu.get_image_bit_depth(str(input_file_path)) + except Exception: + log.warning(f"{input_log_prefix}: Could not get bit depth for {input_file_path}. Defaulting to 8.") + input_source_bit_depths[channel_char] = 8 + else: + log.warning(f"{input_log_prefix}: Failed to load image from {input_file_path}. Attempting fallback.") + except Exception as e: + log.warning(f"{input_log_prefix}: Error loading image from {input_file_path}: {e}. Attempting fallback.") + else: + log.warning(f"{input_log_prefix}: Input file path not found: {input_file_path}. Attempting fallback.") + else: + log.warning(f"{input_log_prefix}: No file path provided. Attempting fallback.") + + # 2. Apply Fallback if needed + if input_image_data is None: + fallback_value = merge_defaults.get(channel_char) + if fallback_value is not None: + try: + h, w = target_dimensions_hw + # Infer shape/dtype for fallback (simplified) + num_channels = 1 if isinstance(fallback_value, (int, float)) else len(fallback_value) if isinstance(fallback_value, (list, tuple)) else 1 # Default to 1 channel? Needs refinement. + dtype = np.uint8 # Default dtype + shape = (h, w) if num_channels == 1 else (h, w, num_channels) + + input_image_data = np.full(shape, fallback_value, dtype=dtype) + log.warning(f"{input_log_prefix}: Using fallback value {fallback_value} (Target Dims: {target_dimensions_hw}).") + input_source_desc = f"Fallback value {fallback_value}" + input_source_bit_depths[channel_char] = 8 # Assume 8-bit for fallbacks + channel_transform_notes.append(f"Used fallback value {fallback_value}") + except Exception as e: + result.error_message = f"Error creating fallback for channel '{channel_char}': {e}" + log.error(f"{log_prefix}: {result.error_message}") + return result # Critical failure + else: + result.error_message = f"Missing input '{required_map_type_from_rule}' and no fallback default provided for channel '{channel_char}'." + log.error(f"{log_prefix}: {result.error_message}") + return result # Critical failure + + # 3. Apply Transformations to the loaded/fallback input + if input_image_data is not None: + input_image_data, _, transform_notes = _apply_in_memory_transformations( + input_image_data.copy(), # Transform a copy + required_map_type_from_rule, # Use the type required by the rule + invert_normal_green, + file_type_definitions, + input_log_prefix + ) + channel_transform_notes.extend(transform_notes) + else: + # This case should be prevented by fallback logic, but as a safeguard: + result.error_message = f"Input data for channel '{channel_char}' is None after load/fallback attempt." + log.error(f"{log_prefix}: {result.error_message} This indicates an internal logic error.") + return result + + loaded_inputs_for_merge[channel_char] = input_image_data + all_transform_notes[channel_char] = channel_transform_notes + + result.transformations_applied_to_inputs = all_transform_notes # Store notes + + # --- Handle Dimension Mismatches (using transformed inputs) --- + log.debug(f"{log_prefix}: Handling dimension mismatches...") + unique_dimensions = set(actual_input_dimensions) + target_merge_dims_hw = target_dimensions_hw # Default + + if len(unique_dimensions) > 1: + log.warning(f"{log_prefix}: Mismatched dimensions found among loaded inputs: {unique_dimensions}. Applying strategy: {merge_dimension_mismatch_strategy}") + mismatch_note = f"Mismatched input dimensions ({unique_dimensions}), applied {merge_dimension_mismatch_strategy}" + # Add note to all relevant inputs? Or just a general note? Add general for now. + # result.status_notes.append(mismatch_note) # Need a place for general notes + + if merge_dimension_mismatch_strategy == "ERROR_SKIP": + result.error_message = "Dimension mismatch and strategy is ERROR_SKIP." + log.error(f"{log_prefix}: {result.error_message}") + return result + elif merge_dimension_mismatch_strategy == "USE_LARGEST": + max_h = max(h for h, w in unique_dimensions) + max_w = max(w for h, w in unique_dimensions) + target_merge_dims_hw = (max_h, max_w) + elif merge_dimension_mismatch_strategy == "USE_FIRST": + target_merge_dims_hw = actual_input_dimensions[0] if actual_input_dimensions else target_dimensions_hw + # Add other strategies or default to USE_LARGEST + + log.info(f"{log_prefix}: Resizing inputs to target merge dimensions: {target_merge_dims_hw}") + # Resize loaded inputs (not fallbacks unless they were treated as having target dims) + for channel_char, img_data in loaded_inputs_for_merge.items(): + # Only resize if it was a loaded input that contributed to the mismatch check + if img_data.shape[:2] in unique_dimensions and img_data.shape[:2] != target_merge_dims_hw: + resized_img = ipu.resize_image(img_data, target_merge_dims_hw[1], target_merge_dims_hw[0]) # w, h + if resized_img is None: + result.error_message = f"Failed to resize input for channel '{channel_char}' to {target_merge_dims_hw}." + log.error(f"{log_prefix}: {result.error_message}") + return result + loaded_inputs_for_merge[channel_char] = resized_img + log.debug(f"{log_prefix}: Resized input for channel '{channel_char}'.") + + # --- Perform Merge --- + log.debug(f"{log_prefix}: Performing merge operation for channels '{merge_channels_order}'.") + try: + output_channels = len(merge_channels_order) + h, w = target_merge_dims_hw # Use the potentially adjusted dimensions + + # Determine output dtype (e.g., based on inputs or config) - Assume uint8 for now + output_dtype = np.uint8 + + if output_channels == 1: + # Assume the first channel in order is the one to use + channel_char_to_use = merge_channels_order[0] + source_img = loaded_inputs_for_merge[channel_char_to_use] + # Ensure it's grayscale (take first channel if it's multi-channel) + if len(source_img.shape) == 3: + merged_image = source_img[:, :, 0].copy().astype(output_dtype) + else: + merged_image = source_img.copy().astype(output_dtype) + elif output_channels > 1: + merged_image = np.zeros((h, w, output_channels), dtype=output_dtype) + for i, channel_char in enumerate(merge_channels_order): + source_img = loaded_inputs_for_merge.get(channel_char) + if source_img is not None: + # Extract the correct channel (e.g., R from RGB, or use grayscale directly) + if len(source_img.shape) == 3: + # Simple approach: take the first channel if source is color. Needs refinement if specific channel mapping (R->R, G->G etc.) is needed. + merged_image[:, :, i] = source_img[:, :, 0] + else: # Grayscale source + merged_image[:, :, i] = source_img + else: + # This case should have been caught by fallback logic earlier + result.error_message = f"Internal error: Missing prepared input for channel '{channel_char}' during final merge assembly." + log.error(f"{log_prefix}: {result.error_message}") + return result + else: + result.error_message = f"Invalid channel_order '{merge_channels_order}' in merge config." + log.error(f"{log_prefix}: {result.error_message}") + return result + + result.merged_image_data = merged_image + result.final_dimensions = (merged_image.shape[1], merged_image.shape[0]) # w, h + result.source_bit_depths = list(input_source_bit_depths.values()) # Collect bit depths used + log.info(f"{log_prefix}: Successfully merged inputs into image with shape {result.merged_image_data.shape}") + + except Exception as e: + log.exception(f"{log_prefix}: Error during merge operation: {e}") + result.error_message = f"Merge operation failed: {e}" + return result + + # --- Success --- + result.status = "Processed" + result.error_message = None + log.info(f"{log_prefix}: Successfully processed merge task.") + + except Exception as e: + log.exception(f"{log_prefix}: Unhandled exception during processing: {e}") + result.status = "Failed" + result.error_message = f"Unhandled exception: {e}" + # Ensure image data is empty on failure + if result.merged_image_data is None or result.merged_image_data.size == 0: + result.merged_image_data = np.array([]) + + return result \ No newline at end of file diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index c612251..a17a8ba 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -5,10 +5,10 @@ from typing import List, Dict, Optional from .base_stage import ProcessingStage from ..asset_context import AssetProcessingContext -from utils.path_utils import generate_path_from_pattern, sanitize_filename +from utils.path_utils import generate_path_from_pattern, sanitize_filename, get_filename_friendly_map_type # Absolute import from rule_structure import FileRule # Assuming these are needed for type hints if not directly in context - +log = logging.getLogger(__name__) logger = logging.getLogger(__name__) class OutputOrganizationStage(ProcessingStage): @@ -17,6 +17,8 @@ class OutputOrganizationStage(ProcessingStage): """ def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + log.info("OUTPUT_ORG: Stage execution started for asset '%s'", context.asset_rule.asset_name) + log.info(f"OUTPUT_ORG: context.processed_maps_details at start: {context.processed_maps_details}") """ Copies temporary processed and merged files to their final output locations based on path patterns and updates AssetProcessingContext. @@ -45,18 +47,20 @@ class OutputOrganizationStage(ProcessingStage): logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(context.processed_maps_details)} processed individual map entries.") for processed_map_key, details in context.processed_maps_details.items(): map_status = details.get('status') - base_map_type = details.get('map_type', 'unknown_map_type') # Final filename-friendly type - - # --- Handle maps processed by the Unified Save Utility --- - if map_status == 'Processed_Via_Save_Utility': - saved_files_info = details.get('saved_files_info') - if not saved_files_info or not isinstance(saved_files_info, list): - logger.warning(f"Asset '{asset_name_for_log}': Map key '{processed_map_key}' (status '{map_status}') has missing or invalid 'saved_files_info'. Skipping organization.") - details['status'] = 'Organization Failed (Missing saved_files_info)' - continue - - logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(saved_files_info)} variants for map key '{processed_map_key}' (map type: {base_map_type}) from Save Utility.") + # Retrieve the internal map type first + internal_map_type = details.get('internal_map_type', 'unknown_map_type') + # Convert internal type to filename-friendly type using the helper + file_type_definitions = getattr(context.config_obj, "FILE_TYPE_DEFINITIONS", {}) + base_map_type = get_filename_friendly_map_type(internal_map_type, file_type_definitions) # Final filename-friendly type + # --- Handle maps processed by the SaveVariantsStage (identified by having saved_files_info) --- + saved_files_info = details.get('saved_files_info') # This is a list of dicts from SaveVariantsOutput + + # Check if 'saved_files_info' exists and is a non-empty list. + # This indicates the item was processed by SaveVariantsStage. + if saved_files_info and isinstance(saved_files_info, list) and len(saved_files_info) > 0: + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(saved_files_info)} variants for map key '{processed_map_key}' (map type: {base_map_type}) from SaveVariantsStage.") + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) map_metadata_entry['map_type'] = base_map_type map_metadata_entry.setdefault('variant_paths', {}) # Initialize if not present diff --git a/processing/pipeline/stages/prepare_processing_items.py b/processing/pipeline/stages/prepare_processing_items.py new file mode 100644 index 0000000..adfaf16 --- /dev/null +++ b/processing/pipeline/stages/prepare_processing_items.py @@ -0,0 +1,92 @@ +import logging +from typing import List, Union, Optional + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext, MergeTaskDefinition +from rule_structure import FileRule # Assuming FileRule is imported correctly + +log = logging.getLogger(__name__) + +class PrepareProcessingItemsStage(ProcessingStage): + """ + Identifies and prepares a unified list of items (FileRule, MergeTaskDefinition) + to be processed in subsequent stages. Performs initial validation. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Populates context.processing_items with FileRule and MergeTaskDefinition objects. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + log.info(f"Asset '{asset_name_for_log}': Preparing processing items...") + + if context.status_flags.get('skip_asset', False): + log.info(f"Asset '{asset_name_for_log}': Skipping item preparation due to skip_asset flag.") + context.processing_items = [] + return context + + items_to_process: List[Union[FileRule, MergeTaskDefinition]] = [] + preparation_failed = False + + # --- Add regular files --- + if context.files_to_process: + # Validate source path early for regular files + source_path_valid = True + if not context.source_rule or not context.source_rule.input_path: + log.error(f"Asset '{asset_name_for_log}': SourceRule or SourceRule.input_path is not set. Cannot process regular files.") + source_path_valid = False + preparation_failed = True # Mark as failed if source path is missing + context.status_flags['prepare_items_failed_reason'] = "SourceRule.input_path missing" + elif not context.workspace_path or not context.workspace_path.is_dir(): + log.error(f"Asset '{asset_name_for_log}': Workspace path '{context.workspace_path}' is not a valid directory. Cannot process regular files.") + source_path_valid = False + preparation_failed = True # Mark as failed if workspace path is bad + context.status_flags['prepare_items_failed_reason'] = "Workspace path invalid" + + if source_path_valid: + for file_rule in context.files_to_process: + # Basic validation for FileRule itself + if not file_rule.file_path: + log.warning(f"Asset '{asset_name_for_log}': Skipping FileRule with empty file_path.") + continue # Skip this specific rule, but don't fail the whole stage + items_to_process.append(file_rule) + log.debug(f"Asset '{asset_name_for_log}': Added {len(context.files_to_process)} potential FileRule items.") + else: + log.warning(f"Asset '{asset_name_for_log}': Skipping addition of all FileRule items due to invalid source/workspace path.") + + + # --- Add merged tasks --- + merged_tasks_attr_name = 'merged_image_tasks' # Check attribute name if different + if hasattr(context, merged_tasks_attr_name) and getattr(context, merged_tasks_attr_name): + merged_tasks_list = getattr(context, merged_tasks_attr_name) + if isinstance(merged_tasks_list, list): + for task_idx, task_data in enumerate(merged_tasks_list): + if isinstance(task_data, dict): + task_key = f"merged_task_{task_idx}" + # Basic validation for merge task data (can be expanded) + if not task_data.get('output_map_type') or not task_data.get('merge_rule_config'): + log.warning(f"Asset '{asset_name_for_log}', Task Index {task_idx}: Skipping merge task due to missing 'output_map_type' or 'merge_rule_config'.") + continue # Skip this specific task + items_to_process.append(MergeTaskDefinition(task_data=task_data, task_key=task_key)) + else: + log.warning(f"Asset '{asset_name_for_log}': Item at index {task_idx} in '{merged_tasks_attr_name}' is not a dictionary. Skipping.") + log.debug(f"Asset '{asset_name_for_log}': Added {len(merged_tasks_list)} potential MergeTaskDefinition items.") + else: + log.warning(f"Asset '{asset_name_for_log}': Attribute '{merged_tasks_attr_name}' is not a list. Skipping merge tasks.") + + + if not items_to_process: + log.info(f"Asset '{asset_name_for_log}': No valid items found to process after preparation.") + + context.processing_items = items_to_process + context.intermediate_results = {} # Initialize intermediate results storage + + if preparation_failed: + # Set a flag indicating failure during preparation, even if some items might have been added before failure + context.status_flags['prepare_items_failed'] = True + log.error(f"Asset '{asset_name_for_log}': Item preparation failed. Reason: {context.status_flags.get('prepare_items_failed_reason', 'Unknown')}") + # Optionally, clear items if failure means nothing should proceed + # context.processing_items = [] + + log.info(f"Asset '{asset_name_for_log}': Finished preparing items. Found {len(context.processing_items)} valid items.") + return context \ No newline at end of file diff --git a/processing/pipeline/stages/regular_map_processor.py b/processing/pipeline/stages/regular_map_processor.py new file mode 100644 index 0000000..2bb5b52 --- /dev/null +++ b/processing/pipeline/stages/regular_map_processor.py @@ -0,0 +1,257 @@ +import logging +import re +from pathlib import Path +from typing import List, Optional, Tuple, Dict + +import cv2 +import numpy as np + +from .base_stage import ProcessingStage # Assuming base_stage is in the same directory +from ..asset_context import AssetProcessingContext, ProcessedRegularMapData +from rule_structure import FileRule, AssetRule +from processing.utils import image_processing_utils as ipu # Absolute import +from utils.path_utils import get_filename_friendly_map_type # Absolute import + +log = logging.getLogger(__name__) + + +class RegularMapProcessorStage(ProcessingStage): + """ + Processes a single regular texture map defined by a FileRule. + Loads the image, determines map type, applies transformations, + and returns the processed data. + """ + + # --- Helper Methods (Adapted from IndividualMapProcessingStage) --- + + def _get_suffixed_internal_map_type( + self, + asset_rule: Optional[AssetRule], + current_file_rule: FileRule, + initial_internal_map_type: str, + respect_variant_map_types: List[str], + asset_name_for_log: str + ) -> str: + """ + Determines the potentially suffixed internal map type (e.g., MAP_COL-1). + """ + final_internal_map_type = initial_internal_map_type # Default + + base_map_type_match = re.match(r"(MAP_[A-Z]{3})", initial_internal_map_type) + if not base_map_type_match or not asset_rule or not asset_rule.files: + return final_internal_map_type # Cannot determine suffix without base type or asset rule files + + true_base_map_type = base_map_type_match.group(1) # This is "MAP_XXX" + + # Find all FileRules in the asset with the same base map type + peers_of_same_base_type = [] + for fr_asset in asset_rule.files: + fr_asset_item_type = fr_asset.item_type_override or fr_asset.item_type or "UnknownMapType" + fr_asset_base_match = re.match(r"(MAP_[A-Z]{3})", fr_asset_item_type) + if fr_asset_base_match and fr_asset_base_match.group(1) == true_base_map_type: + peers_of_same_base_type.append(fr_asset) + + num_occurrences = len(peers_of_same_base_type) + current_instance_index = 0 # 1-based index + + try: + # Find the index based on the FileRule object itself (requires object identity) + current_instance_index = peers_of_same_base_type.index(current_file_rule) + 1 + except ValueError: + # Fallback: try matching by file_path if object identity fails (less reliable) + try: + current_instance_index = [fr.file_path for fr in peers_of_same_base_type].index(current_file_rule.file_path) + 1 + log.warning(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Found peer index using file_path fallback for suffixing.") + except (ValueError, AttributeError): # Catch AttributeError if file_path is None + log.warning( + f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}' (Initial Type: '{initial_internal_map_type}', Base: '{true_base_map_type}'): " + f"Could not find its own instance in the list of {num_occurrences} peers from asset_rule.files using object identity or path. Suffixing may be incorrect." + ) + # Keep index 0, suffix logic below will handle it + + # Determine Suffix + map_type_for_respect_check = true_base_map_type.replace("MAP_", "") # e.g., "COL" + is_in_respect_list = map_type_for_respect_check in respect_variant_map_types + + suffix_to_append = "" + if num_occurrences > 1: + if current_instance_index > 0: + suffix_to_append = f"-{current_instance_index}" + else: + # If index is still 0 (not found), don't add suffix to avoid ambiguity + log.warning(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Index for multi-occurrence map type '{true_base_map_type}' (count: {num_occurrences}) not determined. Omitting numeric suffix.") + elif num_occurrences == 1 and is_in_respect_list: + suffix_to_append = "-1" # Add suffix even for single instance if in respect list + + if suffix_to_append: + final_internal_map_type = true_base_map_type + suffix_to_append + + if final_internal_map_type != initial_internal_map_type: + log.debug(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Suffixed internal map type determined: '{initial_internal_map_type}' -> '{final_internal_map_type}'") + + return final_internal_map_type + + def _apply_in_memory_transformations( + self, + image_data: np.ndarray, + processing_map_type: str, # The potentially suffixed internal type + invert_normal_green: bool, + file_type_definitions: Dict[str, Dict], + log_prefix: str + ) -> Tuple[np.ndarray, str, List[str]]: + """ + Applies in-memory transformations (Gloss-to-Rough, Normal Green Invert). + Returns potentially transformed image data, potentially updated map type, and notes. + """ + transformation_notes = [] + current_image_data = image_data # Start with original data + updated_processing_map_type = processing_map_type # Start with original type + + # Gloss-to-Rough + # Check if the base type is Gloss (before suffix) + base_map_type_match = re.match(r"(MAP_GLOSS)", processing_map_type) + if base_map_type_match: + log.info(f"{log_prefix}: Applying Gloss-to-Rough conversion.") + inversion_succeeded = False + if np.issubdtype(current_image_data.dtype, np.floating): + current_image_data = 1.0 - current_image_data + current_image_data = np.clip(current_image_data, 0.0, 1.0) + log.debug(f"{log_prefix}: Inverted float image data for Gloss->Rough.") + inversion_succeeded = True + elif np.issubdtype(current_image_data.dtype, np.integer): + max_val = np.iinfo(current_image_data.dtype).max + current_image_data = max_val - current_image_data + log.debug(f"{log_prefix}: Inverted integer image data (max_val: {max_val}) for Gloss->Rough.") + inversion_succeeded = True + else: + log.error(f"{log_prefix}: Unsupported image data type {current_image_data.dtype} for GLOSS map. Cannot invert.") + transformation_notes.append("Gloss-to-Rough FAILED (unsupported dtype)") + + if inversion_succeeded: + # Update the type string itself (e.g., MAP_GLOSS-1 -> MAP_ROUGH-1) + updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") + log.info(f"{log_prefix}: Map type updated: '{processing_map_type}' -> '{updated_processing_map_type}'") + transformation_notes.append("Gloss-to-Rough applied") + + # Normal Green Invert + # Check if the base type is Normal (before suffix) + base_map_type_match_nrm = re.match(r"(MAP_NRM)", processing_map_type) + if base_map_type_match_nrm and invert_normal_green: + log.info(f"{log_prefix}: Applying Normal Map Green Channel Inversion (Global Setting).") + current_image_data = ipu.invert_normal_map_green_channel(current_image_data) + transformation_notes.append("Normal Green Inverted (Global)") + + return current_image_data, updated_processing_map_type, transformation_notes + + # --- Execute Method --- + + def execute( + self, + context: AssetProcessingContext, + file_rule: FileRule # Specific item passed by orchestrator + ) -> ProcessedRegularMapData: + """ + Processes the given FileRule item. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + log_prefix = f"Asset '{asset_name_for_log}', File '{file_rule.file_path}'" + log.info(f"{log_prefix}: Processing Regular Map.") + + # Initialize output object with default failure state + result = ProcessedRegularMapData( + processed_image_data=np.array([]), # Placeholder + final_internal_map_type="Unknown", + source_file_path=Path(file_rule.file_path or "InvalidPath"), + original_bit_depth=None, + original_dimensions=None, + transformations_applied=[], + status="Failed", + error_message="Initialization error" + ) + + try: + # --- Configuration --- + config = context.config_obj + file_type_definitions = getattr(config, "FILE_TYPE_DEFINITIONS", {}) + respect_variant_map_types = getattr(config, "respect_variant_map_types", []) + invert_normal_green = config.invert_normal_green_globally + + # --- Determine Map Type (with suffix) --- + initial_internal_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" + if not initial_internal_map_type or initial_internal_map_type == "UnknownMapType": + result.error_message = "Map type (item_type) not defined in FileRule." + log.error(f"{log_prefix}: {result.error_message}") + return result # Early exit + + processing_map_type = self._get_suffixed_internal_map_type( + context.asset_rule, file_rule, initial_internal_map_type, respect_variant_map_types, asset_name_for_log + ) + result.final_internal_map_type = processing_map_type # Store initial suffixed type + + # --- Find and Load Source File --- + if not file_rule.file_path: # Should have been caught by Prepare stage, but double-check + result.error_message = "FileRule has empty file_path." + log.error(f"{log_prefix}: {result.error_message}") + return result + + source_base_path = context.workspace_path + potential_source_path = source_base_path / file_rule.file_path + source_file_path_found: Optional[Path] = None + + if potential_source_path.is_file(): + source_file_path_found = potential_source_path + log.info(f"{log_prefix}: Found source file: {source_file_path_found}") + else: + # Optional: Add globbing fallback if needed, similar to original stage + log.warning(f"{log_prefix}: Source file not found directly at '{potential_source_path}'. Add globbing if necessary.") + result.error_message = f"Source file not found at '{potential_source_path}'" + log.error(f"{log_prefix}: {result.error_message}") + return result + + result.source_file_path = source_file_path_found # Update result with found path + + # Load image + source_image_data = ipu.load_image(str(source_file_path_found)) + if source_image_data is None: + result.error_message = f"Failed to load image from '{source_file_path_found}'." + log.error(f"{log_prefix}: {result.error_message}") + return result + + original_height, original_width = source_image_data.shape[:2] + result.original_dimensions = (original_width, original_height) + log.debug(f"{log_prefix}: Loaded image {result.original_dimensions[0]}x{result.original_dimensions[1]}.") + + # Get original bit depth + try: + result.original_bit_depth = ipu.get_image_bit_depth(str(source_file_path_found)) + log.info(f"{log_prefix}: Determined source bit depth: {result.original_bit_depth}") + except Exception as e: + log.warning(f"{log_prefix}: Could not determine source bit depth for {source_file_path_found}: {e}. Setting to None.") + result.original_bit_depth = None # Indicate failure to determine + + # --- Apply Transformations --- + transformed_image_data, final_map_type, transform_notes = self._apply_in_memory_transformations( + source_image_data.copy(), # Pass a copy to avoid modifying original load + processing_map_type, + invert_normal_green, + file_type_definitions, + log_prefix + ) + result.processed_image_data = transformed_image_data + result.final_internal_map_type = final_map_type # Update if Gloss->Rough changed it + result.transformations_applied = transform_notes + + # --- Success --- + result.status = "Processed" + result.error_message = None + log.info(f"{log_prefix}: Successfully processed regular map. Final type: '{result.final_internal_map_type}'.") + + except Exception as e: + log.exception(f"{log_prefix}: Unhandled exception during processing: {e}") + result.status = "Failed" + result.error_message = f"Unhandled exception: {e}" + # Ensure image data is empty on failure if it wasn't set + if result.processed_image_data is None or result.processed_image_data.size == 0: + result.processed_image_data = np.array([]) + + return result \ No newline at end of file diff --git a/processing/pipeline/stages/save_variants.py b/processing/pipeline/stages/save_variants.py new file mode 100644 index 0000000..426fb15 --- /dev/null +++ b/processing/pipeline/stages/save_variants.py @@ -0,0 +1,88 @@ +import logging +from typing import List, Dict, Optional # Added Optional + +import numpy as np + +from .base_stage import ProcessingStage +# Import necessary context classes and utils +from ..asset_context import SaveVariantsInput, SaveVariantsOutput +from processing.utils import image_saving_utils as isu # Absolute import +from utils.path_utils import get_filename_friendly_map_type # Absolute import + +log = logging.getLogger(__name__) + + +class SaveVariantsStage(ProcessingStage): + """ + Takes final processed image data and configuration, calls the + save_image_variants utility, and returns the results. + """ + + def execute(self, input_data: SaveVariantsInput) -> SaveVariantsOutput: + """ + Calls isu.save_image_variants with data from input_data. + """ + internal_map_type = input_data.internal_map_type + log_prefix = f"Save Variants Stage (Type: {internal_map_type})" + log.info(f"{log_prefix}: Starting.") + + # Initialize output object with default failure state + result = SaveVariantsOutput( + saved_files_details=[], + status="Failed", + error_message="Initialization error" + ) + + if input_data.image_data is None or input_data.image_data.size == 0: + result.error_message = "Input image data is None or empty." + log.error(f"{log_prefix}: {result.error_message}") + return result + + try: + # --- Prepare arguments for save_image_variants --- + + # Get the filename-friendly base map type using the helper + # This assumes the save utility expects the friendly type. Adjust if needed. + base_map_type_friendly = get_filename_friendly_map_type( + internal_map_type, input_data.file_type_defs + ) + log.debug(f"{log_prefix}: Using filename-friendly base type '{base_map_type_friendly}' for saving.") + + save_args = { + "source_image_data": input_data.image_data, + "base_map_type": base_map_type_friendly, # Use the friendly type + "source_bit_depth_info": input_data.source_bit_depth_info, + "image_resolutions": input_data.image_resolutions, + "file_type_defs": input_data.file_type_defs, + "output_format_8bit": input_data.output_format_8bit, + "output_format_16bit_primary": input_data.output_format_16bit_primary, + "output_format_16bit_fallback": input_data.output_format_16bit_fallback, + "png_compression_level": input_data.png_compression_level, + "jpg_quality": input_data.jpg_quality, + "output_filename_pattern_tokens": input_data.output_filename_pattern_tokens, + "output_filename_pattern": input_data.output_filename_pattern, + } + + log.debug(f"{log_prefix}: Calling save_image_variants utility.") + saved_files_details: List[Dict] = isu.save_image_variants(**save_args) + + if saved_files_details: + log.info(f"{log_prefix}: Save utility completed successfully. Saved {len(saved_files_details)} variants.") + result.saved_files_details = saved_files_details + result.status = "Processed" + result.error_message = None + else: + # This might not be an error, maybe no variants were configured? + log.warning(f"{log_prefix}: Save utility returned no saved file details. This might be expected if no resolutions/formats matched.") + result.saved_files_details = [] + result.status = "Processed (No Output)" # Indicate processing happened but nothing saved + result.error_message = "Save utility reported no files saved (check configuration/resolutions)." + + + except Exception as e: + log.exception(f"{log_prefix}: Error calling or executing save_image_variants: {e}") + result.status = "Failed" + result.error_message = f"Save utility call failed: {e}" + result.saved_files_details = [] # Ensure empty list on error + + return result \ No newline at end of file diff --git a/processing_engine.py b/processing_engine.py index 968cb6c..441284d 100644 --- a/processing_engine.py +++ b/processing_engine.py @@ -7,7 +7,7 @@ import tempfile import logging from pathlib import Path from typing import List, Dict, Tuple, Optional, Set - +log = logging.getLogger(__name__) # Attempt to import image processing libraries try: import cv2 @@ -21,7 +21,6 @@ except ImportError as e: np = None - try: from configuration import Configuration, ConfigurationError from rule_structure import SourceRule, AssetRule, FileRule @@ -50,6 +49,7 @@ if not log.hasHandlers(): from processing.pipeline.orchestrator import PipelineOrchestrator # from processing.pipeline.asset_context import AssetProcessingContext # AssetProcessingContext is used by the orchestrator +# Import stages that will be passed to the orchestrator (outer stages) from processing.pipeline.stages.supplier_determination import SupplierDeterminationStage from processing.pipeline.stages.asset_skip_logic import AssetSkipLogicStage from processing.pipeline.stages.metadata_initialization import MetadataInitializationStage @@ -57,8 +57,8 @@ from processing.pipeline.stages.file_rule_filter import FileRuleFilterStage from processing.pipeline.stages.gloss_to_rough_conversion import GlossToRoughConversionStage from processing.pipeline.stages.alpha_extraction_to_mask import AlphaExtractionToMaskStage from processing.pipeline.stages.normal_map_green_channel import NormalMapGreenChannelStage -from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage -from processing.pipeline.stages.map_merging import MapMergingStage +# Removed: from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage +# Removed: from processing.pipeline.stages.map_merging import MapMergingStage from processing.pipeline.stages.metadata_finalization_save import MetadataFinalizationAndSaveStage from processing.pipeline.stages.output_organization import OutputOrganizationStage @@ -94,22 +94,33 @@ class ProcessingEngine: self.loaded_data_cache: dict = {} # Cache for loaded/resized data within a single process call # --- Pipeline Orchestrator Setup --- - self.stages = [ + # Define pre-item and post-item processing stages + pre_item_stages = [ SupplierDeterminationStage(), AssetSkipLogicStage(), MetadataInitializationStage(), FileRuleFilterStage(), - GlossToRoughConversionStage(), - AlphaExtractionToMaskStage(), - NormalMapGreenChannelStage(), - IndividualMapProcessingStage(), - MapMergingStage(), - MetadataFinalizationAndSaveStage(), - OutputOrganizationStage(), + GlossToRoughConversionStage(), # Assumed to run on context.files_to_process if needed by old logic + AlphaExtractionToMaskStage(), # Same assumption as above + NormalMapGreenChannelStage(), # Same assumption as above + # Note: The new RegularMapProcessorStage and MergedTaskProcessorStage handle their own transformations + # on the specific items they process. These global transformation stages might need review + # if they were intended to operate on a broader scope or if their logic is now fully + # encapsulated in the new item-specific processor stages. For now, keeping them as pre-stages. ] + + post_item_stages = [ + OutputOrganizationStage(), # Must run after all items are saved to temp + MetadataFinalizationAndSaveStage(),# Must run after output organization to have final paths + ] + try: - self.pipeline_orchestrator = PipelineOrchestrator(config_obj=self.config_obj, stages=self.stages) - log.info("PipelineOrchestrator initialized successfully in ProcessingEngine.") + self.pipeline_orchestrator = PipelineOrchestrator( + config_obj=self.config_obj, + pre_item_stages=pre_item_stages, + post_item_stages=post_item_stages + ) + log.info("PipelineOrchestrator initialized successfully in ProcessingEngine with pre and post stages.") except Exception as e: log.error(f"Failed to initialize PipelineOrchestrator in ProcessingEngine: {e}", exc_info=True) self.pipeline_orchestrator = None # Ensure it's None if init fails diff --git a/utils/path_utils.py b/utils/path_utils.py index b67929f..5b86194 100644 --- a/utils/path_utils.py +++ b/utils/path_utils.py @@ -163,6 +163,39 @@ def sanitize_filename(name: str) -> str: if not name: name = "invalid_name" return name +def get_filename_friendly_map_type(internal_map_type: str, file_type_definitions: Optional[Dict[str, Dict]]) -> str: + """Derives a filename-friendly map type from the internal map type.""" + filename_friendly_map_type = internal_map_type # Fallback + if not file_type_definitions or not isinstance(file_type_definitions, dict) or not file_type_definitions: + logger.warning(f"Filename-friendly lookup: FILE_TYPE_DEFINITIONS not available or invalid. Falling back to internal type: {internal_map_type}") + return filename_friendly_map_type + + base_map_key_val = None + suffix_part = "" + # Sort keys by length descending to match longest prefix first (e.g., MAP_ROUGHNESS before MAP_ROUGH) + sorted_known_base_keys = sorted(list(file_type_definitions.keys()), key=len, reverse=True) + + for known_key in sorted_known_base_keys: + if internal_map_type.startswith(known_key): + base_map_key_val = known_key + suffix_part = internal_map_type[len(known_key):] + break + + if base_map_key_val: + definition = file_type_definitions.get(base_map_key_val) + if definition and isinstance(definition, dict): + standard_type_alias = definition.get("standard_type") + if standard_type_alias and isinstance(standard_type_alias, str) and standard_type_alias.strip(): + filename_friendly_map_type = standard_type_alias.strip() + suffix_part + logger.debug(f"Filename-friendly lookup: Transformed '{internal_map_type}' -> '{filename_friendly_map_type}'") + else: + logger.warning(f"Filename-friendly lookup: Standard type alias for '{base_map_key_val}' is missing or invalid. Falling back.") + else: + logger.warning(f"Filename-friendly lookup: No valid definition for '{base_map_key_val}'. Falling back.") + else: + logger.warning(f"Filename-friendly lookup: Could not parse base key from '{internal_map_type}'. Falling back.") + + return filename_friendly_map_type # --- Basic Unit Tests --- if __name__ == "__main__": print("Running basic tests for path_utils.generate_path_from_pattern...") -- 2.47.2 From 528d9be47f6315c39bd90c5c2596469ca41854f2 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Mon, 12 May 2025 23:03:26 +0200 Subject: [PATCH 09/16] Closer to feature parity - missing merge still --- processing/pipeline/asset_context.py | 1 + processing/pipeline/orchestrator.py | 27 ++++++++- .../pipeline/stages/output_organization.py | 57 ++----------------- .../stages/prepare_processing_items.py | 8 ++- processing/pipeline/stages/save_variants.py | 1 + processing/utils/image_saving_utils.py | 22 +++++-- 6 files changed, 54 insertions(+), 62 deletions(-) diff --git a/processing/pipeline/asset_context.py b/processing/pipeline/asset_context.py index b195927..f6363e5 100644 --- a/processing/pipeline/asset_context.py +++ b/processing/pipeline/asset_context.py @@ -72,6 +72,7 @@ class SaveVariantsInput: png_compression_level: int jpg_quality: int output_filename_pattern: str + resolution_threshold_for_jpg: Optional[int] # Added for JPG conversion # Output for SaveVariantsStage @dataclass diff --git a/processing/pipeline/orchestrator.py b/processing/pipeline/orchestrator.py index 6765506..5cdd88b 100644 --- a/processing/pipeline/orchestrator.py +++ b/processing/pipeline/orchestrator.py @@ -200,13 +200,27 @@ class PipelineOrchestrator: try: # 1. Process (Load/Merge + Transform) if isinstance(item, FileRule): + if item.item_type == 'EXTRA': + log.debug(f"{item_log_prefix}: Skipping image processing for EXTRA FileRule '{item.file_path}'.") + # Add a basic entry to processed_maps_details to acknowledge it was seen + context.processed_maps_details[item.file_path] = { + "status": "Skipped (EXTRA file)", + "internal_map_type": "EXTRA", + "source_file": str(item.file_path) + } + continue # Skip to the next item item_key = item.file_path # Use file_path string as key log.debug(f"{item_log_prefix}: Processing FileRule '{item.file_path}'...") processed_data = self._regular_processor_stage.execute(context, item) elif isinstance(item, MergeTaskDefinition): item_key = item.task_key # Use task_key string as key - log.debug(f"{item_log_prefix}: Processing MergeTask '{item_key}'...") + log.info(f"{item_log_prefix}: Executing MergedTaskProcessorStage for MergeTask '{item_key}'...") # Log call processed_data = self._merged_processor_stage.execute(context, item) + # Log status/error from merge processor + if processed_data: + log.info(f"{item_log_prefix}: MergedTaskProcessorStage result - Status: {processed_data.status}, Error: {processed_data.error_message}") + else: + log.warning(f"{item_log_prefix}: MergedTaskProcessorStage returned None for MergeTask '{item_key}'.") else: log.warning(f"{item_log_prefix}: Unknown item type '{type(item)}'. Skipping.") item_key = f"unknown_item_{item_index}" @@ -230,6 +244,8 @@ class PipelineOrchestrator: # 2. Scale (Optional) scaling_mode = getattr(context.config_obj, "INITIAL_SCALING_MODE", "NONE") if scaling_mode != "NONE" and current_image_data is not None and current_image_data.size > 0: + if isinstance(item, MergeTaskDefinition): # Log scaling call for merge tasks + log.info(f"{item_log_prefix}: Calling InitialScalingStage for MergeTask '{item_key}' (Mode: {scaling_mode})...") log.debug(f"{item_log_prefix}: Applying initial scaling (Mode: {scaling_mode})...") scale_input = InitialScalingInput( image_data=current_image_data, @@ -255,6 +271,8 @@ class PipelineOrchestrator: # Don't mark as asset error, just skip this item's saving continue # Next item + if isinstance(item, MergeTaskDefinition): # Log save call for merge tasks + log.info(f"{item_log_prefix}: Calling SaveVariantsStage for MergeTask '{item_key}'...") log.debug(f"{item_log_prefix}: Saving variants...") # Prepare input for save stage internal_map_type = processed_data.final_internal_map_type if isinstance(processed_data, ProcessedRegularMapData) else processed_data.output_map_type @@ -282,8 +300,12 @@ class PipelineOrchestrator: png_compression_level=context.config_obj.png_compression_level, jpg_quality=context.config_obj.jpg_quality, output_filename_pattern=context.config_obj.output_filename_pattern, + resolution_threshold_for_jpg=getattr(context.config_obj, "RESOLUTION_THRESHOLD_FOR_JPG", None) # Added ) saved_data = self._save_stage.execute(save_input) + # Log saved_data for merge tasks + if isinstance(item, MergeTaskDefinition): + log.info(f"{item_log_prefix}: SaveVariantsStage result for MergeTask '{item_key}' - Status: {saved_data.status if saved_data else 'N/A'}, Saved Files: {len(saved_data.saved_files_details) if saved_data else 0}") # Check save status and finalize item result if saved_data and saved_data.status.startswith("Processed"): @@ -300,6 +322,9 @@ class PipelineOrchestrator: # Add source file if regular map "source_file": str(processed_data.source_file_path) if isinstance(processed_data, ProcessedRegularMapData) else None, } + # Log final details addition for merge tasks + if isinstance(item, MergeTaskDefinition): + log.info(f"{item_log_prefix}: Adding final details to context.processed_maps_details for MergeTask '{item_key}'. Details: {final_details}") context.processed_maps_details[item_key] = final_details else: error_msg = saved_data.error_message if saved_data else "Save stage returned None" diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index a17a8ba..205fda0 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -278,59 +278,10 @@ class OutputOrganizationStage(ProcessingStage): else: logger.debug(f"Asset '{asset_name_for_log}': No processed individual maps to organize.") - # B. Organize Merged Maps - if context.merged_maps_details: - logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(context.merged_maps_details)} merged map(s).") - for merge_op_id, details in context.merged_maps_details.items(): # Use merge_op_id - if details.get('status') != 'Processed' or not details.get('temp_merged_file'): - logger.debug(f"Asset '{asset_name_for_log}': Skipping merge op id '{merge_op_id}' due to status '{details.get('status')}' or missing temp file.") - continue - - temp_file_path = Path(details['temp_merged_file']) - map_type = details.get('map_type', 'unknown_merged_map') # This is the output_map_type of the merge rule - # Merged maps might not have a simple 'resolution' token like individual maps. - # We'll use a placeholder or derive if possible. - resolution_str = details.get('merged_resolution_name', 'mergedRes') - - - token_data_merged = { - "assetname": asset_name_for_log, - "supplier": context.effective_supplier or "DefaultSupplier", - "maptype": map_type, - "resolution": resolution_str, - "ext": temp_file_path.suffix.lstrip('.'), - "incrementingvalue": getattr(context, 'incrementing_value', None), - "sha5": getattr(context, 'sha5_value', None) - } - token_data_merged_cleaned = {k: v for k, v in token_data_merged.items() if v is not None} - - output_filename_merged = generate_path_from_pattern(output_filename_pattern_config, token_data_merged_cleaned) - - try: - relative_dir_path_str_merged = generate_path_from_pattern( - pattern_string=output_dir_pattern, - token_data=token_data_merged_cleaned - ) - final_path_merged = Path(context.output_base_path) / Path(relative_dir_path_str_merged) / Path(output_filename_merged) - final_path_merged.parent.mkdir(parents=True, exist_ok=True) - - if final_path_merged.exists() and not overwrite_existing: - logger.info(f"Asset '{asset_name_for_log}': Output file {final_path_merged} exists and overwrite is disabled. Skipping copy for merged map.") - else: - shutil.copy2(temp_file_path, final_path_merged) - logger.info(f"Asset '{asset_name_for_log}': Copied merged map {temp_file_path} to {final_path_merged}") - final_output_files.append(str(final_path_merged)) - - context.merged_maps_details[merge_op_id]['final_output_path'] = str(final_path_merged) - context.merged_maps_details[merge_op_id]['status'] = 'Organized' - - except Exception as e: - logger.error(f"Asset '{asset_name_for_log}': Failed to copy merged map {temp_file_path} to destination for merge op id '{merge_op_id}'. Error: {e}", exc_info=True) - context.status_flags['output_organization_error'] = True - context.asset_metadata['status'] = "Failed (Output Organization Error)" - context.merged_maps_details[merge_op_id]['status'] = 'Organization Failed' - else: - logger.debug(f"Asset '{asset_name_for_log}': No merged maps to organize.") + # B. Organize Merged Maps (OBSOLETE BLOCK - Merged maps are handled by the main loop processing context.processed_maps_details) + # The log "No merged maps to organize" will no longer appear from here. + # If merged maps are not appearing, the issue is likely that they are not being added + # to context.processed_maps_details with 'saved_files_info' by the orchestrator/SaveVariantsStage. # C. Organize Extra Files (e.g., previews, text files) logger.debug(f"Asset '{asset_name_for_log}': Checking for EXTRA files to organize.") diff --git a/processing/pipeline/stages/prepare_processing_items.py b/processing/pipeline/stages/prepare_processing_items.py index adfaf16..cee6c2e 100644 --- a/processing/pipeline/stages/prepare_processing_items.py +++ b/processing/pipeline/stages/prepare_processing_items.py @@ -65,11 +65,13 @@ class PrepareProcessingItemsStage(ProcessingStage): task_key = f"merged_task_{task_idx}" # Basic validation for merge task data (can be expanded) if not task_data.get('output_map_type') or not task_data.get('merge_rule_config'): - log.warning(f"Asset '{asset_name_for_log}', Task Index {task_idx}: Skipping merge task due to missing 'output_map_type' or 'merge_rule_config'.") + log.warning(f"Asset '{asset_name_for_log}', Task Index {task_idx}: Skipping merge task due to missing 'output_map_type' or 'merge_rule_config'. Task data: {task_data}") continue # Skip this specific task - items_to_process.append(MergeTaskDefinition(task_data=task_data, task_key=task_key)) + merge_def = MergeTaskDefinition(task_data=task_data, task_key=task_key) + log.info(f"Asset '{asset_name_for_log}': Identified and adding Merge Task: Key='{merge_def.task_key}', OutputType='{task_data.get('output_map_type', 'N/A')}'") + items_to_process.append(merge_def) else: - log.warning(f"Asset '{asset_name_for_log}': Item at index {task_idx} in '{merged_tasks_attr_name}' is not a dictionary. Skipping.") + log.warning(f"Asset '{asset_name_for_log}': Item at index {task_idx} in '{merged_tasks_attr_name}' is not a dictionary. Skipping. Item: {task_data}") log.debug(f"Asset '{asset_name_for_log}': Added {len(merged_tasks_list)} potential MergeTaskDefinition items.") else: log.warning(f"Asset '{asset_name_for_log}': Attribute '{merged_tasks_attr_name}' is not a list. Skipping merge tasks.") diff --git a/processing/pipeline/stages/save_variants.py b/processing/pipeline/stages/save_variants.py index 426fb15..482b1cc 100644 --- a/processing/pipeline/stages/save_variants.py +++ b/processing/pipeline/stages/save_variants.py @@ -61,6 +61,7 @@ class SaveVariantsStage(ProcessingStage): "jpg_quality": input_data.jpg_quality, "output_filename_pattern_tokens": input_data.output_filename_pattern_tokens, "output_filename_pattern": input_data.output_filename_pattern, + "resolution_threshold_for_jpg": input_data.resolution_threshold_for_jpg, # Added } log.debug(f"{log_prefix}: Calling save_image_variants utility.") diff --git a/processing/utils/image_saving_utils.py b/processing/utils/image_saving_utils.py index 66591a8..01ec7a1 100644 --- a/processing/utils/image_saving_utils.py +++ b/processing/utils/image_saving_utils.py @@ -33,6 +33,7 @@ def save_image_variants( jpg_quality: int, output_filename_pattern_tokens: Dict[str, Any], # Must include 'output_base_directory': Path and 'asset_name': str output_filename_pattern: str, + resolution_threshold_for_jpg: Optional[int] = None, # Added # Consider adding ipu or relevant parts of it if not importing globally ) -> List[Dict[str, Any]]: """ @@ -113,8 +114,10 @@ def save_image_variants( else: logger.error(f"Unsupported target bit depth: {target_bit_depth}. Defaulting to 8-bit format.") output_ext = output_format_8bit.lstrip('.').lower() + + current_output_ext = output_ext # Store the initial extension based on bit depth - logger.info(f"SaveImageVariants: Determined target bit depth: {target_bit_depth}, Output format: {output_ext} for map type {base_map_type}") + logger.info(f"SaveImageVariants: Determined target bit depth: {target_bit_depth}, Initial output format: {current_output_ext} for map type {base_map_type}") # 4. Generate and Save Resolution Variants # Sort resolutions by max dimension descending @@ -167,7 +170,16 @@ def save_image_variants( current_tokens = output_filename_pattern_tokens.copy() current_tokens['maptype'] = base_map_type current_tokens['resolution'] = res_key - current_tokens['ext'] = output_ext + + # Determine final extension for this variant, considering JPG threshold + final_variant_ext = current_output_ext + if target_bit_depth == 8 and resolution_threshold_for_jpg is not None and \ + max(target_w_res, target_h_res) > resolution_threshold_for_jpg and \ + current_output_ext == 'png': # Only convert if current 8-bit is PNG + final_variant_ext = 'jpg' + logger.info(f"SaveImageVariants: Overriding 8-bit PNG to JPG for {base_map_type} {res_key} due to resolution {max(target_w_res, target_h_res)}px > threshold {resolution_threshold_for_jpg}px.") + + current_tokens['ext'] = final_variant_ext try: # Replace placeholders in the pattern @@ -196,11 +208,11 @@ def save_image_variants( # Prepare Save Parameters save_params_cv2 = [] - if output_ext == 'jpg': + if final_variant_ext == 'jpg': # Check against final_variant_ext save_params_cv2.append(cv2.IMWRITE_JPEG_QUALITY) save_params_cv2.append(jpg_quality) logger.debug(f"SaveImageVariants: Using JPG quality: {jpg_quality} for {base_map_type} {res_key}") - elif output_ext == 'png': + elif final_variant_ext == 'png': # Check against final_variant_ext save_params_cv2.append(cv2.IMWRITE_PNG_COMPRESSION) save_params_cv2.append(png_compression_level) logger.debug(f"SaveImageVariants: Using PNG compression level: {png_compression_level} for {base_map_type} {res_key}") @@ -237,7 +249,7 @@ def save_image_variants( saved_file_details.append({ 'path': str(output_path), 'resolution_key': res_key, - 'format': output_ext, + 'format': final_variant_ext, # Log the actual saved format 'bit_depth': target_bit_depth, 'dimensions': (target_w_res, target_h_res) }) -- 2.47.2 From c2ad299ce21ca463ae33743cc33a7fbf98c8db7a Mon Sep 17 00:00:00 2001 From: Rusfort Date: Mon, 12 May 2025 23:32:35 +0200 Subject: [PATCH 10/16] Various Attempted fixes --- configuration.py | 9 ++++++- processing/pipeline/orchestrator.py | 4 ++- .../stages/prepare_processing_items.py | 25 +++++++++++------ .../pipeline/stages/supplier_determination.py | 7 +++++ processing/utils/image_saving_utils.py | 27 ++++++++++++++++--- 5 files changed, 59 insertions(+), 13 deletions(-) diff --git a/configuration.py b/configuration.py index dec107d..5e3dd72 100644 --- a/configuration.py +++ b/configuration.py @@ -398,7 +398,14 @@ class Configuration: @property def resolution_threshold_for_jpg(self) -> int: """Gets the pixel dimension threshold for using JPG for 8-bit images.""" - return self._core_settings.get('RESOLUTION_THRESHOLD_FOR_JPG', 4096) + value = self._core_settings.get('RESOLUTION_THRESHOLD_FOR_JPG', 4096) + log.info(f"CONFIGURATION_DEBUG: resolution_threshold_for_jpg property returning: {value} (type: {type(value)})") + # Ensure it's an int, as downstream might expect it. + # The .get() default is an int, but if the JSON had null or a string, it might be different. + if not isinstance(value, int): + log.warning(f"CONFIGURATION_DEBUG: RESOLUTION_THRESHOLD_FOR_JPG was not an int, got {type(value)}. Defaulting to 4096.") + return 4096 + return value @property def respect_variant_map_types(self) -> list: diff --git a/processing/pipeline/orchestrator.py b/processing/pipeline/orchestrator.py index 5cdd88b..993dcc9 100644 --- a/processing/pipeline/orchestrator.py +++ b/processing/pipeline/orchestrator.py @@ -286,6 +286,8 @@ class PipelineOrchestrator: 'supplier': context.effective_supplier or 'UnknownSupplier', } + # Log the value being read for the threshold before creating the input object + log.info(f"ORCHESTRATOR_DEBUG: Reading RESOLUTION_THRESHOLD_FOR_JPG from config for SaveVariantsInput: {getattr(context.config_obj, 'RESOLUTION_THRESHOLD_FOR_JPG', None)}") save_input = SaveVariantsInput( image_data=current_image_data, # Use potentially scaled data internal_map_type=internal_map_type, @@ -300,7 +302,7 @@ class PipelineOrchestrator: png_compression_level=context.config_obj.png_compression_level, jpg_quality=context.config_obj.jpg_quality, output_filename_pattern=context.config_obj.output_filename_pattern, - resolution_threshold_for_jpg=getattr(context.config_obj, "RESOLUTION_THRESHOLD_FOR_JPG", None) # Added + resolution_threshold_for_jpg=getattr(context.config_obj, "resolution_threshold_for_jpg", None) # Corrected case ) saved_data = self._save_stage.execute(save_input) # Log saved_data for merge tasks diff --git a/processing/pipeline/stages/prepare_processing_items.py b/processing/pipeline/stages/prepare_processing_items.py index cee6c2e..16f068e 100644 --- a/processing/pipeline/stages/prepare_processing_items.py +++ b/processing/pipeline/stages/prepare_processing_items.py @@ -56,10 +56,15 @@ class PrepareProcessingItemsStage(ProcessingStage): # --- Add merged tasks --- - merged_tasks_attr_name = 'merged_image_tasks' # Check attribute name if different - if hasattr(context, merged_tasks_attr_name) and getattr(context, merged_tasks_attr_name): - merged_tasks_list = getattr(context, merged_tasks_attr_name) - if isinstance(merged_tasks_list, list): + # --- Add merged tasks from global configuration --- + # merged_image_tasks are expected to be loaded into context.config_obj + # by the Configuration class from app_settings.json. + + merged_tasks_list = getattr(context.config_obj, 'merged_image_tasks', None) + + if merged_tasks_list and isinstance(merged_tasks_list, list): + log.debug(f"Asset '{asset_name_for_log}': Found {len(merged_tasks_list)} merge tasks in global config.") + for task_idx, task_data in enumerate(merged_tasks_list): for task_idx, task_data in enumerate(merged_tasks_list): if isinstance(task_data, dict): task_key = f"merged_task_{task_idx}" @@ -71,10 +76,14 @@ class PrepareProcessingItemsStage(ProcessingStage): log.info(f"Asset '{asset_name_for_log}': Identified and adding Merge Task: Key='{merge_def.task_key}', OutputType='{task_data.get('output_map_type', 'N/A')}'") items_to_process.append(merge_def) else: - log.warning(f"Asset '{asset_name_for_log}': Item at index {task_idx} in '{merged_tasks_attr_name}' is not a dictionary. Skipping. Item: {task_data}") - log.debug(f"Asset '{asset_name_for_log}': Added {len(merged_tasks_list)} potential MergeTaskDefinition items.") - else: - log.warning(f"Asset '{asset_name_for_log}': Attribute '{merged_tasks_attr_name}' is not a list. Skipping merge tasks.") + log.warning(f"Asset '{asset_name_for_log}': Item at index {task_idx} in config_obj.merged_image_tasks is not a dictionary. Skipping. Item: {task_data}") + # The log for "Added X potential MergeTaskDefinition items" will be covered by the final log. + elif merged_tasks_list is None: + log.debug(f"Asset '{asset_name_for_log}': 'merged_image_tasks' not found in config_obj. No global merge tasks to add.") + elif not isinstance(merged_tasks_list, list): + log.warning(f"Asset '{asset_name_for_log}': 'merged_image_tasks' in config_obj is not a list. Skipping global merge tasks. Type: {type(merged_tasks_list)}") + else: # Empty list + log.debug(f"Asset '{asset_name_for_log}': 'merged_image_tasks' in config_obj is empty. No global merge tasks to add.") if not items_to_process: diff --git a/processing/pipeline/stages/supplier_determination.py b/processing/pipeline/stages/supplier_determination.py index 27a1a5e..15f5e5d 100644 --- a/processing/pipeline/stages/supplier_determination.py +++ b/processing/pipeline/stages/supplier_determination.py @@ -55,6 +55,13 @@ class SupplierDeterminationStage(ProcessingStage): # Optionally clear the error flag if previously set and now resolved. if 'supplier_error' in context.status_flags: del context.status_flags['supplier_error'] + + # merged_image_tasks are loaded from app_settings.json into Configuration object, + # not from supplier-specific presets. + # Ensure the attribute exists on context for PrepareProcessingItemsStage, + # which will get it from context.config_obj. + if not hasattr(context, 'merged_image_tasks'): + context.merged_image_tasks = [] return context \ No newline at end of file diff --git a/processing/utils/image_saving_utils.py b/processing/utils/image_saving_utils.py index 01ec7a1..9147fc5 100644 --- a/processing/utils/image_saving_utils.py +++ b/processing/utils/image_saving_utils.py @@ -80,6 +80,7 @@ def save_image_variants( logger.debug(f"SaveImageVariants: Resolutions: {image_resolutions}, File Type Defs: {file_type_defs.keys()}, Output Formats: 8bit={output_format_8bit}, 16bit_pri={output_format_16bit_primary}, 16bit_fall={output_format_16bit_fallback}") logger.debug(f"SaveImageVariants: PNG Comp: {png_compression_level}, JPG Qual: {jpg_quality}") logger.debug(f"SaveImageVariants: Output Tokens: {output_filename_pattern_tokens}, Output Pattern: {output_filename_pattern}") + logger.debug(f"SaveImageVariants: Received resolution_threshold_for_jpg: {resolution_threshold_for_jpg}") # Log received threshold # 2. Determine Target Bit Depth target_bit_depth = 8 # Default @@ -173,9 +174,29 @@ def save_image_variants( # Determine final extension for this variant, considering JPG threshold final_variant_ext = current_output_ext - if target_bit_depth == 8 and resolution_threshold_for_jpg is not None and \ - max(target_w_res, target_h_res) > resolution_threshold_for_jpg and \ - current_output_ext == 'png': # Only convert if current 8-bit is PNG + + # --- Start JPG Threshold Logging --- + logger.debug(f"SaveImageVariants: JPG Threshold Check for {base_map_type} {res_key}:") + logger.debug(f" - target_bit_depth: {target_bit_depth}") + logger.debug(f" - resolution_threshold_for_jpg: {resolution_threshold_for_jpg}") + logger.debug(f" - target_w_res: {target_w_res}, target_h_res: {target_h_res}") + logger.debug(f" - max(target_w_res, target_h_res): {max(target_w_res, target_h_res)}") + logger.debug(f" - current_output_ext: {current_output_ext}") + + cond_bit_depth = target_bit_depth == 8 + cond_threshold_not_none = resolution_threshold_for_jpg is not None + cond_res_exceeded = False + if cond_threshold_not_none: # Avoid comparison if threshold is None + cond_res_exceeded = max(target_w_res, target_h_res) > resolution_threshold_for_jpg + cond_is_png = current_output_ext == 'png' + + logger.debug(f" - Condition (target_bit_depth == 8): {cond_bit_depth}") + logger.debug(f" - Condition (resolution_threshold_for_jpg is not None): {cond_threshold_not_none}") + logger.debug(f" - Condition (max(res) > threshold): {cond_res_exceeded}") + logger.debug(f" - Condition (current_output_ext == 'png'): {cond_is_png}") + # --- End JPG Threshold Logging --- + + if cond_bit_depth and cond_threshold_not_none and cond_res_exceeded and cond_is_png: final_variant_ext = 'jpg' logger.info(f"SaveImageVariants: Overriding 8-bit PNG to JPG for {base_map_type} {res_key} due to resolution {max(target_w_res, target_h_res)}px > threshold {resolution_threshold_for_jpg}px.") -- 2.47.2 From b441174076194e88bd0cba2a741fa207a33317ec Mon Sep 17 00:00:00 2001 From: Rusfort Date: Tue, 13 May 2025 02:28:42 +0200 Subject: [PATCH 11/16] Processing Documentation Update --- .../05_Processing_Pipeline.md | 126 +++++++++++------- 1 file changed, 76 insertions(+), 50 deletions(-) diff --git a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md index ccff23a..95e81ac 100644 --- a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md +++ b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md @@ -1,69 +1,95 @@ # Developer Guide: Processing Pipeline -This document details the step-by-step technical process executed by the asset processing pipeline, which is initiated by the `ProcessingEngine` class (`processing_engine.py`) and orchestrated by the `PipelineOrchestrator` (`processing/pipeline/orchestrator.py`). +This document details the step-by-step technical process executed by the asset processing pipeline, which is initiated by the [`ProcessingEngine`](processing_engine.py:73) class (`processing_engine.py`) and orchestrated by the [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) (`processing/pipeline/orchestrator.py`). -The `ProcessingEngine.process()` method serves as the main entry point. It initializes a `PipelineOrchestrator` instance, providing it with the application's `Configuration` object and a predefined list of processing stages. The `PipelineOrchestrator.process_source_rule()` method then manages the execution of these stages for each asset defined in the input `SourceRule`. +The [`ProcessingEngine.process()`](processing_engine.py:131) method serves as the main entry point. It initializes a [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) instance, providing it with the application's [`Configuration`](configuration.py:68) object and predefined lists of pre-item and post-item processing stages. The [`PipelineOrchestrator.process_source_rule()`](processing/pipeline/orchestrator.py:95) method then manages the execution of these stages for each asset defined in the input [`SourceRule`](rule_structure.py:40). -A crucial component in this architecture is the `AssetProcessingContext` (`processing/pipeline/asset_context.py`). An instance of this dataclass is created for each `AssetRule` being processed. It acts as a stateful container, carrying all relevant data (source files, rules, configuration, intermediate results, metadata) and is passed sequentially through each stage. Each stage can read from and write to the context, allowing data to flow and be modified throughout the pipeline. +A crucial component in this architecture is the [`AssetProcessingContext`](processing/pipeline/asset_context.py:86) (`processing/pipeline/asset_context.py`). An instance of this dataclass is created for each [`AssetRule`](rule_structure.py:22) being processed. It acts as a stateful container, carrying all relevant data (source files, rules, configuration, intermediate results, metadata) and is passed sequentially through each stage. Each stage can read from and write to the context, allowing data to flow and be modified throughout the pipeline. -The pipeline stages are executed in the following order: +The pipeline execution for each asset follows this general flow: -1. **`SupplierDeterminationStage` (`processing/pipeline/stages/supplier_determination.py`)**: - * **Responsibility**: Determines the effective supplier for the asset based on the `SourceRule`'s `supplier_identifier`, `supplier_override`, and supplier definitions in the `Configuration`. - * **Context Interaction**: Updates `AssetProcessingContext.effective_supplier` and potentially `AssetProcessingContext.asset_metadata` with supplier information. +1. **Pre-Item Stages:** A sequence of stages executed once per asset before the core item processing loop. These stages typically perform initial setup, filtering, and asset-level transformations. +2. **Core Item Processing Loop:** The [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) iterates through a list of "processing items" (individual files or merge tasks) prepared by a dedicated stage. For each item, a sequence of core processing stages is executed. +3. **Post-Item Stages:** A sequence of stages executed once per asset after the core item processing loop is complete. These stages handle final tasks like organizing output files and saving metadata. -2. **`AssetSkipLogicStage` (`processing/pipeline/stages/asset_skip_logic.py`)**: - * **Responsibility**: Checks if the asset should be skipped, typically if the output already exists and overwriting is not forced. - * **Context Interaction**: Sets `AssetProcessingContext.status_flags['skip_asset']` to `True` if the asset should be skipped, halting further processing for this asset by the orchestrator. +## Pipeline Stages -3. **`MetadataInitializationStage` (`processing/pipeline/stages/metadata_initialization.py`)**: - * **Responsibility**: Initializes the `AssetProcessingContext.asset_metadata` dictionary with base information derived from the `AssetRule`, `SourceRule`, and `Configuration`. This includes asset name, type, and any common metadata. - * **Context Interaction**: Populates `AssetProcessingContext.asset_metadata`. +The stages are executed in the following order for each asset: -4. **`FileRuleFilterStage` (`processing/pipeline/stages/file_rule_filter.py`)**: - * **Responsibility**: Filters the `FileRule` objects from the `AssetRule` to determine which files should actually be processed. It respects `FILE_IGNORE` rules. - * **Context Interaction**: Populates `AssetProcessingContext.files_to_process` with the list of `FileRule` objects that passed the filter. +### Pre-Item Stages -5. **`GlossToRoughConversionStage` (`processing/pipeline/stages/gloss_to_rough_conversion.py`)**: - * **Responsibility**: Identifies gloss maps (based on `FileRule` properties and filename conventions) that are intended to be used as roughness maps. If found, it loads the image, inverts its colors, and saves a temporary inverted version. - * **Context Interaction**: Modifies `FileRule` objects in `AssetProcessingContext.files_to_process` (e.g., updates `file_path` to point to the temporary inverted map, sets flags indicating inversion). Updates `AssetProcessingContext.processed_maps_details` with information about the conversion. +These stages are executed sequentially once for each asset before the core item processing loop begins. -6. **`AlphaExtractionToMaskStage` (`processing/pipeline/stages/alpha_extraction_to_mask.py`)**: - * **Responsibility**: If a `FileRule` specifies alpha channel extraction (e.g., from a diffuse map to create an opacity mask), this stage loads the source image, extracts its alpha channel, and saves it as a new temporary grayscale map. - * **Context Interaction**: May add new `FileRule`-like entries or details to `AssetProcessingContext.processed_maps_details` representing the extracted mask. +1. **[`SupplierDeterminationStage`](processing/pipeline/stages/supplier_determination.py:6)** (`processing/pipeline/stages/supplier_determination.py`): + * **Responsibility**: Determines the effective supplier for the asset based on the [`SourceRule`](rule_structure.py:40)'s `supplier_override`, `supplier_identifier`, and validation against configured suppliers. + * **Context Interaction**: Sets `context.effective_supplier` and may set a `supplier_error` flag in `context.status_flags`. -7. **`NormalMapGreenChannelStage` (`processing/pipeline/stages/normal_map_green_channel.py`)**: - * **Responsibility**: Checks `FileRule`s for normal maps and, based on configuration (e.g., `invert_normal_map_green_channel` for a specific supplier), potentially inverts the green channel of the normal map image. - * **Context Interaction**: Modifies the image data for normal maps if inversion is needed, saving a new temporary version. Updates `AssetProcessingContext.processed_maps_details`. +2. **[`AssetSkipLogicStage`](processing/pipeline/stages/asset_skip_logic.py:5)** (`processing/pipeline/stages/asset_skip_logic.py`): + * **Responsibility**: Checks if the entire asset should be skipped based on conditions like a missing/invalid supplier, a "SKIP" status in asset metadata, or if the asset is already processed and overwrite is disabled. + * **Context Interaction**: Sets the `skip_asset` flag and `skip_reason` in `context.status_flags` if the asset should be skipped. -8. **`IndividualMapProcessingStage` (`processing/pipeline/stages/individual_map_processing.py`)**: - * **Responsibility**: Processes individual texture map files. This includes: - * Loading the source image. - * Applying Power-of-Two (POT) scaling. - * Generating multiple resolution variants based on configuration. - * Handling color space conversions (e.g., BGR to RGB). - * Calculating image statistics (min, max, mean, median). - * Determining and storing aspect ratio change information. - * Saving processed temporary map files. - * Applying name variant suffixing and using standard type aliases for filenames. - * **Context Interaction**: Heavily populates `AssetProcessingContext.processed_maps_details` with paths to temporary processed files, dimensions, and other metadata for each map and its variants. Updates `AssetProcessingContext.asset_metadata` with image stats and aspect ratio info. +3. **[`MetadataInitializationStage`](processing/pipeline/stages/metadata_initialization.py:81)** (`processing/pipeline/stages/metadata_initialization.py`): + * **Responsibility**: Initializes the `context.asset_metadata` dictionary with base information derived from the [`AssetRule`](rule_structure.py:22), [`SourceRule`](rule_structure.py:40), and [`Configuration`](configuration.py:68). This includes asset name, IDs, source/output paths, timestamps, and initial status. + * **Context Interaction**: Populates `context.asset_metadata` and initializes empty dictionaries for `processed_maps_details` and `merged_maps_details`. -9. **`MapMergingStage` (`processing/pipeline/stages/map_merging.py`)**: - * **Responsibility**: Performs channel packing and other merge operations based on `map_merge_rules` defined in the `Configuration`. - * **Context Interaction**: Reads source map details and temporary file paths from `AssetProcessingContext.processed_maps_details`. Saves new temporary merged maps and records their details in `AssetProcessingContext.merged_maps_details`. +4. **[`FileRuleFilterStage`](processing/pipeline/stages/file_rule_filter.py:10)** (`processing/pipeline/stages/file_rule_filter.py`): + * **Responsibility**: Filters the [`FileRule`](rule_structure.py:5) objects associated with the asset to determine which individual files should be considered for processing. It identifies and excludes files matching "FILE_IGNORE" rules. + * **Context Interaction**: Populates `context.files_to_process` with the list of [`FileRule`](rule_structure.py:5) objects that are not ignored. -10. **`MetadataFinalizationAndSaveStage` (`processing/pipeline/stages/metadata_finalization_save.py`)**: - * **Responsibility**: Collects all accumulated metadata from `AssetProcessingContext.asset_metadata`, `AssetProcessingContext.processed_maps_details`, and `AssetProcessingContext.merged_maps_details`. It structures this information and saves it as the `metadata.json` file in a temporary location within the engine's temporary directory. - * **Context Interaction**: Reads from various context fields and writes the `metadata.json` file. Stores the path to this temporary metadata file in the context (e.g., `AssetProcessingContext.asset_metadata['temp_metadata_path']`). +5. **[`GlossToRoughConversionStage`](processing/pipeline/stages/gloss_to_rough_conversion.py:15)** (`processing/pipeline/stages/gloss_to_rough_conversion.py`): + * **Responsibility**: Identifies processed maps that were originally glossiness maps. If found, it loads the temporary image data, inverts it, saves a new temporary roughness map, and updates the corresponding details in `context.processed_maps_details` and the relevant [`FileRule`](rule_structure.py:5) in `context.files_to_process`. + * **Context Interaction**: Reads from and updates `context.processed_maps_details` and `context.files_to_process`. -11. **`OutputOrganizationStage` (`processing/pipeline/stages/output_organization.py`)**: - * **Responsibility**: Determines final output paths for all processed maps, merged maps, the metadata file, and any other asset files (like models). It then copies these files from their temporary locations to the final structured output directory. - * **Context Interaction**: Reads temporary file paths from `AssetProcessingContext.processed_maps_details`, `AssetProcessingContext.merged_maps_details`, and the temporary metadata file path. Uses `Configuration` for output path patterns. Updates `AssetProcessingContext.asset_metadata` with final file paths and status. +6. **[`AlphaExtractionToMaskStage`](processing/pipeline/stages/alpha_extraction_to_mask.py:16)** (`processing/pipeline/stages/alpha_extraction_to_mask.py`): + * **Responsibility**: If no mask map is explicitly defined for the asset, this stage searches for a suitable source map (e.g., Albedo, Diffuse) with an alpha channel in `context.processed_maps_details`. If found, it extracts the alpha channel, saves it as a new temporary mask map, and adds a new [`FileRule`](rule_structure.py:5) and corresponding details to the context. + * **Context Interaction**: Reads from `context.processed_maps_details`, adds a new [`FileRule`](rule_structure.py:5) to `context.files_to_process`, and adds a new entry to `context.processed_maps_details`. -**External Steps (Not part of `PipelineOrchestrator`'s direct loop but integral to the overall process):** +7. **[`NormalMapGreenChannelStage`](processing/pipeline/stages/normal_map_green_channel.py:14)** (`processing/pipeline/stages/normal_map_green_channel.py`): + * **Responsibility**: Identifies processed normal maps in `context.processed_maps_details`. If the global `invert_normal_map_green_channel_globally` configuration is true, it loads the temporary image data, inverts the green channel, saves a new temporary modified normal map, and updates the corresponding details in `context.processed_maps_details`. + * **Context Interaction**: Reads from and updates `context.processed_maps_details`. -* **Workspace Preparation and Cleanup**: Handled by the code that invokes `ProcessingEngine.process()` (e.g., `main.ProcessingTask`, `monitor._process_archive_task`), typically using `utils.workspace_utils`. The engine itself creates a sub-temporary directory (`engine_temp_dir`) within the workspace provided to it by the orchestrator, which it cleans up. -* **Prediction and Rule Generation**: Also external, performed before `ProcessingEngine` is called. Generates the `SourceRule`. -* **Optional Blender Script Execution**: Triggered externally after successful processing. +### Core Item Processing Loop -This staged pipeline provides a modular and extensible architecture for asset processing, with clear separation of concerns for each step. The `AssetProcessingContext` ensures that data flows consistently between these stages.r \ No newline at end of file +The [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) iterates through the `context.processing_items` list (populated by the [`PrepareProcessingItemsStage`](processing/pipeline/stages/prepare_processing_items.py:10)). For each item (either a [`FileRule`](rule_structure.py:5) for a regular map or a [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) for a merged map), the following stages are executed sequentially: + +1. **[`PrepareProcessingItemsStage`](processing/pipeline/stages/prepare_processing_items.py:10)** (`processing/pipeline/stages/prepare_processing_items.py`): + * **Responsibility**: (Executed once before the loop) Creates the `context.processing_items` list by combining [`FileRule`](rule_structure.py:5)s from `context.files_to_process` and [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16)s derived from the global `merged_image_tasks` configuration. Initializes `context.intermediate_results`. + * **Context Interaction**: Populates `context.processing_items` and initializes `context.intermediate_results`. + +2. **[`RegularMapProcessorStage`](processing/pipeline/stages/regular_map_processor.py:18)** (`processing/pipeline/stages/regular_map_processor.py`): + * **Responsibility**: (Executed per [`FileRule`](rule_structure.py:5) item) Loads the image data for a single file, determines its potentially suffixed internal map type, applies in-memory transformations (Gloss-to-Rough, Normal Green Invert), and returns the processed image data and details in a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object. + * **Context Interaction**: Reads from the input [`FileRule`](rule_structure.py:5) and [`Configuration`](configuration.py:68). Returns a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object which is stored in `context.intermediate_results`. + +3. **[`MergedTaskProcessorStage`](processing/pipeline/stages/merged_task_processor.py:68)** (`processing/pipeline/stages/merged_task_processor.py`): + * **Responsibility**: (Executed per [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) item) Loads and prepares multiple input images based on the merge task definition (including fallbacks and in-memory transformations), handles dimension mismatches, performs the channel merging operation, and returns the merged image data and details in a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object. + * **Context Interaction**: Reads from the input [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16), `context.workspace_path`, and [`Configuration`](configuration.py:68). Returns a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object which is stored in `context.intermediate_results`. + +4. **[`InitialScalingStage`](processing/pipeline/stages/initial_scaling.py:14)** (`processing/pipeline/stages/initial_scaling.py`): + * **Responsibility**: (Executed per item) Applies initial scaling (e.g., Power-of-Two downscaling) to the image data from the previous processing stage based on the `initial_scaling_mode` configuration. + * **Context Interaction**: Takes a [`InitialScalingInput`](processing/pipeline/asset_context.py:46) (containing image data and config) and returns an [`InitialScalingOutput`](processing/pipeline/asset_context.py:54) object, which updates the item's entry in `context.intermediate_results`. + +5. **[`SaveVariantsStage`](processing/pipeline/stages/save_variants.py:15)** (`processing/pipeline/stages/save_variants.py`): + * **Responsibility**: (Executed per item) Takes the final processed image data (potentially scaled) and configuration, and calls a utility to save the image to temporary files in various resolutions and formats as defined by the configuration. + * **Context Interaction**: Takes a [`SaveVariantsInput`](processing/pipeline/asset_context.py:61) object. Returns a [`SaveVariantsOutput`](processing/pipeline/asset_context.py:79) object containing details about the saved temporary files. The orchestrator stores these details in `context.processed_maps_details` for the item. + +### Post-Item Stages + +These stages are executed sequentially once for each asset after the core item processing loop has finished for all items. + +1. **[`OutputOrganizationStage`](processing/pipeline/stages/output_organization.py:14)** (`processing/pipeline/stages/output_organization.py`): + * **Responsibility**: Determines the final output paths for all processed maps (including variants) and extra files based on configured patterns. It copies the temporary files generated by the core stages to these final destinations, creating directories as needed and respecting overwrite settings. + * **Context Interaction**: Reads from `context.processed_maps_details`, `context.files_to_process` (for 'EXTRA' files), `context.output_base_path`, and [`Configuration`](configuration.py:68). Updates entries in `context.processed_maps_details` with final paths and organization status. Populates `context.asset_metadata['final_output_files']`. + +2. **[`MetadataFinalizationAndSaveStage`](processing/pipeline/stages/metadata_finalization_save.py:14)** (`processing/pipeline/stages/metadata_finalization_save.py`): + * **Responsibility**: Finalizes the `context.asset_metadata` (setting end time, final status based on flags). It restructures the processed map details for inclusion, determines the save path for the metadata file based on configuration and patterns, serializes the metadata to JSON, and saves the `metadata.json` file to the final output location. + * **Context Interaction**: Reads from `context.asset_metadata`, `context.processed_maps_details`, `context.merged_maps_details`, `context.output_base_path`, and [`Configuration`](configuration.py:68). Writes the `metadata.json` file and updates `context.asset_metadata` with its final path and status. + +## External Steps + +Certain steps are integral to the overall asset processing workflow but are handled outside the [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36)'s direct execution loop: + +* **Workspace Preparation and Cleanup**: Handled by the code that invokes [`ProcessingEngine.process()`](processing_engine.py:131) (e.g., `main.ProcessingTask`, `monitor._process_archive_task`), typically involving extracting archives and setting up temporary directories. The engine itself manages a sub-temporary directory (`engine_temp_dir`) for intermediate processing files. +* **Prediction and Rule Generation**: Performed before the [`ProcessingEngine`](processing_engine.py:73) is called. This involves analyzing source files and generating the [`SourceRule`](rule_structure.py:40) object with its nested [`AssetRule`](rule_structure.py:22)s and [`FileRule`](rule_structure.py:5)s, often involving prediction logic (potentially using LLMs). +* **Optional Blender Script Execution**: Can be triggered externally after successful processing to perform tasks like material setup in Blender using the generated output files and metadata. + +This staged pipeline provides a modular and extensible architecture for asset processing, with clear separation of concerns for each step. The [`AssetProcessingContext`](processing/pipeline/asset_context.py:86) ensures that data flows consistently between these stages. \ No newline at end of file -- 2.47.2 From 0de4db182626e9c2578c47ff3add7094ac94ecb3 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Tue, 13 May 2025 02:52:07 +0200 Subject: [PATCH 12/16] Fixed inconcistencies - only processes MAP_ files now --- ProjectNotes/MAP_Prefix_Enforcement_Plan.md | 96 +++++++++++++++++++ .../stages/alpha_extraction_to_mask.py | 17 ++-- .../stages/gloss_to_rough_conversion.py | 16 ++-- .../pipeline/stages/merged_task_processor.py | 9 ++ .../stages/normal_map_green_channel.py | 4 +- .../pipeline/stages/regular_map_processor.py | 7 ++ 6 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 ProjectNotes/MAP_Prefix_Enforcement_Plan.md diff --git a/ProjectNotes/MAP_Prefix_Enforcement_Plan.md b/ProjectNotes/MAP_Prefix_Enforcement_Plan.md new file mode 100644 index 0000000..205918d --- /dev/null +++ b/ProjectNotes/MAP_Prefix_Enforcement_Plan.md @@ -0,0 +1,96 @@ +# Plan: Enforcing "MAP_" Prefix for Internal Processing and Standard Type for Output Naming + +**Date:** 2025-05-13 + +**I. Goal:** +The primary goal is to ensure that for all internal processing, the system *exclusively* uses `FileRule.item_type` values that start with the "MAP_" prefix (e.g., "MAP_COL", "MAP_NRM"). The "standard type" (e.g., "COL", "NRM") associated with these "MAP_" types (as defined in `config/app_settings.json`) should *only* be used during the file saving stages for output naming. Any `FileRule` whose `item_type` does not start with "MAP_" (and isn't a special type like "EXTRA" or "MODEL") should be skipped by the relevant map processing stages. + +**II. Current State Analysis Summary:** + +* **Output Naming:** The use of "standard type" for output filenames via the `get_filename_friendly_map_type` utility in `SaveVariantsStage` and `OutputOrganizationStage` is **correct** and already meets the requirement. +* **Internal "MAP_" Prefix Usage:** + * Some stages like `GlossToRoughConversionStage` correctly check for "MAP_" prefixes (e.g., `processing_map_type.startswith("MAP_GLOSS")`). + * Other stages like `RegularMapProcessorStage` and `MergedTaskProcessorStage` (and its helpers) implicitly expect "MAP_" prefixed types for their internal regex-based logic but lack explicit checks to skip items if the prefix is missing. + * Stages like `AlphaExtractionToMaskStage` and `NormalMapGreenChannelStage` currently use non-"MAP_" prefixed "standard types" (e.g., "NORMAL", "ALBEDO") when reading from `context.processed_maps_details` for their decision-making logic. + * The `PrepareProcessingItemsStage` adds `FileRule`s to the processing queue without filtering based on the "MAP_" prefix in `item_type`. +* **Data Consistency in `AssetProcessingContext`:** + * `FileRule.item_type` is the field that should hold the "MAP_" prefixed type from the initial rule generation. + * `context.processed_maps_details` entries can contain various map type representations: + * `map_type`: Often stores the "standard type" (e.g., "Roughness", "MASK", "NORMAL"). + * `processing_map_type` / `internal_map_type`: Generally seem to store the "MAP_" prefixed type. This needs to be consistent. +* **Configuration (`config/app_settings.json`):** + * `FILE_TYPE_DEFINITIONS` correctly use "MAP_" prefixed keys. + * `MAP_MERGE_RULES` need to be reviewed to ensure their `output_map_type` and input map types are "MAP_" prefixed. + +**III. Proposed Changes (Code Identification & Recommendations):** + +**A. Enforce "MAP_" Prefix for Processing Items (Skipping Logic):** +The core requirement is that processing stages should skip `FileRule` items if their `item_type` doesn't start with "MAP_". + +1. **`RegularMapProcessorStage` (`processing/pipeline/stages/regular_map_processor.py`):** + * **Identify:** In the `execute` method, `initial_internal_map_type` is derived from `file_rule.item_type_override` or `file_rule.item_type`. + * **Recommend:** Add an explicit check after determining `initial_internal_map_type`. If `initial_internal_map_type` does not start with `"MAP_"`, the stage should log a warning, set the `result.status` to "Skipped (Invalid Type)" or similar, and return `result` early, effectively skipping processing for this item. + +2. **`MergedTaskProcessorStage` (`processing/pipeline/stages/merged_task_processor.py`):** + * **Identify:** This stage processes `MergeTaskDefinition`s. The definitions for these tasks (input types, output type) come from `MAP_MERGE_RULES` in `config/app_settings.json`. The stage uses `required_map_type_from_rule` for its inputs. + * **Recommend:** + * **Configuration First:** Review all entries in `MAP_MERGE_RULES` in `config/app_settings.json`. + * Ensure the `output_map_type` for each rule (e.g., "MAP_NRMRGH") starts with "MAP_". + * Ensure all map type values within the `inputs` dictionary (e.g., `"R": "MAP_NRM"`) start with "MAP_". + * **Stage Logic:** In the `execute` method, when iterating through `merge_inputs_config.items()`, check if `required_map_type_from_rule` starts with `"MAP_"`. If not, log a warning and either: + * Skip loading/processing this specific input channel (potentially using its fallback if the overall merge can still proceed). + * Or, if a non-"MAP_" input is critical, fail the entire merge task for this asset. + * The helper `_apply_in_memory_transformations` already uses regex expecting "MAP_" prefixes; this will naturally fail or misbehave if inputs are not "MAP_" prefixed, reinforcing the need for the check above. + +**B. Standardize Map Type Fields and Usage in `context.processed_maps_details`:** +Ensure consistency in how "MAP_" prefixed types are stored and accessed within `context.processed_maps_details` for internal logic (not naming). + +1. **Recommendation:** Establish a single, consistent field name within `context.processed_maps_details` to store the definitive "MAP_" prefixed internal map type (e.g., `internal_map_type` or `processing_map_type`). All stages that perform logic based on the specific *kind* of map (e.g., transformations, source selection) should read from this standardized field. The `map_type` field can continue to store the "standard type" (e.g., "Roughness") primarily for informational/metadata purposes if needed, but not for core processing logic. + +2. **`AlphaExtractionToMaskStage` (`processing/pipeline/stages/alpha_extraction_to_mask.py`):** + * **Identify:** + * Checks for existing MASK map using `file_rule.map_type == "MASK"`. (Discrepancy: `FileRule` uses `item_type`). + * Searches for suitable source maps using `details.get('map_type') in self.SUITABLE_SOURCE_MAP_TYPES` where `SUITABLE_SOURCE_MAP_TYPES` are standard types like "ALBEDO". + * When adding new details, it sets `map_type: "MASK"` and the new `FileRule` gets `item_type="MAP_MASK"`. + * **Recommend:** + * Change the check for an existing MASK map to `file_rule.item_type == "MAP_MASK"`. + * Modify the source map search to use the standardized "MAP_" prefixed field from `details` (e.g., `details.get('internal_map_type')`) and update `SUITABLE_SOURCE_MAP_TYPES` to be "MAP_" prefixed (e.g., "MAP_COL", "MAP_ALBEDO"). + * When adding new details for the created MASK map to `context.processed_maps_details`, ensure the standardized "MAP_" prefixed field is set to "MAP_MASK", and `map_type` (if kept) is "MASK". + +3. **`NormalMapGreenChannelStage` (`processing/pipeline/stages/normal_map_green_channel.py`):** + * **Identify:** Checks `map_details.get('map_type') == "NORMAL"`. + * **Recommend:** Change this check to use the standardized "MAP_" prefixed field from `map_details` (e.g., `map_details.get('internal_map_type')`) and verify if it `startswith("MAP_NRM")`. + +4. **`GlossToRoughConversionStage` (`processing/pipeline/stages/gloss_to_rough_conversion.py`):** + * **Identify:** This stage already uses `processing_map_type.startswith("MAP_GLOSS")` and updates `processing_map_type` to "MAP_ROUGH" in `map_details`. It also updates the `FileRule.item_type` correctly. + * **Recommend:** This stage is largely consistent. Ensure the field it reads/writes (`processing_map_type`) aligns with the chosen standardized "MAP_" prefixed field for `processed_maps_details`. + +**C. Review Orchestration Logic (Conceptual):** +* When the orchestrator populates `context.processed_maps_details` after stages like `SaveVariantsStage`, ensure it stores the "MAP_" prefixed `internal_map_type` (from `SaveVariantsInput`) into the chosen standardized field in `processed_maps_details`. + +**IV. Testing Recommendations:** + +* Create test cases with `AssetRule`s containing `FileRule`s where `item_type` is intentionally set to a non-"MAP_" prefixed value (e.g., "COLOR_MAP", "TEXTURE_ROUGH"). Verify that `RegularMapProcessorStage` skips these. +* Modify `MAP_MERGE_RULES` in a test configuration: + * Set an `output_map_type` to a non-"MAP_" value. + * Set an input map type (e.g., for channel "R") to a non-"MAP_" value. + * Verify that `MergedTaskProcessorStage` correctly handles these (e.g., fails the task, skips the input, logs warnings). +* Test `AlphaExtractionToMaskStage`: + * With an existing `FileRule` having `item_type="MAP_MASK"` to ensure extraction is skipped. + * With source maps having "MAP_COL" (with alpha) as their `internal_map_type` in `processed_maps_details` to ensure they are correctly identified as sources. +* Test `NormalMapGreenChannelStage` with a normal map having "MAP_NRM" as its `internal_map_type` in `processed_maps_details` to ensure it's processed. +* Verify that output filenames continue to use the "standard type" (e.g., "COL", "ROUGH", "NRM") correctly. + +**V. Mermaid Diagram (Illustrative Flow for `FileRule` Processing):** + +```mermaid +graph TD + A[AssetRule with FileRules] --> B{FileRuleFilterStage}; + B -- files_to_process --> C{PrepareProcessingItemsStage}; + C -- processing_items (FileRule) --> D{PipelineOrchestrator}; + D -- FileRule --> E(RegularMapProcessorStage); + E --> F{Check FileRule.item_type}; + F -- Starts with "MAP_"? --> G[Process Map]; + F -- No --> H[Skip Map / Log Warning]; + G --> I[...subsequent stages...]; + H --> I; \ No newline at end of file diff --git a/processing/pipeline/stages/alpha_extraction_to_mask.py b/processing/pipeline/stages/alpha_extraction_to_mask.py index 8de310e..87aa3b6 100644 --- a/processing/pipeline/stages/alpha_extraction_to_mask.py +++ b/processing/pipeline/stages/alpha_extraction_to_mask.py @@ -18,7 +18,8 @@ class AlphaExtractionToMaskStage(ProcessingStage): Extracts an alpha channel from a suitable source map (e.g., Albedo, Diffuse) to generate a MASK map if one is not explicitly defined. """ - SUITABLE_SOURCE_MAP_TYPES = ["ALBEDO", "DIFFUSE", "BASE_COLOR"] # Map types likely to have alpha + # Use MAP_ prefixed types for internal logic checks + SUITABLE_SOURCE_MAP_TYPES = ["MAP_COL", "MAP_ALBEDO", "MAP_BASECOLOR"] # Map types likely to have alpha def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" @@ -38,7 +39,8 @@ class AlphaExtractionToMaskStage(ProcessingStage): # A. Check for Existing MASK Map for file_rule in context.files_to_process: # Assuming file_rule has 'map_type' and 'file_path' (instead of filename_pattern) - if hasattr(file_rule, 'map_type') and file_rule.map_type == "MASK": + # Check for existing MASK map using the correct item_type field and MAP_ prefix + if file_rule.item_type == "MAP_MASK": file_path_for_log = file_rule.file_path if hasattr(file_rule, 'file_path') else "Unknown file path" logger.info( f"Asset '{asset_name_for_log}': MASK map already defined by FileRule " @@ -51,8 +53,10 @@ class AlphaExtractionToMaskStage(ProcessingStage): source_file_rule_id_for_alpha: Optional[str] = None # This ID comes from processed_maps_details keys for file_rule_id, details in context.processed_maps_details.items(): + # Check for suitable source map using the standardized internal_map_type field + internal_map_type = details.get('internal_map_type') # Use the standardized field if details.get('status') == 'Processed' and \ - details.get('map_type') in self.SUITABLE_SOURCE_MAP_TYPES: + internal_map_type in self.SUITABLE_SOURCE_MAP_TYPES: try: temp_path = Path(details['temp_processed_file']) if not temp_path.exists(): @@ -153,15 +157,16 @@ class AlphaExtractionToMaskStage(ProcessingStage): context.processed_maps_details[new_mask_processed_map_key] = { - 'map_type': "MASK", + 'internal_map_type': "MAP_MASK", # Use the standardized MAP_ prefixed field + 'map_type': "MASK", # Keep standard type for metadata/naming consistency if needed 'source_file': str(source_image_path), 'temp_processed_file': str(mask_temp_path), 'original_dimensions': original_dims, 'processed_dimensions': (alpha_channel.shape[1], alpha_channel.shape[0]), 'status': 'Processed', 'notes': ( - f"Generated from alpha of {source_map_details_for_alpha['map_type']} " - f"(Source Detail ID: {source_file_rule_id_for_alpha})" # Changed from Source Rule ID + f"Generated from alpha of {source_map_details_for_alpha.get('internal_map_type', 'unknown type')} " # Use internal_map_type for notes + f"(Source Detail ID: {source_file_rule_id_for_alpha})" ), # 'file_rule_id': new_mask_file_rule_id_str # FileRule doesn't have an ID to link here directly } diff --git a/processing/pipeline/stages/gloss_to_rough_conversion.py b/processing/pipeline/stages/gloss_to_rough_conversion.py index 2de863c..9c2f948 100644 --- a/processing/pipeline/stages/gloss_to_rough_conversion.py +++ b/processing/pipeline/stages/gloss_to_rough_conversion.py @@ -51,7 +51,8 @@ class GlossToRoughConversionStage(ProcessingStage): # Iterate using the index (map_key_index) as the key, which is now standard. for map_key_index, map_details in context.processed_maps_details.items(): - processing_map_type = map_details.get('processing_map_type', '') + # Use the standardized internal_map_type field + internal_map_type = map_details.get('internal_map_type', '') map_status = map_details.get('status') original_temp_path_str = map_details.get('temp_processed_file') # source_file_rule_idx from details should align with map_key_index. @@ -70,11 +71,12 @@ class GlossToRoughConversionStage(ProcessingStage): processing_tag = f"mki_{map_key_index}_fallback_tag" - if not processing_map_type.startswith("MAP_GLOSS"): - # logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Type '{processing_map_type}' is not GLOSS. Skipping.") + # Check if the map is a GLOSS map using the standardized internal_map_type + if not internal_map_type.startswith("MAP_GLOSS"): + # logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Type '{internal_map_type}' is not GLOSS. Skipping.") continue - logger.info(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Identified potential GLOSS map (Type: {processing_map_type}).") + logger.info(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Identified potential GLOSS map (Type: {internal_map_type}).") if map_status not in successful_conversion_statuses: logger.warning( @@ -163,9 +165,9 @@ class GlossToRoughConversionStage(ProcessingStage): # Update context.processed_maps_details for this map_key_index map_details['temp_processed_file'] = str(new_temp_path) - map_details['original_map_type_before_conversion'] = processing_map_type - map_details['processing_map_type'] = "MAP_ROUGH" - map_details['map_type'] = "Roughness" + map_details['original_map_type_before_conversion'] = internal_map_type # Store the original internal type + map_details['internal_map_type'] = "MAP_ROUGH" # Use the standardized MAP_ prefixed field + map_details['map_type'] = "Roughness" # Keep standard type for metadata/naming consistency if needed map_details['status'] = "Converted_To_Rough" map_details['notes'] = map_details.get('notes', '') + "; Converted from GLOSS by GlossToRoughConversionStage" if 'base_pot_resolution_name' in map_details: diff --git a/processing/pipeline/stages/merged_task_processor.py b/processing/pipeline/stages/merged_task_processor.py index e9a8eea..13f9281 100644 --- a/processing/pipeline/stages/merged_task_processor.py +++ b/processing/pipeline/stages/merged_task_processor.py @@ -125,6 +125,15 @@ class MergedTaskProcessorStage(ProcessingStage): # --- Load, Transform, and Prepare Inputs --- log.debug(f"{log_prefix}: Loading and preparing inputs...") for channel_char, required_map_type_from_rule in merge_inputs_config.items(): + # Validate that the required input map type starts with "MAP_" + if not required_map_type_from_rule.startswith("MAP_"): + result.error_message = ( + f"Invalid input map type '{required_map_type_from_rule}' for channel '{channel_char}'. " + f"Input map types for merging must start with 'MAP_'." + ) + log.error(f"{log_prefix}: {result.error_message}") + return result # Fail the task if an input type is invalid + input_info = input_map_sources_from_task.get(required_map_type_from_rule) input_image_data: Optional[np.ndarray] = None input_source_desc = f"Fallback for {required_map_type_from_rule}" diff --git a/processing/pipeline/stages/normal_map_green_channel.py b/processing/pipeline/stages/normal_map_green_channel.py index 38d9034..636c1ec 100644 --- a/processing/pipeline/stages/normal_map_green_channel.py +++ b/processing/pipeline/stages/normal_map_green_channel.py @@ -38,7 +38,9 @@ class NormalMapGreenChannelStage(ProcessingStage): # Iterate through processed maps, as FileRule objects don't have IDs directly for map_id_hex, map_details in context.processed_maps_details.items(): - if map_details.get('map_type') == "NORMAL" and map_details.get('status') == 'Processed': + # Check if the map is a processed normal map using the standardized internal_map_type + internal_map_type = map_details.get('internal_map_type') + if internal_map_type and internal_map_type.startswith("MAP_NRM") and map_details.get('status') == 'Processed': # Check configuration for inversion # Assuming general_settings is an attribute of config_obj and might be a dict or an object diff --git a/processing/pipeline/stages/regular_map_processor.py b/processing/pipeline/stages/regular_map_processor.py index 2bb5b52..bb74321 100644 --- a/processing/pipeline/stages/regular_map_processor.py +++ b/processing/pipeline/stages/regular_map_processor.py @@ -183,6 +183,13 @@ class RegularMapProcessorStage(ProcessingStage): log.error(f"{log_prefix}: {result.error_message}") return result # Early exit + # Explicitly skip if the determined type doesn't start with "MAP_" + if not initial_internal_map_type.startswith("MAP_"): + result.status = "Skipped (Invalid Type)" + result.error_message = f"FileRule item_type '{initial_internal_map_type}' does not start with 'MAP_'. Skipping processing." + log.warning(f"{log_prefix}: {result.error_message}") + return result # Early exit + processing_map_type = self._get_suffixed_internal_map_type( context.asset_rule, file_rule, initial_internal_map_type, respect_variant_map_types, asset_name_for_log ) -- 2.47.2 From 35a7221f5702438260e4470a946efbd1037117d5 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Tue, 13 May 2025 03:07:00 +0200 Subject: [PATCH 13/16] Cleanup of inconsistencies --- .../05_Processing_Pipeline.md | 14 +-- .../pipeline/stages/merged_task_processor.py | 51 +---------- .../pipeline/stages/output_organization.py | 77 ---------------- .../pipeline/stages/regular_map_processor.py | 53 +---------- processing/utils/image_processing_utils.py | 88 ++++++++++++++++++- 5 files changed, 96 insertions(+), 187 deletions(-) diff --git a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md index 95e81ac..6642880 100644 --- a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md +++ b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md @@ -1,4 +1,4 @@ -# Developer Guide: Processing Pipeline +Cl# Developer Guide: Processing Pipeline This document details the step-by-step technical process executed by the asset processing pipeline, which is initiated by the [`ProcessingEngine`](processing_engine.py:73) class (`processing_engine.py`) and orchestrated by the [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) (`processing/pipeline/orchestrator.py`). @@ -57,12 +57,12 @@ The [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) iterates th * **Context Interaction**: Populates `context.processing_items` and initializes `context.intermediate_results`. 2. **[`RegularMapProcessorStage`](processing/pipeline/stages/regular_map_processor.py:18)** (`processing/pipeline/stages/regular_map_processor.py`): - * **Responsibility**: (Executed per [`FileRule`](rule_structure.py:5) item) Loads the image data for a single file, determines its potentially suffixed internal map type, applies in-memory transformations (Gloss-to-Rough, Normal Green Invert), and returns the processed image data and details in a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object. - * **Context Interaction**: Reads from the input [`FileRule`](rule_structure.py:5) and [`Configuration`](configuration.py:68). Returns a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object which is stored in `context.intermediate_results`. + * **Responsibility**: (Executed per [`FileRule`](rule_structure.py:5) item) Checks if the `FileRule.item_type` starts with "MAP_". If not, the item is skipped. Otherwise, it loads the image data for the file, determines its potentially suffixed internal map type (e.g., "MAP_COL-1"), applies in-memory transformations (Gloss-to-Rough, Normal Green Invert) using the shared utility function [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), and returns the processed image data and details in a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object. The `internal_map_type` in the output reflects any transformations (e.g., "MAP_GLOSS" becomes "MAP_ROUGH"). + * **Context Interaction**: Reads from the input [`FileRule`](rule_structure.py:5) (checking `item_type`) and [`Configuration`](configuration.py:68). Returns a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object which is stored in `context.intermediate_results`. 3. **[`MergedTaskProcessorStage`](processing/pipeline/stages/merged_task_processor.py:68)** (`processing/pipeline/stages/merged_task_processor.py`): - * **Responsibility**: (Executed per [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) item) Loads and prepares multiple input images based on the merge task definition (including fallbacks and in-memory transformations), handles dimension mismatches, performs the channel merging operation, and returns the merged image data and details in a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object. - * **Context Interaction**: Reads from the input [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16), `context.workspace_path`, and [`Configuration`](configuration.py:68). Returns a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object which is stored in `context.intermediate_results`. + * **Responsibility**: (Executed per [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) item) Validates that all input map types specified in the merge rule start with "MAP_". If not, the task is failed. Otherwise, it loads and prepares multiple input images based on the merge task definition (including fallbacks and in-memory transformations applied to inputs using [`apply_common_map_transformations`](processing/utils/image_processing_utils.py)), handles dimension mismatches, performs the channel merging operation, and returns the merged image data and details in a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object. The `output_map_type` of the merged map must also be "MAP_" prefixed in the configuration. + * **Context Interaction**: Reads from the input [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) (checking input map types), `context.workspace_path`, and [`Configuration`](configuration.py:68). Returns a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object which is stored in `context.intermediate_results`. 4. **[`InitialScalingStage`](processing/pipeline/stages/initial_scaling.py:14)** (`processing/pipeline/stages/initial_scaling.py`): * **Responsibility**: (Executed per item) Applies initial scaling (e.g., Power-of-Two downscaling) to the image data from the previous processing stage based on the `initial_scaling_mode` configuration. @@ -70,7 +70,7 @@ The [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) iterates th 5. **[`SaveVariantsStage`](processing/pipeline/stages/save_variants.py:15)** (`processing/pipeline/stages/save_variants.py`): * **Responsibility**: (Executed per item) Takes the final processed image data (potentially scaled) and configuration, and calls a utility to save the image to temporary files in various resolutions and formats as defined by the configuration. - * **Context Interaction**: Takes a [`SaveVariantsInput`](processing/pipeline/asset_context.py:61) object. Returns a [`SaveVariantsOutput`](processing/pipeline/asset_context.py:79) object containing details about the saved temporary files. The orchestrator stores these details in `context.processed_maps_details` for the item. + * **Context Interaction**: Takes a [`SaveVariantsInput`](processing/pipeline/asset_context.py:61) object (which includes the "MAP_" prefixed `internal_map_type`). It uses the `get_filename_friendly_map_type` utility to convert this to a "standard type" (e.g., "COL") for output naming. Returns a [`SaveVariantsOutput`](processing/pipeline/asset_context.py:79) object containing details about the saved temporary files. The orchestrator stores these details, including the original "MAP_" prefixed `internal_map_type`, in `context.processed_maps_details` for the item. ### Post-Item Stages @@ -78,7 +78,7 @@ These stages are executed sequentially once for each asset after the core item p 1. **[`OutputOrganizationStage`](processing/pipeline/stages/output_organization.py:14)** (`processing/pipeline/stages/output_organization.py`): * **Responsibility**: Determines the final output paths for all processed maps (including variants) and extra files based on configured patterns. It copies the temporary files generated by the core stages to these final destinations, creating directories as needed and respecting overwrite settings. - * **Context Interaction**: Reads from `context.processed_maps_details`, `context.files_to_process` (for 'EXTRA' files), `context.output_base_path`, and [`Configuration`](configuration.py:68). Updates entries in `context.processed_maps_details` with final paths and organization status. Populates `context.asset_metadata['final_output_files']`. + * **Context Interaction**: Reads from `context.processed_maps_details` (using the "MAP_" prefixed `internal_map_type` to get the "standard type" via `get_filename_friendly_map_type` for output naming), `context.files_to_process` (for 'EXTRA' files), `context.output_base_path`, and [`Configuration`](configuration.py:68). Updates entries in `context.processed_maps_details` with final paths and organization status. Populates `context.asset_metadata['final_output_files']`. (Note: Legacy code for `'Processed_With_Variants'` status has been removed from this stage). 2. **[`MetadataFinalizationAndSaveStage`](processing/pipeline/stages/metadata_finalization_save.py:14)** (`processing/pipeline/stages/metadata_finalization_save.py`): * **Responsibility**: Finalizes the `context.asset_metadata` (setting end time, final status based on flags). It restructures the processed map details for inclusion, determines the save path for the metadata file based on configuration and patterns, serializes the metadata to JSON, and saves the `metadata.json` file to the final output location. diff --git a/processing/pipeline/stages/merged_task_processor.py b/processing/pipeline/stages/merged_task_processor.py index 13f9281..1a610a0 100644 --- a/processing/pipeline/stages/merged_task_processor.py +++ b/processing/pipeline/stages/merged_task_processor.py @@ -14,55 +14,6 @@ from ...utils import image_processing_utils as ipu log = logging.getLogger(__name__) # Helper function (Duplicated from RegularMapProcessorStage - consider moving to utils) -def _apply_in_memory_transformations( - image_data: np.ndarray, - processing_map_type: str, # The internal type of the *input* map - invert_normal_green: bool, - file_type_definitions: Dict[str, Dict], - log_prefix: str -) -> Tuple[np.ndarray, str, List[str]]: - """ - Applies in-memory transformations (Gloss-to-Rough, Normal Green Invert). - Returns potentially transformed image data, potentially updated map type, and notes. - NOTE: This is applied to individual inputs *before* merging. - """ - transformation_notes = [] - current_image_data = image_data # Start with original data - updated_processing_map_type = processing_map_type # Start with original type - - # Gloss-to-Rough - base_map_type_match = re.match(r"(MAP_GLOSS)", processing_map_type) - if base_map_type_match: - log.info(f"{log_prefix}: Applying Gloss-to-Rough conversion to input.") - inversion_succeeded = False - if np.issubdtype(current_image_data.dtype, np.floating): - current_image_data = 1.0 - current_image_data - current_image_data = np.clip(current_image_data, 0.0, 1.0) - log.debug(f"{log_prefix}: Inverted float input data for Gloss->Rough.") - inversion_succeeded = True - elif np.issubdtype(current_image_data.dtype, np.integer): - max_val = np.iinfo(current_image_data.dtype).max - current_image_data = max_val - current_image_data - log.debug(f"{log_prefix}: Inverted integer input data (max_val: {max_val}) for Gloss->Rough.") - inversion_succeeded = True - else: - log.error(f"{log_prefix}: Unsupported image data type {current_image_data.dtype} for GLOSS input map. Cannot invert.") - transformation_notes.append("Gloss-to-Rough FAILED (unsupported dtype)") - - if inversion_succeeded: - updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") - log.info(f"{log_prefix}: Input map type conceptually updated: '{processing_map_type}' -> '{updated_processing_map_type}'") - transformation_notes.append("Gloss-to-Rough applied to input") - - # Normal Green Invert - base_map_type_match_nrm = re.match(r"(MAP_NRM)", processing_map_type) - if base_map_type_match_nrm and invert_normal_green: - log.info(f"{log_prefix}: Applying Normal Map Green Channel Inversion (Global Setting) to input.") - current_image_data = ipu.invert_normal_map_green_channel(current_image_data) - transformation_notes.append("Normal Green Inverted (Global) applied to input") - - # Return the transformed data, the *original* map type (as it identifies the input source), and notes - return current_image_data, processing_map_type, transformation_notes class MergedTaskProcessorStage(ProcessingStage): @@ -193,7 +144,7 @@ class MergedTaskProcessorStage(ProcessingStage): # 3. Apply Transformations to the loaded/fallback input if input_image_data is not None: - input_image_data, _, transform_notes = _apply_in_memory_transformations( + input_image_data, _, transform_notes = ipu.apply_common_map_transformations( input_image_data.copy(), # Transform a copy required_map_type_from_rule, # Use the type required by the rule invert_normal_green, diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index 205fda0..f032eae 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -194,83 +194,6 @@ class OutputOrganizationStage(ProcessingStage): context.asset_metadata['status'] = "Failed (Output Organization Error)" details['status'] = 'Organization Failed' - # --- Handle legacy 'Processed_With_Variants' status (if still needed, otherwise remove) --- - # This block is kept for potential backward compatibility but might be redundant - # if 'Processed_Via_Save_Utility' is the new standard for variants. - elif map_status == 'Processed_With_Variants': - variants = details.get('variants') # Expects old structure: list of dicts with 'temp_path' - if not variants: - logger.warning(f"Asset '{asset_name_for_log}': Map key '{processed_map_key}' (status '{map_status}') has no 'variants' list. Skipping.") - details['status'] = 'Organization Failed (Legacy Variants Missing)' - continue - - logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(variants)} legacy variants for map key '{processed_map_key}' (map type: {base_map_type}).") - - map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) - map_metadata_entry['map_type'] = base_map_type - map_metadata_entry.setdefault('variant_paths', {}) - - processed_any_variant_successfully = False - failed_any_variant = False - - for variant_index, variant_detail in enumerate(variants): - temp_variant_path_str = variant_detail.get('temp_path') # Uses 'temp_path' - if not temp_variant_path_str: - logger.warning(f"Asset '{asset_name_for_log}': Legacy Variant {variant_index} for map '{processed_map_key}' is missing 'temp_path'. Skipping.") - continue - - temp_variant_path = Path(temp_variant_path_str) - if not temp_variant_path.is_file(): - logger.warning(f"Asset '{asset_name_for_log}': Legacy temporary variant file '{temp_variant_path}' for map '{processed_map_key}' not found. Skipping.") - continue - - variant_resolution_key = variant_detail.get('resolution_key', f"varRes{variant_index}") - variant_ext = temp_variant_path.suffix.lstrip('.') - - token_data_variant = { - "assetname": asset_name_for_log, - "supplier": context.effective_supplier or "DefaultSupplier", - "maptype": base_map_type, - "resolution": variant_resolution_key, - "ext": variant_ext, - "incrementingvalue": getattr(context, 'incrementing_value', None), - "sha5": getattr(context, 'sha5_value', None) - } - token_data_variant_cleaned = {k: v for k, v in token_data_variant.items() if v is not None} - output_filename_variant = generate_path_from_pattern(output_filename_pattern_config, token_data_variant_cleaned) - - try: - relative_dir_path_str_variant = generate_path_from_pattern( - pattern_string=output_dir_pattern, - token_data=token_data_variant_cleaned - ) - final_variant_path = Path(context.output_base_path) / Path(relative_dir_path_str_variant) / Path(output_filename_variant) - final_variant_path.parent.mkdir(parents=True, exist_ok=True) - - if final_variant_path.exists() and not overwrite_existing: - logger.info(f"Asset '{asset_name_for_log}': Output legacy variant file {final_variant_path} exists and overwrite is disabled. Skipping copy.") - else: - shutil.copy2(temp_variant_path, final_variant_path) - logger.info(f"Asset '{asset_name_for_log}': Copied legacy variant {temp_variant_path} to {final_variant_path}.") - final_output_files.append(str(final_variant_path)) - - relative_final_variant_path_str = str(Path(relative_dir_path_str_variant) / Path(output_filename_variant)) - map_metadata_entry['variant_paths'][variant_resolution_key] = relative_final_variant_path_str - processed_any_variant_successfully = True - - except Exception as e: - logger.error(f"Asset '{asset_name_for_log}': Failed to copy legacy variant {temp_variant_path}. Error: {e}", exc_info=True) - context.status_flags['output_organization_error'] = True - context.asset_metadata['status'] = "Failed (Output Organization Error - Legacy Variant)" - failed_any_variant = True - - if failed_any_variant: - details['status'] = 'Organization Failed (Legacy Variants)' - elif processed_any_variant_successfully: - details['status'] = 'Organized (Legacy Variants)' - else: - details['status'] = 'Organization Skipped (No Legacy Variants Copied/Needed)' - # --- Handle other statuses (Skipped, Failed, etc.) --- else: # Catches statuses not explicitly handled above logger.debug(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status: '{map_status}') for organization as it's not a recognized final processed state or variant state.") diff --git a/processing/pipeline/stages/regular_map_processor.py b/processing/pipeline/stages/regular_map_processor.py index bb74321..964aaf8 100644 --- a/processing/pipeline/stages/regular_map_processor.py +++ b/processing/pipeline/stages/regular_map_processor.py @@ -91,57 +91,6 @@ class RegularMapProcessorStage(ProcessingStage): return final_internal_map_type - def _apply_in_memory_transformations( - self, - image_data: np.ndarray, - processing_map_type: str, # The potentially suffixed internal type - invert_normal_green: bool, - file_type_definitions: Dict[str, Dict], - log_prefix: str - ) -> Tuple[np.ndarray, str, List[str]]: - """ - Applies in-memory transformations (Gloss-to-Rough, Normal Green Invert). - Returns potentially transformed image data, potentially updated map type, and notes. - """ - transformation_notes = [] - current_image_data = image_data # Start with original data - updated_processing_map_type = processing_map_type # Start with original type - - # Gloss-to-Rough - # Check if the base type is Gloss (before suffix) - base_map_type_match = re.match(r"(MAP_GLOSS)", processing_map_type) - if base_map_type_match: - log.info(f"{log_prefix}: Applying Gloss-to-Rough conversion.") - inversion_succeeded = False - if np.issubdtype(current_image_data.dtype, np.floating): - current_image_data = 1.0 - current_image_data - current_image_data = np.clip(current_image_data, 0.0, 1.0) - log.debug(f"{log_prefix}: Inverted float image data for Gloss->Rough.") - inversion_succeeded = True - elif np.issubdtype(current_image_data.dtype, np.integer): - max_val = np.iinfo(current_image_data.dtype).max - current_image_data = max_val - current_image_data - log.debug(f"{log_prefix}: Inverted integer image data (max_val: {max_val}) for Gloss->Rough.") - inversion_succeeded = True - else: - log.error(f"{log_prefix}: Unsupported image data type {current_image_data.dtype} for GLOSS map. Cannot invert.") - transformation_notes.append("Gloss-to-Rough FAILED (unsupported dtype)") - - if inversion_succeeded: - # Update the type string itself (e.g., MAP_GLOSS-1 -> MAP_ROUGH-1) - updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") - log.info(f"{log_prefix}: Map type updated: '{processing_map_type}' -> '{updated_processing_map_type}'") - transformation_notes.append("Gloss-to-Rough applied") - - # Normal Green Invert - # Check if the base type is Normal (before suffix) - base_map_type_match_nrm = re.match(r"(MAP_NRM)", processing_map_type) - if base_map_type_match_nrm and invert_normal_green: - log.info(f"{log_prefix}: Applying Normal Map Green Channel Inversion (Global Setting).") - current_image_data = ipu.invert_normal_map_green_channel(current_image_data) - transformation_notes.append("Normal Green Inverted (Global)") - - return current_image_data, updated_processing_map_type, transformation_notes # --- Execute Method --- @@ -237,7 +186,7 @@ class RegularMapProcessorStage(ProcessingStage): result.original_bit_depth = None # Indicate failure to determine # --- Apply Transformations --- - transformed_image_data, final_map_type, transform_notes = self._apply_in_memory_transformations( + transformed_image_data, final_map_type, transform_notes = ipu.apply_common_map_transformations( source_image_data.copy(), # Pass a copy to avoid modifying original load processing_map_type, invert_normal_green, diff --git a/processing/utils/image_processing_utils.py b/processing/utils/image_processing_utils.py index e9181b8..70da34a 100644 --- a/processing/utils/image_processing_utils.py +++ b/processing/utils/image_processing_utils.py @@ -426,4 +426,90 @@ def save_image( return True except Exception: # as e: # print(f"Error saving image {path_obj}: {e}") # Optional: for debugging utils - return False \ No newline at end of file + return False + +# --- Common Map Transformations --- + +import re +import logging + +ipu_log = logging.getLogger(__name__) + +def apply_common_map_transformations( + image_data: np.ndarray, + processing_map_type: str, # The potentially suffixed internal type + invert_normal_green: bool, + file_type_definitions: Dict[str, Dict], + log_prefix: str +) -> Tuple[np.ndarray, str, List[str]]: + """ + Applies common in-memory transformations (Gloss-to-Rough, Normal Green Invert). + Returns potentially transformed image data, potentially updated map type, and notes. + """ + transformation_notes = [] + current_image_data = image_data # Start with original data + updated_processing_map_type = processing_map_type # Start with original type + + # Gloss-to-Rough + # Check if the base type is Gloss (before suffix) + base_map_type_match = re.match(r"(MAP_GLOSS)", processing_map_type) + if base_map_type_match: + ipu_log.info(f"{log_prefix}: Applying Gloss-to-Rough conversion.") + inversion_succeeded = False + if np.issubdtype(current_image_data.dtype, np.floating): + current_image_data = 1.0 - current_image_data + current_image_data = np.clip(current_image_data, 0.0, 1.0) + ipu_log.debug(f"{log_prefix}: Inverted float image data for Gloss->Rough.") + inversion_succeeded = True + elif np.issubdtype(current_image_data.dtype, np.integer): + max_val = np.iinfo(current_image_data.dtype).max + current_image_data = max_val - current_image_data + ipu_log.debug(f"{log_prefix}: Inverted integer image data (max_val: {max_val}) for Gloss->Rough.") + inversion_succeeded = True + else: + ipu_log.error(f"{log_prefix}: Unsupported image data type {current_image_data.dtype} for GLOSS map. Cannot invert.") + transformation_notes.append("Gloss-to-Rough FAILED (unsupported dtype)") + + if inversion_succeeded: + # Update the type string itself (e.g., MAP_GLOSS-1 -> MAP_ROUGH-1) + updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") + ipu_log.info(f"{log_prefix}: Map type updated: '{processing_map_type}' -> '{updated_processing_map_type}'") + transformation_notes.append("Gloss-to-Rough applied") + + # Normal Green Invert + # Check if the base type is Normal (before suffix) + base_map_type_match_nrm = re.match(r"(MAP_NRM)", processing_map_type) + if base_map_type_match_nrm and invert_normal_green: + ipu_log.info(f"{log_prefix}: Applying Normal Map Green Channel Inversion (Global Setting).") + current_image_data = invert_normal_map_green_channel(current_image_data) + transformation_notes.append("Normal Green Inverted (Global)") + + return current_image_data, updated_processing_map_type, transformation_notes + +# --- Normal Map Utilities --- + +def invert_normal_map_green_channel(normal_map: np.ndarray) -> np.ndarray: + """ + Inverts the green channel of a normal map. + Assumes the normal map is in RGB or RGBA format (channel order R, G, B, A). + """ + if normal_map is None or len(normal_map.shape) < 3 or normal_map.shape[2] < 3: + # Not a valid color image with at least 3 channels + return normal_map + + # Ensure data is mutable + inverted_map = normal_map.copy() + + # Invert the green channel (index 1) + # Handle different data types + if np.issubdtype(inverted_map.dtype, np.floating): + inverted_map[:, :, 1] = 1.0 - inverted_map[:, :, 1] + elif np.issubdtype(inverted_map.dtype, np.integer): + max_val = np.iinfo(inverted_map.dtype).max + inverted_map[:, :, 1] = max_val - inverted_map[:, :, 1] + else: + # Unsupported dtype, return original + print(f"Warning: Unsupported dtype {inverted_map.dtype} for normal map green channel inversion.") + return normal_map + + return inverted_map \ No newline at end of file -- 2.47.2 From f800bb25a9cb0c28e0adf2073b22274fb3682185 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Tue, 13 May 2025 04:01:38 +0200 Subject: [PATCH 14/16] channelpacking now works --- .../05_Processing_Pipeline.md | 22 +-- processing/pipeline/orchestrator.py | 4 +- .../pipeline/stages/merged_task_processor.py | 137 +++++++++++++----- .../stages/prepare_processing_items.py | 28 ++-- 4 files changed, 130 insertions(+), 61 deletions(-) diff --git a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md index 6642880..fc358c2 100644 --- a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md +++ b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md @@ -33,36 +33,36 @@ These stages are executed sequentially once for each asset before the core item * **Context Interaction**: Populates `context.asset_metadata` and initializes empty dictionaries for `processed_maps_details` and `merged_maps_details`. 4. **[`FileRuleFilterStage`](processing/pipeline/stages/file_rule_filter.py:10)** (`processing/pipeline/stages/file_rule_filter.py`): - * **Responsibility**: Filters the [`FileRule`](rule_structure.py:5) objects associated with the asset to determine which individual files should be considered for processing. It identifies and excludes files matching "FILE_IGNORE" rules. + * **Responsibility**: Filters the [`FileRule`](rule_structure.py:5) objects associated with the asset to determine which individual files should be considered for processing. It identifies and excludes files matching "FILE_IGNORE" rules based on their `item_type`. * **Context Interaction**: Populates `context.files_to_process` with the list of [`FileRule`](rule_structure.py:5) objects that are not ignored. 5. **[`GlossToRoughConversionStage`](processing/pipeline/stages/gloss_to_rough_conversion.py:15)** (`processing/pipeline/stages/gloss_to_rough_conversion.py`): - * **Responsibility**: Identifies processed maps that were originally glossiness maps. If found, it loads the temporary image data, inverts it, saves a new temporary roughness map, and updates the corresponding details in `context.processed_maps_details` and the relevant [`FileRule`](rule_structure.py:5) in `context.files_to_process`. - * **Context Interaction**: Reads from and updates `context.processed_maps_details` and `context.files_to_process`. + * **Responsibility**: Identifies processed maps in `context.processed_maps_details` whose `internal_map_type` starts with "MAP_GLOSS". If found, it loads the temporary image data, inverts it using the shared utility function [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), saves a new temporary roughness map ("MAP_ROUGH"), and updates the corresponding details in `context.processed_maps_details` (setting `internal_map_type` to "MAP_ROUGH") and the relevant [`FileRule`](rule_structure.py:5) in `context.files_to_process` (setting `item_type` to "MAP_ROUGH"). + * **Context Interaction**: Reads from and updates `context.processed_maps_details` (specifically `internal_map_type` and `temp_processed_file`) and `context.files_to_process` (specifically `item_type`). 6. **[`AlphaExtractionToMaskStage`](processing/pipeline/stages/alpha_extraction_to_mask.py:16)** (`processing/pipeline/stages/alpha_extraction_to_mask.py`): - * **Responsibility**: If no mask map is explicitly defined for the asset, this stage searches for a suitable source map (e.g., Albedo, Diffuse) with an alpha channel in `context.processed_maps_details`. If found, it extracts the alpha channel, saves it as a new temporary mask map, and adds a new [`FileRule`](rule_structure.py:5) and corresponding details to the context. - * **Context Interaction**: Reads from `context.processed_maps_details`, adds a new [`FileRule`](rule_structure.py:5) to `context.files_to_process`, and adds a new entry to `context.processed_maps_details`. + * **Responsibility**: If no mask map is explicitly defined for the asset (as a [`FileRule`](rule_structure.py:5) with `item_type="MAP_MASK"`), this stage searches `context.processed_maps_details` for a suitable source map (e.g., a "MAP_COL" with an alpha channel, based on its `internal_map_type`). If found, it extracts the alpha channel, saves it as a new temporary mask map, and adds a new [`FileRule`](rule_structure.py:5) (with `item_type="MAP_MASK"`) and corresponding details (with `internal_map_type="MAP_MASK"`) to the context. + * **Context Interaction**: Reads from `context.processed_maps_details`, adds a new [`FileRule`](rule_structure.py:5) to `context.files_to_process`, and adds a new entry to `context.processed_maps_details` (setting `internal_map_type`). 7. **[`NormalMapGreenChannelStage`](processing/pipeline/stages/normal_map_green_channel.py:14)** (`processing/pipeline/stages/normal_map_green_channel.py`): - * **Responsibility**: Identifies processed normal maps in `context.processed_maps_details`. If the global `invert_normal_map_green_channel_globally` configuration is true, it loads the temporary image data, inverts the green channel, saves a new temporary modified normal map, and updates the corresponding details in `context.processed_maps_details`. - * **Context Interaction**: Reads from and updates `context.processed_maps_details`. + * **Responsibility**: Identifies processed normal maps in `context.processed_maps_details` (those with an `internal_map_type` starting with "MAP_NRM"). If the global `invert_normal_map_green_channel_globally` configuration is true, it loads the temporary image data, inverts the green channel using the shared utility function [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), saves a new temporary modified normal map, and updates the `temp_processed_file` path in `context.processed_maps_details`. + * **Context Interaction**: Reads from and updates `context.processed_maps_details` (specifically `temp_processed_file` and `notes`). ### Core Item Processing Loop The [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) iterates through the `context.processing_items` list (populated by the [`PrepareProcessingItemsStage`](processing/pipeline/stages/prepare_processing_items.py:10)). For each item (either a [`FileRule`](rule_structure.py:5) for a regular map or a [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) for a merged map), the following stages are executed sequentially: 1. **[`PrepareProcessingItemsStage`](processing/pipeline/stages/prepare_processing_items.py:10)** (`processing/pipeline/stages/prepare_processing_items.py`): - * **Responsibility**: (Executed once before the loop) Creates the `context.processing_items` list by combining [`FileRule`](rule_structure.py:5)s from `context.files_to_process` and [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16)s derived from the global `merged_image_tasks` configuration. Initializes `context.intermediate_results`. - * **Context Interaction**: Populates `context.processing_items` and initializes `context.intermediate_results`. + * **Responsibility**: (Executed once before the loop) Creates the `context.processing_items` list by combining [`FileRule`](rule_structure.py:5)s from `context.files_to_process` and [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16)s derived from the global `map_merge_rules` configuration. It correctly accesses `map_merge_rules` from `context.config_obj` and validates each merge rule for the presence of `output_map_type` and a dictionary for `inputs`. Initializes `context.intermediate_results`. + * **Context Interaction**: Reads from `context.files_to_process` and `context.config_obj` (accessing `map_merge_rules`). Populates `context.processing_items` and initializes `context.intermediate_results`. 2. **[`RegularMapProcessorStage`](processing/pipeline/stages/regular_map_processor.py:18)** (`processing/pipeline/stages/regular_map_processor.py`): * **Responsibility**: (Executed per [`FileRule`](rule_structure.py:5) item) Checks if the `FileRule.item_type` starts with "MAP_". If not, the item is skipped. Otherwise, it loads the image data for the file, determines its potentially suffixed internal map type (e.g., "MAP_COL-1"), applies in-memory transformations (Gloss-to-Rough, Normal Green Invert) using the shared utility function [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), and returns the processed image data and details in a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object. The `internal_map_type` in the output reflects any transformations (e.g., "MAP_GLOSS" becomes "MAP_ROUGH"). * **Context Interaction**: Reads from the input [`FileRule`](rule_structure.py:5) (checking `item_type`) and [`Configuration`](configuration.py:68). Returns a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object which is stored in `context.intermediate_results`. 3. **[`MergedTaskProcessorStage`](processing/pipeline/stages/merged_task_processor.py:68)** (`processing/pipeline/stages/merged_task_processor.py`): - * **Responsibility**: (Executed per [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) item) Validates that all input map types specified in the merge rule start with "MAP_". If not, the task is failed. Otherwise, it loads and prepares multiple input images based on the merge task definition (including fallbacks and in-memory transformations applied to inputs using [`apply_common_map_transformations`](processing/utils/image_processing_utils.py)), handles dimension mismatches, performs the channel merging operation, and returns the merged image data and details in a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object. The `output_map_type` of the merged map must also be "MAP_" prefixed in the configuration. - * **Context Interaction**: Reads from the input [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) (checking input map types), `context.workspace_path`, and [`Configuration`](configuration.py:68). Returns a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object which is stored in `context.intermediate_results`. + * **Responsibility**: (Executed per [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) item) Validates that all input map types specified in the merge rule start with "MAP_". If not, the task is failed. It dynamically loads input images by looking up the required input map types (e.g., "MAP_NRM") in `context.processed_maps_details` and using the temporary file paths from their `saved_files_info`. It applies in-memory transformations to inputs using [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), handles dimension mismatches (with fallback creation if configured and `source_dimensions` are available), performs the channel merging operation, and returns the merged image data and details in a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object. The `output_map_type` of the merged map must also be "MAP_" prefixed in the configuration. + * **Context Interaction**: Reads from the input [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) (checking input map types), `context.workspace_path`, `context.processed_maps_details` (for input image data), and [`Configuration`](configuration.py:68). Returns a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object which is stored in `context.intermediate_results`. 4. **[`InitialScalingStage`](processing/pipeline/stages/initial_scaling.py:14)** (`processing/pipeline/stages/initial_scaling.py`): * **Responsibility**: (Executed per item) Applies initial scaling (e.g., Power-of-Two downscaling) to the image data from the previous processing stage based on the `initial_scaling_mode` configuration. diff --git a/processing/pipeline/orchestrator.py b/processing/pipeline/orchestrator.py index 993dcc9..6c8fe7a 100644 --- a/processing/pipeline/orchestrator.py +++ b/processing/pipeline/orchestrator.py @@ -165,10 +165,12 @@ class PipelineOrchestrator: # --- Prepare Processing Items --- log.debug(f"Asset '{asset_name}': Preparing processing items...") try: + log.info(f"ORCHESTRATOR_TRACE: Asset '{asset_name}': Attempting to call _prepare_stage.execute(). Current context.status_flags: {context.status_flags}") # Prepare stage modifies context directly context = self._prepare_stage.execute(context) + log.info(f"ORCHESTRATOR_TRACE: Asset '{asset_name}': Successfully RETURNED from _prepare_stage.execute(). context.processing_items count: {len(context.processing_items) if context.processing_items is not None else 'None'}. context.status_flags: {context.status_flags}") except Exception as e: - log.error(f"Asset '{asset_name}': Error during PrepareProcessingItemsStage: {e}", exc_info=True) + log.error(f"ORCHESTRATOR_TRACE: Asset '{asset_name}': EXCEPTION during _prepare_stage.execute(): {e}", exc_info=True) context.status_flags["asset_failed"] = True context.status_flags["asset_failed_stage"] = "PrepareProcessingItemsStage" context.status_flags["asset_failed_reason"] = str(e) diff --git a/processing/pipeline/stages/merged_task_processor.py b/processing/pipeline/stages/merged_task_processor.py index 1a610a0..46507b8 100644 --- a/processing/pipeline/stages/merged_task_processor.py +++ b/processing/pipeline/stages/merged_task_processor.py @@ -13,9 +13,6 @@ from ...utils import image_processing_utils as ipu log = logging.getLogger(__name__) -# Helper function (Duplicated from RegularMapProcessorStage - consider moving to utils) - - class MergedTaskProcessorStage(ProcessingStage): """ Processes a single merge task defined in the configuration. @@ -23,6 +20,42 @@ class MergedTaskProcessorStage(ProcessingStage): performs the merge, and returns the merged data. """ + def _find_input_map_details_in_context( + self, + required_map_type: str, + processed_map_details_context: Dict[str, Dict[str, Any]], + log_prefix_for_find: str + ) -> Optional[Dict[str, Any]]: + """ + Finds the details of a required input map from the context's processed_maps_details. + Prefers exact match for full types (e.g. MAP_TYPE-1), or base type / base type + "-1" for base types (e.g. MAP_TYPE). + Returns the details dictionary for the found map if it has saved_files_info. + """ + # Try exact match first (e.g., rule asks for "MAP_NRM-1" or "MAP_NRM" if that's how it was processed) + for item_key, details in processed_map_details_context.items(): + if details.get('internal_map_type') == required_map_type: + if details.get('saved_files_info') and isinstance(details['saved_files_info'], list) and len(details['saved_files_info']) > 0: + log.debug(f"{log_prefix_for_find}: Found exact match for '{required_map_type}' with key '{item_key}'.") + return details + log.warning(f"{log_prefix_for_find}: Found exact match for '{required_map_type}' (key '{item_key}') but no saved_files_info.") + return None # Found type but no usable files + + # If exact match not found, and required_map_type is a base type (e.g. "MAP_NRM") + # try to find the primary suffixed version "MAP_NRM-1" or the base type itself if it was processed without a suffix. + if not re.search(r'-\d+$', required_map_type): # if it's a base type like MAP_XXX + # Prefer "MAP_XXX-1" as the primary variant if suffixed types exist + primary_suffixed_type = f"{required_map_type}-1" + for item_key, details in processed_map_details_context.items(): + if details.get('internal_map_type') == primary_suffixed_type: + if details.get('saved_files_info') and isinstance(details['saved_files_info'], list) and len(details['saved_files_info']) > 0: + log.debug(f"{log_prefix_for_find}: Found primary suffixed match '{primary_suffixed_type}' for base '{required_map_type}' with key '{item_key}'.") + return details + log.warning(f"{log_prefix_for_find}: Found primary suffixed match '{primary_suffixed_type}' (key '{item_key}') but no saved_files_info.") + return None # Found type but no usable files + + log.debug(f"{log_prefix_for_find}: No suitable match found for '{required_map_type}' via exact or primary suffixed type search.") + return None + def execute( self, context: AssetProcessingContext, @@ -56,17 +89,23 @@ class MergedTaskProcessorStage(ProcessingStage): merge_dimension_mismatch_strategy = getattr(config, "MERGE_DIMENSION_MISMATCH_STRATEGY", "USE_LARGEST") workspace_path = context.workspace_path # Base for resolving relative input paths - merge_rule_config = task_data.get('merge_rule_config', {}) - input_map_sources_from_task = task_data.get('input_map_sources', {}) # Info about where inputs come from - target_dimensions_hw = task_data.get('source_dimensions') # Expected dimensions (h, w) from previous stage - merge_inputs_config = merge_rule_config.get('inputs', {}) # e.g., {'R': 'MAP_AO', 'G': 'MAP_ROUGH', ...} - merge_defaults = merge_rule_config.get('defaults', {}) # e.g., {'R': 255, 'G': 255, ...} - merge_channels_order = merge_rule_config.get('channel_order', 'RGB') # e.g., 'RGB', 'RGBA' + # input_map_sources_from_task is no longer used for paths. Paths are sourced from context.processed_maps_details. + target_dimensions_hw = task_data.get('source_dimensions') # Expected dimensions (h, w) for fallback creation, must be in config. + merge_inputs_config = task_data.get('inputs', {}) # e.g., {'R': 'MAP_AO', 'G': 'MAP_ROUGH', ...} + merge_defaults = task_data.get('defaults', {}) # e.g., {'R': 255, 'G': 255, ...} + merge_channels_order = task_data.get('channel_order', 'RGB') # e.g., 'RGB', 'RGBA' - if not merge_rule_config or not input_map_sources_from_task or not target_dimensions_hw or not merge_inputs_config: - result.error_message = "Merge task data is incomplete (missing config, sources, dimensions, or input mapping)." + # Target dimensions are crucial if fallbacks are needed. + # Merge inputs config is essential. + # Merge inputs config is essential. Check directly in task_data. + inputs_from_task_data = task_data.get('inputs') + if not isinstance(inputs_from_task_data, dict) or not inputs_from_task_data: + result.error_message = "Merge task data is incomplete (missing or invalid 'inputs' dictionary in task_data)." log.error(f"{log_prefix}: {result.error_message}") return result + if not target_dimensions_hw and any(merge_defaults.get(ch) is not None for ch in merge_inputs_config.keys()): + log.warning(f"{log_prefix}: Merge task has defaults defined, but 'source_dimensions' (target_dimensions_hw) is missing in task_data. Fallback image creation might fail if needed.") + # Not returning error yet, as fallbacks might not be triggered. loaded_inputs_for_merge: Dict[str, np.ndarray] = {} # Channel char -> image data actual_input_dimensions: List[Tuple[int, int]] = [] # List of (h, w) for loaded files @@ -85,46 +124,61 @@ class MergedTaskProcessorStage(ProcessingStage): log.error(f"{log_prefix}: {result.error_message}") return result # Fail the task if an input type is invalid - input_info = input_map_sources_from_task.get(required_map_type_from_rule) input_image_data: Optional[np.ndarray] = None input_source_desc = f"Fallback for {required_map_type_from_rule}" input_log_prefix = f"{log_prefix}, Input '{required_map_type_from_rule}' (Channel '{channel_char}')" channel_transform_notes: List[str] = [] - # 1. Attempt to load from file path - if input_info and input_info.get('file_path'): - # Paths in merged tasks should be relative to workspace_path - input_file_path_str = input_info['file_path'] - input_file_path = workspace_path / input_file_path_str - if input_file_path.is_file(): - try: - input_image_data = ipu.load_image(str(input_file_path)) - if input_image_data is not None: - log.info(f"{input_log_prefix}: Loaded from: {input_file_path}") - actual_input_dimensions.append(input_image_data.shape[:2]) # (h, w) - input_source_desc = str(input_file_path) - try: - input_source_bit_depths[channel_char] = ipu.get_image_bit_depth(str(input_file_path)) - except Exception: - log.warning(f"{input_log_prefix}: Could not get bit depth for {input_file_path}. Defaulting to 8.") - input_source_bit_depths[channel_char] = 8 - else: - log.warning(f"{input_log_prefix}: Failed to load image from {input_file_path}. Attempting fallback.") - except Exception as e: - log.warning(f"{input_log_prefix}: Error loading image from {input_file_path}: {e}. Attempting fallback.") + # 1. Attempt to load from context.processed_maps_details + found_input_map_details = self._find_input_map_details_in_context( + required_map_type_from_rule, context.processed_maps_details, input_log_prefix + ) + + if found_input_map_details: + # Assuming the first saved file is the primary one for merging. + # This might need refinement if specific variants (resolutions/formats) are required. + primary_saved_file_info = found_input_map_details['saved_files_info'][0] + input_file_path_str = primary_saved_file_info.get('path') + + if input_file_path_str: + input_file_path = Path(input_file_path_str) # Path is absolute from SaveVariantsStage + if input_file_path.is_file(): + try: + input_image_data = ipu.load_image(str(input_file_path)) + if input_image_data is not None: + log.info(f"{input_log_prefix}: Loaded from context: {input_file_path}") + actual_input_dimensions.append(input_image_data.shape[:2]) # (h, w) + input_source_desc = str(input_file_path) + # Bit depth from the saved variant info + input_source_bit_depths[channel_char] = primary_saved_file_info.get('bit_depth', 8) + else: + log.warning(f"{input_log_prefix}: Failed to load image from {input_file_path} (found in context). Attempting fallback.") + input_image_data = None # Ensure fallback is triggered + except Exception as e: + log.warning(f"{input_log_prefix}: Error loading image from {input_file_path} (found in context): {e}. Attempting fallback.") + input_image_data = None # Ensure fallback is triggered + else: + log.warning(f"{input_log_prefix}: Input file path '{input_file_path}' (from context) not found. Attempting fallback.") + input_image_data = None # Ensure fallback is triggered else: - log.warning(f"{input_log_prefix}: Input file path not found: {input_file_path}. Attempting fallback.") + log.warning(f"{input_log_prefix}: Found map type '{required_map_type_from_rule}' in context, but 'path' is missing in saved_files_info. Attempting fallback.") + input_image_data = None # Ensure fallback is triggered else: - log.warning(f"{input_log_prefix}: No file path provided. Attempting fallback.") + log.info(f"{input_log_prefix}: Input map type '{required_map_type_from_rule}' not found in context.processed_maps_details. Attempting fallback.") + input_image_data = None # Ensure fallback is triggered # 2. Apply Fallback if needed if input_image_data is None: fallback_value = merge_defaults.get(channel_char) if fallback_value is not None: try: + if not target_dimensions_hw: + result.error_message = f"Cannot create fallback for channel '{channel_char}': 'source_dimensions' (target_dimensions_hw) not defined in task_data." + log.error(f"{log_prefix}: {result.error_message}") + return result # Critical failure if dimensions for fallback are missing h, w = target_dimensions_hw # Infer shape/dtype for fallback (simplified) - num_channels = 1 if isinstance(fallback_value, (int, float)) else len(fallback_value) if isinstance(fallback_value, (list, tuple)) else 1 # Default to 1 channel? Needs refinement. + num_channels = 1 if isinstance(fallback_value, (int, float)) else len(fallback_value) if isinstance(fallback_value, (list, tuple)) else 1 dtype = np.uint8 # Default dtype shape = (h, w) if num_channels == 1 else (h, w, num_channels) @@ -199,9 +253,20 @@ class MergedTaskProcessorStage(ProcessingStage): loaded_inputs_for_merge[channel_char] = resized_img log.debug(f"{log_prefix}: Resized input for channel '{channel_char}'.") + # If target_merge_dims_hw is still None (no source_dimensions and no mismatch), use first loaded input's dimensions + if target_merge_dims_hw is None and actual_input_dimensions: + target_merge_dims_hw = actual_input_dimensions[0] + log.info(f"{log_prefix}: Using dimensions from first loaded input: {target_merge_dims_hw}") + # --- Perform Merge --- log.debug(f"{log_prefix}: Performing merge operation for channels '{merge_channels_order}'.") try: + # Final check for valid dimensions before unpacking + if not isinstance(target_merge_dims_hw, tuple) or len(target_merge_dims_hw) != 2: + result.error_message = "Could not determine valid target dimensions for merge operation." + log.error(f"{log_prefix}: {result.error_message} (target_merge_dims_hw: {target_merge_dims_hw})") + return result + output_channels = len(merge_channels_order) h, w = target_merge_dims_hw # Use the potentially adjusted dimensions diff --git a/processing/pipeline/stages/prepare_processing_items.py b/processing/pipeline/stages/prepare_processing_items.py index 16f068e..cdfc2ac 100644 --- a/processing/pipeline/stages/prepare_processing_items.py +++ b/processing/pipeline/stages/prepare_processing_items.py @@ -60,23 +60,24 @@ class PrepareProcessingItemsStage(ProcessingStage): # merged_image_tasks are expected to be loaded into context.config_obj # by the Configuration class from app_settings.json. - merged_tasks_list = getattr(context.config_obj, 'merged_image_tasks', None) + merged_tasks_list = getattr(context.config_obj, 'map_merge_rules', None) if merged_tasks_list and isinstance(merged_tasks_list, list): log.debug(f"Asset '{asset_name_for_log}': Found {len(merged_tasks_list)} merge tasks in global config.") for task_idx, task_data in enumerate(merged_tasks_list): - for task_idx, task_data in enumerate(merged_tasks_list): - if isinstance(task_data, dict): - task_key = f"merged_task_{task_idx}" - # Basic validation for merge task data (can be expanded) - if not task_data.get('output_map_type') or not task_data.get('merge_rule_config'): - log.warning(f"Asset '{asset_name_for_log}', Task Index {task_idx}: Skipping merge task due to missing 'output_map_type' or 'merge_rule_config'. Task data: {task_data}") - continue # Skip this specific task - merge_def = MergeTaskDefinition(task_data=task_data, task_key=task_key) - log.info(f"Asset '{asset_name_for_log}': Identified and adding Merge Task: Key='{merge_def.task_key}', OutputType='{task_data.get('output_map_type', 'N/A')}'") - items_to_process.append(merge_def) - else: - log.warning(f"Asset '{asset_name_for_log}': Item at index {task_idx} in config_obj.merged_image_tasks is not a dictionary. Skipping. Item: {task_data}") + if isinstance(task_data, dict): + task_key = f"merged_task_{task_idx}" + # Basic validation for merge task data: requires output_map_type and an inputs dictionary + if not task_data.get('output_map_type') or not isinstance(task_data.get('inputs'), dict): + log.warning(f"Asset '{asset_name_for_log}', Task Index {task_idx}: Skipping merge task due to missing 'output_map_type' or valid 'inputs' dictionary. Task data: {task_data}") + continue # Skip this specific task + log.debug(f"Asset '{asset_name_for_log}', Preparing Merge Task Index {task_idx}: Raw task_data: {task_data}") + merge_def = MergeTaskDefinition(task_data=task_data, task_key=task_key) + log.debug(f"Asset '{asset_name_for_log}': Created MergeTaskDefinition object: {merge_def}") + log.info(f"Asset '{asset_name_for_log}': Successfully CREATED MergeTaskDefinition: Key='{merge_def.task_key}', OutputType='{merge_def.task_data.get('output_map_type', 'N/A')}'") + items_to_process.append(merge_def) + else: + log.warning(f"Asset '{asset_name_for_log}': Item at index {task_idx} in config_obj.merged_image_tasks is not a dictionary. Skipping. Item: {task_data}") # The log for "Added X potential MergeTaskDefinition items" will be covered by the final log. elif merged_tasks_list is None: log.debug(f"Asset '{asset_name_for_log}': 'merged_image_tasks' not found in config_obj. No global merge tasks to add.") @@ -89,6 +90,7 @@ class PrepareProcessingItemsStage(ProcessingStage): if not items_to_process: log.info(f"Asset '{asset_name_for_log}': No valid items found to process after preparation.") + log.debug(f"Asset '{asset_name_for_log}': Final items_to_process before assigning to context: {items_to_process}") context.processing_items = items_to_process context.intermediate_results = {} # Initialize intermediate results storage -- 2.47.2 From 1cd81cb87ac539562da5b9f3795e620fec0ae890 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Tue, 13 May 2025 09:15:43 +0200 Subject: [PATCH 15/16] Metadata reformatting --- .../stages/metadata_finalization_save.py | 11 ++++++---- .../pipeline/stages/output_organization.py | 20 ++++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/processing/pipeline/stages/metadata_finalization_save.py b/processing/pipeline/stages/metadata_finalization_save.py index f2adb70..fc23833 100644 --- a/processing/pipeline/stages/metadata_finalization_save.py +++ b/processing/pipeline/stages/metadata_finalization_save.py @@ -41,7 +41,7 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): # Check Skip Flag if context.status_flags.get('skip_asset'): context.asset_metadata['status'] = "Skipped" - context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() + # context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() context.asset_metadata['notes'] = context.status_flags.get('skip_reason', 'Skipped early in pipeline') logger.info( f"Asset '{asset_name_for_log}': Marked as skipped. Reason: {context.asset_metadata['notes']}" @@ -51,7 +51,7 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): # However, if we are here, asset_metadata IS initialized. # A. Finalize Metadata - context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() + # context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() # Determine final status (if not already set to Skipped) if context.asset_metadata.get('status') != "Skipped": @@ -115,8 +115,8 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): restructured_processed_maps[map_key] = new_map_entry # Assign the restructured details. Note: 'processed_map_details' (singular 'map') is the key in asset_metadata. - context.asset_metadata['processed_map_details'] = restructured_processed_maps - context.asset_metadata['merged_map_details'] = getattr(context, 'merged_maps_details', {}) + # context.asset_metadata['processed_map_details'] = restructured_processed_maps + # context.asset_metadata['merged_map_details'] = getattr(context, 'merged_maps_details', {}) # (Optional) Add a list of all temporary files # context.asset_metadata['temporary_files'] = getattr(context, 'temporary_files', []) # Assuming this is populated elsewhere @@ -203,6 +203,9 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): return [make_serializable(i) for i in data] return data + # final_output_files is populated by OutputOrganizationStage and might be desired. + # The 'maps' structure is now the primary source for map file paths in metadata. + # context.asset_metadata.pop('final_output_files', None) serializable_metadata = make_serializable(context.asset_metadata) with open(metadata_save_path, 'w') as f: diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index f032eae..e299f3a 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -61,8 +61,10 @@ class OutputOrganizationStage(ProcessingStage): if saved_files_info and isinstance(saved_files_info, list) and len(saved_files_info) > 0: logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(saved_files_info)} variants for map key '{processed_map_key}' (map type: {base_map_type}) from SaveVariantsStage.") - map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) - map_metadata_entry['map_type'] = base_map_type + # Use base_map_type (e.g., "COL") as the key for the map entry + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(base_map_type, {}) + # map_type is now the key, so no need to store it inside the entry + # map_metadata_entry['map_type'] = base_map_type map_metadata_entry.setdefault('variant_paths', {}) # Initialize if not present processed_any_variant_successfully = False @@ -182,11 +184,15 @@ class OutputOrganizationStage(ProcessingStage): details['final_output_path'] = str(final_path) # Update asset_metadata for metadata.json - map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(processed_map_key, {}) - map_metadata_entry['map_type'] = base_map_type - map_metadata_entry['path'] = str(Path(relative_dir_path_str) / Path(output_filename)) # Store relative path - if 'variant_paths' in map_metadata_entry: # Clean up variant paths if present from previous runs - del map_metadata_entry['variant_paths'] + # Use base_map_type (e.g., "COL") as the key for the map entry + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(base_map_type, {}) + # map_type is now the key, so no need to store it inside the entry + # map_metadata_entry['map_type'] = base_map_type + # Store single path in variant_paths, keyed by its resolution string + map_metadata_entry.setdefault('variant_paths', {})[resolution_str] = str(Path(relative_dir_path_str) / Path(output_filename)) + # Remove old cleanup logic, as variant_paths is now the standard + # if 'variant_paths' in map_metadata_entry: + # del map_metadata_entry['variant_paths'] except Exception as e: logger.error(f"Asset '{asset_name_for_log}': Failed to copy {temp_file_path} for map key '{processed_map_key}'. Error: {e}", exc_info=True) -- 2.47.2 From 6e7daf260ad482cbd584b4884c7c41334c982889 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Tue, 13 May 2025 09:21:38 +0200 Subject: [PATCH 16/16] Metadata reformat done --- .../02_Developer_Guide/05_Processing_Pipeline.md | 11 +++++++---- .../pipeline/stages/metadata_finalization_save.py | 5 ++--- processing/pipeline/stages/output_organization.py | 7 ++++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md index fc358c2..4dead92 100644 --- a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md +++ b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md @@ -30,7 +30,7 @@ These stages are executed sequentially once for each asset before the core item 3. **[`MetadataInitializationStage`](processing/pipeline/stages/metadata_initialization.py:81)** (`processing/pipeline/stages/metadata_initialization.py`): * **Responsibility**: Initializes the `context.asset_metadata` dictionary with base information derived from the [`AssetRule`](rule_structure.py:22), [`SourceRule`](rule_structure.py:40), and [`Configuration`](configuration.py:68). This includes asset name, IDs, source/output paths, timestamps, and initial status. - * **Context Interaction**: Populates `context.asset_metadata` and initializes empty dictionaries for `processed_maps_details` and `merged_maps_details`. + * **Context Interaction**: Populates `context.asset_metadata`. Initializes `context.processed_maps_details` and `context.merged_maps_details` as empty dictionaries (these are used internally by subsequent stages but are not directly part of the final `metadata.json` in their original form). 4. **[`FileRuleFilterStage`](processing/pipeline/stages/file_rule_filter.py:10)** (`processing/pipeline/stages/file_rule_filter.py`): * **Responsibility**: Filters the [`FileRule`](rule_structure.py:5) objects associated with the asset to determine which individual files should be considered for processing. It identifies and excludes files matching "FILE_IGNORE" rules based on their `item_type`. @@ -78,11 +78,14 @@ These stages are executed sequentially once for each asset after the core item p 1. **[`OutputOrganizationStage`](processing/pipeline/stages/output_organization.py:14)** (`processing/pipeline/stages/output_organization.py`): * **Responsibility**: Determines the final output paths for all processed maps (including variants) and extra files based on configured patterns. It copies the temporary files generated by the core stages to these final destinations, creating directories as needed and respecting overwrite settings. - * **Context Interaction**: Reads from `context.processed_maps_details` (using the "MAP_" prefixed `internal_map_type` to get the "standard type" via `get_filename_friendly_map_type` for output naming), `context.files_to_process` (for 'EXTRA' files), `context.output_base_path`, and [`Configuration`](configuration.py:68). Updates entries in `context.processed_maps_details` with final paths and organization status. Populates `context.asset_metadata['final_output_files']`. (Note: Legacy code for `'Processed_With_Variants'` status has been removed from this stage). + * **Context Interaction**: Reads from `context.processed_maps_details`, `context.files_to_process` (for 'EXTRA' files), `context.output_base_path`, and [`Configuration`](configuration.py:68). Updates entries in `context.processed_maps_details` with organization status. Populates `context.asset_metadata['maps']` with the final map structure: + * The `maps` object is a dictionary where keys are standard map types (e.g., "COL", "REFL"). + * Each entry contains a `variant_paths` dictionary, where keys are resolution strings (e.g., "8K", "4K") and values are the filenames of the map variants (relative to the asset's output directory). + It also populates `context.asset_metadata['final_output_files']` with a list of absolute paths to all generated files (this list itself is not saved in the final `metadata.json`). 2. **[`MetadataFinalizationAndSaveStage`](processing/pipeline/stages/metadata_finalization_save.py:14)** (`processing/pipeline/stages/metadata_finalization_save.py`): - * **Responsibility**: Finalizes the `context.asset_metadata` (setting end time, final status based on flags). It restructures the processed map details for inclusion, determines the save path for the metadata file based on configuration and patterns, serializes the metadata to JSON, and saves the `metadata.json` file to the final output location. - * **Context Interaction**: Reads from `context.asset_metadata`, `context.processed_maps_details`, `context.merged_maps_details`, `context.output_base_path`, and [`Configuration`](configuration.py:68). Writes the `metadata.json` file and updates `context.asset_metadata` with its final path and status. + * **Responsibility**: Finalizes the `context.asset_metadata` (setting final status based on flags). It determines the save path for the metadata file based on configuration and patterns, serializes the `context.asset_metadata` (which now contains the structured `maps` data from `OutputOrganizationStage`) to JSON, and saves the `metadata.json` file. + * **Context Interaction**: Reads from `context.asset_metadata` (including the `maps` structure), `context.output_base_path`, and [`Configuration`](configuration.py:68). Before saving, it explicitly removes the `final_output_files` key from `context.asset_metadata`. The `processing_end_time` is also no longer added. The `metadata.json` file is written, and `context.asset_metadata` is updated with its final path and status. The older `processed_maps_details` and `merged_maps_details` from the context are not directly included in the JSON. ## External Steps diff --git a/processing/pipeline/stages/metadata_finalization_save.py b/processing/pipeline/stages/metadata_finalization_save.py index fc23833..78373fc 100644 --- a/processing/pipeline/stages/metadata_finalization_save.py +++ b/processing/pipeline/stages/metadata_finalization_save.py @@ -203,9 +203,8 @@ class MetadataFinalizationAndSaveStage(ProcessingStage): return [make_serializable(i) for i in data] return data - # final_output_files is populated by OutputOrganizationStage and might be desired. - # The 'maps' structure is now the primary source for map file paths in metadata. - # context.asset_metadata.pop('final_output_files', None) + # final_output_files is populated by OutputOrganizationStage. Explicitly remove it as per user request. + context.asset_metadata.pop('final_output_files', None) serializable_metadata = make_serializable(context.asset_metadata) with open(metadata_save_path, 'w') as f: diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py index e299f3a..7a9d9d0 100644 --- a/processing/pipeline/stages/output_organization.py +++ b/processing/pipeline/stages/output_organization.py @@ -116,8 +116,8 @@ class OutputOrganizationStage(ProcessingStage): # Optionally update variant_detail status if needed # Store relative path in metadata - relative_final_variant_path_str = str(Path(relative_dir_path_str_variant) / Path(output_filename_variant)) - map_metadata_entry['variant_paths'][variant_resolution_key] = relative_final_variant_path_str + # Store only the filename, as it's relative to the metadata.json location + map_metadata_entry['variant_paths'][variant_resolution_key] = output_filename_variant processed_any_variant_successfully = True except Exception as e: @@ -189,7 +189,8 @@ class OutputOrganizationStage(ProcessingStage): # map_type is now the key, so no need to store it inside the entry # map_metadata_entry['map_type'] = base_map_type # Store single path in variant_paths, keyed by its resolution string - map_metadata_entry.setdefault('variant_paths', {})[resolution_str] = str(Path(relative_dir_path_str) / Path(output_filename)) + # Store only the filename, as it's relative to the metadata.json location + map_metadata_entry.setdefault('variant_paths', {})[resolution_str] = output_filename # Remove old cleanup logic, as variant_paths is now the standard # if 'variant_paths' in map_metadata_entry: # del map_metadata_entry['variant_paths'] -- 2.47.2