diff --git a/.vscode/settings.json b/.vscode/settings.json index c21d2a5..33d89d2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,6 +8,6 @@ ".vscode": true, ".vs": true, ".lh": true, - "__pycache__": true, + "__pycache__": true } } \ No newline at end of file diff --git a/Documentation/00_Overview.md b/Documentation/00_Overview.md index 897442c..177ecf0 100644 --- a/Documentation/00_Overview.md +++ b/Documentation/00_Overview.md @@ -12,9 +12,9 @@ This documentation strictly excludes details on environment setup, dependency in ## Architecture and Codebase Summary -For developers interested in contributing, the tool's architecture centers on a **Core Processing Engine** (`processing_engine.py`) executing a pipeline based on a **Hierarchical Rule System** (`rule_structure.py`) and a **Configuration System** (`configuration.py` loading `config/app_settings.json` and `Presets/*.json`). The **Graphical User Interface** (`gui/`) has been significantly refactored: `MainWindow` (`main_window.py`) acts as a coordinator, delegating tasks to specialized widgets (`MainPanelWidget`, `PresetEditorWidget`, `LogConsoleWidget`) and background handlers (`RuleBasedPredictionHandler`, `LLMPredictionHandler`, `LLMInteractionHandler`, `AssetRestructureHandler`). The **Directory Monitor** (`monitor.py`) now processes archives asynchronously using a thread pool and utility functions (`utils/prediction_utils.py`, `utils/workspace_utils.py`). The **Command-Line Interface** entry point (`main.py`) primarily launches the GUI, with core CLI functionality currently non-operational. Optional **Blender Integration** (`blenderscripts/`) remains. A new `utils/` directory houses shared helper functions. +For developers interested in contributing, the tool's architecture centers on a **Core Processing Engine** (`processing_engine.py`) which initializes and runs a **Pipeline Orchestrator** (`processing/pipeline/orchestrator.py::PipelineOrchestrator`). This orchestrator executes a defined sequence of **Processing Stages** (located in `processing/pipeline/stages/`) based on a **Hierarchical Rule System** (`rule_structure.py`) and a **Configuration System** (`configuration.py` loading `config/app_settings.json` and `Presets/*.json`). The **Graphical User Interface** (`gui/`) has been significantly refactored: `MainWindow` (`main_window.py`) acts as a coordinator, delegating tasks to specialized widgets (`MainPanelWidget`, `PresetEditorWidget`, `LogConsoleWidget`) and background handlers (`RuleBasedPredictionHandler`, `LLMPredictionHandler`, `LLMInteractionHandler`, `AssetRestructureHandler`). The **Directory Monitor** (`monitor.py`) now processes archives asynchronously using a thread pool and utility functions (`utils/prediction_utils.py`, `utils/workspace_utils.py`). The **Command-Line Interface** entry point (`main.py`) primarily launches the GUI, with core CLI functionality currently non-operational. Optional **Blender Integration** (`blenderscripts/`) remains. A new `utils/` directory houses shared helper functions. -The codebase reflects this structure. The `gui/` directory contains the refactored UI components, `utils/` holds shared utilities, `Presets/` contains JSON presets, and `blenderscripts/` holds Blender scripts. Core logic resides in `processing_engine.py`, `configuration.py`, `rule_structure.py`, `monitor.py`, and `main.py`. The processing pipeline, executed by `processing_engine.py`, relies entirely on the input `SourceRule` and static configuration for steps like map processing, channel merging, and metadata generation. +The codebase reflects this structure. The `gui/` directory contains the refactored UI components, `utils/` holds shared utilities, `processing/pipeline/` contains the orchestrator and individual processing stages, `Presets/` contains JSON presets, and `blenderscripts/` holds Blender scripts. Core logic resides in `processing_engine.py`, `processing/pipeline/orchestrator.py`, `configuration.py`, `rule_structure.py`, `monitor.py`, and `main.py`. The processing pipeline, initiated by `processing_engine.py` and executed by the `PipelineOrchestrator`, relies entirely on the input `SourceRule` and static configuration. Each stage in the pipeline operates on an `AssetProcessingContext` object (`processing/pipeline/asset_context.py`) to perform specific tasks like map processing, channel merging, and metadata generation. ## Table of Contents diff --git a/Documentation/02_Developer_Guide/01_Architecture.md b/Documentation/02_Developer_Guide/01_Architecture.md index 4ff04e9..24d888d 100644 --- a/Documentation/02_Developer_Guide/01_Architecture.md +++ b/Documentation/02_Developer_Guide/01_Architecture.md @@ -6,17 +6,19 @@ This document provides a high-level overview of the Asset Processor Tool's archi The Asset Processor Tool is designed to process 3D asset source files into a standardized library format. Its high-level architecture consists of: -1. **Core Processing Engine (`processing_engine.py`):** The primary component responsible for executing the asset processing pipeline for a single input asset based on a provided `SourceRule` object and static configuration. The previous `asset_processor.py` has been removed. -2. **Prediction System:** Responsible for analyzing input files and generating the initial `SourceRule` hierarchy with predicted values. This system utilizes a base handler (`gui/base_prediction_handler.py::BasePredictionHandler`) with specific implementations: +1. **Core Processing Initiation (`processing_engine.py`):** The `ProcessingEngine` class acts as the entry point for an asset processing task. It initializes and runs a `PipelineOrchestrator`. +2. **Pipeline Orchestration (`processing/pipeline/orchestrator.py`):** The `PipelineOrchestrator` manages a sequence of discrete processing stages. It creates an `AssetProcessingContext` for each asset and passes this context through each stage. +3. **Processing Stages (`processing/pipeline/stages/`):** Individual modules, each responsible for a specific task in the pipeline (e.g., filtering files, processing maps, merging channels, organizing output). They operate on the `AssetProcessingContext`. +4. **Prediction System:** Responsible for analyzing input files and generating the initial `SourceRule` hierarchy with predicted values. This system utilizes a base handler (`gui/base_prediction_handler.py::BasePredictionHandler`) with specific implementations: * **Rule-Based Predictor (`gui/prediction_handler.py::RuleBasedPredictionHandler`):** Uses predefined rules from presets to classify files and determine initial processing parameters. * **LLM Predictor (`gui/llm_prediction_handler.py::LLMPredictionHandler`):** An experimental alternative that uses a Large Language Model (LLM) to interpret file contents and context to predict processing parameters. -3. **Configuration System (`Configuration`):** Handles loading core settings (including centralized type definitions and LLM-specific configuration) and merging them with supplier-specific rules defined in JSON presets and the persistent `config/suppliers.json` file. -4. **Multiple Interfaces:** Provides different ways to interact with the tool: +5. **Configuration System (`Configuration`):** Handles loading core settings (including centralized type definitions and LLM-specific configuration) and merging them with supplier-specific rules defined in JSON presets and the persistent `config/suppliers.json` file. +6. **Multiple Interfaces:** Provides different ways to interact with the tool: * Graphical User Interface (GUI) * Command-Line Interface (CLI) - *Note: The primary CLI execution logic (`run_cli` in `main.py`) is currently non-functional/commented out post-refactoring.* * Directory Monitor for automated processing. -The GUI acts as the primary source of truth for processing rules, coordinating the generation and management of the `SourceRule` hierarchy before sending it to the processing engine. It accumulates prediction results from multiple input sources before updating the view. The Monitor interface can also generate `SourceRule` objects (using `utils/prediction_utils.py`) to bypass the GUI for automated workflows. -5. **Optional Integration:** Includes scripts (`blenderscripts/`) for integrating with Blender. Logic for executing these scripts was intended to be centralized in `utils/blender_utils.py`, but this utility has not yet been implemented. +The GUI acts as the primary source of truth for processing rules, coordinating the generation and management of the `SourceRule` hierarchy before sending it to the `ProcessingEngine`. It accumulates prediction results from multiple input sources before updating the view. The Monitor interface can also generate `SourceRule` objects (using `utils/prediction_utils.py`) to bypass the GUI for automated workflows. +7. **Optional Integration:** Includes scripts (`blenderscripts/`) for integrating with Blender. Logic for executing these scripts was intended to be centralized in `utils/blender_utils.py`, but this utility has not yet been implemented. ## Hierarchical Rule System @@ -26,14 +28,14 @@ A key addition to the architecture is the **Hierarchical Rule System**, which pr * **AssetRule:** Represents rules applied to a specific asset within a source (a source can contain multiple assets). * **FileRule:** Represents rules applied to individual files within an asset. -This hierarchy allows for fine-grained control over processing parameters. The GUI's prediction logic generates this hierarchy with initial predicted values for overridable fields based on presets and file analysis. The processing engine then operates *solely* on the explicit values provided in this `SourceRule` object and static configuration, without internal prediction or fallback logic. +This hierarchy allows for fine-grained control over processing parameters. The GUI's prediction logic generates this hierarchy with initial predicted values for overridable fields based on presets and file analysis. The `ProcessingEngine` (via the `PipelineOrchestrator` and its stages) then operates *solely* on the explicit values provided in this `SourceRule` object and static configuration, without internal prediction or fallback logic. ## Core Components * `config/app_settings.json`: Defines core, global settings, constants, and centralized definitions for allowed asset and file types (`ASSET_TYPE_DEFINITIONS`, `FILE_TYPE_DEFINITIONS`), including metadata like colors and descriptions. This replaces the old `config.py` file. * `config/suppliers.json`: A persistent JSON file storing known supplier names for GUI auto-completion. * `Presets/*.json`: Supplier-specific JSON files defining rules for file interpretation and initial prediction. -* `configuration.py` (`Configuration` class): Loads `config/app_settings.json` settings and merges them with a selected preset, pre-compiling regex patterns for efficiency. This static configuration is used by the processing engine. +* `configuration.py` (`Configuration` class): Loads `config/app_settings.json` settings and merges them with a selected preset, pre-compiling regex patterns for efficiency. This static configuration is used by the processing pipeline. * `rule_structure.py`: Defines the `SourceRule`, `AssetRule`, and `FileRule` dataclasses used to represent the hierarchical processing rules. * `gui/`: Directory containing modules for the Graphical User Interface (GUI), built with PySide6. The `MainWindow` (`main_window.py`) acts as a coordinator, orchestrating interactions between various components. Key GUI components include: * `main_panel_widget.py::MainPanelWidget`: Contains the primary controls for loading sources, selecting presets, viewing/editing rules, and initiating processing. @@ -47,7 +49,10 @@ This hierarchy allows for fine-grained control over processing parameters. The G * `prediction_handler.py::RuleBasedPredictionHandler`: Generates the initial `SourceRule` hierarchy based on presets and file analysis. Inherits from `BasePredictionHandler`. * `llm_prediction_handler.py::LLMPredictionHandler`: Experimental predictor using an LLM. Inherits from `BasePredictionHandler`. * `llm_interaction_handler.py::LLMInteractionHandler`: Manages communication with the LLM service for the LLM predictor. -* `processing_engine.py` (`ProcessingEngine` class): The core component that executes the processing pipeline for a single `SourceRule` object using the static `Configuration`. A new instance is created per task for state isolation. +* `processing_engine.py` (`ProcessingEngine` class): The entry-point class that initializes and runs the `PipelineOrchestrator` for a given `SourceRule` and `Configuration`. +* `processing/pipeline/orchestrator.py` (`PipelineOrchestrator` class): Manages the sequence of processing stages, creating and passing an `AssetProcessingContext` through them. +* `processing/pipeline/asset_context.py` (`AssetProcessingContext` class): A dataclass holding all data and state for the processing of a single asset, passed between stages. +* `processing/pipeline/stages/`: Directory containing individual processing stage modules, each handling a specific part of the pipeline (e.g., `IndividualMapProcessingStage`, `MapMergingStage`). * `main.py`: The main entry point for the application. Primarily launches the GUI. Contains commented-out/non-functional CLI logic (`run_cli`). * `monitor.py`: Implements the directory monitoring feature using `watchdog`. It now processes archives asynchronously using a `ThreadPoolExecutor`, leveraging `utils.prediction_utils.py` for rule generation and `utils.workspace_utils.py` for workspace management before invoking the `ProcessingEngine`. * `blenderscripts/`: Contains Python scripts designed to be executed *within* Blender for post-processing tasks. @@ -56,19 +61,21 @@ This hierarchy allows for fine-grained control over processing parameters. The G * `prediction_utils.py`: Contains functions like `generate_source_rule_from_archive` used by the monitor for rule-based prediction. * `blender_utils.py`: (Intended location for Blender script execution logic, currently not implemented). -## Processing Pipeline (Simplified) +## Processing Pipeline (Simplified Overview) -The primary processing engine (`processing_engine.py`) executes a series of steps for each asset based on the provided `SourceRule` object and static configuration: +The asset processing pipeline, initiated by `processing_engine.py` and managed by `PipelineOrchestrator`, executes a series of stages for each asset defined in the `SourceRule`. An `AssetProcessingContext` object carries data between stages. The typical sequence is: -1. Extraction of input to a temporary workspace (using `utils.workspace_utils.py`). -2. Classification of files (map, model, extra, ignored, unrecognised) based *only* on the provided `SourceRule` object (classification/prediction happens *before* the engine is called). -3. Determination of base metadata (asset name, category, archetype). -4. Skip check if output exists and overwrite is not forced. -5. Processing of maps (resize, format/bit depth conversion, inversion, stats calculation). -6. Merging of channels based on rules. -7. Generation of `metadata.json` file. -8. Organization of processed files into the final output structure. -9. Cleanup of the temporary workspace. -10. (Optional) Execution of Blender scripts (currently triggered directly, intended to use `utils.blender_utils.py`). +1. **Supplier Determination**: Identify the effective supplier. +2. **Asset Skip Logic**: Check if the asset should be skipped. +3. **Metadata Initialization**: Set up initial asset metadata. +4. **File Rule Filtering**: Determine which files to process. +5. **Pre-Map Processing**: + * Gloss-to-Roughness Conversion. + * Alpha Channel Extraction. + * Normal Map Green Channel Inversion. +6. **Individual Map Processing**: Handle individual maps (scaling, variants, stats, naming). +7. **Map Merging**: Combine channels from different maps. +8. **Metadata Finalization & Save**: Generate and save `metadata.json` (temporarily). +9. **Output Organization**: Copy all processed files to final output locations. -This architecture allows for a modular design, separating configuration, rule generation/management (GUI, Monitor utilities), and core processing execution. The `SourceRule` object serves as a clear data contract between the rule generation layer and the processing engine. Parallel processing (in Monitor) and background threads (in GUI) are utilized for efficiency and responsiveness. \ No newline at end of file +External steps like workspace preparation/cleanup and optional Blender script execution bracket this core pipeline. This architecture allows for a modular design, separating configuration, rule generation/management, and core processing execution. \ No newline at end of file diff --git a/Documentation/02_Developer_Guide/03_Key_Components.md b/Documentation/02_Developer_Guide/03_Key_Components.md index 11fb683..95f1c17 100644 --- a/Documentation/02_Developer_Guide/03_Key_Components.md +++ b/Documentation/02_Developer_Guide/03_Key_Components.md @@ -2,17 +2,65 @@ This document describes the major classes and modules that form the core of the Asset Processor Tool. -## `ProcessingEngine` (`processing_engine.py`) +## Core Processing Architecture -The `ProcessingEngine` class is the new core component responsible for executing the asset processing pipeline for a *single* input asset. Unlike the older `AssetProcessor`, this engine operates *solely* based on a complete `SourceRule` object provided to its `process()` method and the static `Configuration` object passed during initialization. It contains no internal prediction, classification, or fallback logic. Its key responsibilities include: +The asset processing pipeline has been refactored into a staged architecture, managed by an orchestrator. -* Setting up and cleaning up a temporary workspace for processing (potentially using `utils.workspace_utils`). -* Extracting or copying input files to the workspace. -* Processing files based on the explicit rules and predicted values contained within the input `SourceRule`. -* Processing texture maps (resizing, format/bit depth conversion, inversion, stats calculation) using parameters from the `SourceRule` or static `Configuration`. -* Merging channels based on rules defined in the static `Configuration` and parameters from the `SourceRule`. -* Generating the `metadata.json` file containing details about the processed asset, incorporating information from the `SourceRule`. -* Organizing the final output files into the structured library directory. +### `ProcessingEngine` (`processing_engine.py`) + +The `ProcessingEngine` class serves as the primary entry point for initiating an asset processing task. Its main responsibilities are: + +* Initializing a `PipelineOrchestrator` instance. +* Providing the `PipelineOrchestrator` with the global `Configuration` object and a predefined list of processing stages. +* Invoking the orchestrator's `process_source_rule()` method with the input `SourceRule`, workspace path, output path, and other processing parameters. +* Managing a top-level temporary directory for the engine's operations if needed, though individual stages might also use sub-temporary directories via the `AssetProcessingContext`. + +It no longer contains the detailed logic for each processing step (like map manipulation, merging, etc.) directly. Instead, it delegates these tasks to the orchestrator and its stages. + +### `PipelineOrchestrator` (`processing/pipeline/orchestrator.py`) + +The `PipelineOrchestrator` class is responsible for managing the execution of the asset processing pipeline. Its key functions include: + +* Receiving a `SourceRule` object, `Configuration`, and a list of `ProcessingStage` objects. +* For each `AssetRule` within the `SourceRule`: + * Creating an `AssetProcessingContext` instance. + * Sequentially executing each registered `ProcessingStage`, passing the `AssetProcessingContext` to each stage. + * Handling exceptions that occur within stages and managing the overall status of asset processing (processed, skipped, failed). +* Managing a temporary directory for the duration of a `SourceRule` processing, which is made available to stages via the `AssetProcessingContext`. + +### `AssetProcessingContext` (`processing/pipeline/asset_context.py`) + +The `AssetProcessingContext` is a dataclass that acts as a stateful container for all data related to the processing of a single `AssetRule`. An instance of this context is created by the `PipelineOrchestrator` for each asset and is passed through each processing stage. Key information it holds includes: + +* The input `SourceRule` and the current `AssetRule`. +* Paths: `workspace_path`, `engine_temp_dir`, `output_base_path`. +* The `Configuration` object. +* `effective_supplier`: Determined by an early stage. +* `asset_metadata`: A dictionary to accumulate metadata about the asset. +* `processed_maps_details`: Stores details about individually processed maps (paths, dimensions, etc.). +* `merged_maps_details`: Stores details about merged maps. +* `files_to_process`: A list of `FileRule` objects to be processed for the current asset. +* `loaded_data_cache`: For caching loaded image data within an asset's processing. +* `status_flags`: For signaling conditions like `skip_asset` or `asset_failed`. +* `incrementing_value`, `sha5_value`: Optional values for path generation. + +Each stage reads from and writes to this context, allowing data and state to flow through the pipeline. + +### `Processing Stages` (`processing/pipeline/stages/`) + +The actual processing logic is broken down into a series of discrete stages, each inheriting from `ProcessingStage` (`processing/pipeline/stages/base_stage.py`). Each stage implements an `execute(context: AssetProcessingContext)` method. Key stages include (in typical execution order): + +* **`SupplierDeterminationStage`**: Determines the effective supplier. +* **`AssetSkipLogicStage`**: Checks if the asset processing should be skipped. +* **`MetadataInitializationStage`**: Initializes basic asset metadata. +* **`FileRuleFilterStage`**: Filters `FileRule`s to decide which files to process. +* **`GlossToRoughConversionStage`**: Handles gloss-to-roughness map inversion. +* **`AlphaExtractionToMaskStage`**: Extracts alpha channels to create masks. +* **`NormalMapGreenChannelStage`**: Inverts normal map green channels if required. +* **`IndividualMapProcessingStage`**: Processes individual maps (POT scaling, resolution variants, color conversion, stats, aspect ratio, filename conventions). +* **`MapMergingStage`**: Merges map channels based on rules. +* **`MetadataFinalizationAndSaveStage`**: Collects all metadata and saves `metadata.json` to a temporary location. +* **`OutputOrganizationStage`**: Copies all processed files and metadata to the final output directory structure. ## `Rule Structure` (`rule_structure.py`) @@ -22,19 +70,19 @@ This module defines the data structures used to represent the hierarchical proce * `AssetRule`: A dataclass representing rules applied at the asset level. It contains nested `FileRule` objects. * `FileRule`: A dataclass representing rules applied at the file level. -These classes hold specific rule parameters (e.g., `supplier_identifier`, `asset_type`, `asset_type_override`, `item_type`, `item_type_override`, `target_asset_name_override`). Attributes like `asset_type` and `item_type_override` now use string types, which are validated against centralized lists in `config/app_settings.json`. These structures support serialization (Pickle, JSON) to allow them to be passed between different parts of the application, including across process boundaries. +These classes hold specific rule parameters (e.g., `supplier_identifier`, `asset_type`, `asset_type_override`, `item_type`, `item_type_override`, `target_asset_name_override`, `resolution_override`, `channel_merge_instructions`). Attributes like `asset_type` and `item_type_override` now use string types, which are validated against centralized lists in `config/app_settings.json`. These structures support serialization (Pickle, JSON) to allow them to be passed between different parts of theapplication, including across process boundaries. The `PipelineOrchestrator` and its stages heavily rely on the information within these rule objects, passed via the `AssetProcessingContext`. ## `Configuration` (`configuration.py`) The `Configuration` class manages the tool's settings. It is responsible for: -* Loading the core default settings defined in `config/app_settings.json`. +* Loading the core default settings defined in `config/app_settings.json` (e.g., `FILE_TYPE_DEFINITIONS`, `ASSET_TYPE_DEFINITIONS`, `image_resolutions`, `map_merge_rules`, `output_filename_pattern`). * Loading the supplier-specific rules from a selected preset JSON file (`Presets/*.json`). * Merging the core settings and preset rules into a single, unified configuration object. * Validating the loaded configuration to ensure required settings are present. -* Pre-compiling regular expression patterns defined in the preset for efficient file classification by the `PredictionHandler`. +* Pre-compiling regular expression patterns defined in the preset for efficient file classification by the prediction handlers. -An instance of the `Configuration` class is typically created once per application run (or per processing batch) and passed to the `ProcessingEngine`. +An instance of the `Configuration` class is typically created once per application run (or per processing batch) and passed to the `ProcessingEngine`, which then makes it available to the `PipelineOrchestrator` and subsequently to each stage via the `AssetProcessingContext`. ## GUI Components (`gui/`) @@ -191,10 +239,10 @@ The `monitor.py` script implements the directory monitoring feature. It has been * Loads the necessary `Configuration`. * Calls `utils.prediction_utils.generate_source_rule_from_archive` to get the `SourceRule`. * Calls `utils.workspace_utils.prepare_processing_workspace` to set up the workspace. - * Instantiates and runs the `ProcessingEngine`. + * Instantiates and runs the `ProcessingEngine` (which in turn uses the `PipelineOrchestrator`). * Handles moving the source archive to 'processed' or 'error' directories. * Cleans up the workspace. ## Summary -These key components, along with the refactored GUI structure and new utility modules, work together to provide the tool's functionality. The architecture emphasizes separation of concerns (configuration, rule generation, processing, UI), utilizes background processing for responsiveness (GUI prediction, Monitor tasks), and relies on the `SourceRule` object as the central data structure passed between different stages of the workflow. \ No newline at end of file +These key components, along with the refactored GUI structure and new utility modules, work together to provide the tool's functionality. The architecture emphasizes separation of concerns (configuration, rule generation, processing, UI), utilizes background processing for responsiveness (GUI prediction, Monitor tasks), and relies on the `SourceRule` object as the central data structure passed between different stages of the workflow. The processing core is now a staged pipeline managed by the `PipelineOrchestrator`, enhancing modularity and maintainability. \ No newline at end of file diff --git a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md index 14d801c..4dead92 100644 --- a/Documentation/02_Developer_Guide/05_Processing_Pipeline.md +++ b/Documentation/02_Developer_Guide/05_Processing_Pipeline.md @@ -1,72 +1,98 @@ -# Developer Guide: Processing Pipeline +Cl# Developer Guide: Processing Pipeline -This document details the step-by-step technical process executed by the `ProcessingEngine` class (`processing_engine.py`) when processing a single asset. A new instance of `ProcessingEngine` is created for each processing task to ensure state isolation. +This document details the step-by-step technical process executed by the asset processing pipeline, which is initiated by the [`ProcessingEngine`](processing_engine.py:73) class (`processing_engine.py`) and orchestrated by the [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) (`processing/pipeline/orchestrator.py`). -The `ProcessingEngine.process()` method orchestrates the following pipeline based *solely* on the provided `SourceRule` object and the static `Configuration` object passed during engine initialization. It contains no internal prediction, classification, or fallback logic. All necessary overrides and static configuration values are accessed directly from these inputs. +The [`ProcessingEngine.process()`](processing_engine.py:131) method serves as the main entry point. It initializes a [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) instance, providing it with the application's [`Configuration`](configuration.py:68) object and predefined lists of pre-item and post-item processing stages. The [`PipelineOrchestrator.process_source_rule()`](processing/pipeline/orchestrator.py:95) method then manages the execution of these stages for each asset defined in the input [`SourceRule`](rule_structure.py:40). -The pipeline steps are: +A crucial component in this architecture is the [`AssetProcessingContext`](processing/pipeline/asset_context.py:86) (`processing/pipeline/asset_context.py`). An instance of this dataclass is created for each [`AssetRule`](rule_structure.py:22) being processed. It acts as a stateful container, carrying all relevant data (source files, rules, configuration, intermediate results, metadata) and is passed sequentially through each stage. Each stage can read from and write to the context, allowing data to flow and be modified throughout the pipeline. -1. **Workspace Preparation (External)**: - * Before the `ProcessingEngine` is invoked, the calling code (e.g., `main.ProcessingTask`, `monitor._process_archive_task`) is responsible for setting up a temporary workspace. - * This typically involves using `utils.workspace_utils.prepare_processing_workspace`, which creates a temporary directory and extracts the input source (archive or folder) into it. - * The path to this prepared workspace is passed to the `ProcessingEngine` during initialization. +The pipeline execution for each asset follows this general flow: -2. **Prediction and Rule Generation (External)**: - * Also handled before the `ProcessingEngine` is invoked. - * Either the `RuleBasedPredictionHandler`, `LLMPredictionHandler` (triggered by the GUI), or `utils.prediction_utils.generate_source_rule_from_archive` (used by the Monitor) analyzes the input files and generates a `SourceRule` object. - * This `SourceRule` contains predicted classifications and initial overrides. - * If using the GUI, the user can modify these rules. - * The final `SourceRule` object is the primary input to the `ProcessingEngine.process()` method. +1. **Pre-Item Stages:** A sequence of stages executed once per asset before the core item processing loop. These stages typically perform initial setup, filtering, and asset-level transformations. +2. **Core Item Processing Loop:** The [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) iterates through a list of "processing items" (individual files or merge tasks) prepared by a dedicated stage. For each item, a sequence of core processing stages is executed. +3. **Post-Item Stages:** A sequence of stages executed once per asset after the core item processing loop is complete. These stages handle final tasks like organizing output files and saving metadata. -3. **File Inventory (`_inventory_and_classify_files`)**: - * Scans the contents of the *already prepared* temporary workspace. - * This step primarily inventories the files present. The *classification* (determining `item_type`, etc.) is taken directly from the input `SourceRule`. The `item_type` for each file (within the `FileRule` objects of the `SourceRule`) is expected to be a key from `Configuration.FILE_TYPE_DEFINITIONS`. - * Stores the file paths and their associated rules from the `SourceRule` in `self.classified_files`. +## Pipeline Stages -4. **Base Metadata Determination (`_determine_base_metadata`, `_determine_single_asset_metadata`)**: - * Determines the base asset name, category, and archetype using the explicit values provided in the input `SourceRule` and the static `Configuration`. Overrides (like `supplier_identifier`, `asset_type`, `asset_name_override`) are taken directly from the `SourceRule`. The `asset_type` (within the `AssetRule` object of the `SourceRule`) is expected to be a key from `Configuration.ASSET_TYPE_DEFINITIONS`. +The stages are executed in the following order for each asset: -5. **Skip Check**: - * If the `overwrite` flag is `False`, checks if the final output directory already exists and contains `metadata.json`. - * If so, processing for this asset is skipped. +### Pre-Item Stages -6. **Map Processing (`_process_maps`)**: - * Iterates through files classified as maps in the `SourceRule`. - * Loads images (`cv2.imread`). - * **Glossiness-to-Roughness Inversion**: - * The system identifies a map as a gloss map if its input filename contains "MAP_GLOSS" (case-insensitive) and is intended to become a roughness map (e.g., its `item_type` or `item_type_override` in the `SourceRule` effectively designates it as roughness). - * If these conditions are met, its colors are inverted. - * After inversion, the map is treated as a "MAP_ROUGH" type for subsequent processing steps. - * The fact that a map was derived from a gloss source and inverted is recorded in the output `metadata.json` for that map type using the `derived_from_gloss_filename: true` flag. This replaces the previous reliance on an internal `is_gloss_source` flag within the `FileRule` structure. - * Resizes images based on `Configuration`. - * Determines output bit depth and format based on `Configuration` and `SourceRule`. - * Converts data types and saves images (`cv2.imwrite`). -* The output filename uses the `standard_type` alias (e.g., `COL`, `NRM`) retrieved from the `Configuration.FILE_TYPE_DEFINITIONS` based on the file's effective `item_type`. - * Calculates image statistics. - * Stores processed map details. +These stages are executed sequentially once for each asset before the core item processing loop begins. -7. **Map Merging (`_merge_maps_from_source`)**: - * Iterates through `MAP_MERGE_RULES` in `Configuration`. - * Identifies required source maps by checking the `item_type_override` within the `SourceRule` (specifically in the `FileRule` for each file). Both `item_type` and `item_type_override` are expected to be keys from `Configuration.FILE_TYPE_DEFINITIONS`. Files with a base `item_type` of `"FILE_IGNORE"` are explicitly excluded from consideration. - * Loads source channels, handling missing inputs with defaults from `Configuration` or `SourceRule`. - * Merges channels (`cv2.merge`). - * Determines output format/bit depth and saves the merged map. - * Stores merged map details. +1. **[`SupplierDeterminationStage`](processing/pipeline/stages/supplier_determination.py:6)** (`processing/pipeline/stages/supplier_determination.py`): + * **Responsibility**: Determines the effective supplier for the asset based on the [`SourceRule`](rule_structure.py:40)'s `supplier_override`, `supplier_identifier`, and validation against configured suppliers. + * **Context Interaction**: Sets `context.effective_supplier` and may set a `supplier_error` flag in `context.status_flags`. -8. **Metadata File Generation (`_generate_metadata_file`)**: - * Collects asset metadata, processed/merged map details, ignored files list, etc., primarily from the `SourceRule` and internal processing results. - * Writes data to `metadata.json` in the temporary workspace. +2. **[`AssetSkipLogicStage`](processing/pipeline/stages/asset_skip_logic.py:5)** (`processing/pipeline/stages/asset_skip_logic.py`): + * **Responsibility**: Checks if the entire asset should be skipped based on conditions like a missing/invalid supplier, a "SKIP" status in asset metadata, or if the asset is already processed and overwrite is disabled. + * **Context Interaction**: Sets the `skip_asset` flag and `skip_reason` in `context.status_flags` if the asset should be skipped. -9. **Output Organization (`_organize_output_files`)**: -* Determines the final output directory using the global `OUTPUT_DIRECTORY_PATTERN` and the final filename using the global `OUTPUT_FILENAME_PATTERN` (both from the `Configuration` object). The `utils.path_utils` module combines these with the base output directory and asset-specific data (like asset name, map type, resolution, etc.) to construct the full path for each file. - * Creates the final structured output directory (`///`), using the supplier name from the `SourceRule`. - * Moves processed maps, merged maps, models, metadata, and other classified files from the temporary workspace to the final output directory. +3. **[`MetadataInitializationStage`](processing/pipeline/stages/metadata_initialization.py:81)** (`processing/pipeline/stages/metadata_initialization.py`): + * **Responsibility**: Initializes the `context.asset_metadata` dictionary with base information derived from the [`AssetRule`](rule_structure.py:22), [`SourceRule`](rule_structure.py:40), and [`Configuration`](configuration.py:68). This includes asset name, IDs, source/output paths, timestamps, and initial status. + * **Context Interaction**: Populates `context.asset_metadata`. Initializes `context.processed_maps_details` and `context.merged_maps_details` as empty dictionaries (these are used internally by subsequent stages but are not directly part of the final `metadata.json` in their original form). -10. **Workspace Cleanup (External)**: - * After the `ProcessingEngine.process()` method completes (successfully or with errors), the *calling code* is responsible for cleaning up the temporary workspace directory created in Step 1. This is often done in a `finally` block where `utils.workspace_utils.prepare_processing_workspace` was called. +4. **[`FileRuleFilterStage`](processing/pipeline/stages/file_rule_filter.py:10)** (`processing/pipeline/stages/file_rule_filter.py`): + * **Responsibility**: Filters the [`FileRule`](rule_structure.py:5) objects associated with the asset to determine which individual files should be considered for processing. It identifies and excludes files matching "FILE_IGNORE" rules based on their `item_type`. + * **Context Interaction**: Populates `context.files_to_process` with the list of [`FileRule`](rule_structure.py:5) objects that are not ignored. -11. **(Optional) Blender Script Execution (External)**: - * If triggered (e.g., via CLI arguments or GUI controls), the orchestrating code (e.g., `main.ProcessingTask`) executes the corresponding Blender scripts (`blenderscripts/*.py`) using `subprocess.run` *after* the `ProcessingEngine.process()` call completes successfully. - * *Note: Centralized logic for this was intended for `utils/blender_utils.py`, but this utility has not yet been implemented.* See `Developer Guide: Blender Integration Internals` for more details. +5. **[`GlossToRoughConversionStage`](processing/pipeline/stages/gloss_to_rough_conversion.py:15)** (`processing/pipeline/stages/gloss_to_rough_conversion.py`): + * **Responsibility**: Identifies processed maps in `context.processed_maps_details` whose `internal_map_type` starts with "MAP_GLOSS". If found, it loads the temporary image data, inverts it using the shared utility function [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), saves a new temporary roughness map ("MAP_ROUGH"), and updates the corresponding details in `context.processed_maps_details` (setting `internal_map_type` to "MAP_ROUGH") and the relevant [`FileRule`](rule_structure.py:5) in `context.files_to_process` (setting `item_type` to "MAP_ROUGH"). + * **Context Interaction**: Reads from and updates `context.processed_maps_details` (specifically `internal_map_type` and `temp_processed_file`) and `context.files_to_process` (specifically `item_type`). -This pipeline, executed by the `ProcessingEngine`, provides a clear and explicit processing flow based on the complete rule set provided by the GUI or other interfaces. \ No newline at end of file +6. **[`AlphaExtractionToMaskStage`](processing/pipeline/stages/alpha_extraction_to_mask.py:16)** (`processing/pipeline/stages/alpha_extraction_to_mask.py`): + * **Responsibility**: If no mask map is explicitly defined for the asset (as a [`FileRule`](rule_structure.py:5) with `item_type="MAP_MASK"`), this stage searches `context.processed_maps_details` for a suitable source map (e.g., a "MAP_COL" with an alpha channel, based on its `internal_map_type`). If found, it extracts the alpha channel, saves it as a new temporary mask map, and adds a new [`FileRule`](rule_structure.py:5) (with `item_type="MAP_MASK"`) and corresponding details (with `internal_map_type="MAP_MASK"`) to the context. + * **Context Interaction**: Reads from `context.processed_maps_details`, adds a new [`FileRule`](rule_structure.py:5) to `context.files_to_process`, and adds a new entry to `context.processed_maps_details` (setting `internal_map_type`). + +7. **[`NormalMapGreenChannelStage`](processing/pipeline/stages/normal_map_green_channel.py:14)** (`processing/pipeline/stages/normal_map_green_channel.py`): + * **Responsibility**: Identifies processed normal maps in `context.processed_maps_details` (those with an `internal_map_type` starting with "MAP_NRM"). If the global `invert_normal_map_green_channel_globally` configuration is true, it loads the temporary image data, inverts the green channel using the shared utility function [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), saves a new temporary modified normal map, and updates the `temp_processed_file` path in `context.processed_maps_details`. + * **Context Interaction**: Reads from and updates `context.processed_maps_details` (specifically `temp_processed_file` and `notes`). + +### Core Item Processing Loop + +The [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36) iterates through the `context.processing_items` list (populated by the [`PrepareProcessingItemsStage`](processing/pipeline/stages/prepare_processing_items.py:10)). For each item (either a [`FileRule`](rule_structure.py:5) for a regular map or a [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) for a merged map), the following stages are executed sequentially: + +1. **[`PrepareProcessingItemsStage`](processing/pipeline/stages/prepare_processing_items.py:10)** (`processing/pipeline/stages/prepare_processing_items.py`): + * **Responsibility**: (Executed once before the loop) Creates the `context.processing_items` list by combining [`FileRule`](rule_structure.py:5)s from `context.files_to_process` and [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16)s derived from the global `map_merge_rules` configuration. It correctly accesses `map_merge_rules` from `context.config_obj` and validates each merge rule for the presence of `output_map_type` and a dictionary for `inputs`. Initializes `context.intermediate_results`. + * **Context Interaction**: Reads from `context.files_to_process` and `context.config_obj` (accessing `map_merge_rules`). Populates `context.processing_items` and initializes `context.intermediate_results`. + +2. **[`RegularMapProcessorStage`](processing/pipeline/stages/regular_map_processor.py:18)** (`processing/pipeline/stages/regular_map_processor.py`): + * **Responsibility**: (Executed per [`FileRule`](rule_structure.py:5) item) Checks if the `FileRule.item_type` starts with "MAP_". If not, the item is skipped. Otherwise, it loads the image data for the file, determines its potentially suffixed internal map type (e.g., "MAP_COL-1"), applies in-memory transformations (Gloss-to-Rough, Normal Green Invert) using the shared utility function [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), and returns the processed image data and details in a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object. The `internal_map_type` in the output reflects any transformations (e.g., "MAP_GLOSS" becomes "MAP_ROUGH"). + * **Context Interaction**: Reads from the input [`FileRule`](rule_structure.py:5) (checking `item_type`) and [`Configuration`](configuration.py:68). Returns a [`ProcessedRegularMapData`](processing/pipeline/asset_context.py:23) object which is stored in `context.intermediate_results`. + +3. **[`MergedTaskProcessorStage`](processing/pipeline/stages/merged_task_processor.py:68)** (`processing/pipeline/stages/merged_task_processor.py`): + * **Responsibility**: (Executed per [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) item) Validates that all input map types specified in the merge rule start with "MAP_". If not, the task is failed. It dynamically loads input images by looking up the required input map types (e.g., "MAP_NRM") in `context.processed_maps_details` and using the temporary file paths from their `saved_files_info`. It applies in-memory transformations to inputs using [`apply_common_map_transformations`](processing/utils/image_processing_utils.py), handles dimension mismatches (with fallback creation if configured and `source_dimensions` are available), performs the channel merging operation, and returns the merged image data and details in a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object. The `output_map_type` of the merged map must also be "MAP_" prefixed in the configuration. + * **Context Interaction**: Reads from the input [`MergeTaskDefinition`](processing/pipeline/asset_context.py:16) (checking input map types), `context.workspace_path`, `context.processed_maps_details` (for input image data), and [`Configuration`](configuration.py:68). Returns a [`ProcessedMergedMapData`](processing/pipeline/asset_context.py:35) object which is stored in `context.intermediate_results`. + +4. **[`InitialScalingStage`](processing/pipeline/stages/initial_scaling.py:14)** (`processing/pipeline/stages/initial_scaling.py`): + * **Responsibility**: (Executed per item) Applies initial scaling (e.g., Power-of-Two downscaling) to the image data from the previous processing stage based on the `initial_scaling_mode` configuration. + * **Context Interaction**: Takes a [`InitialScalingInput`](processing/pipeline/asset_context.py:46) (containing image data and config) and returns an [`InitialScalingOutput`](processing/pipeline/asset_context.py:54) object, which updates the item's entry in `context.intermediate_results`. + +5. **[`SaveVariantsStage`](processing/pipeline/stages/save_variants.py:15)** (`processing/pipeline/stages/save_variants.py`): + * **Responsibility**: (Executed per item) Takes the final processed image data (potentially scaled) and configuration, and calls a utility to save the image to temporary files in various resolutions and formats as defined by the configuration. + * **Context Interaction**: Takes a [`SaveVariantsInput`](processing/pipeline/asset_context.py:61) object (which includes the "MAP_" prefixed `internal_map_type`). It uses the `get_filename_friendly_map_type` utility to convert this to a "standard type" (e.g., "COL") for output naming. Returns a [`SaveVariantsOutput`](processing/pipeline/asset_context.py:79) object containing details about the saved temporary files. The orchestrator stores these details, including the original "MAP_" prefixed `internal_map_type`, in `context.processed_maps_details` for the item. + +### Post-Item Stages + +These stages are executed sequentially once for each asset after the core item processing loop has finished for all items. + +1. **[`OutputOrganizationStage`](processing/pipeline/stages/output_organization.py:14)** (`processing/pipeline/stages/output_organization.py`): + * **Responsibility**: Determines the final output paths for all processed maps (including variants) and extra files based on configured patterns. It copies the temporary files generated by the core stages to these final destinations, creating directories as needed and respecting overwrite settings. + * **Context Interaction**: Reads from `context.processed_maps_details`, `context.files_to_process` (for 'EXTRA' files), `context.output_base_path`, and [`Configuration`](configuration.py:68). Updates entries in `context.processed_maps_details` with organization status. Populates `context.asset_metadata['maps']` with the final map structure: + * The `maps` object is a dictionary where keys are standard map types (e.g., "COL", "REFL"). + * Each entry contains a `variant_paths` dictionary, where keys are resolution strings (e.g., "8K", "4K") and values are the filenames of the map variants (relative to the asset's output directory). + It also populates `context.asset_metadata['final_output_files']` with a list of absolute paths to all generated files (this list itself is not saved in the final `metadata.json`). + +2. **[`MetadataFinalizationAndSaveStage`](processing/pipeline/stages/metadata_finalization_save.py:14)** (`processing/pipeline/stages/metadata_finalization_save.py`): + * **Responsibility**: Finalizes the `context.asset_metadata` (setting final status based on flags). It determines the save path for the metadata file based on configuration and patterns, serializes the `context.asset_metadata` (which now contains the structured `maps` data from `OutputOrganizationStage`) to JSON, and saves the `metadata.json` file. + * **Context Interaction**: Reads from `context.asset_metadata` (including the `maps` structure), `context.output_base_path`, and [`Configuration`](configuration.py:68). Before saving, it explicitly removes the `final_output_files` key from `context.asset_metadata`. The `processing_end_time` is also no longer added. The `metadata.json` file is written, and `context.asset_metadata` is updated with its final path and status. The older `processed_maps_details` and `merged_maps_details` from the context are not directly included in the JSON. + +## External Steps + +Certain steps are integral to the overall asset processing workflow but are handled outside the [`PipelineOrchestrator`](processing/pipeline/orchestrator.py:36)'s direct execution loop: + +* **Workspace Preparation and Cleanup**: Handled by the code that invokes [`ProcessingEngine.process()`](processing_engine.py:131) (e.g., `main.ProcessingTask`, `monitor._process_archive_task`), typically involving extracting archives and setting up temporary directories. The engine itself manages a sub-temporary directory (`engine_temp_dir`) for intermediate processing files. +* **Prediction and Rule Generation**: Performed before the [`ProcessingEngine`](processing_engine.py:73) is called. This involves analyzing source files and generating the [`SourceRule`](rule_structure.py:40) object with its nested [`AssetRule`](rule_structure.py:22)s and [`FileRule`](rule_structure.py:5)s, often involving prediction logic (potentially using LLMs). +* **Optional Blender Script Execution**: Can be triggered externally after successful processing to perform tasks like material setup in Blender using the generated output files and metadata. + +This staged pipeline provides a modular and extensible architecture for asset processing, with clear separation of concerns for each step. The [`AssetProcessingContext`](processing/pipeline/asset_context.py:86) ensures that data flows consistently between these stages. \ No newline at end of file diff --git a/Presets/Dinesen.json b/Presets/Dinesen.json index 654eec1..f91cf36 100644 --- a/Presets/Dinesen.json +++ b/Presets/Dinesen.json @@ -56,7 +56,7 @@ ] }, { - "target_type": "MAP_ROUGH", + "target_type": "MAP_GLOSS", "keywords": [ "GLOSS" ] diff --git a/Presets/Poliigon.json b/Presets/Poliigon.json index 58560fd..efa3076 100644 --- a/Presets/Poliigon.json +++ b/Presets/Poliigon.json @@ -54,7 +54,7 @@ ] }, { - "target_type": "MAP_ROUGH", + "target_type": "MAP_GLOSS", "keywords": [ "GLOSS" ], diff --git a/ProjectNotes/MAP_Prefix_Enforcement_Plan.md b/ProjectNotes/MAP_Prefix_Enforcement_Plan.md new file mode 100644 index 0000000..205918d --- /dev/null +++ b/ProjectNotes/MAP_Prefix_Enforcement_Plan.md @@ -0,0 +1,96 @@ +# Plan: Enforcing "MAP_" Prefix for Internal Processing and Standard Type for Output Naming + +**Date:** 2025-05-13 + +**I. Goal:** +The primary goal is to ensure that for all internal processing, the system *exclusively* uses `FileRule.item_type` values that start with the "MAP_" prefix (e.g., "MAP_COL", "MAP_NRM"). The "standard type" (e.g., "COL", "NRM") associated with these "MAP_" types (as defined in `config/app_settings.json`) should *only* be used during the file saving stages for output naming. Any `FileRule` whose `item_type` does not start with "MAP_" (and isn't a special type like "EXTRA" or "MODEL") should be skipped by the relevant map processing stages. + +**II. Current State Analysis Summary:** + +* **Output Naming:** The use of "standard type" for output filenames via the `get_filename_friendly_map_type` utility in `SaveVariantsStage` and `OutputOrganizationStage` is **correct** and already meets the requirement. +* **Internal "MAP_" Prefix Usage:** + * Some stages like `GlossToRoughConversionStage` correctly check for "MAP_" prefixes (e.g., `processing_map_type.startswith("MAP_GLOSS")`). + * Other stages like `RegularMapProcessorStage` and `MergedTaskProcessorStage` (and its helpers) implicitly expect "MAP_" prefixed types for their internal regex-based logic but lack explicit checks to skip items if the prefix is missing. + * Stages like `AlphaExtractionToMaskStage` and `NormalMapGreenChannelStage` currently use non-"MAP_" prefixed "standard types" (e.g., "NORMAL", "ALBEDO") when reading from `context.processed_maps_details` for their decision-making logic. + * The `PrepareProcessingItemsStage` adds `FileRule`s to the processing queue without filtering based on the "MAP_" prefix in `item_type`. +* **Data Consistency in `AssetProcessingContext`:** + * `FileRule.item_type` is the field that should hold the "MAP_" prefixed type from the initial rule generation. + * `context.processed_maps_details` entries can contain various map type representations: + * `map_type`: Often stores the "standard type" (e.g., "Roughness", "MASK", "NORMAL"). + * `processing_map_type` / `internal_map_type`: Generally seem to store the "MAP_" prefixed type. This needs to be consistent. +* **Configuration (`config/app_settings.json`):** + * `FILE_TYPE_DEFINITIONS` correctly use "MAP_" prefixed keys. + * `MAP_MERGE_RULES` need to be reviewed to ensure their `output_map_type` and input map types are "MAP_" prefixed. + +**III. Proposed Changes (Code Identification & Recommendations):** + +**A. Enforce "MAP_" Prefix for Processing Items (Skipping Logic):** +The core requirement is that processing stages should skip `FileRule` items if their `item_type` doesn't start with "MAP_". + +1. **`RegularMapProcessorStage` (`processing/pipeline/stages/regular_map_processor.py`):** + * **Identify:** In the `execute` method, `initial_internal_map_type` is derived from `file_rule.item_type_override` or `file_rule.item_type`. + * **Recommend:** Add an explicit check after determining `initial_internal_map_type`. If `initial_internal_map_type` does not start with `"MAP_"`, the stage should log a warning, set the `result.status` to "Skipped (Invalid Type)" or similar, and return `result` early, effectively skipping processing for this item. + +2. **`MergedTaskProcessorStage` (`processing/pipeline/stages/merged_task_processor.py`):** + * **Identify:** This stage processes `MergeTaskDefinition`s. The definitions for these tasks (input types, output type) come from `MAP_MERGE_RULES` in `config/app_settings.json`. The stage uses `required_map_type_from_rule` for its inputs. + * **Recommend:** + * **Configuration First:** Review all entries in `MAP_MERGE_RULES` in `config/app_settings.json`. + * Ensure the `output_map_type` for each rule (e.g., "MAP_NRMRGH") starts with "MAP_". + * Ensure all map type values within the `inputs` dictionary (e.g., `"R": "MAP_NRM"`) start with "MAP_". + * **Stage Logic:** In the `execute` method, when iterating through `merge_inputs_config.items()`, check if `required_map_type_from_rule` starts with `"MAP_"`. If not, log a warning and either: + * Skip loading/processing this specific input channel (potentially using its fallback if the overall merge can still proceed). + * Or, if a non-"MAP_" input is critical, fail the entire merge task for this asset. + * The helper `_apply_in_memory_transformations` already uses regex expecting "MAP_" prefixes; this will naturally fail or misbehave if inputs are not "MAP_" prefixed, reinforcing the need for the check above. + +**B. Standardize Map Type Fields and Usage in `context.processed_maps_details`:** +Ensure consistency in how "MAP_" prefixed types are stored and accessed within `context.processed_maps_details` for internal logic (not naming). + +1. **Recommendation:** Establish a single, consistent field name within `context.processed_maps_details` to store the definitive "MAP_" prefixed internal map type (e.g., `internal_map_type` or `processing_map_type`). All stages that perform logic based on the specific *kind* of map (e.g., transformations, source selection) should read from this standardized field. The `map_type` field can continue to store the "standard type" (e.g., "Roughness") primarily for informational/metadata purposes if needed, but not for core processing logic. + +2. **`AlphaExtractionToMaskStage` (`processing/pipeline/stages/alpha_extraction_to_mask.py`):** + * **Identify:** + * Checks for existing MASK map using `file_rule.map_type == "MASK"`. (Discrepancy: `FileRule` uses `item_type`). + * Searches for suitable source maps using `details.get('map_type') in self.SUITABLE_SOURCE_MAP_TYPES` where `SUITABLE_SOURCE_MAP_TYPES` are standard types like "ALBEDO". + * When adding new details, it sets `map_type: "MASK"` and the new `FileRule` gets `item_type="MAP_MASK"`. + * **Recommend:** + * Change the check for an existing MASK map to `file_rule.item_type == "MAP_MASK"`. + * Modify the source map search to use the standardized "MAP_" prefixed field from `details` (e.g., `details.get('internal_map_type')`) and update `SUITABLE_SOURCE_MAP_TYPES` to be "MAP_" prefixed (e.g., "MAP_COL", "MAP_ALBEDO"). + * When adding new details for the created MASK map to `context.processed_maps_details`, ensure the standardized "MAP_" prefixed field is set to "MAP_MASK", and `map_type` (if kept) is "MASK". + +3. **`NormalMapGreenChannelStage` (`processing/pipeline/stages/normal_map_green_channel.py`):** + * **Identify:** Checks `map_details.get('map_type') == "NORMAL"`. + * **Recommend:** Change this check to use the standardized "MAP_" prefixed field from `map_details` (e.g., `map_details.get('internal_map_type')`) and verify if it `startswith("MAP_NRM")`. + +4. **`GlossToRoughConversionStage` (`processing/pipeline/stages/gloss_to_rough_conversion.py`):** + * **Identify:** This stage already uses `processing_map_type.startswith("MAP_GLOSS")` and updates `processing_map_type` to "MAP_ROUGH" in `map_details`. It also updates the `FileRule.item_type` correctly. + * **Recommend:** This stage is largely consistent. Ensure the field it reads/writes (`processing_map_type`) aligns with the chosen standardized "MAP_" prefixed field for `processed_maps_details`. + +**C. Review Orchestration Logic (Conceptual):** +* When the orchestrator populates `context.processed_maps_details` after stages like `SaveVariantsStage`, ensure it stores the "MAP_" prefixed `internal_map_type` (from `SaveVariantsInput`) into the chosen standardized field in `processed_maps_details`. + +**IV. Testing Recommendations:** + +* Create test cases with `AssetRule`s containing `FileRule`s where `item_type` is intentionally set to a non-"MAP_" prefixed value (e.g., "COLOR_MAP", "TEXTURE_ROUGH"). Verify that `RegularMapProcessorStage` skips these. +* Modify `MAP_MERGE_RULES` in a test configuration: + * Set an `output_map_type` to a non-"MAP_" value. + * Set an input map type (e.g., for channel "R") to a non-"MAP_" value. + * Verify that `MergedTaskProcessorStage` correctly handles these (e.g., fails the task, skips the input, logs warnings). +* Test `AlphaExtractionToMaskStage`: + * With an existing `FileRule` having `item_type="MAP_MASK"` to ensure extraction is skipped. + * With source maps having "MAP_COL" (with alpha) as their `internal_map_type` in `processed_maps_details` to ensure they are correctly identified as sources. +* Test `NormalMapGreenChannelStage` with a normal map having "MAP_NRM" as its `internal_map_type` in `processed_maps_details` to ensure it's processed. +* Verify that output filenames continue to use the "standard type" (e.g., "COL", "ROUGH", "NRM") correctly. + +**V. Mermaid Diagram (Illustrative Flow for `FileRule` Processing):** + +```mermaid +graph TD + A[AssetRule with FileRules] --> B{FileRuleFilterStage}; + B -- files_to_process --> C{PrepareProcessingItemsStage}; + C -- processing_items (FileRule) --> D{PipelineOrchestrator}; + D -- FileRule --> E(RegularMapProcessorStage); + E --> F{Check FileRule.item_type}; + F -- Starts with "MAP_"? --> G[Process Map]; + F -- No --> H[Skip Map / Log Warning]; + G --> I[...subsequent stages...]; + H --> I; \ No newline at end of file diff --git a/ProjectNotes/PipelineRefactoringPlan.md b/ProjectNotes/PipelineRefactoringPlan.md new file mode 100644 index 0000000..f74fa35 --- /dev/null +++ b/ProjectNotes/PipelineRefactoringPlan.md @@ -0,0 +1,72 @@ +# Processing Pipeline Refactoring Plan + +## 1. Problem Summary + +The current processing pipeline, particularly the `IndividualMapProcessingStage`, exhibits maintainability challenges: + +* **High Complexity:** The stage handles too many responsibilities (loading, merging, transformations, scaling, saving). +* **Duplicated Logic:** Image transformations (Gloss-to-Rough, Normal Green Invert) are duplicated within the stage instead of relying solely on dedicated stages or being handled consistently. +* **Tight Coupling:** Heavy reliance on the large, mutable `AssetProcessingContext` object creates implicit dependencies and makes isolated testing difficult. + +## 2. Refactoring Goals + +* Improve code readability and understanding. +* Enhance maintainability by localizing changes and removing duplication. +* Increase testability through smaller, focused components with clear interfaces. +* Clarify data dependencies between pipeline stages. +* Adhere more closely to the Single Responsibility Principle (SRP). + +## 3. Proposed New Pipeline Stages + +Replace the existing `IndividualMapProcessingStage` with the following sequence of smaller, focused stages, executed by the `PipelineOrchestrator` for each processing item: + +1. **`PrepareProcessingItemsStage`:** + * **Responsibility:** Identifies and lists all items (`FileRule`, `MergeTaskDefinition`) to be processed from the main context. + * **Output:** Updates `context.processing_items`. + +2. **`RegularMapProcessorStage`:** (Handles `FileRule` items) + * **Responsibility:** Loads source image, determines internal map type (with suffix), applies relevant transformations (Gloss-to-Rough, Normal Green Invert), determines original metadata. + * **Output:** `ProcessedRegularMapData` object containing transformed image data and metadata. + +3. **`MergedTaskProcessorStage`:** (Handles `MergeTaskDefinition` items) + * **Responsibility:** Loads input images, applies transformations to inputs, handles fallbacks/resizing, performs merge operation. + * **Output:** `ProcessedMergedMapData` object containing merged image data and metadata. + +4. **`InitialScalingStage`:** (Optional) + * **Responsibility:** Applies configured scaling (e.g., POT downscale) to the processed image data received from the previous stage. + * **Output:** Scaled image data. + +5. **`SaveVariantsStage`:** + * **Responsibility:** Takes the final processed (and potentially scaled) image data and orchestrates saving variants using the `save_image_variants` utility. + * **Output:** List of saved file details (`saved_files_details`). + +## 4. Proposed Data Flow + +* **Input/Output Objects:** Key stages (`RegularMapProcessor`, `MergedTaskProcessor`, `InitialScaling`, `SaveVariants`) will use specific Input and Output dataclasses for clearer interfaces. +* **Orchestrator Role:** The `PipelineOrchestrator` manages the overall flow. It calls stages, passes necessary data (extracting image data references and metadata from previous stage outputs to create inputs for the next), receives output objects, and integrates final results (like saved file details) back into the main `AssetProcessingContext`. +* **Image Data Handling:** Large image arrays (`np.ndarray`) are passed primarily via stage return values (Output objects) and used as inputs to subsequent stages, managed by the Orchestrator. They are not stored long-term in the main `AssetProcessingContext`. +* **Main Context:** The `AssetProcessingContext` remains for overall state (rules, paths, configuration access, final status tracking) and potentially for simpler stages with minimal side effects. + +## 5. Visualization (Conceptual) + +```mermaid +graph TD + subgraph Proposed Pipeline Stages + Start --> Prep[PrepareProcessingItemsStage] + Prep --> ItemLoop{Loop per Item} + ItemLoop -- FileRule --> RegProc[RegularMapProcessorStage] + ItemLoop -- MergeTask --> MergeProc[MergedTaskProcessorStage] + RegProc --> Scale(InitialScalingStage) + MergeProc --> Scale + Scale --> Save[SaveVariantsStage] + Save --> UpdateContext[Update Main Context w/ Results] + UpdateContext --> ItemLoop + end +``` + +## 6. Benefits + +* Improved Readability & Understanding. +* Enhanced Maintainability & Reduced Risk. +* Better Testability. +* Clearer Dependencies. \ No newline at end of file diff --git a/config/app_settings.json b/config/app_settings.json index ac3a894..086a044 100644 --- a/config/app_settings.json +++ b/config/app_settings.json @@ -246,7 +246,7 @@ ], "EXTRA_FILES_SUBDIR": "Extra", "OUTPUT_BASE_DIR": "../Asset_Processor_Output_Tests", - "OUTPUT_DIRECTORY_PATTERN": "[supplier]/[sha5]_[assetname]", + "OUTPUT_DIRECTORY_PATTERN": "[supplier]_[assetname]", "OUTPUT_FILENAME_PATTERN": "[assetname]_[maptype]_[resolution].[ext]", "METADATA_FILENAME": "metadata.json", "DEFAULT_NODEGROUP_BLEND_PATH": "G:/02 Content/10-19 Content/19 Catalogs/19.01 Blender Asset Catalogue/_CustomLibraries/Nodes-Linked/PBRSET-Nodes-Testing.blend", @@ -259,7 +259,8 @@ "8K": 8192, "4K": 4096, "2K": 2048, - "1K": 1024 + "1K": 1024, + "PREVIEW": 128 }, "ASPECT_RATIO_DECIMALS": 2, "OUTPUT_FORMAT_16BIT_PRIMARY": "exr", @@ -267,11 +268,11 @@ "OUTPUT_FORMAT_8BIT": "png", "MAP_MERGE_RULES": [ { - "output_map_type": "NRMRGH", + "output_map_type": "MAP_NRMRGH", "inputs": { - "R": "NRM", - "G": "NRM", - "B": "ROUGH" + "R": "MAP_NRM", + "G": "MAP_NRM", + "B": "MAP_ROUGH" }, "defaults": { "R": 0.5, @@ -283,5 +284,10 @@ ], "CALCULATE_STATS_RESOLUTION": "1K", "DEFAULT_ASSET_CATEGORY": "Surface", - "TEMP_DIR_PREFIX": "_PROCESS_ASSET_" + "TEMP_DIR_PREFIX": "_PROCESS_ASSET_", + "INITIAL_SCALING_MODE": "POT_DOWNSCALE", + "MERGE_DIMENSION_MISMATCH_STRATEGY": "USE_LARGEST", + "general_settings": { + "invert_normal_map_green_channel_globally": false + } } \ No newline at end of file diff --git a/configuration.py b/configuration.py index cbaec74..5e3dd72 100644 --- a/configuration.py +++ b/configuration.py @@ -379,10 +379,33 @@ class Configuration: """Gets the configured JPG quality level.""" return self._core_settings.get('JPG_QUALITY', 95) + @property + def invert_normal_green_globally(self) -> bool: + """Gets the global setting for inverting the green channel of normal maps.""" + # Default to False if the setting is missing in the core config + return self._core_settings.get('invert_normal_map_green_channel_globally', False) + + @property + def overwrite_existing(self) -> bool: + """Gets the setting for overwriting existing files from core settings.""" + return self._core_settings.get('overwrite_existing', False) + + @property + def png_compression_level(self) -> int: + """Gets the PNG compression level from core settings.""" + return self._core_settings.get('PNG_COMPRESSION', 6) # Default to 6 if not found + @property def resolution_threshold_for_jpg(self) -> int: """Gets the pixel dimension threshold for using JPG for 8-bit images.""" - return self._core_settings.get('RESOLUTION_THRESHOLD_FOR_JPG', 4096) + value = self._core_settings.get('RESOLUTION_THRESHOLD_FOR_JPG', 4096) + log.info(f"CONFIGURATION_DEBUG: resolution_threshold_for_jpg property returning: {value} (type: {type(value)})") + # Ensure it's an int, as downstream might expect it. + # The .get() default is an int, but if the JSON had null or a string, it might be different. + if not isinstance(value, int): + log.warning(f"CONFIGURATION_DEBUG: RESOLUTION_THRESHOLD_FOR_JPG was not an int, got {type(value)}. Defaulting to 4096.") + return 4096 + return value @property def respect_variant_map_types(self) -> list: diff --git a/main.py b/main.py index acc0713..1cc864a 100644 --- a/main.py +++ b/main.py @@ -21,15 +21,43 @@ from PySide6.QtCore import Qt from PySide6.QtWidgets import QApplication # --- Backend Imports --- +# Add current directory to sys.path for direct execution +import sys +import os +sys.path.append(os.path.dirname(__file__)) +print(f"DEBUG: sys.path after append: {sys.path}") + try: + print("DEBUG: Attempting to import Configuration...") from configuration import Configuration, ConfigurationError + print("DEBUG: Successfully imported Configuration.") + + print("DEBUG: Attempting to import ProcessingEngine...") from processing_engine import ProcessingEngine + print("DEBUG: Successfully imported ProcessingEngine.") + + print("DEBUG: Attempting to import SourceRule...") from rule_structure import SourceRule + print("DEBUG: Successfully imported SourceRule.") + + print("DEBUG: Attempting to import MainWindow...") from gui.main_window import MainWindow + print("DEBUG: Successfully imported MainWindow.") + + print("DEBUG: Attempting to import prepare_processing_workspace...") from utils.workspace_utils import prepare_processing_workspace + print("DEBUG: Successfully imported prepare_processing_workspace.") + except ImportError as e: script_dir = Path(__file__).parent.resolve() + print(f"ERROR: Cannot import Configuration or rule_structure classes.") + print(f"Ensure configuration.py and rule_structure.py are in the same directory or Python path.") print(f"ERROR: Failed to import necessary classes: {e}") + print(f"DEBUG: Exception type: {type(e)}") + print(f"DEBUG: Exception args: {e.args}") + import traceback + print("DEBUG: Full traceback of the ImportError:") + traceback.print_exc() print(f"Ensure 'configuration.py' and 'asset_processor.py' exist in the directory:") print(f" {script_dir}") print("Or that the directory is included in your PYTHONPATH.") diff --git a/processing/pipeline/asset_context.py b/processing/pipeline/asset_context.py new file mode 100644 index 0000000..f6363e5 --- /dev/null +++ b/processing/pipeline/asset_context.py @@ -0,0 +1,106 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional + +from rule_structure import AssetRule, FileRule, SourceRule +from configuration import Configuration + +# Imports needed for new dataclasses +import numpy as np +from typing import Any, Tuple, Union + +# --- Stage Input/Output Dataclasses --- + +# Item types for PrepareProcessingItemsStage output +@dataclass +class MergeTaskDefinition: + """Represents a merge task identified by PrepareProcessingItemsStage.""" + task_data: Dict # The original task data from context.merged_image_tasks + task_key: str # e.g., "merged_task_0" + +# Output for RegularMapProcessorStage +@dataclass +class ProcessedRegularMapData: + processed_image_data: np.ndarray + final_internal_map_type: str + source_file_path: Path + original_bit_depth: Optional[int] + original_dimensions: Optional[Tuple[int, int]] # (width, height) + transformations_applied: List[str] + status: str = "Processed" + error_message: Optional[str] = None + +# Output for MergedTaskProcessorStage +@dataclass +class ProcessedMergedMapData: + merged_image_data: np.ndarray + output_map_type: str # Internal type + source_bit_depths: List[int] + final_dimensions: Optional[Tuple[int, int]] # (width, height) + transformations_applied_to_inputs: Dict[str, List[str]] # Map type -> list of transforms + status: str = "Processed" + error_message: Optional[str] = None + +# Input for InitialScalingStage +@dataclass +class InitialScalingInput: + image_data: np.ndarray + original_dimensions: Optional[Tuple[int, int]] # (width, height) + # Configuration needed + initial_scaling_mode: str + +# Output for InitialScalingStage +@dataclass +class InitialScalingOutput: + scaled_image_data: np.ndarray + scaling_applied: bool + final_dimensions: Tuple[int, int] # (width, height) + +# Input for SaveVariantsStage +@dataclass +class SaveVariantsInput: + image_data: np.ndarray # Final data (potentially scaled) + internal_map_type: str # Final internal type (e.g., MAP_ROUGH, MAP_COL-1) + source_bit_depth_info: List[int] + # Configuration needed + output_filename_pattern_tokens: Dict[str, Any] + image_resolutions: List[int] + file_type_defs: Dict[str, Dict] + output_format_8bit: str + output_format_16bit_primary: str + output_format_16bit_fallback: str + png_compression_level: int + jpg_quality: int + output_filename_pattern: str + resolution_threshold_for_jpg: Optional[int] # Added for JPG conversion + +# Output for SaveVariantsStage +@dataclass +class SaveVariantsOutput: + saved_files_details: List[Dict] + status: str = "Processed" + error_message: Optional[str] = None + +# Add a field to AssetProcessingContext for the prepared items +@dataclass +class AssetProcessingContext: + source_rule: SourceRule + asset_rule: AssetRule + workspace_path: Path + engine_temp_dir: Path + output_base_path: Path + effective_supplier: Optional[str] + asset_metadata: Dict + processed_maps_details: Dict[str, Dict] # Will store final results per item_key + merged_maps_details: Dict[str, Dict] # This might become redundant? Keep for now. + files_to_process: List[FileRule] + loaded_data_cache: Dict + config_obj: Configuration + status_flags: Dict + incrementing_value: Optional[str] + sha5_value: Optional[str] # Keep existing fields + # New field for prepared items + processing_items: Optional[List[Union[FileRule, MergeTaskDefinition]]] = None + # Temporary storage during pipeline execution (managed by orchestrator) + # Keys could be FileRule object hash/id or MergeTaskDefinition task_key + intermediate_results: Optional[Dict[Any, Union[ProcessedRegularMapData, ProcessedMergedMapData, InitialScalingOutput]]] = None \ No newline at end of file diff --git a/processing/pipeline/orchestrator.py b/processing/pipeline/orchestrator.py new file mode 100644 index 0000000..6c8fe7a --- /dev/null +++ b/processing/pipeline/orchestrator.py @@ -0,0 +1,439 @@ +# --- Imports --- +import logging +import shutil +import tempfile +from pathlib import Path +from typing import List, Dict, Optional, Any, Union # Added Any, Union + +import numpy as np # Added numpy + +from configuration import Configuration +from rule_structure import SourceRule, AssetRule, FileRule # Added FileRule + +# Import new context classes and stages +from .asset_context import ( + AssetProcessingContext, + MergeTaskDefinition, + ProcessedRegularMapData, + ProcessedMergedMapData, + InitialScalingInput, + InitialScalingOutput, + SaveVariantsInput, + SaveVariantsOutput, +) +from .stages.base_stage import ProcessingStage +# Import the new stages we created +from .stages.prepare_processing_items import PrepareProcessingItemsStage +from .stages.regular_map_processor import RegularMapProcessorStage +from .stages.merged_task_processor import MergedTaskProcessorStage +from .stages.initial_scaling import InitialScalingStage +from .stages.save_variants import SaveVariantsStage + +log = logging.getLogger(__name__) + +# --- PipelineOrchestrator Class --- + +class PipelineOrchestrator: + """ + Orchestrates the processing of assets based on source rules and a series of processing stages. + Manages the overall flow, including the core item processing sequence. + """ + + def __init__(self, config_obj: Configuration, + pre_item_stages: List[ProcessingStage], + post_item_stages: List[ProcessingStage]): + """ + Initializes the PipelineOrchestrator. + + Args: + config_obj: The main configuration object. + pre_item_stages: Stages to run before the core item processing loop. + post_item_stages: Stages to run after the core item processing loop. + """ + self.config_obj: Configuration = config_obj + self.pre_item_stages: List[ProcessingStage] = pre_item_stages + self.post_item_stages: List[ProcessingStage] = post_item_stages + # Instantiate the core item processing stages internally + self._prepare_stage = PrepareProcessingItemsStage() + self._regular_processor_stage = RegularMapProcessorStage() + self._merged_processor_stage = MergedTaskProcessorStage() + self._scaling_stage = InitialScalingStage() + self._save_stage = SaveVariantsStage() + + def _execute_specific_stages( + self, context: AssetProcessingContext, + stages_to_run: List[ProcessingStage], + stage_group_name: str, + stop_on_skip: bool = True + ) -> AssetProcessingContext: + """Executes a specific list of stages.""" + asset_name = context.asset_rule.asset_name if context.asset_rule else "Unknown" + log.debug(f"Asset '{asset_name}': Executing {stage_group_name} stages...") + for stage in stages_to_run: + stage_name = stage.__class__.__name__ + log.debug(f"Asset '{asset_name}': Executing {stage_group_name} stage: {stage_name}") + try: + # Check if stage expects context directly or specific input + # For now, assume outer stages take context directly + # This might need refinement if outer stages also adopt Input/Output pattern + context = stage.execute(context) + except Exception as e: + log.error(f"Asset '{asset_name}': Error during outer stage '{stage_name}': {e}", exc_info=True) + context.status_flags["asset_failed"] = True + context.status_flags["asset_failed_stage"] = stage_name + context.status_flags["asset_failed_reason"] = str(e) + # Update overall metadata immediately on outer stage failure + context.asset_metadata["status"] = f"Failed: Error in stage {stage_name}" + context.asset_metadata["error_message"] = str(e) + break # Stop processing outer stages for this asset on error + + if stop_on_skip and context.status_flags.get("skip_asset"): + log.info(f"Asset '{asset_name}': Skipped by outer stage '{stage_name}'. Reason: {context.status_flags.get('skip_reason', 'N/A')}") + break # Skip remaining outer stages for this asset + return context + + def process_source_rule( + self, + source_rule: SourceRule, + workspace_path: Path, + output_base_path: Path, + overwrite: bool, + incrementing_value: Optional[str], + sha5_value: Optional[str] # Keep param name consistent for now + ) -> Dict[str, List[str]]: + """ + Processes a single source rule, applying pre-processing stages, + the core item processing loop (Prepare, Process, Scale, Save), + and post-processing stages. + """ + overall_status: Dict[str, List[str]] = { + "processed": [], + "skipped": [], + "failed": [], + } + engine_temp_dir_path: Optional[Path] = None + + try: + # --- Setup Temporary Directory --- + temp_dir_path_str = tempfile.mkdtemp(prefix=self.config_obj.temp_dir_prefix) + engine_temp_dir_path = Path(temp_dir_path_str) + log.debug(f"PipelineOrchestrator created temporary directory: {engine_temp_dir_path}") + + # --- Process Each Asset Rule --- + for asset_rule in source_rule.assets: + asset_name = asset_rule.asset_name + log.info(f"Orchestrator: Processing asset '{asset_name}'") + + # --- Initialize Asset Context --- + context = AssetProcessingContext( + source_rule=source_rule, + asset_rule=asset_rule, + workspace_path=workspace_path, + engine_temp_dir=engine_temp_dir_path, + output_base_path=output_base_path, + effective_supplier=None, + asset_metadata={}, + processed_maps_details={}, # Final results per item + merged_maps_details={}, # Keep for potential backward compat or other uses? + files_to_process=[], # Populated by FileRuleFilterStage (assumed in outer_stages) + loaded_data_cache={}, + config_obj=self.config_obj, + status_flags={"skip_asset": False, "asset_failed": False}, + incrementing_value=incrementing_value, + sha5_value=sha5_value, + processing_items=[], # Initialize new fields + intermediate_results={} + ) + + # --- Execute Pre-Item-Processing Outer Stages --- + # (e.g., MetadataInit, SupplierDet, FileRuleFilter, GlossToRough, NormalInvert) + # Identify which outer stages run before the item loop + # This requires knowing the intended order. Assume all run before for now. + context = self._execute_specific_stages(context, self.pre_item_stages, "pre-item", stop_on_skip=True) + + # Check if asset should be skipped or failed after pre-processing + if context.status_flags.get("asset_failed"): + log.error(f"Asset '{asset_name}': Failed during pre-processing stage '{context.status_flags.get('asset_failed_stage', 'Unknown')}'. Skipping item processing.") + overall_status["failed"].append(f"{asset_name} (Failed in {context.status_flags.get('asset_failed_stage', 'Pre-Processing')})") + continue # Move to the next asset rule + + if context.status_flags.get("skip_asset"): + log.info(f"Asset '{asset_name}': Skipped during pre-processing. Skipping item processing.") + overall_status["skipped"].append(asset_name) + continue # Move to the next asset rule + + # --- Prepare Processing Items --- + log.debug(f"Asset '{asset_name}': Preparing processing items...") + try: + log.info(f"ORCHESTRATOR_TRACE: Asset '{asset_name}': Attempting to call _prepare_stage.execute(). Current context.status_flags: {context.status_flags}") + # Prepare stage modifies context directly + context = self._prepare_stage.execute(context) + log.info(f"ORCHESTRATOR_TRACE: Asset '{asset_name}': Successfully RETURNED from _prepare_stage.execute(). context.processing_items count: {len(context.processing_items) if context.processing_items is not None else 'None'}. context.status_flags: {context.status_flags}") + except Exception as e: + log.error(f"ORCHESTRATOR_TRACE: Asset '{asset_name}': EXCEPTION during _prepare_stage.execute(): {e}", exc_info=True) + context.status_flags["asset_failed"] = True + context.status_flags["asset_failed_stage"] = "PrepareProcessingItemsStage" + context.status_flags["asset_failed_reason"] = str(e) + overall_status["failed"].append(f"{asset_name} (Failed in Prepare Items)") + continue # Move to next asset + + if context.status_flags.get('prepare_items_failed'): + log.error(f"Asset '{asset_name}': Failed during item preparation. Reason: {context.status_flags.get('prepare_items_failed_reason', 'Unknown')}. Skipping item processing loop.") + overall_status["failed"].append(f"{asset_name} (Failed Prepare Items: {context.status_flags.get('prepare_items_failed_reason', 'Unknown')})") + continue # Move to next asset + + if not context.processing_items: + log.info(f"Asset '{asset_name}': No items to process after preparation stage.") + # Status will be determined at the end + + # --- Core Item Processing Loop --- + log.info("ORCHESTRATOR: Starting processing items loop for asset '%s'", asset_name) # Corrected indentation and message + log.info(f"Asset '{asset_name}': Starting core item processing loop for {len(context.processing_items)} items...") + asset_had_item_errors = False + for item_index, item in enumerate(context.processing_items): + item_key: Any = None # Key for storing results (FileRule object or task_key string) + item_log_prefix = f"Asset '{asset_name}', Item {item_index + 1}/{len(context.processing_items)}" + processed_data: Optional[Union[ProcessedRegularMapData, ProcessedMergedMapData]] = None + scaled_data_output: Optional[InitialScalingOutput] = None # Store output object + saved_data: Optional[SaveVariantsOutput] = None + item_status = "Failed" # Default item status + current_image_data: Optional[np.ndarray] = None # Track current image data ref + + try: + # 1. Process (Load/Merge + Transform) + if isinstance(item, FileRule): + if item.item_type == 'EXTRA': + log.debug(f"{item_log_prefix}: Skipping image processing for EXTRA FileRule '{item.file_path}'.") + # Add a basic entry to processed_maps_details to acknowledge it was seen + context.processed_maps_details[item.file_path] = { + "status": "Skipped (EXTRA file)", + "internal_map_type": "EXTRA", + "source_file": str(item.file_path) + } + continue # Skip to the next item + item_key = item.file_path # Use file_path string as key + log.debug(f"{item_log_prefix}: Processing FileRule '{item.file_path}'...") + processed_data = self._regular_processor_stage.execute(context, item) + elif isinstance(item, MergeTaskDefinition): + item_key = item.task_key # Use task_key string as key + log.info(f"{item_log_prefix}: Executing MergedTaskProcessorStage for MergeTask '{item_key}'...") # Log call + processed_data = self._merged_processor_stage.execute(context, item) + # Log status/error from merge processor + if processed_data: + log.info(f"{item_log_prefix}: MergedTaskProcessorStage result - Status: {processed_data.status}, Error: {processed_data.error_message}") + else: + log.warning(f"{item_log_prefix}: MergedTaskProcessorStage returned None for MergeTask '{item_key}'.") + else: + log.warning(f"{item_log_prefix}: Unknown item type '{type(item)}'. Skipping.") + item_key = f"unknown_item_{item_index}" + context.processed_maps_details[item_key] = {"status": "Skipped", "notes": f"Unknown item type {type(item)}"} + asset_had_item_errors = True + continue # Next item + + # Check for processing failure + if not processed_data or processed_data.status != "Processed": + error_msg = processed_data.error_message if processed_data else "Processor returned None" + log.error(f"{item_log_prefix}: Failed during processing stage. Error: {error_msg}") + context.processed_maps_details[item_key] = {"status": "Failed", "notes": f"Processing Error: {error_msg}", "stage": processed_data.__class__.__name__ if processed_data else "UnknownProcessor"} + asset_had_item_errors = True + continue # Next item + + # Store intermediate result & get current image data + context.intermediate_results[item_key] = processed_data + current_image_data = processed_data.processed_image_data if isinstance(processed_data, ProcessedRegularMapData) else processed_data.merged_image_data + current_dimensions = processed_data.original_dimensions if isinstance(processed_data, ProcessedRegularMapData) else processed_data.final_dimensions + + # 2. Scale (Optional) + scaling_mode = getattr(context.config_obj, "INITIAL_SCALING_MODE", "NONE") + if scaling_mode != "NONE" and current_image_data is not None and current_image_data.size > 0: + if isinstance(item, MergeTaskDefinition): # Log scaling call for merge tasks + log.info(f"{item_log_prefix}: Calling InitialScalingStage for MergeTask '{item_key}' (Mode: {scaling_mode})...") + log.debug(f"{item_log_prefix}: Applying initial scaling (Mode: {scaling_mode})...") + scale_input = InitialScalingInput( + image_data=current_image_data, + original_dimensions=current_dimensions, # Pass original/merged dims + initial_scaling_mode=scaling_mode + ) + scaled_data_output = self._scaling_stage.execute(scale_input) + # Update intermediate result and current image data reference + context.intermediate_results[item_key] = scaled_data_output # Overwrite previous intermediate + current_image_data = scaled_data_output.scaled_image_data # Use scaled data for saving + log.debug(f"{item_log_prefix}: Scaling applied: {scaled_data_output.scaling_applied}. New Dims: {scaled_data_output.final_dimensions}") + else: + log.debug(f"{item_log_prefix}: Initial scaling skipped (Mode: NONE or empty image).") + # Create dummy output if scaling skipped, using current dims + final_dims = current_dimensions if current_dimensions else (current_image_data.shape[1], current_image_data.shape[0]) if current_image_data is not None else (0,0) + scaled_data_output = InitialScalingOutput(scaled_image_data=current_image_data, scaling_applied=False, final_dimensions=final_dims) + + + # 3. Save Variants + if current_image_data is None or current_image_data.size == 0: + log.warning(f"{item_log_prefix}: Skipping save stage because image data is empty.") + context.processed_maps_details[item_key] = {"status": "Skipped", "notes": "No image data to save", "stage": "SaveVariantsStage"} + # Don't mark as asset error, just skip this item's saving + continue # Next item + + if isinstance(item, MergeTaskDefinition): # Log save call for merge tasks + log.info(f"{item_log_prefix}: Calling SaveVariantsStage for MergeTask '{item_key}'...") + log.debug(f"{item_log_prefix}: Saving variants...") + # Prepare input for save stage + internal_map_type = processed_data.final_internal_map_type if isinstance(processed_data, ProcessedRegularMapData) else processed_data.output_map_type + source_bit_depth = [processed_data.original_bit_depth] if isinstance(processed_data, ProcessedRegularMapData) and processed_data.original_bit_depth is not None else processed_data.source_bit_depths if isinstance(processed_data, ProcessedMergedMapData) else [8] # Default bit depth if unknown + + # Construct filename tokens (ensure temp dir is used) + output_filename_tokens = { + 'asset_name': asset_name, + 'output_base_directory': context.engine_temp_dir, # Save variants to temp dir + # Add other tokens from context/config as needed by the pattern + 'supplier': context.effective_supplier or 'UnknownSupplier', + } + + # Log the value being read for the threshold before creating the input object + log.info(f"ORCHESTRATOR_DEBUG: Reading RESOLUTION_THRESHOLD_FOR_JPG from config for SaveVariantsInput: {getattr(context.config_obj, 'RESOLUTION_THRESHOLD_FOR_JPG', None)}") + save_input = SaveVariantsInput( + image_data=current_image_data, # Use potentially scaled data + internal_map_type=internal_map_type, + source_bit_depth_info=source_bit_depth, + output_filename_pattern_tokens=output_filename_tokens, + # Pass config values needed by save stage + image_resolutions=context.config_obj.image_resolutions, + file_type_defs=getattr(context.config_obj, "FILE_TYPE_DEFINITIONS", {}), + output_format_8bit=context.config_obj.get_8bit_output_format(), + output_format_16bit_primary=context.config_obj.get_16bit_output_formats()[0], + output_format_16bit_fallback=context.config_obj.get_16bit_output_formats()[1], + png_compression_level=context.config_obj.png_compression_level, + jpg_quality=context.config_obj.jpg_quality, + output_filename_pattern=context.config_obj.output_filename_pattern, + resolution_threshold_for_jpg=getattr(context.config_obj, "resolution_threshold_for_jpg", None) # Corrected case + ) + saved_data = self._save_stage.execute(save_input) + # Log saved_data for merge tasks + if isinstance(item, MergeTaskDefinition): + log.info(f"{item_log_prefix}: SaveVariantsStage result for MergeTask '{item_key}' - Status: {saved_data.status if saved_data else 'N/A'}, Saved Files: {len(saved_data.saved_files_details) if saved_data else 0}") + + # Check save status and finalize item result + if saved_data and saved_data.status.startswith("Processed"): + item_status = saved_data.status # e.g., "Processed" or "Processed (No Output)" + log.info(f"{item_log_prefix}: Item successfully processed and saved. Status: {item_status}") + # Populate final details for this item + final_details = { + "status": item_status, + "saved_files_info": saved_data.saved_files_details, # List of dicts from save util + "internal_map_type": internal_map_type, + "original_dimensions": processed_data.original_dimensions if isinstance(processed_data, ProcessedRegularMapData) else None, + "final_dimensions": scaled_data_output.final_dimensions if scaled_data_output else current_dimensions, + "transformations": processed_data.transformations_applied if isinstance(processed_data, ProcessedRegularMapData) else processed_data.transformations_applied_to_inputs, + # Add source file if regular map + "source_file": str(processed_data.source_file_path) if isinstance(processed_data, ProcessedRegularMapData) else None, + } + # Log final details addition for merge tasks + if isinstance(item, MergeTaskDefinition): + log.info(f"{item_log_prefix}: Adding final details to context.processed_maps_details for MergeTask '{item_key}'. Details: {final_details}") + context.processed_maps_details[item_key] = final_details + else: + error_msg = saved_data.error_message if saved_data else "Save stage returned None" + log.error(f"{item_log_prefix}: Failed during save stage. Error: {error_msg}") + context.processed_maps_details[item_key] = {"status": "Failed", "notes": f"Save Error: {error_msg}", "stage": "SaveVariantsStage"} + asset_had_item_errors = True + item_status = "Failed" # Ensure item status reflects failure + + except Exception as e: + log.exception(f"{item_log_prefix}: Unhandled exception during item processing loop: {e}") + # Ensure details are recorded even on unhandled exception + if item_key is not None: + context.processed_maps_details[item_key] = {"status": "Failed", "notes": f"Unhandled Loop Error: {e}", "stage": "OrchestratorLoop"} + else: + log.error(f"Asset '{asset_name}': Unhandled exception in item loop before item key was set.") + asset_had_item_errors = True + item_status = "Failed" + # Optionally break loop or continue? Continue for now to process other items. + + log.info("ORCHESTRATOR: Finished processing items loop for asset '%s'", asset_name) + log.info(f"Asset '{asset_name}': Finished core item processing loop.") + + # --- Execute Post-Item-Processing Outer Stages --- + # (e.g., OutputOrganization, MetadataFinalizationSave) + # Identify which outer stages run after the item loop + # This needs better handling based on stage purpose. Assume none run after for now. + if not context.status_flags.get("asset_failed"): + log.info("ORCHESTRATOR: Executing post-item-processing outer stages for asset '%s'", asset_name) + context = self._execute_specific_stages(context, self.post_item_stages, "post-item", stop_on_skip=False) + + # --- Final Asset Status Determination --- + final_asset_status = "Unknown" + fail_reason = "" + if context.status_flags.get("asset_failed"): + final_asset_status = "Failed" + fail_reason = f"(Failed in {context.status_flags.get('asset_failed_stage', 'Unknown Stage')}: {context.status_flags.get('asset_failed_reason', 'Unknown Reason')})" + elif context.status_flags.get("skip_asset"): + final_asset_status = "Skipped" + fail_reason = f"(Skipped: {context.status_flags.get('skip_reason', 'Unknown Reason')})" + elif asset_had_item_errors: + final_asset_status = "Failed" + fail_reason = "(One or more items failed)" + elif not context.processing_items: + # No items prepared, no errors -> consider skipped or processed based on definition? + final_asset_status = "Skipped" # Or "Processed (No Items)" + fail_reason = "(No items to process)" + elif not context.processed_maps_details and context.processing_items: + # Items were prepared, but none resulted in processed_maps_details entry + final_asset_status = "Skipped" # Or Failed? + fail_reason = "(All processing items skipped or failed internally)" + elif context.processed_maps_details: + # Check if all items in processed_maps_details are actually processed successfully + all_processed_ok = all( + str(details.get("status", "")).startswith("Processed") + for details in context.processed_maps_details.values() + ) + some_processed_ok = any( + str(details.get("status", "")).startswith("Processed") + for details in context.processed_maps_details.values() + ) + + if all_processed_ok: + final_asset_status = "Processed" + elif some_processed_ok: + final_asset_status = "Partial" # Introduce a partial status? Or just Failed? + fail_reason = "(Some items failed)" + final_asset_status = "Failed" # Treat partial as Failed for overall status + else: # No items processed successfully + final_asset_status = "Failed" + fail_reason = "(All items failed)" + else: + # Should not happen if processing_items existed + final_asset_status = "Failed" + fail_reason = "(Unknown state after item processing)" + + + # Update overall status list + if final_asset_status == "Processed": + overall_status["processed"].append(asset_name) + elif final_asset_status == "Skipped": + overall_status["skipped"].append(f"{asset_name} {fail_reason}") + else: # Failed or Unknown + overall_status["failed"].append(f"{asset_name} {fail_reason}") + + log.info(f"Asset '{asset_name}' final status: {final_asset_status} {fail_reason}") + # Clean up intermediate results for the asset to save memory + context.intermediate_results = {} + + + except Exception as e: + log.error(f"PipelineOrchestrator.process_source_rule failed critically: {e}", exc_info=True) + # Mark all assets from this source rule that weren't finished as failed + processed_or_skipped_or_failed = set(overall_status["processed"]) | \ + set(name.split(" ")[0] for name in overall_status["skipped"]) | \ + set(name.split(" ")[0] for name in overall_status["failed"]) + for asset_rule in source_rule.assets: + if asset_rule.asset_name not in processed_or_skipped_or_failed: + overall_status["failed"].append(f"{asset_rule.asset_name} (Orchestrator Error: {e})") + finally: + # --- Cleanup Temporary Directory --- + if engine_temp_dir_path and engine_temp_dir_path.exists(): + try: + log.debug(f"PipelineOrchestrator cleaning up temporary directory: {engine_temp_dir_path}") + shutil.rmtree(engine_temp_dir_path, ignore_errors=True) + except Exception as e: + log.error(f"Error cleaning up orchestrator temporary directory {engine_temp_dir_path}: {e}", exc_info=True) + + return overall_status \ No newline at end of file diff --git a/processing/pipeline/stages/alpha_extraction_to_mask.py b/processing/pipeline/stages/alpha_extraction_to_mask.py new file mode 100644 index 0000000..87aa3b6 --- /dev/null +++ b/processing/pipeline/stages/alpha_extraction_to_mask.py @@ -0,0 +1,179 @@ +import logging +import uuid +from pathlib import Path +from typing import List, Optional, Dict + +import numpy as np + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from ...utils import image_processing_utils as ipu +from rule_structure import FileRule +from utils.path_utils import sanitize_filename + +logger = logging.getLogger(__name__) + +class AlphaExtractionToMaskStage(ProcessingStage): + """ + Extracts an alpha channel from a suitable source map (e.g., Albedo, Diffuse) + to generate a MASK map if one is not explicitly defined. + """ + # Use MAP_ prefixed types for internal logic checks + SUITABLE_SOURCE_MAP_TYPES = ["MAP_COL", "MAP_ALBEDO", "MAP_BASECOLOR"] # Map types likely to have alpha + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + logger.debug(f"Asset '{asset_name_for_log}': Running AlphaExtractionToMaskStage.") + + if context.status_flags.get('skip_asset'): + logger.debug(f"Asset '{asset_name_for_log}': Skipping due to 'skip_asset' flag.") + return context + + if not context.files_to_process or not context.processed_maps_details: + logger.debug( + f"Asset '{asset_name_for_log}': Skipping alpha extraction - " + f"no files to process or no processed map details." + ) + return context + + # A. Check for Existing MASK Map + for file_rule in context.files_to_process: + # Assuming file_rule has 'map_type' and 'file_path' (instead of filename_pattern) + # Check for existing MASK map using the correct item_type field and MAP_ prefix + if file_rule.item_type == "MAP_MASK": + file_path_for_log = file_rule.file_path if hasattr(file_rule, 'file_path') else "Unknown file path" + logger.info( + f"Asset '{asset_name_for_log}': MASK map already defined by FileRule " + f"for '{file_path_for_log}'. Skipping alpha extraction." + ) + return context + + # B. Find Suitable Source Map with Alpha + source_map_details_for_alpha: Optional[Dict] = None + source_file_rule_id_for_alpha: Optional[str] = None # This ID comes from processed_maps_details keys + + for file_rule_id, details in context.processed_maps_details.items(): + # Check for suitable source map using the standardized internal_map_type field + internal_map_type = details.get('internal_map_type') # Use the standardized field + if details.get('status') == 'Processed' and \ + internal_map_type in self.SUITABLE_SOURCE_MAP_TYPES: + try: + temp_path = Path(details['temp_processed_file']) + if not temp_path.exists(): + logger.warning( + f"Asset '{asset_name_for_log}': Temp file {temp_path} for map " + f"{details['map_type']} (ID: {file_rule_id}) does not exist. Cannot check for alpha." + ) + continue + + image_data = ipu.load_image(temp_path) + + if image_data is not None and image_data.ndim == 3 and image_data.shape[2] == 4: + source_map_details_for_alpha = details + source_file_rule_id_for_alpha = file_rule_id + logger.info( + f"Asset '{asset_name_for_log}': Found potential source for alpha extraction: " + f"{temp_path} (MapType: {details['map_type']})" + ) + break + except Exception as e: + logger.warning( + f"Asset '{asset_name_for_log}': Error checking alpha for {details.get('temp_processed_file', 'N/A')}: {e}" + ) + continue + + + if source_map_details_for_alpha is None or source_file_rule_id_for_alpha is None: + logger.info( + f"Asset '{asset_name_for_log}': No suitable source map with alpha channel found " + f"for MASK extraction." + ) + return context + + # C. Extract Alpha Channel + source_image_path = Path(source_map_details_for_alpha['temp_processed_file']) + full_image_data = ipu.load_image(source_image_path) # Reload to ensure we have the original RGBA + + if full_image_data is None or not (full_image_data.ndim == 3 and full_image_data.shape[2] == 4): + logger.error( + f"Asset '{asset_name_for_log}': Failed to reload or verify alpha channel from " + f"{source_image_path} for MASK extraction." + ) + return context + + alpha_channel: np.ndarray = full_image_data[:, :, 3] # Extract alpha (0-255) + + # D. Save New Temporary MASK Map + if alpha_channel.ndim == 2: # Expected + pass + elif alpha_channel.ndim == 3 and alpha_channel.shape[2] == 1: # (H, W, 1) + alpha_channel = alpha_channel.squeeze(axis=2) + else: + logger.error( + f"Asset '{asset_name_for_log}': Extracted alpha channel has unexpected dimensions: " + f"{alpha_channel.shape}. Cannot save." + ) + return context + + mask_temp_filename = ( + f"mask_from_alpha_{sanitize_filename(source_map_details_for_alpha['map_type'])}" + f"_{source_file_rule_id_for_alpha}{source_image_path.suffix}" + ) + mask_temp_path = context.engine_temp_dir / mask_temp_filename + + save_success = ipu.save_image(mask_temp_path, alpha_channel) + + if not save_success: + logger.error( + f"Asset '{asset_name_for_log}': Failed to save extracted alpha mask to {mask_temp_path}." + ) + return context + + logger.info( + f"Asset '{asset_name_for_log}': Extracted alpha and saved as new MASK map: {mask_temp_path}" + ) + + # E. Create New FileRule for the MASK and Update Context + # FileRule does not have id, active, transform_settings, source_map_ids_for_generation + # It has file_path, item_type, item_type_override, etc. + new_mask_file_rule = FileRule( + file_path=mask_temp_path.name, # Use file_path + item_type="MAP_MASK", # This should be the item_type for a mask + map_type="MASK" # Explicitly set map_type if FileRule has it, or handle via item_type + # Other FileRule fields like item_type_override can be set if needed + ) + # If FileRule needs a unique identifier, it should be handled differently, + # perhaps by generating one and storing it in common_metadata or a separate mapping. + # For now, we create a simple FileRule. + + context.files_to_process.append(new_mask_file_rule) + + # For processed_maps_details, we need a unique key. Using a new UUID. + new_mask_processed_map_key = uuid.uuid4().hex + + original_dims = source_map_details_for_alpha.get('original_dimensions') + if original_dims is None and full_image_data is not None: # Fallback if not in details + original_dims = (full_image_data.shape[1], full_image_data.shape[0]) + + + context.processed_maps_details[new_mask_processed_map_key] = { + 'internal_map_type': "MAP_MASK", # Use the standardized MAP_ prefixed field + 'map_type': "MASK", # Keep standard type for metadata/naming consistency if needed + 'source_file': str(source_image_path), + 'temp_processed_file': str(mask_temp_path), + 'original_dimensions': original_dims, + 'processed_dimensions': (alpha_channel.shape[1], alpha_channel.shape[0]), + 'status': 'Processed', + 'notes': ( + f"Generated from alpha of {source_map_details_for_alpha.get('internal_map_type', 'unknown type')} " # Use internal_map_type for notes + f"(Source Detail ID: {source_file_rule_id_for_alpha})" + ), + # 'file_rule_id': new_mask_file_rule_id_str # FileRule doesn't have an ID to link here directly + } + + logger.info( + f"Asset '{asset_name_for_log}': Added new FileRule for generated MASK " + f"and updated processed_maps_details with key '{new_mask_processed_map_key}'." + ) + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/asset_skip_logic.py b/processing/pipeline/stages/asset_skip_logic.py new file mode 100644 index 0000000..0c176fe --- /dev/null +++ b/processing/pipeline/stages/asset_skip_logic.py @@ -0,0 +1,55 @@ +import logging +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext + +class AssetSkipLogicStage(ProcessingStage): + """ + Processing stage to determine if an asset should be skipped based on various conditions. + """ + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the asset skip logic. + + Args: + context: The asset processing context. + + Returns: + The updated asset processing context. + """ + context.status_flags['skip_asset'] = False # Initialize/reset skip flag + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + + # 1. Check for Supplier Error + # Assuming 'supplier_error' might be set by a previous stage (e.g., SupplierDeterminationStage) + # or if effective_supplier is None after attempts to determine it. + if context.effective_supplier is None or context.status_flags.get('supplier_error', False): + logging.info(f"Asset '{asset_name_for_log}': Skipping due to missing or invalid supplier.") + context.status_flags['skip_asset'] = True + context.status_flags['skip_reason'] = "Invalid or missing supplier" + return context + + # 2. Check process_status in asset_rule.common_metadata + process_status = context.asset_rule.common_metadata.get('process_status') + + if process_status == "SKIP": + logging.info(f"Asset '{asset_name_for_log}': Skipping as per common_metadata.process_status 'SKIP'.") + context.status_flags['skip_asset'] = True + context.status_flags['skip_reason'] = "Process status set to SKIP in common_metadata" + return context + + # Assuming context.config_obj.general_settings.overwrite_existing is a valid path. + # This might need adjustment if 'general_settings' or 'overwrite_existing' is not found. + # For now, we'll assume it's correct based on the original code's intent. + if process_status == "PROCESSED" and \ + hasattr(context.config_obj, 'general_settings') and \ + not getattr(context.config_obj.general_settings, 'overwrite_existing', True): # Default to True (allow overwrite) if not found + logging.info( + f"Asset '{asset_name_for_log}': Skipping as it's already 'PROCESSED' (from common_metadata) " + f"and overwrite is disabled." + ) + context.status_flags['skip_asset'] = True + context.status_flags['skip_reason'] = "Already processed (common_metadata), overwrite disabled" + return context + + # If none of the above conditions are met, skip_asset remains False. + return context \ No newline at end of file diff --git a/processing/pipeline/stages/base_stage.py b/processing/pipeline/stages/base_stage.py new file mode 100644 index 0000000..321a0d4 --- /dev/null +++ b/processing/pipeline/stages/base_stage.py @@ -0,0 +1,22 @@ +from abc import ABC, abstractmethod + +from ..asset_context import AssetProcessingContext + + +class ProcessingStage(ABC): + """ + Abstract base class for a stage in the asset processing pipeline. + """ + + @abstractmethod + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the processing logic of this stage. + + Args: + context: The current asset processing context. + + Returns: + The updated asset processing context. + """ + pass \ No newline at end of file diff --git a/processing/pipeline/stages/file_rule_filter.py b/processing/pipeline/stages/file_rule_filter.py new file mode 100644 index 0000000..b6785c8 --- /dev/null +++ b/processing/pipeline/stages/file_rule_filter.py @@ -0,0 +1,90 @@ +import logging +import fnmatch +from typing import List, Set + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from rule_structure import FileRule + + +class FileRuleFilterStage(ProcessingStage): + """ + Determines which FileRules associated with an AssetRule should be processed. + Populates context.files_to_process, respecting FILE_IGNORE rules. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the file rule filtering logic. + + Args: + context: The AssetProcessingContext for the current asset. + + Returns: + The modified AssetProcessingContext. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + if context.status_flags.get('skip_asset'): + logging.debug(f"Asset '{asset_name_for_log}': Skipping FileRuleFilterStage due to 'skip_asset' flag.") + return context + + context.files_to_process: List[FileRule] = [] + ignore_patterns: Set[str] = set() + + # Step 1: Collect all FILE_IGNORE patterns + if context.asset_rule and context.asset_rule.files: + for file_rule in context.asset_rule.files: + if file_rule.item_type == "FILE_IGNORE": # Removed 'and file_rule.active' + if hasattr(file_rule, 'file_path') and file_rule.file_path: + ignore_patterns.add(file_rule.file_path) + logging.debug( + f"Asset '{asset_name_for_log}': Registering ignore pattern: '{file_rule.file_path}'" + ) + else: + logging.warning(f"Asset '{asset_name_for_log}': FILE_IGNORE rule found without a file_path. Skipping this ignore rule.") + else: + logging.debug(f"Asset '{asset_name_for_log}': No file rules (context.asset_rule.files) to process or asset_rule is None.") + # Still need to return context even if there are no rules + logging.info(f"Asset '{asset_name_for_log}': 0 file rules queued for processing after filtering.") + return context + + + # Step 2: Filter and add processable FileRules + for file_rule in context.asset_rule.files: # Iterate over .files + # Removed 'if not file_rule.active:' check + + if file_rule.item_type == "FILE_IGNORE": + # Already processed, skip. + continue + + is_ignored = False + # Ensure file_rule.file_path exists before using it with fnmatch + current_file_path = file_rule.file_path if hasattr(file_rule, 'file_path') else None + if not current_file_path: + logging.warning(f"Asset '{asset_name_for_log}': FileRule found without a file_path. Skipping this rule for ignore matching.") + # Decide if this rule should be added or skipped if it has no path + # For now, let's assume it might be an error and not add it if it can't be matched. + # If it should be added by default, this logic needs adjustment. + continue + + + for ignore_pat in ignore_patterns: + if fnmatch.fnmatch(current_file_path, ignore_pat): + is_ignored = True + logging.debug( + f"Asset '{asset_name_for_log}': Skipping file rule for '{current_file_path}' " + f"due to matching ignore pattern '{ignore_pat}'." + ) + break + + if not is_ignored: + context.files_to_process.append(file_rule) + logging.debug( + f"Asset '{asset_name_for_log}': Adding file rule for '{current_file_path}' " + f"(type: {file_rule.item_type}) to processing queue." + ) + + logging.info( + f"Asset '{asset_name_for_log}': {len(context.files_to_process)} file rules queued for processing after filtering." + ) + return context \ No newline at end of file diff --git a/processing/pipeline/stages/gloss_to_rough_conversion.py b/processing/pipeline/stages/gloss_to_rough_conversion.py new file mode 100644 index 0000000..9c2f948 --- /dev/null +++ b/processing/pipeline/stages/gloss_to_rough_conversion.py @@ -0,0 +1,195 @@ +import logging +from pathlib import Path +import numpy as np +from typing import List +import dataclasses + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from rule_structure import FileRule +from ...utils import image_processing_utils as ipu +from utils.path_utils import sanitize_filename + +logger = logging.getLogger(__name__) + +class GlossToRoughConversionStage(ProcessingStage): + """ + Processing stage to convert glossiness maps to roughness maps. + Iterates through FileRules, identifies GLOSS maps, loads their + corresponding temporary processed images, inverts them, and saves + them as new temporary ROUGHNESS maps. Updates the FileRule and + context.processed_maps_details accordingly. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the gloss to roughness conversion logic. + + Args: + context: The AssetProcessingContext containing asset and processing details. + + Returns: + The updated AssetProcessingContext. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + if context.status_flags.get('skip_asset'): + logger.debug(f"Asset '{asset_name_for_log}': Skipping GlossToRoughConversionStage due to skip_asset flag.") + return context + + if not context.processed_maps_details: # files_to_process might be empty if only gloss maps existed and all are converted + logger.debug( + f"Asset '{asset_name_for_log}': processed_maps_details is empty in GlossToRoughConversionStage. Skipping." + ) + return context + + # Start with a copy of the current file rules. We will modify this list. + new_files_to_process: List[FileRule] = list(context.files_to_process) if context.files_to_process else [] + processed_a_gloss_map = False + successful_conversion_statuses = ['BasePOTSaved', 'Processed_With_Variants', 'Processed_No_Variants'] + + logger.info(f"Asset '{asset_name_for_log}': Starting Gloss to Roughness Conversion Stage. Examining {len(context.processed_maps_details)} processed map entries.") + + # Iterate using the index (map_key_index) as the key, which is now standard. + for map_key_index, map_details in context.processed_maps_details.items(): + # Use the standardized internal_map_type field + internal_map_type = map_details.get('internal_map_type', '') + map_status = map_details.get('status') + original_temp_path_str = map_details.get('temp_processed_file') + # source_file_rule_idx from details should align with map_key_index. + # We primarily use map_key_index for accessing FileRule from context.files_to_process. + source_file_rule_idx_from_details = map_details.get('source_file_rule_index') + processing_tag = map_details.get('processing_tag') + + if map_key_index != source_file_rule_idx_from_details: + logger.warning( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Mismatch between map key index and 'source_file_rule_index' ({source_file_rule_idx_from_details}) in details. " + f"Using map_key_index ({map_key_index}) for FileRule lookup. This might indicate a data consistency issue from previous stage." + ) + + if not processing_tag: + logger.warning(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: 'processing_tag' is missing in map_details. Using a fallback for temp filename. This is unexpected.") + processing_tag = f"mki_{map_key_index}_fallback_tag" + + + # Check if the map is a GLOSS map using the standardized internal_map_type + if not internal_map_type.startswith("MAP_GLOSS"): + # logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Type '{internal_map_type}' is not GLOSS. Skipping.") + continue + + logger.info(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Identified potential GLOSS map (Type: {internal_map_type}).") + + if map_status not in successful_conversion_statuses: + logger.warning( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}) (GLOSS): Status '{map_status}' is not one of {successful_conversion_statuses}. " + f"Skipping conversion for this map." + ) + continue + + if not original_temp_path_str: + logger.warning( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}) (GLOSS): 'temp_processed_file' missing in details. " + f"Skipping conversion." + ) + continue + + original_temp_path = Path(original_temp_path_str) + if not original_temp_path.exists(): + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}) (GLOSS): Temporary file {original_temp_path_str} " + f"does not exist. Skipping conversion." + ) + continue + + # Use map_key_index directly to access the FileRule + # Ensure map_key_index is a valid index for context.files_to_process + if not isinstance(map_key_index, int) or map_key_index < 0 or map_key_index >= len(context.files_to_process): + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}) (GLOSS): Invalid map_key_index ({map_key_index}) for accessing files_to_process (len: {len(context.files_to_process)}). " + f"Skipping conversion." + ) + continue + + original_file_rule = context.files_to_process[map_key_index] + source_file_path_for_log = original_file_rule.file_path if hasattr(original_file_rule, 'file_path') else "Unknown source path" + logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Processing GLOSS map from '{original_temp_path_str}' (Original FileRule path: '{source_file_path_for_log}') for conversion.") + + image_data = ipu.load_image(str(original_temp_path)) + if image_data is None: + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Failed to load image data from {original_temp_path_str}. " + f"Skipping conversion." + ) + continue + + # Perform Inversion + inverted_image_data: np.ndarray + if np.issubdtype(image_data.dtype, np.floating): + inverted_image_data = 1.0 - image_data + inverted_image_data = np.clip(inverted_image_data, 0.0, 1.0) + logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Inverted float image data.") + elif np.issubdtype(image_data.dtype, np.integer): + max_val = np.iinfo(image_data.dtype).max + inverted_image_data = max_val - image_data + logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Inverted integer image data (max_val: {max_val}).") + else: + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Unsupported image data type {image_data.dtype} " + f"for GLOSS map. Cannot invert. Skipping conversion." + ) + continue + + # Save New Temporary (Roughness) Map + new_temp_filename = f"rough_from_gloss_{processing_tag}{original_temp_path.suffix}" + new_temp_path = context.engine_temp_dir / new_temp_filename + + save_success = ipu.save_image(str(new_temp_path), inverted_image_data) + + if save_success: + logger.info( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Converted GLOSS map {original_temp_path_str} " + f"to ROUGHNESS map {new_temp_path}." + ) + + update_dict = {'item_type': "MAP_ROUGH", 'item_type_override': "MAP_ROUGH"} + + modified_file_rule: Optional[FileRule] = None + if hasattr(original_file_rule, 'model_copy') and callable(original_file_rule.model_copy): # Pydantic + modified_file_rule = original_file_rule.model_copy(update=update_dict) + elif dataclasses.is_dataclass(original_file_rule): # Dataclass + modified_file_rule = dataclasses.replace(original_file_rule, **update_dict) + else: + logger.error(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Original FileRule is neither Pydantic nor dataclass. Cannot modify. Skipping update for this rule.") + continue + + new_files_to_process[map_key_index] = modified_file_rule # Replace using map_key_index + + # Update context.processed_maps_details for this map_key_index + map_details['temp_processed_file'] = str(new_temp_path) + map_details['original_map_type_before_conversion'] = internal_map_type # Store the original internal type + map_details['internal_map_type'] = "MAP_ROUGH" # Use the standardized MAP_ prefixed field + map_details['map_type'] = "Roughness" # Keep standard type for metadata/naming consistency if needed + map_details['status'] = "Converted_To_Rough" + map_details['notes'] = map_details.get('notes', '') + "; Converted from GLOSS by GlossToRoughConversionStage" + if 'base_pot_resolution_name' in map_details: + map_details['processed_resolution_name'] = map_details['base_pot_resolution_name'] + + processed_a_gloss_map = True + else: + logger.error( + f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Failed to save inverted ROUGHNESS map to {new_temp_path}. " + f"Original GLOSS FileRule remains." + ) + + context.files_to_process = new_files_to_process + + if processed_a_gloss_map: + logger.info( + f"Asset '{asset_name_for_log}': Gloss to Roughness conversion stage finished. Processed one or more maps and updated file list and map details." + ) + else: + logger.info( + f"Asset '{asset_name_for_log}': No gloss maps were converted in GlossToRoughConversionStage. " + f"File list for next stage contains original non-gloss maps and any gloss maps that failed or were ineligible for conversion." + ) + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/initial_scaling.py b/processing/pipeline/stages/initial_scaling.py new file mode 100644 index 0000000..6fc27ab --- /dev/null +++ b/processing/pipeline/stages/initial_scaling.py @@ -0,0 +1,83 @@ +import logging +from typing import Tuple + +import cv2 # Assuming cv2 is available for interpolation flags +import numpy as np + +from .base_stage import ProcessingStage +# Import necessary context classes and utils +from ..asset_context import InitialScalingInput, InitialScalingOutput +from ...utils import image_processing_utils as ipu + +log = logging.getLogger(__name__) + +class InitialScalingStage(ProcessingStage): + """ + Applies initial scaling (e.g., Power-of-Two downscaling) to image data + if configured via the InitialScalingInput. + """ + + def execute(self, input_data: InitialScalingInput) -> InitialScalingOutput: + """ + Applies scaling based on input_data.initial_scaling_mode. + """ + log.debug(f"Initial Scaling Stage: Mode '{input_data.initial_scaling_mode}'.") + + image_to_scale = input_data.image_data + original_dims_wh = input_data.original_dimensions + scaling_mode = input_data.initial_scaling_mode + scaling_applied = False + final_image_data = image_to_scale # Default to original if no scaling happens + + if image_to_scale is None or image_to_scale.size == 0: + log.warning("Initial Scaling Stage: Input image data is None or empty. Skipping.") + # Return original (empty) data and indicate no scaling + return InitialScalingOutput( + scaled_image_data=np.array([]), + scaling_applied=False, + final_dimensions=(0, 0) + ) + + if original_dims_wh is None: + log.warning("Initial Scaling Stage: Original dimensions not provided. Using current image shape.") + h_pre_scale, w_pre_scale = image_to_scale.shape[:2] + original_dims_wh = (w_pre_scale, h_pre_scale) + else: + w_pre_scale, h_pre_scale = original_dims_wh + + + if scaling_mode == "POT_DOWNSCALE": + pot_w = ipu.get_nearest_power_of_two_downscale(w_pre_scale) + pot_h = ipu.get_nearest_power_of_two_downscale(h_pre_scale) + + if (pot_w, pot_h) != (w_pre_scale, h_pre_scale): + log.info(f"Initial Scaling: Applying POT Downscale from ({w_pre_scale},{h_pre_scale}) to ({pot_w},{pot_h}).") + # Use INTER_AREA for downscaling generally + resized_img = ipu.resize_image(image_to_scale, pot_w, pot_h, interpolation=cv2.INTER_AREA) + if resized_img is not None: + final_image_data = resized_img + scaling_applied = True + log.debug("Initial Scaling: POT Downscale applied successfully.") + else: + log.warning("Initial Scaling: POT Downscale resize failed. Using original data.") + # final_image_data remains image_to_scale + else: + log.info("Initial Scaling: POT Downscale - Image already POT or smaller. No scaling needed.") + # final_image_data remains image_to_scale + + elif scaling_mode == "NONE": + log.info("Initial Scaling: Mode is NONE. No scaling applied.") + # final_image_data remains image_to_scale + else: + log.warning(f"Initial Scaling: Unknown INITIAL_SCALING_MODE '{scaling_mode}'. Defaulting to NONE.") + # final_image_data remains image_to_scale + + # Determine final dimensions + final_h, final_w = final_image_data.shape[:2] + final_dims_wh = (final_w, final_h) + + return InitialScalingOutput( + scaled_image_data=final_image_data, + scaling_applied=scaling_applied, + final_dimensions=final_dims_wh + ) \ No newline at end of file diff --git a/processing/pipeline/stages/merged_task_processor.py b/processing/pipeline/stages/merged_task_processor.py new file mode 100644 index 0000000..46507b8 --- /dev/null +++ b/processing/pipeline/stages/merged_task_processor.py @@ -0,0 +1,329 @@ +import logging +import re +from pathlib import Path +from typing import List, Optional, Tuple, Dict, Any + +import cv2 +import numpy as np + +from .base_stage import ProcessingStage +# Import necessary context classes and utils +from ..asset_context import AssetProcessingContext, MergeTaskDefinition, ProcessedMergedMapData +from ...utils import image_processing_utils as ipu + +log = logging.getLogger(__name__) + +class MergedTaskProcessorStage(ProcessingStage): + """ + Processes a single merge task defined in the configuration. + Loads inputs, applies transformations to inputs, handles fallbacks/resizing, + performs the merge, and returns the merged data. + """ + + def _find_input_map_details_in_context( + self, + required_map_type: str, + processed_map_details_context: Dict[str, Dict[str, Any]], + log_prefix_for_find: str + ) -> Optional[Dict[str, Any]]: + """ + Finds the details of a required input map from the context's processed_maps_details. + Prefers exact match for full types (e.g. MAP_TYPE-1), or base type / base type + "-1" for base types (e.g. MAP_TYPE). + Returns the details dictionary for the found map if it has saved_files_info. + """ + # Try exact match first (e.g., rule asks for "MAP_NRM-1" or "MAP_NRM" if that's how it was processed) + for item_key, details in processed_map_details_context.items(): + if details.get('internal_map_type') == required_map_type: + if details.get('saved_files_info') and isinstance(details['saved_files_info'], list) and len(details['saved_files_info']) > 0: + log.debug(f"{log_prefix_for_find}: Found exact match for '{required_map_type}' with key '{item_key}'.") + return details + log.warning(f"{log_prefix_for_find}: Found exact match for '{required_map_type}' (key '{item_key}') but no saved_files_info.") + return None # Found type but no usable files + + # If exact match not found, and required_map_type is a base type (e.g. "MAP_NRM") + # try to find the primary suffixed version "MAP_NRM-1" or the base type itself if it was processed without a suffix. + if not re.search(r'-\d+$', required_map_type): # if it's a base type like MAP_XXX + # Prefer "MAP_XXX-1" as the primary variant if suffixed types exist + primary_suffixed_type = f"{required_map_type}-1" + for item_key, details in processed_map_details_context.items(): + if details.get('internal_map_type') == primary_suffixed_type: + if details.get('saved_files_info') and isinstance(details['saved_files_info'], list) and len(details['saved_files_info']) > 0: + log.debug(f"{log_prefix_for_find}: Found primary suffixed match '{primary_suffixed_type}' for base '{required_map_type}' with key '{item_key}'.") + return details + log.warning(f"{log_prefix_for_find}: Found primary suffixed match '{primary_suffixed_type}' (key '{item_key}') but no saved_files_info.") + return None # Found type but no usable files + + log.debug(f"{log_prefix_for_find}: No suitable match found for '{required_map_type}' via exact or primary suffixed type search.") + return None + + def execute( + self, + context: AssetProcessingContext, + merge_task: MergeTaskDefinition # Specific item passed by orchestrator + ) -> ProcessedMergedMapData: + """ + Processes the given MergeTaskDefinition item. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + task_key = merge_task.task_key + task_data = merge_task.task_data + log_prefix = f"Asset '{asset_name_for_log}', Task '{task_key}'" + log.info(f"{log_prefix}: Processing Merge Task.") + + # Initialize output object with default failure state + result = ProcessedMergedMapData( + merged_image_data=np.array([]), # Placeholder + output_map_type=task_data.get('output_map_type', 'UnknownMergeOutput'), + source_bit_depths=[], + final_dimensions=None, + transformations_applied_to_inputs={}, + status="Failed", + error_message="Initialization error" + ) + + try: + # --- Configuration & Task Data --- + config = context.config_obj + file_type_definitions = getattr(config, "FILE_TYPE_DEFINITIONS", {}) + invert_normal_green = config.invert_normal_green_globally + merge_dimension_mismatch_strategy = getattr(config, "MERGE_DIMENSION_MISMATCH_STRATEGY", "USE_LARGEST") + workspace_path = context.workspace_path # Base for resolving relative input paths + + # input_map_sources_from_task is no longer used for paths. Paths are sourced from context.processed_maps_details. + target_dimensions_hw = task_data.get('source_dimensions') # Expected dimensions (h, w) for fallback creation, must be in config. + merge_inputs_config = task_data.get('inputs', {}) # e.g., {'R': 'MAP_AO', 'G': 'MAP_ROUGH', ...} + merge_defaults = task_data.get('defaults', {}) # e.g., {'R': 255, 'G': 255, ...} + merge_channels_order = task_data.get('channel_order', 'RGB') # e.g., 'RGB', 'RGBA' + + # Target dimensions are crucial if fallbacks are needed. + # Merge inputs config is essential. + # Merge inputs config is essential. Check directly in task_data. + inputs_from_task_data = task_data.get('inputs') + if not isinstance(inputs_from_task_data, dict) or not inputs_from_task_data: + result.error_message = "Merge task data is incomplete (missing or invalid 'inputs' dictionary in task_data)." + log.error(f"{log_prefix}: {result.error_message}") + return result + if not target_dimensions_hw and any(merge_defaults.get(ch) is not None for ch in merge_inputs_config.keys()): + log.warning(f"{log_prefix}: Merge task has defaults defined, but 'source_dimensions' (target_dimensions_hw) is missing in task_data. Fallback image creation might fail if needed.") + # Not returning error yet, as fallbacks might not be triggered. + + loaded_inputs_for_merge: Dict[str, np.ndarray] = {} # Channel char -> image data + actual_input_dimensions: List[Tuple[int, int]] = [] # List of (h, w) for loaded files + input_source_bit_depths: Dict[str, int] = {} # Channel char -> bit depth + all_transform_notes: Dict[str, List[str]] = {} # Channel char -> list of transform notes + + # --- Load, Transform, and Prepare Inputs --- + log.debug(f"{log_prefix}: Loading and preparing inputs...") + for channel_char, required_map_type_from_rule in merge_inputs_config.items(): + # Validate that the required input map type starts with "MAP_" + if not required_map_type_from_rule.startswith("MAP_"): + result.error_message = ( + f"Invalid input map type '{required_map_type_from_rule}' for channel '{channel_char}'. " + f"Input map types for merging must start with 'MAP_'." + ) + log.error(f"{log_prefix}: {result.error_message}") + return result # Fail the task if an input type is invalid + + input_image_data: Optional[np.ndarray] = None + input_source_desc = f"Fallback for {required_map_type_from_rule}" + input_log_prefix = f"{log_prefix}, Input '{required_map_type_from_rule}' (Channel '{channel_char}')" + channel_transform_notes: List[str] = [] + + # 1. Attempt to load from context.processed_maps_details + found_input_map_details = self._find_input_map_details_in_context( + required_map_type_from_rule, context.processed_maps_details, input_log_prefix + ) + + if found_input_map_details: + # Assuming the first saved file is the primary one for merging. + # This might need refinement if specific variants (resolutions/formats) are required. + primary_saved_file_info = found_input_map_details['saved_files_info'][0] + input_file_path_str = primary_saved_file_info.get('path') + + if input_file_path_str: + input_file_path = Path(input_file_path_str) # Path is absolute from SaveVariantsStage + if input_file_path.is_file(): + try: + input_image_data = ipu.load_image(str(input_file_path)) + if input_image_data is not None: + log.info(f"{input_log_prefix}: Loaded from context: {input_file_path}") + actual_input_dimensions.append(input_image_data.shape[:2]) # (h, w) + input_source_desc = str(input_file_path) + # Bit depth from the saved variant info + input_source_bit_depths[channel_char] = primary_saved_file_info.get('bit_depth', 8) + else: + log.warning(f"{input_log_prefix}: Failed to load image from {input_file_path} (found in context). Attempting fallback.") + input_image_data = None # Ensure fallback is triggered + except Exception as e: + log.warning(f"{input_log_prefix}: Error loading image from {input_file_path} (found in context): {e}. Attempting fallback.") + input_image_data = None # Ensure fallback is triggered + else: + log.warning(f"{input_log_prefix}: Input file path '{input_file_path}' (from context) not found. Attempting fallback.") + input_image_data = None # Ensure fallback is triggered + else: + log.warning(f"{input_log_prefix}: Found map type '{required_map_type_from_rule}' in context, but 'path' is missing in saved_files_info. Attempting fallback.") + input_image_data = None # Ensure fallback is triggered + else: + log.info(f"{input_log_prefix}: Input map type '{required_map_type_from_rule}' not found in context.processed_maps_details. Attempting fallback.") + input_image_data = None # Ensure fallback is triggered + + # 2. Apply Fallback if needed + if input_image_data is None: + fallback_value = merge_defaults.get(channel_char) + if fallback_value is not None: + try: + if not target_dimensions_hw: + result.error_message = f"Cannot create fallback for channel '{channel_char}': 'source_dimensions' (target_dimensions_hw) not defined in task_data." + log.error(f"{log_prefix}: {result.error_message}") + return result # Critical failure if dimensions for fallback are missing + h, w = target_dimensions_hw + # Infer shape/dtype for fallback (simplified) + num_channels = 1 if isinstance(fallback_value, (int, float)) else len(fallback_value) if isinstance(fallback_value, (list, tuple)) else 1 + dtype = np.uint8 # Default dtype + shape = (h, w) if num_channels == 1 else (h, w, num_channels) + + input_image_data = np.full(shape, fallback_value, dtype=dtype) + log.warning(f"{input_log_prefix}: Using fallback value {fallback_value} (Target Dims: {target_dimensions_hw}).") + input_source_desc = f"Fallback value {fallback_value}" + input_source_bit_depths[channel_char] = 8 # Assume 8-bit for fallbacks + channel_transform_notes.append(f"Used fallback value {fallback_value}") + except Exception as e: + result.error_message = f"Error creating fallback for channel '{channel_char}': {e}" + log.error(f"{log_prefix}: {result.error_message}") + return result # Critical failure + else: + result.error_message = f"Missing input '{required_map_type_from_rule}' and no fallback default provided for channel '{channel_char}'." + log.error(f"{log_prefix}: {result.error_message}") + return result # Critical failure + + # 3. Apply Transformations to the loaded/fallback input + if input_image_data is not None: + input_image_data, _, transform_notes = ipu.apply_common_map_transformations( + input_image_data.copy(), # Transform a copy + required_map_type_from_rule, # Use the type required by the rule + invert_normal_green, + file_type_definitions, + input_log_prefix + ) + channel_transform_notes.extend(transform_notes) + else: + # This case should be prevented by fallback logic, but as a safeguard: + result.error_message = f"Input data for channel '{channel_char}' is None after load/fallback attempt." + log.error(f"{log_prefix}: {result.error_message} This indicates an internal logic error.") + return result + + loaded_inputs_for_merge[channel_char] = input_image_data + all_transform_notes[channel_char] = channel_transform_notes + + result.transformations_applied_to_inputs = all_transform_notes # Store notes + + # --- Handle Dimension Mismatches (using transformed inputs) --- + log.debug(f"{log_prefix}: Handling dimension mismatches...") + unique_dimensions = set(actual_input_dimensions) + target_merge_dims_hw = target_dimensions_hw # Default + + if len(unique_dimensions) > 1: + log.warning(f"{log_prefix}: Mismatched dimensions found among loaded inputs: {unique_dimensions}. Applying strategy: {merge_dimension_mismatch_strategy}") + mismatch_note = f"Mismatched input dimensions ({unique_dimensions}), applied {merge_dimension_mismatch_strategy}" + # Add note to all relevant inputs? Or just a general note? Add general for now. + # result.status_notes.append(mismatch_note) # Need a place for general notes + + if merge_dimension_mismatch_strategy == "ERROR_SKIP": + result.error_message = "Dimension mismatch and strategy is ERROR_SKIP." + log.error(f"{log_prefix}: {result.error_message}") + return result + elif merge_dimension_mismatch_strategy == "USE_LARGEST": + max_h = max(h for h, w in unique_dimensions) + max_w = max(w for h, w in unique_dimensions) + target_merge_dims_hw = (max_h, max_w) + elif merge_dimension_mismatch_strategy == "USE_FIRST": + target_merge_dims_hw = actual_input_dimensions[0] if actual_input_dimensions else target_dimensions_hw + # Add other strategies or default to USE_LARGEST + + log.info(f"{log_prefix}: Resizing inputs to target merge dimensions: {target_merge_dims_hw}") + # Resize loaded inputs (not fallbacks unless they were treated as having target dims) + for channel_char, img_data in loaded_inputs_for_merge.items(): + # Only resize if it was a loaded input that contributed to the mismatch check + if img_data.shape[:2] in unique_dimensions and img_data.shape[:2] != target_merge_dims_hw: + resized_img = ipu.resize_image(img_data, target_merge_dims_hw[1], target_merge_dims_hw[0]) # w, h + if resized_img is None: + result.error_message = f"Failed to resize input for channel '{channel_char}' to {target_merge_dims_hw}." + log.error(f"{log_prefix}: {result.error_message}") + return result + loaded_inputs_for_merge[channel_char] = resized_img + log.debug(f"{log_prefix}: Resized input for channel '{channel_char}'.") + + # If target_merge_dims_hw is still None (no source_dimensions and no mismatch), use first loaded input's dimensions + if target_merge_dims_hw is None and actual_input_dimensions: + target_merge_dims_hw = actual_input_dimensions[0] + log.info(f"{log_prefix}: Using dimensions from first loaded input: {target_merge_dims_hw}") + + # --- Perform Merge --- + log.debug(f"{log_prefix}: Performing merge operation for channels '{merge_channels_order}'.") + try: + # Final check for valid dimensions before unpacking + if not isinstance(target_merge_dims_hw, tuple) or len(target_merge_dims_hw) != 2: + result.error_message = "Could not determine valid target dimensions for merge operation." + log.error(f"{log_prefix}: {result.error_message} (target_merge_dims_hw: {target_merge_dims_hw})") + return result + + output_channels = len(merge_channels_order) + h, w = target_merge_dims_hw # Use the potentially adjusted dimensions + + # Determine output dtype (e.g., based on inputs or config) - Assume uint8 for now + output_dtype = np.uint8 + + if output_channels == 1: + # Assume the first channel in order is the one to use + channel_char_to_use = merge_channels_order[0] + source_img = loaded_inputs_for_merge[channel_char_to_use] + # Ensure it's grayscale (take first channel if it's multi-channel) + if len(source_img.shape) == 3: + merged_image = source_img[:, :, 0].copy().astype(output_dtype) + else: + merged_image = source_img.copy().astype(output_dtype) + elif output_channels > 1: + merged_image = np.zeros((h, w, output_channels), dtype=output_dtype) + for i, channel_char in enumerate(merge_channels_order): + source_img = loaded_inputs_for_merge.get(channel_char) + if source_img is not None: + # Extract the correct channel (e.g., R from RGB, or use grayscale directly) + if len(source_img.shape) == 3: + # Simple approach: take the first channel if source is color. Needs refinement if specific channel mapping (R->R, G->G etc.) is needed. + merged_image[:, :, i] = source_img[:, :, 0] + else: # Grayscale source + merged_image[:, :, i] = source_img + else: + # This case should have been caught by fallback logic earlier + result.error_message = f"Internal error: Missing prepared input for channel '{channel_char}' during final merge assembly." + log.error(f"{log_prefix}: {result.error_message}") + return result + else: + result.error_message = f"Invalid channel_order '{merge_channels_order}' in merge config." + log.error(f"{log_prefix}: {result.error_message}") + return result + + result.merged_image_data = merged_image + result.final_dimensions = (merged_image.shape[1], merged_image.shape[0]) # w, h + result.source_bit_depths = list(input_source_bit_depths.values()) # Collect bit depths used + log.info(f"{log_prefix}: Successfully merged inputs into image with shape {result.merged_image_data.shape}") + + except Exception as e: + log.exception(f"{log_prefix}: Error during merge operation: {e}") + result.error_message = f"Merge operation failed: {e}" + return result + + # --- Success --- + result.status = "Processed" + result.error_message = None + log.info(f"{log_prefix}: Successfully processed merge task.") + + except Exception as e: + log.exception(f"{log_prefix}: Unhandled exception during processing: {e}") + result.status = "Failed" + result.error_message = f"Unhandled exception: {e}" + # Ensure image data is empty on failure + if result.merged_image_data is None or result.merged_image_data.size == 0: + result.merged_image_data = np.array([]) + + return result \ No newline at end of file diff --git a/processing/pipeline/stages/metadata_finalization_save.py b/processing/pipeline/stages/metadata_finalization_save.py new file mode 100644 index 0000000..78373fc --- /dev/null +++ b/processing/pipeline/stages/metadata_finalization_save.py @@ -0,0 +1,219 @@ +import datetime +import json +import logging +from pathlib import Path +from typing import Any, Dict + +from ..asset_context import AssetProcessingContext +from .base_stage import ProcessingStage +from utils.path_utils import generate_path_from_pattern, sanitize_filename + + +logger = logging.getLogger(__name__) + +class MetadataFinalizationAndSaveStage(ProcessingStage): + """ + This stage finalizes the asset_metadata (e.g., setting processing end time, + final status) and saves it as a JSON file. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Finalizes metadata, determines output path, and saves the metadata JSON file. + """ + asset_name_for_log = "Unknown Asset" + if hasattr(context, 'asset_rule') and context.asset_rule and hasattr(context.asset_rule, 'asset_name'): + asset_name_for_log = context.asset_rule.asset_name + + if not hasattr(context, 'asset_metadata') or not context.asset_metadata: + if context.status_flags.get('skip_asset'): + logger.info( + f"Asset '{asset_name_for_log}': " + f"Skipped before metadata initialization. No metadata file will be saved." + ) + else: + logger.warning( + f"Asset '{asset_name_for_log}': " + f"asset_metadata not initialized. Skipping metadata finalization and save." + ) + return context + + # Check Skip Flag + if context.status_flags.get('skip_asset'): + context.asset_metadata['status'] = "Skipped" + # context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() + context.asset_metadata['notes'] = context.status_flags.get('skip_reason', 'Skipped early in pipeline') + logger.info( + f"Asset '{asset_name_for_log}': Marked as skipped. Reason: {context.asset_metadata['notes']}" + ) + # Assuming we save metadata for skipped assets if it was initialized. + # If not, the logic to skip saving would be here or before path generation. + # However, if we are here, asset_metadata IS initialized. + + # A. Finalize Metadata + # context.asset_metadata['processing_end_time'] = datetime.datetime.now().isoformat() + + # Determine final status (if not already set to Skipped) + if context.asset_metadata.get('status') != "Skipped": + has_errors = any( + context.status_flags.get(error_flag) + for error_flag in ['file_processing_error', 'merge_error', 'critical_error', + 'individual_map_processing_failed', 'metadata_save_error'] # Added more flags + ) + if has_errors: + context.asset_metadata['status'] = "Failed" + else: + context.asset_metadata['status'] = "Processed" + + # Add details of processed and merged maps + # Restructure processed_map_details before assigning + restructured_processed_maps = {} + # getattr(context, 'processed_maps_details', {}) is the source (plural 'maps') + original_processed_maps = getattr(context, 'processed_maps_details', {}) + + # Define keys to remove at the top level of each map entry + map_keys_to_remove = [ + "status", "source_file_path", "temp_processed_file", # Assuming "source_file_path" is the correct key + "original_resolution_name", "base_pot_resolution_name", "processed_resolution_name" + ] + # Define keys to remove from each variant + variant_keys_to_remove = ["temp_path", "dimensions"] + + for map_key, map_detail_original in original_processed_maps.items(): + # Create a new dictionary for the modified map entry + new_map_entry = {} + for key, value in map_detail_original.items(): + if key not in map_keys_to_remove: + new_map_entry[key] = value + + if "variants" in map_detail_original and isinstance(map_detail_original["variants"], dict): + new_variants_dict = {} + for variant_name, variant_data_original in map_detail_original["variants"].items(): + new_variant_entry = {} + for key, value in variant_data_original.items(): + if key not in variant_keys_to_remove: + new_variant_entry[key] = value + + # Add 'path_to_file' + # This path is expected to be set by OutputOrganizationStage in the context. + # It should be a Path object representing the path relative to the metadata directory, + # or an absolute Path that make_serializable can convert. + # Using 'final_output_path_for_metadata' as the key from context. + if 'final_output_path_for_metadata' in variant_data_original: + new_variant_entry['path_to_file'] = variant_data_original['final_output_path_for_metadata'] + else: + # Log a warning if the expected path is not found + logger.warning( + f"Asset '{asset_name_for_log}': 'final_output_path_for_metadata' " + f"missing for variant '{variant_name}' in map '{map_key}'. " + f"Metadata will be incomplete for this variant's path." + ) + new_variant_entry['path_to_file'] = "ERROR_PATH_NOT_FOUND" # Placeholder + new_variants_dict[variant_name] = new_variant_entry + new_map_entry["variants"] = new_variants_dict + + restructured_processed_maps[map_key] = new_map_entry + + # Assign the restructured details. Note: 'processed_map_details' (singular 'map') is the key in asset_metadata. + # context.asset_metadata['processed_map_details'] = restructured_processed_maps + # context.asset_metadata['merged_map_details'] = getattr(context, 'merged_maps_details', {}) + + # (Optional) Add a list of all temporary files + # context.asset_metadata['temporary_files'] = getattr(context, 'temporary_files', []) # Assuming this is populated elsewhere + + # B. Determine Metadata Output Path + # asset_name_for_log is defined at the top of the function if asset_metadata exists + + source_rule_identifier_for_path = "unknown_source" + if hasattr(context, 'source_rule') and context.source_rule: + if hasattr(context.source_rule, 'supplier_identifier') and context.source_rule.supplier_identifier: + source_rule_identifier_for_path = context.source_rule.supplier_identifier + elif hasattr(context.source_rule, 'input_path') and context.source_rule.input_path: + source_rule_identifier_for_path = Path(context.source_rule.input_path).stem # Use stem of input path if no identifier + else: + source_rule_identifier_for_path = "unknown_source_details" + + # Use the configured metadata filename from config_obj + metadata_filename_from_config = getattr(context.config_obj, 'metadata_filename', "metadata.json") + # Ensure asset_name_for_log is safe for filenames + safe_asset_name = sanitize_filename(asset_name_for_log) # asset_name_for_log is defined at the top + final_metadata_filename = f"{safe_asset_name}_{metadata_filename_from_config}" + + # Output path pattern should come from config_obj, not asset_rule + output_path_pattern_from_config = getattr(context.config_obj, 'output_directory_pattern', "[supplier]/[assetname]") + + sha_value = getattr(context, 'sha5_value', None) # Prefer sha5_value if explicitly set on context + if sha_value is None: # Fallback to sha256_value if that was the intended attribute + sha_value = getattr(context, 'sha256_value', None) + + token_data = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier if context.effective_supplier else source_rule_identifier_for_path, + "sourcerulename": source_rule_identifier_for_path, + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": sha_value, # Assuming pattern uses [sha5] or similar for sha_value + "maptype": "metadata", # Added maptype to token_data + "filename": final_metadata_filename # Added filename to token_data + # Add other tokens if your output_path_pattern_from_config expects them + } + # Clean None values, as generate_path_from_pattern might not handle them well for all tokens + token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} + + # Generate the relative directory path using the pattern and tokens + relative_dir_path_str = generate_path_from_pattern( + pattern_string=output_path_pattern_from_config, # This pattern should resolve to a directory + token_data=token_data_cleaned + ) + + # Construct the full path by joining the base output path, the generated relative directory, and the final filename + metadata_save_path = Path(context.output_base_path) / Path(relative_dir_path_str) / Path(final_metadata_filename) + + # C. Save Metadata File + try: + metadata_save_path.parent.mkdir(parents=True, exist_ok=True) + + def make_serializable(data: Any) -> Any: + if isinstance(data, Path): + # metadata_save_path is available from the outer scope + metadata_dir = metadata_save_path.parent + try: + # Attempt to make the path relative if it's absolute and under the same root + if data.is_absolute(): + # Check if the path can be made relative (e.g., same drive on Windows) + # This check might need to be more robust depending on os.path.relpath behavior + # For pathlib, relative_to will raise ValueError if not possible. + return str(data.relative_to(metadata_dir)) + else: + # If it's already relative, assume it's correct or handle as needed + return str(data) + except ValueError: + # If paths are on different drives or cannot be made relative, + # log a warning and return the absolute path as a string. + # This can happen if an output path was explicitly set to an unrelated directory. + logger.warning( + f"Asset '{asset_name_for_log}': Could not make path {data} " + f"relative to {metadata_dir}. Storing as absolute." + ) + return str(data) + if isinstance(data, datetime.datetime): # Ensure datetime is serializable + return data.isoformat() + if isinstance(data, dict): + return {k: make_serializable(v) for k, v in data.items()} + if isinstance(data, list): + return [make_serializable(i) for i in data] + return data + + # final_output_files is populated by OutputOrganizationStage. Explicitly remove it as per user request. + context.asset_metadata.pop('final_output_files', None) + serializable_metadata = make_serializable(context.asset_metadata) + + with open(metadata_save_path, 'w') as f: + json.dump(serializable_metadata, f, indent=4) + logger.info(f"Asset '{asset_name_for_log}': Metadata saved to {metadata_save_path}") # Use asset_name_for_log + context.asset_metadata['metadata_file_path'] = str(metadata_save_path) + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to save metadata to {metadata_save_path}. Error: {e}") # Use asset_name_for_log + context.asset_metadata['status'] = "Failed (Metadata Save Error)" + context.status_flags['metadata_save_error'] = True + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/metadata_initialization.py b/processing/pipeline/stages/metadata_initialization.py new file mode 100644 index 0000000..f938ff5 --- /dev/null +++ b/processing/pipeline/stages/metadata_initialization.py @@ -0,0 +1,173 @@ +import datetime +import logging + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext # Adjusted import path assuming asset_context is in processing.pipeline +# If AssetProcessingContext is directly under 'processing', the import would be: +# from ...asset_context import AssetProcessingContext +# Based on the provided file structure, asset_context.py is in processing/pipeline/ +# So, from ...asset_context import AssetProcessingContext is likely incorrect. +# It should be: from ..asset_context import AssetProcessingContext +# Correcting this based on typical Python package structure and the location of base_stage.py + +# Re-evaluating import based on common structure: +# If base_stage.py is in processing/pipeline/stages/ +# and asset_context.py is in processing/pipeline/ +# then the import for AssetProcessingContext from metadata_initialization.py (in stages) would be: +# from ..asset_context import AssetProcessingContext + +# Let's assume the following structure for clarity: +# processing/ +# L-- pipeline/ +# L-- __init__.py +# L-- asset_context.py +# L-- base_stage.py (Mistake here, base_stage is in stages, so it's ..base_stage) +# L-- stages/ +# L-- __init__.py +# L-- metadata_initialization.py +# L-- base_stage.py (Corrected: base_stage.py is here) + +# Corrected imports based on the plan and typical structure: +# base_stage.py is in processing/pipeline/stages/ +# asset_context.py is in processing/pipeline/ + +# from ..base_stage import ProcessingStage # This would mean base_stage is one level up from stages (i.e. in pipeline) +# The plan says: from ..base_stage import ProcessingStage +# This implies that metadata_initialization.py is in a subdirectory of where base_stage.py is. +# However, the file path for metadata_initialization.py is processing/pipeline/stages/metadata_initialization.py +# And base_stage.py is listed as processing/pipeline/stages/base_stage.py in the open tabs. +# So, the import should be: +# from .base_stage import ProcessingStage + +# AssetProcessingContext is at processing/pipeline/asset_context.py +# So from processing/pipeline/stages/metadata_initialization.py, it would be: +# from ..asset_context import AssetProcessingContext + +# Final check on imports based on instructions: +# `from ..base_stage import ProcessingStage` -> This means base_stage.py is in `processing/pipeline/` +# `from ...asset_context import AssetProcessingContext` -> This means asset_context.py is in `processing/` +# Let's verify the location of these files from the environment details. +# processing/pipeline/asset_context.py +# processing/pipeline/stages/base_stage.py +# +# So, from processing/pipeline/stages/metadata_initialization.py: +# To import ProcessingStage from processing/pipeline/stages/base_stage.py: +# from .base_stage import ProcessingStage +# To import AssetProcessingContext from processing/pipeline/asset_context.py: +# from ..asset_context import AssetProcessingContext + +# The instructions explicitly state: +# `from ..base_stage import ProcessingStage` +# `from ...asset_context import AssetProcessingContext` +# This implies a different structure than what seems to be in the file tree. +# I will follow the explicit import instructions from the task. +# This means: +# base_stage.py is expected at `processing/pipeline/base_stage.py` +# asset_context.py is expected at `processing/asset_context.py` + +# Given the file tree: +# processing/pipeline/asset_context.py +# processing/pipeline/stages/base_stage.py +# The imports in `processing/pipeline/stages/metadata_initialization.py` should be: +# from .base_stage import ProcessingStage +# from ..asset_context import AssetProcessingContext + +# I will use the imports that align with the provided file structure. + + + +logger = logging.getLogger(__name__) + +class MetadataInitializationStage(ProcessingStage): + """ + Initializes metadata structures within the AssetProcessingContext. + This stage sets up asset_metadata, processed_maps_details, and + merged_maps_details. + """ + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Executes the metadata initialization logic. + + Args: + context: The AssetProcessingContext for the current asset. + + Returns: + The modified AssetProcessingContext. + """ + if context.status_flags.get('skip_asset', False): + logger.debug(f"Asset '{context.asset_rule.asset_name if context.asset_rule else 'Unknown'}': Skipping metadata initialization as 'skip_asset' is True.") + return context + + logger.debug(f"Asset '{context.asset_rule.asset_name if context.asset_rule else 'Unknown'}': Initializing metadata.") + + context.asset_metadata = {} + context.processed_maps_details = {} + context.merged_maps_details = {} + + # Populate Initial asset_metadata + if context.asset_rule: + context.asset_metadata['asset_name'] = context.asset_rule.asset_name + # Attempt to get 'id' from common_metadata or use asset_name as a fallback + asset_id_val = context.asset_rule.common_metadata.get('id', context.asset_rule.common_metadata.get('asset_id')) + if asset_id_val is None: + logger.warning(f"Asset '{context.asset_rule.asset_name}': No 'id' or 'asset_id' found in common_metadata. Using asset_name as asset_id.") + asset_id_val = context.asset_rule.asset_name + context.asset_metadata['asset_id'] = str(asset_id_val) + + # Assuming source_path, output_path_pattern, tags, custom_fields might also be in common_metadata + context.asset_metadata['source_path'] = str(context.asset_rule.common_metadata.get('source_path', 'N/A')) + context.asset_metadata['output_path_pattern'] = context.asset_rule.common_metadata.get('output_path_pattern', 'N/A') + context.asset_metadata['tags'] = list(context.asset_rule.common_metadata.get('tags', [])) + context.asset_metadata['custom_fields'] = dict(context.asset_rule.common_metadata.get('custom_fields', {})) + else: + # Handle cases where asset_rule might be None, though typically it should be set + logger.warning("AssetRule is not set in context during metadata initialization.") + context.asset_metadata['asset_name'] = "Unknown Asset" + context.asset_metadata['asset_id'] = "N/A" + context.asset_metadata['source_path'] = "N/A" + context.asset_metadata['output_path_pattern'] = "N/A" + context.asset_metadata['tags'] = [] + context.asset_metadata['custom_fields'] = {} + + + if context.source_rule: + # SourceRule also doesn't have 'name' or 'id' directly. + # Using 'input_path' as a proxy for name, and a placeholder for id. + source_rule_name_val = context.source_rule.input_path if context.source_rule.input_path else "Unknown Source Rule Path" + source_rule_id_val = context.source_rule.high_level_sorting_parameters.get('id', "N/A_SR_ID") # Check high_level_sorting_parameters + logger.debug(f"SourceRule: using input_path '{source_rule_name_val}' as name, and '{source_rule_id_val}' as id.") + context.asset_metadata['source_rule_name'] = source_rule_name_val + context.asset_metadata['source_rule_id'] = str(source_rule_id_val) + else: + logger.warning("SourceRule is not set in context during metadata initialization.") + context.asset_metadata['source_rule_name'] = "Unknown Source Rule" + context.asset_metadata['source_rule_id'] = "N/A" + + context.asset_metadata['effective_supplier'] = context.effective_supplier + context.asset_metadata['processing_start_time'] = datetime.datetime.now().isoformat() + context.asset_metadata['status'] = "Pending" + + if context.config_obj and hasattr(context.config_obj, 'general_settings') and \ + hasattr(context.config_obj.general_settings, 'app_version'): + context.asset_metadata['version'] = context.config_obj.general_settings.app_version + else: + logger.warning("App version not found in config_obj.general_settings. Setting version to 'N/A'.") + context.asset_metadata['version'] = "N/A" # Default or placeholder + + if context.incrementing_value is not None: + context.asset_metadata['incrementing_value'] = context.incrementing_value + + # The plan mentions sha5_value, which is likely a typo for sha256 or similar. + # Implementing as 'sha5_value' per instructions, but noting the potential typo. + if hasattr(context, 'sha5_value') and context.sha5_value is not None: # Check attribute existence + context.asset_metadata['sha5_value'] = context.sha5_value + elif hasattr(context, 'sha256_value') and context.sha256_value is not None: # Fallback if sha5 was a typo + logger.debug("sha5_value not found, using sha256_value if available for metadata.") + context.asset_metadata['sha256_value'] = context.sha256_value + + + logger.info(f"Asset '{context.asset_metadata.get('asset_name', 'Unknown')}': Metadata initialized.") + # Example of how you might log the full metadata for debugging: + # logger.debug(f"Initialized metadata: {context.asset_metadata}") + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/normal_map_green_channel.py b/processing/pipeline/stages/normal_map_green_channel.py new file mode 100644 index 0000000..636c1ec --- /dev/null +++ b/processing/pipeline/stages/normal_map_green_channel.py @@ -0,0 +1,155 @@ +import logging +import numpy as np +from pathlib import Path +from typing import List + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from rule_structure import FileRule +from ...utils import image_processing_utils as ipu +from utils.path_utils import sanitize_filename + +logger = logging.getLogger(__name__) + +class NormalMapGreenChannelStage(ProcessingStage): + """ + Processing stage to invert the green channel of normal maps if configured. + This is often needed when converting between DirectX (Y-) and OpenGL (Y+) normal map formats. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Identifies NORMAL maps, checks configuration for green channel inversion, + performs inversion if needed, saves a new temporary file, and updates + the AssetProcessingContext. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + if context.status_flags.get('skip_asset'): + logger.debug(f"Asset '{asset_name_for_log}': Skipping NormalMapGreenChannelStage due to skip_asset flag.") + return context + + if not context.processed_maps_details: # Check processed_maps_details primarily + logger.debug( + f"Asset '{asset_name_for_log}': No processed_maps_details in NormalMapGreenChannelStage. Skipping." + ) + return context + + processed_a_normal_map = False + + # Iterate through processed maps, as FileRule objects don't have IDs directly + for map_id_hex, map_details in context.processed_maps_details.items(): + # Check if the map is a processed normal map using the standardized internal_map_type + internal_map_type = map_details.get('internal_map_type') + if internal_map_type and internal_map_type.startswith("MAP_NRM") and map_details.get('status') == 'Processed': + + # Check configuration for inversion + # Assuming general_settings is an attribute of config_obj and might be a dict or an object + should_invert = False + if hasattr(context.config_obj, 'general_settings'): + if isinstance(context.config_obj.general_settings, dict): + should_invert = context.config_obj.general_settings.get('invert_normal_map_green_channel_globally', False) + elif hasattr(context.config_obj.general_settings, 'invert_normal_map_green_channel_globally'): + should_invert = getattr(context.config_obj.general_settings, 'invert_normal_map_green_channel_globally', False) + + original_temp_path_str = map_details.get('temp_processed_file') + if not original_temp_path_str: + logger.warning(f"Asset '{asset_name_for_log}': Normal map (ID: {map_id_hex}) missing 'temp_processed_file' in details. Skipping.") + continue + + original_temp_path = Path(original_temp_path_str) + original_filename_for_log = original_temp_path.name + + if not should_invert: + logger.debug( + f"Asset '{asset_name_for_log}': Normal map green channel inversion not enabled. " + f"Skipping for {original_filename_for_log} (ID: {map_id_hex})." + ) + continue + + if not original_temp_path.exists(): + logger.error( + f"Asset '{asset_name_for_log}': Temporary file {original_temp_path} for normal map " + f"{original_filename_for_log} (ID: {map_id_hex}) does not exist. Cannot invert green channel." + ) + continue + + image_data = ipu.load_image(original_temp_path) + + if image_data is None: + logger.error( + f"Asset '{asset_name_for_log}': Failed to load image from {original_temp_path} " + f"for normal map {original_filename_for_log} (ID: {map_id_hex})." + ) + continue + + if image_data.ndim != 3 or image_data.shape[2] < 2: # Must have at least R, G channels + logger.error( + f"Asset '{asset_name_for_log}': Image {original_temp_path} for normal map " + f"{original_filename_for_log} (ID: {map_id_hex}) is not a valid RGB/normal map " + f"(ndim={image_data.ndim}, channels={image_data.shape[2] if image_data.ndim == 3 else 'N/A'}) " + f"for green channel inversion." + ) + continue + + # Perform Green Channel Inversion + modified_image_data = image_data.copy() + try: + if np.issubdtype(modified_image_data.dtype, np.floating): + modified_image_data[:, :, 1] = 1.0 - modified_image_data[:, :, 1] + elif np.issubdtype(modified_image_data.dtype, np.integer): + max_val = np.iinfo(modified_image_data.dtype).max + modified_image_data[:, :, 1] = max_val - modified_image_data[:, :, 1] + else: + logger.error( + f"Asset '{asset_name_for_log}': Unsupported image data type " + f"{modified_image_data.dtype} for normal map {original_temp_path}. Cannot invert green channel." + ) + continue + except IndexError: + logger.error( + f"Asset '{asset_name_for_log}': Image {original_temp_path} for normal map " + f"{original_filename_for_log} (ID: {map_id_hex}) does not have a green channel (index 1) " + f"or has unexpected dimensions ({modified_image_data.shape}). Cannot invert." + ) + continue + + # Save New Temporary (Modified Normal) Map + # Sanitize map_details.get('map_type') in case it's missing, though it should be 'NORMAL' here + map_type_for_filename = sanitize_filename(map_details.get('map_type', 'NORMAL')) + new_temp_filename = f"normal_g_inv_{map_type_for_filename}_{map_id_hex}{original_temp_path.suffix}" + new_temp_path = context.engine_temp_dir / new_temp_filename + + save_success = ipu.save_image(new_temp_path, modified_image_data) + + if save_success: + logger.info( + f"Asset '{asset_name_for_log}': Inverted green channel for NORMAL map " + f"{original_filename_for_log}, saved to {new_temp_path.name}." + ) + # Update processed_maps_details for this map_id_hex + context.processed_maps_details[map_id_hex]['temp_processed_file'] = str(new_temp_path) + current_notes = context.processed_maps_details[map_id_hex].get('notes', '') + context.processed_maps_details[map_id_hex]['notes'] = \ + f"{current_notes}; Green channel inverted by NormalMapGreenChannelStage".strip('; ') + + processed_a_normal_map = True + else: + logger.error( + f"Asset '{asset_name_for_log}': Failed to save inverted normal map to {new_temp_path} " + f"for original {original_filename_for_log}." + ) + # No need to explicitly manage new_files_to_process list in this loop, + # as we are modifying the temp_processed_file path within processed_maps_details. + # The existing FileRule objects in context.files_to_process (if any) would + # be linked to these details by a previous stage (e.g. IndividualMapProcessing) + # if that stage populates a 'file_rule_id' in map_details. + + # context.files_to_process remains unchanged by this stage directly, + # as we modify the data pointed to by processed_maps_details. + + if processed_a_normal_map: + logger.info(f"Asset '{asset_name_for_log}': NormalMapGreenChannelStage processed relevant normal maps.") + else: + logger.debug(f"Asset '{asset_name_for_log}': No normal maps found or processed in NormalMapGreenChannelStage.") + + return context \ No newline at end of file diff --git a/processing/pipeline/stages/output_organization.py b/processing/pipeline/stages/output_organization.py new file mode 100644 index 0000000..7a9d9d0 --- /dev/null +++ b/processing/pipeline/stages/output_organization.py @@ -0,0 +1,293 @@ +import logging +import shutil +from pathlib import Path +from typing import List, Dict, Optional + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext +from utils.path_utils import generate_path_from_pattern, sanitize_filename, get_filename_friendly_map_type # Absolute import +from rule_structure import FileRule # Assuming these are needed for type hints if not directly in context + +log = logging.getLogger(__name__) +logger = logging.getLogger(__name__) + +class OutputOrganizationStage(ProcessingStage): + """ + Organizes output files by copying temporary processed files to their final destinations. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + log.info("OUTPUT_ORG: Stage execution started for asset '%s'", context.asset_rule.asset_name) + log.info(f"OUTPUT_ORG: context.processed_maps_details at start: {context.processed_maps_details}") + """ + Copies temporary processed and merged files to their final output locations + based on path patterns and updates AssetProcessingContext. + """ + asset_name_for_log = context.asset_rule.asset_name if hasattr(context, 'asset_rule') and context.asset_rule else "Unknown Asset" + logger.debug(f"Asset '{asset_name_for_log}': Starting output organization stage.") + + if context.status_flags.get('skip_asset'): + logger.info(f"Asset '{asset_name_for_log}': Output organization skipped as 'skip_asset' is True.") + return context + + current_status = context.asset_metadata.get('status', '') + if current_status.startswith("Failed") or current_status == "Skipped": + logger.info(f"Asset '{asset_name_for_log}': Output organization skipped due to prior status: {current_status}.") + return context + + final_output_files: List[str] = [] + overwrite_existing = context.config_obj.overwrite_existing + + output_dir_pattern = getattr(context.config_obj, 'output_directory_pattern', "[supplier]/[assetname]") + output_filename_pattern_config = getattr(context.config_obj, 'output_filename_pattern', "[assetname]_[maptype]_[resolution].[ext]") + + + # A. Organize Processed Individual Maps + if context.processed_maps_details: + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(context.processed_maps_details)} processed individual map entries.") + for processed_map_key, details in context.processed_maps_details.items(): + map_status = details.get('status') + # Retrieve the internal map type first + internal_map_type = details.get('internal_map_type', 'unknown_map_type') + # Convert internal type to filename-friendly type using the helper + file_type_definitions = getattr(context.config_obj, "FILE_TYPE_DEFINITIONS", {}) + base_map_type = get_filename_friendly_map_type(internal_map_type, file_type_definitions) # Final filename-friendly type + + # --- Handle maps processed by the SaveVariantsStage (identified by having saved_files_info) --- + saved_files_info = details.get('saved_files_info') # This is a list of dicts from SaveVariantsOutput + + # Check if 'saved_files_info' exists and is a non-empty list. + # This indicates the item was processed by SaveVariantsStage. + if saved_files_info and isinstance(saved_files_info, list) and len(saved_files_info) > 0: + logger.debug(f"Asset '{asset_name_for_log}': Organizing {len(saved_files_info)} variants for map key '{processed_map_key}' (map type: {base_map_type}) from SaveVariantsStage.") + + # Use base_map_type (e.g., "COL") as the key for the map entry + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(base_map_type, {}) + # map_type is now the key, so no need to store it inside the entry + # map_metadata_entry['map_type'] = base_map_type + map_metadata_entry.setdefault('variant_paths', {}) # Initialize if not present + + processed_any_variant_successfully = False + failed_any_variant = False + + for variant_index, variant_detail in enumerate(saved_files_info): + # Extract info from the save utility's output structure + temp_variant_path_str = variant_detail.get('path') # Key is 'path' + if not temp_variant_path_str: + logger.warning(f"Asset '{asset_name_for_log}': Variant {variant_index} for map '{processed_map_key}' is missing 'path' in saved_files_info. Skipping.") + # Optionally update variant_detail status if it's mutable and tracked, otherwise just skip + continue + + temp_variant_path = Path(temp_variant_path_str) + if not temp_variant_path.is_file(): + logger.warning(f"Asset '{asset_name_for_log}': Temporary variant file '{temp_variant_path}' for map '{processed_map_key}' not found. Skipping.") + continue + + variant_resolution_key = variant_detail.get('resolution_key', f"varRes{variant_index}") + variant_ext = variant_detail.get('format', temp_variant_path.suffix.lstrip('.')) # Use 'format' key + + token_data_variant = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + "maptype": base_map_type, + "resolution": variant_resolution_key, + "ext": variant_ext, + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + token_data_variant_cleaned = {k: v for k, v in token_data_variant.items() if v is not None} + output_filename_variant = generate_path_from_pattern(output_filename_pattern_config, token_data_variant_cleaned) + + try: + relative_dir_path_str_variant = generate_path_from_pattern( + pattern_string=output_dir_pattern, + token_data=token_data_variant_cleaned + ) + final_variant_path = Path(context.output_base_path) / Path(relative_dir_path_str_variant) / Path(output_filename_variant) + final_variant_path.parent.mkdir(parents=True, exist_ok=True) + + if final_variant_path.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': Output variant file {final_variant_path} for map '{processed_map_key}' (res: {variant_resolution_key}) exists and overwrite is disabled. Skipping copy.") + # Optionally update variant_detail status if needed + else: + shutil.copy2(temp_variant_path, final_variant_path) + logger.info(f"Asset '{asset_name_for_log}': Copied variant {temp_variant_path} to {final_variant_path} for map '{processed_map_key}'.") + final_output_files.append(str(final_variant_path)) + # Optionally update variant_detail status if needed + + # Store relative path in metadata + # Store only the filename, as it's relative to the metadata.json location + map_metadata_entry['variant_paths'][variant_resolution_key] = output_filename_variant + processed_any_variant_successfully = True + + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to copy variant {temp_variant_path} for map key '{processed_map_key}' (res: {variant_resolution_key}). Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error - Variant)" + # Optionally update variant_detail status if needed + failed_any_variant = True + + # Update parent map detail status based on variant outcomes + if failed_any_variant: + details['status'] = 'Organization Failed (Save Utility Variants)' + elif processed_any_variant_successfully: + details['status'] = 'Organized (Save Utility Variants)' + else: # No variants were successfully copied (e.g., all skipped due to existing file or missing temp file) + details['status'] = 'Organization Skipped (No Save Utility Variants Copied/Needed)' + + # --- Handle older/other processing statuses (like single file processing) --- + elif map_status in ['Processed', 'Processed_No_Variants', 'Converted_To_Rough']: # Add other single-file statuses if needed + temp_file_path_str = details.get('temp_processed_file') + if not temp_file_path_str: + logger.warning(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status '{map_status}') due to missing 'temp_processed_file'.") + details['status'] = 'Organization Skipped (Missing Temp File)' + continue + + temp_file_path = Path(temp_file_path_str) + if not temp_file_path.is_file(): + logger.warning(f"Asset '{asset_name_for_log}': Temporary file '{temp_file_path}' for map '{processed_map_key}' not found. Skipping.") + details['status'] = 'Organization Skipped (Temp File Not Found)' + continue + + resolution_str = details.get('processed_resolution_name', details.get('original_resolution_name', 'resX')) + + token_data = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + "maptype": base_map_type, + "resolution": resolution_str, + "ext": temp_file_path.suffix.lstrip('.'), + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + token_data_cleaned = {k: v for k, v in token_data.items() if v is not None} + + output_filename = generate_path_from_pattern(output_filename_pattern_config, token_data_cleaned) + + try: + relative_dir_path_str = generate_path_from_pattern( + pattern_string=output_dir_pattern, + token_data=token_data_cleaned + ) + final_path = Path(context.output_base_path) / Path(relative_dir_path_str) / Path(output_filename) + final_path.parent.mkdir(parents=True, exist_ok=True) + + if final_path.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': Output file {final_path} for map '{processed_map_key}' exists and overwrite is disabled. Skipping copy.") + details['status'] = 'Organized (Exists, Skipped Copy)' + else: + shutil.copy2(temp_file_path, final_path) + logger.info(f"Asset '{asset_name_for_log}': Copied {temp_file_path} to {final_path} for map '{processed_map_key}'.") + final_output_files.append(str(final_path)) + details['status'] = 'Organized' + + details['final_output_path'] = str(final_path) + + # Update asset_metadata for metadata.json + # Use base_map_type (e.g., "COL") as the key for the map entry + map_metadata_entry = context.asset_metadata.setdefault('maps', {}).setdefault(base_map_type, {}) + # map_type is now the key, so no need to store it inside the entry + # map_metadata_entry['map_type'] = base_map_type + # Store single path in variant_paths, keyed by its resolution string + # Store only the filename, as it's relative to the metadata.json location + map_metadata_entry.setdefault('variant_paths', {})[resolution_str] = output_filename + # Remove old cleanup logic, as variant_paths is now the standard + # if 'variant_paths' in map_metadata_entry: + # del map_metadata_entry['variant_paths'] + + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to copy {temp_file_path} for map key '{processed_map_key}'. Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error)" + details['status'] = 'Organization Failed' + + # --- Handle other statuses (Skipped, Failed, etc.) --- + else: # Catches statuses not explicitly handled above + logger.debug(f"Asset '{asset_name_for_log}': Skipping map key '{processed_map_key}' (status: '{map_status}') for organization as it's not a recognized final processed state or variant state.") + continue + else: + logger.debug(f"Asset '{asset_name_for_log}': No processed individual maps to organize.") + + # B. Organize Merged Maps (OBSOLETE BLOCK - Merged maps are handled by the main loop processing context.processed_maps_details) + # The log "No merged maps to organize" will no longer appear from here. + # If merged maps are not appearing, the issue is likely that they are not being added + # to context.processed_maps_details with 'saved_files_info' by the orchestrator/SaveVariantsStage. + + # C. Organize Extra Files (e.g., previews, text files) + logger.debug(f"Asset '{asset_name_for_log}': Checking for EXTRA files to organize.") + extra_files_organized_count = 0 + if hasattr(context, 'files_to_process') and context.files_to_process: + extra_subdir_name = getattr(context.config_obj, 'extra_files_subdir', 'Extra') # Default to 'Extra' + + for file_rule in context.files_to_process: + if file_rule.item_type == 'EXTRA': + source_file_path = context.workspace_path / file_rule.file_path + if not source_file_path.is_file(): + logger.warning(f"Asset '{asset_name_for_log}': EXTRA file '{source_file_path}' not found. Skipping.") + continue + + # Basic token data for the asset's base output directory + # We don't use map_type, resolution, or ext for the base directory of extras. + # However, generate_path_from_pattern might expect them or handle their absence. + # For the base asset directory, only assetname and supplier are typically primary. + base_token_data = { + "assetname": asset_name_for_log, + "supplier": context.effective_supplier or "DefaultSupplier", + # Add other tokens if your output_directory_pattern uses them at the asset level + "incrementingvalue": getattr(context, 'incrementing_value', None), + "sha5": getattr(context, 'sha5_value', None) + } + base_token_data_cleaned = {k: v for k, v in base_token_data.items() if v is not None} + + try: + asset_base_output_dir_str = generate_path_from_pattern( + pattern_string=output_dir_pattern, # Uses the same pattern as other maps for base dir + token_data=base_token_data_cleaned + ) + # Destination: /// + final_dest_path = (Path(context.output_base_path) / + Path(asset_base_output_dir_str) / + Path(extra_subdir_name) / + source_file_path.name) # Use original filename + + final_dest_path.parent.mkdir(parents=True, exist_ok=True) + + if final_dest_path.exists() and not overwrite_existing: + logger.info(f"Asset '{asset_name_for_log}': EXTRA file destination {final_dest_path} exists and overwrite is disabled. Skipping copy.") + else: + shutil.copy2(source_file_path, final_dest_path) + logger.info(f"Asset '{asset_name_for_log}': Copied EXTRA file {source_file_path} to {final_dest_path}") + final_output_files.append(str(final_dest_path)) + extra_files_organized_count += 1 + + # Optionally, add more detailed tracking for extra files in context.asset_metadata + # For example: + # if 'extra_files_details' not in context.asset_metadata: + # context.asset_metadata['extra_files_details'] = [] + # context.asset_metadata['extra_files_details'].append({ + # 'source_path': str(source_file_path), + # 'destination_path': str(final_dest_path), + # 'status': 'Organized' + # }) + + except Exception as e: + logger.error(f"Asset '{asset_name_for_log}': Failed to copy EXTRA file {source_file_path} to destination. Error: {e}", exc_info=True) + context.status_flags['output_organization_error'] = True + context.asset_metadata['status'] = "Failed (Output Organization Error - Extra Files)" + # Optionally, update status for the specific file_rule if tracked + + if extra_files_organized_count > 0: + logger.info(f"Asset '{asset_name_for_log}': Successfully organized {extra_files_organized_count} EXTRA file(s).") + else: + logger.debug(f"Asset '{asset_name_for_log}': No EXTRA files were processed or found to organize.") + + + context.asset_metadata['final_output_files'] = final_output_files + + if context.status_flags.get('output_organization_error'): + logger.error(f"Asset '{asset_name_for_log}': Output organization encountered errors. Status: {context.asset_metadata['status']}") + else: + logger.info(f"Asset '{asset_name_for_log}': Output organization complete. {len(final_output_files)} files placed.") + + logger.debug(f"Asset '{asset_name_for_log}': Output organization stage finished.") + return context \ No newline at end of file diff --git a/processing/pipeline/stages/prepare_processing_items.py b/processing/pipeline/stages/prepare_processing_items.py new file mode 100644 index 0000000..cdfc2ac --- /dev/null +++ b/processing/pipeline/stages/prepare_processing_items.py @@ -0,0 +1,105 @@ +import logging +from typing import List, Union, Optional + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext, MergeTaskDefinition +from rule_structure import FileRule # Assuming FileRule is imported correctly + +log = logging.getLogger(__name__) + +class PrepareProcessingItemsStage(ProcessingStage): + """ + Identifies and prepares a unified list of items (FileRule, MergeTaskDefinition) + to be processed in subsequent stages. Performs initial validation. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Populates context.processing_items with FileRule and MergeTaskDefinition objects. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + log.info(f"Asset '{asset_name_for_log}': Preparing processing items...") + + if context.status_flags.get('skip_asset', False): + log.info(f"Asset '{asset_name_for_log}': Skipping item preparation due to skip_asset flag.") + context.processing_items = [] + return context + + items_to_process: List[Union[FileRule, MergeTaskDefinition]] = [] + preparation_failed = False + + # --- Add regular files --- + if context.files_to_process: + # Validate source path early for regular files + source_path_valid = True + if not context.source_rule or not context.source_rule.input_path: + log.error(f"Asset '{asset_name_for_log}': SourceRule or SourceRule.input_path is not set. Cannot process regular files.") + source_path_valid = False + preparation_failed = True # Mark as failed if source path is missing + context.status_flags['prepare_items_failed_reason'] = "SourceRule.input_path missing" + elif not context.workspace_path or not context.workspace_path.is_dir(): + log.error(f"Asset '{asset_name_for_log}': Workspace path '{context.workspace_path}' is not a valid directory. Cannot process regular files.") + source_path_valid = False + preparation_failed = True # Mark as failed if workspace path is bad + context.status_flags['prepare_items_failed_reason'] = "Workspace path invalid" + + if source_path_valid: + for file_rule in context.files_to_process: + # Basic validation for FileRule itself + if not file_rule.file_path: + log.warning(f"Asset '{asset_name_for_log}': Skipping FileRule with empty file_path.") + continue # Skip this specific rule, but don't fail the whole stage + items_to_process.append(file_rule) + log.debug(f"Asset '{asset_name_for_log}': Added {len(context.files_to_process)} potential FileRule items.") + else: + log.warning(f"Asset '{asset_name_for_log}': Skipping addition of all FileRule items due to invalid source/workspace path.") + + + # --- Add merged tasks --- + # --- Add merged tasks from global configuration --- + # merged_image_tasks are expected to be loaded into context.config_obj + # by the Configuration class from app_settings.json. + + merged_tasks_list = getattr(context.config_obj, 'map_merge_rules', None) + + if merged_tasks_list and isinstance(merged_tasks_list, list): + log.debug(f"Asset '{asset_name_for_log}': Found {len(merged_tasks_list)} merge tasks in global config.") + for task_idx, task_data in enumerate(merged_tasks_list): + if isinstance(task_data, dict): + task_key = f"merged_task_{task_idx}" + # Basic validation for merge task data: requires output_map_type and an inputs dictionary + if not task_data.get('output_map_type') or not isinstance(task_data.get('inputs'), dict): + log.warning(f"Asset '{asset_name_for_log}', Task Index {task_idx}: Skipping merge task due to missing 'output_map_type' or valid 'inputs' dictionary. Task data: {task_data}") + continue # Skip this specific task + log.debug(f"Asset '{asset_name_for_log}', Preparing Merge Task Index {task_idx}: Raw task_data: {task_data}") + merge_def = MergeTaskDefinition(task_data=task_data, task_key=task_key) + log.debug(f"Asset '{asset_name_for_log}': Created MergeTaskDefinition object: {merge_def}") + log.info(f"Asset '{asset_name_for_log}': Successfully CREATED MergeTaskDefinition: Key='{merge_def.task_key}', OutputType='{merge_def.task_data.get('output_map_type', 'N/A')}'") + items_to_process.append(merge_def) + else: + log.warning(f"Asset '{asset_name_for_log}': Item at index {task_idx} in config_obj.merged_image_tasks is not a dictionary. Skipping. Item: {task_data}") + # The log for "Added X potential MergeTaskDefinition items" will be covered by the final log. + elif merged_tasks_list is None: + log.debug(f"Asset '{asset_name_for_log}': 'merged_image_tasks' not found in config_obj. No global merge tasks to add.") + elif not isinstance(merged_tasks_list, list): + log.warning(f"Asset '{asset_name_for_log}': 'merged_image_tasks' in config_obj is not a list. Skipping global merge tasks. Type: {type(merged_tasks_list)}") + else: # Empty list + log.debug(f"Asset '{asset_name_for_log}': 'merged_image_tasks' in config_obj is empty. No global merge tasks to add.") + + + if not items_to_process: + log.info(f"Asset '{asset_name_for_log}': No valid items found to process after preparation.") + + log.debug(f"Asset '{asset_name_for_log}': Final items_to_process before assigning to context: {items_to_process}") + context.processing_items = items_to_process + context.intermediate_results = {} # Initialize intermediate results storage + + if preparation_failed: + # Set a flag indicating failure during preparation, even if some items might have been added before failure + context.status_flags['prepare_items_failed'] = True + log.error(f"Asset '{asset_name_for_log}': Item preparation failed. Reason: {context.status_flags.get('prepare_items_failed_reason', 'Unknown')}") + # Optionally, clear items if failure means nothing should proceed + # context.processing_items = [] + + log.info(f"Asset '{asset_name_for_log}': Finished preparing items. Found {len(context.processing_items)} valid items.") + return context \ No newline at end of file diff --git a/processing/pipeline/stages/regular_map_processor.py b/processing/pipeline/stages/regular_map_processor.py new file mode 100644 index 0000000..964aaf8 --- /dev/null +++ b/processing/pipeline/stages/regular_map_processor.py @@ -0,0 +1,213 @@ +import logging +import re +from pathlib import Path +from typing import List, Optional, Tuple, Dict + +import cv2 +import numpy as np + +from .base_stage import ProcessingStage # Assuming base_stage is in the same directory +from ..asset_context import AssetProcessingContext, ProcessedRegularMapData +from rule_structure import FileRule, AssetRule +from processing.utils import image_processing_utils as ipu # Absolute import +from utils.path_utils import get_filename_friendly_map_type # Absolute import + +log = logging.getLogger(__name__) + + +class RegularMapProcessorStage(ProcessingStage): + """ + Processes a single regular texture map defined by a FileRule. + Loads the image, determines map type, applies transformations, + and returns the processed data. + """ + + # --- Helper Methods (Adapted from IndividualMapProcessingStage) --- + + def _get_suffixed_internal_map_type( + self, + asset_rule: Optional[AssetRule], + current_file_rule: FileRule, + initial_internal_map_type: str, + respect_variant_map_types: List[str], + asset_name_for_log: str + ) -> str: + """ + Determines the potentially suffixed internal map type (e.g., MAP_COL-1). + """ + final_internal_map_type = initial_internal_map_type # Default + + base_map_type_match = re.match(r"(MAP_[A-Z]{3})", initial_internal_map_type) + if not base_map_type_match or not asset_rule or not asset_rule.files: + return final_internal_map_type # Cannot determine suffix without base type or asset rule files + + true_base_map_type = base_map_type_match.group(1) # This is "MAP_XXX" + + # Find all FileRules in the asset with the same base map type + peers_of_same_base_type = [] + for fr_asset in asset_rule.files: + fr_asset_item_type = fr_asset.item_type_override or fr_asset.item_type or "UnknownMapType" + fr_asset_base_match = re.match(r"(MAP_[A-Z]{3})", fr_asset_item_type) + if fr_asset_base_match and fr_asset_base_match.group(1) == true_base_map_type: + peers_of_same_base_type.append(fr_asset) + + num_occurrences = len(peers_of_same_base_type) + current_instance_index = 0 # 1-based index + + try: + # Find the index based on the FileRule object itself (requires object identity) + current_instance_index = peers_of_same_base_type.index(current_file_rule) + 1 + except ValueError: + # Fallback: try matching by file_path if object identity fails (less reliable) + try: + current_instance_index = [fr.file_path for fr in peers_of_same_base_type].index(current_file_rule.file_path) + 1 + log.warning(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Found peer index using file_path fallback for suffixing.") + except (ValueError, AttributeError): # Catch AttributeError if file_path is None + log.warning( + f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}' (Initial Type: '{initial_internal_map_type}', Base: '{true_base_map_type}'): " + f"Could not find its own instance in the list of {num_occurrences} peers from asset_rule.files using object identity or path. Suffixing may be incorrect." + ) + # Keep index 0, suffix logic below will handle it + + # Determine Suffix + map_type_for_respect_check = true_base_map_type.replace("MAP_", "") # e.g., "COL" + is_in_respect_list = map_type_for_respect_check in respect_variant_map_types + + suffix_to_append = "" + if num_occurrences > 1: + if current_instance_index > 0: + suffix_to_append = f"-{current_instance_index}" + else: + # If index is still 0 (not found), don't add suffix to avoid ambiguity + log.warning(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Index for multi-occurrence map type '{true_base_map_type}' (count: {num_occurrences}) not determined. Omitting numeric suffix.") + elif num_occurrences == 1 and is_in_respect_list: + suffix_to_append = "-1" # Add suffix even for single instance if in respect list + + if suffix_to_append: + final_internal_map_type = true_base_map_type + suffix_to_append + + if final_internal_map_type != initial_internal_map_type: + log.debug(f"Asset '{asset_name_for_log}', FileRule path '{current_file_rule.file_path}': Suffixed internal map type determined: '{initial_internal_map_type}' -> '{final_internal_map_type}'") + + return final_internal_map_type + + + # --- Execute Method --- + + def execute( + self, + context: AssetProcessingContext, + file_rule: FileRule # Specific item passed by orchestrator + ) -> ProcessedRegularMapData: + """ + Processes the given FileRule item. + """ + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + log_prefix = f"Asset '{asset_name_for_log}', File '{file_rule.file_path}'" + log.info(f"{log_prefix}: Processing Regular Map.") + + # Initialize output object with default failure state + result = ProcessedRegularMapData( + processed_image_data=np.array([]), # Placeholder + final_internal_map_type="Unknown", + source_file_path=Path(file_rule.file_path or "InvalidPath"), + original_bit_depth=None, + original_dimensions=None, + transformations_applied=[], + status="Failed", + error_message="Initialization error" + ) + + try: + # --- Configuration --- + config = context.config_obj + file_type_definitions = getattr(config, "FILE_TYPE_DEFINITIONS", {}) + respect_variant_map_types = getattr(config, "respect_variant_map_types", []) + invert_normal_green = config.invert_normal_green_globally + + # --- Determine Map Type (with suffix) --- + initial_internal_map_type = file_rule.item_type_override or file_rule.item_type or "UnknownMapType" + if not initial_internal_map_type or initial_internal_map_type == "UnknownMapType": + result.error_message = "Map type (item_type) not defined in FileRule." + log.error(f"{log_prefix}: {result.error_message}") + return result # Early exit + + # Explicitly skip if the determined type doesn't start with "MAP_" + if not initial_internal_map_type.startswith("MAP_"): + result.status = "Skipped (Invalid Type)" + result.error_message = f"FileRule item_type '{initial_internal_map_type}' does not start with 'MAP_'. Skipping processing." + log.warning(f"{log_prefix}: {result.error_message}") + return result # Early exit + + processing_map_type = self._get_suffixed_internal_map_type( + context.asset_rule, file_rule, initial_internal_map_type, respect_variant_map_types, asset_name_for_log + ) + result.final_internal_map_type = processing_map_type # Store initial suffixed type + + # --- Find and Load Source File --- + if not file_rule.file_path: # Should have been caught by Prepare stage, but double-check + result.error_message = "FileRule has empty file_path." + log.error(f"{log_prefix}: {result.error_message}") + return result + + source_base_path = context.workspace_path + potential_source_path = source_base_path / file_rule.file_path + source_file_path_found: Optional[Path] = None + + if potential_source_path.is_file(): + source_file_path_found = potential_source_path + log.info(f"{log_prefix}: Found source file: {source_file_path_found}") + else: + # Optional: Add globbing fallback if needed, similar to original stage + log.warning(f"{log_prefix}: Source file not found directly at '{potential_source_path}'. Add globbing if necessary.") + result.error_message = f"Source file not found at '{potential_source_path}'" + log.error(f"{log_prefix}: {result.error_message}") + return result + + result.source_file_path = source_file_path_found # Update result with found path + + # Load image + source_image_data = ipu.load_image(str(source_file_path_found)) + if source_image_data is None: + result.error_message = f"Failed to load image from '{source_file_path_found}'." + log.error(f"{log_prefix}: {result.error_message}") + return result + + original_height, original_width = source_image_data.shape[:2] + result.original_dimensions = (original_width, original_height) + log.debug(f"{log_prefix}: Loaded image {result.original_dimensions[0]}x{result.original_dimensions[1]}.") + + # Get original bit depth + try: + result.original_bit_depth = ipu.get_image_bit_depth(str(source_file_path_found)) + log.info(f"{log_prefix}: Determined source bit depth: {result.original_bit_depth}") + except Exception as e: + log.warning(f"{log_prefix}: Could not determine source bit depth for {source_file_path_found}: {e}. Setting to None.") + result.original_bit_depth = None # Indicate failure to determine + + # --- Apply Transformations --- + transformed_image_data, final_map_type, transform_notes = ipu.apply_common_map_transformations( + source_image_data.copy(), # Pass a copy to avoid modifying original load + processing_map_type, + invert_normal_green, + file_type_definitions, + log_prefix + ) + result.processed_image_data = transformed_image_data + result.final_internal_map_type = final_map_type # Update if Gloss->Rough changed it + result.transformations_applied = transform_notes + + # --- Success --- + result.status = "Processed" + result.error_message = None + log.info(f"{log_prefix}: Successfully processed regular map. Final type: '{result.final_internal_map_type}'.") + + except Exception as e: + log.exception(f"{log_prefix}: Unhandled exception during processing: {e}") + result.status = "Failed" + result.error_message = f"Unhandled exception: {e}" + # Ensure image data is empty on failure if it wasn't set + if result.processed_image_data is None or result.processed_image_data.size == 0: + result.processed_image_data = np.array([]) + + return result \ No newline at end of file diff --git a/processing/pipeline/stages/save_variants.py b/processing/pipeline/stages/save_variants.py new file mode 100644 index 0000000..482b1cc --- /dev/null +++ b/processing/pipeline/stages/save_variants.py @@ -0,0 +1,89 @@ +import logging +from typing import List, Dict, Optional # Added Optional + +import numpy as np + +from .base_stage import ProcessingStage +# Import necessary context classes and utils +from ..asset_context import SaveVariantsInput, SaveVariantsOutput +from processing.utils import image_saving_utils as isu # Absolute import +from utils.path_utils import get_filename_friendly_map_type # Absolute import + +log = logging.getLogger(__name__) + + +class SaveVariantsStage(ProcessingStage): + """ + Takes final processed image data and configuration, calls the + save_image_variants utility, and returns the results. + """ + + def execute(self, input_data: SaveVariantsInput) -> SaveVariantsOutput: + """ + Calls isu.save_image_variants with data from input_data. + """ + internal_map_type = input_data.internal_map_type + log_prefix = f"Save Variants Stage (Type: {internal_map_type})" + log.info(f"{log_prefix}: Starting.") + + # Initialize output object with default failure state + result = SaveVariantsOutput( + saved_files_details=[], + status="Failed", + error_message="Initialization error" + ) + + if input_data.image_data is None or input_data.image_data.size == 0: + result.error_message = "Input image data is None or empty." + log.error(f"{log_prefix}: {result.error_message}") + return result + + try: + # --- Prepare arguments for save_image_variants --- + + # Get the filename-friendly base map type using the helper + # This assumes the save utility expects the friendly type. Adjust if needed. + base_map_type_friendly = get_filename_friendly_map_type( + internal_map_type, input_data.file_type_defs + ) + log.debug(f"{log_prefix}: Using filename-friendly base type '{base_map_type_friendly}' for saving.") + + save_args = { + "source_image_data": input_data.image_data, + "base_map_type": base_map_type_friendly, # Use the friendly type + "source_bit_depth_info": input_data.source_bit_depth_info, + "image_resolutions": input_data.image_resolutions, + "file_type_defs": input_data.file_type_defs, + "output_format_8bit": input_data.output_format_8bit, + "output_format_16bit_primary": input_data.output_format_16bit_primary, + "output_format_16bit_fallback": input_data.output_format_16bit_fallback, + "png_compression_level": input_data.png_compression_level, + "jpg_quality": input_data.jpg_quality, + "output_filename_pattern_tokens": input_data.output_filename_pattern_tokens, + "output_filename_pattern": input_data.output_filename_pattern, + "resolution_threshold_for_jpg": input_data.resolution_threshold_for_jpg, # Added + } + + log.debug(f"{log_prefix}: Calling save_image_variants utility.") + saved_files_details: List[Dict] = isu.save_image_variants(**save_args) + + if saved_files_details: + log.info(f"{log_prefix}: Save utility completed successfully. Saved {len(saved_files_details)} variants.") + result.saved_files_details = saved_files_details + result.status = "Processed" + result.error_message = None + else: + # This might not be an error, maybe no variants were configured? + log.warning(f"{log_prefix}: Save utility returned no saved file details. This might be expected if no resolutions/formats matched.") + result.saved_files_details = [] + result.status = "Processed (No Output)" # Indicate processing happened but nothing saved + result.error_message = "Save utility reported no files saved (check configuration/resolutions)." + + + except Exception as e: + log.exception(f"{log_prefix}: Error calling or executing save_image_variants: {e}") + result.status = "Failed" + result.error_message = f"Save utility call failed: {e}" + result.saved_files_details = [] # Ensure empty list on error + + return result \ No newline at end of file diff --git a/processing/pipeline/stages/supplier_determination.py b/processing/pipeline/stages/supplier_determination.py new file mode 100644 index 0000000..15f5e5d --- /dev/null +++ b/processing/pipeline/stages/supplier_determination.py @@ -0,0 +1,67 @@ +import logging + +from .base_stage import ProcessingStage +from ..asset_context import AssetProcessingContext + +class SupplierDeterminationStage(ProcessingStage): + """ + Determines the effective supplier for an asset based on asset and source rules. + """ + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + """ + Determines and validates the effective supplier for the asset. + + Args: + context: The asset processing context. + + Returns: + The updated asset processing context. + """ + effective_supplier = None + logger = logging.getLogger(__name__) # Using a logger specific to this module + asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset" + + # 1. Check source_rule.supplier_override (highest precedence) + if context.source_rule and context.source_rule.supplier_override: + effective_supplier = context.source_rule.supplier_override + logger.debug(f"Asset '{asset_name_for_log}': Supplier override from source_rule found: '{effective_supplier}'.") + # 2. If not overridden, check source_rule.supplier_identifier + elif context.source_rule and context.source_rule.supplier_identifier: + effective_supplier = context.source_rule.supplier_identifier + logger.debug(f"Asset '{asset_name_for_log}': Supplier identifier from source_rule found: '{effective_supplier}'.") + + # 3. Validation + if not effective_supplier: + logger.error(f"Asset '{asset_name_for_log}': No supplier defined in source_rule (override or identifier).") + context.effective_supplier = None + if 'status_flags' not in context: # Ensure status_flags exists + context.status_flags = {} + context.status_flags['supplier_error'] = True + # Assuming context.config_obj.suppliers is a valid way to get the list of configured suppliers. + # This might need further investigation if errors occur here later. + elif context.config_obj and hasattr(context.config_obj, 'suppliers') and effective_supplier not in context.config_obj.suppliers: + logger.warning( + f"Asset '{asset_name_for_log}': Determined supplier '{effective_supplier}' not found in global supplier configuration. " + f"Available: {list(context.config_obj.suppliers.keys()) if context.config_obj.suppliers else 'None'}" + ) + context.effective_supplier = None + if 'status_flags' not in context: # Ensure status_flags exists + context.status_flags = {} + context.status_flags['supplier_error'] = True + else: + context.effective_supplier = effective_supplier + logger.info(f"Asset '{asset_name_for_log}': Effective supplier set to '{effective_supplier}'.") + # Optionally clear the error flag if previously set and now resolved. + if 'supplier_error' in context.status_flags: + del context.status_flags['supplier_error'] + + # merged_image_tasks are loaded from app_settings.json into Configuration object, + # not from supplier-specific presets. + # Ensure the attribute exists on context for PrepareProcessingItemsStage, + # which will get it from context.config_obj. + if not hasattr(context, 'merged_image_tasks'): + context.merged_image_tasks = [] + + + return context \ No newline at end of file diff --git a/processing/utils/__init__.py b/processing/utils/__init__.py new file mode 100644 index 0000000..5f3ceb7 --- /dev/null +++ b/processing/utils/__init__.py @@ -0,0 +1 @@ +# This file makes the 'utils' directory a Python package. \ No newline at end of file diff --git a/processing/utils/image_processing_utils.py b/processing/utils/image_processing_utils.py new file mode 100644 index 0000000..70da34a --- /dev/null +++ b/processing/utils/image_processing_utils.py @@ -0,0 +1,515 @@ +import cv2 +import numpy as np +from pathlib import Path +import math +from typing import Optional, Union, List, Tuple, Dict + +# --- Basic Power-of-Two Utilities --- + +def is_power_of_two(n: int) -> bool: + """Checks if a number is a power of two.""" + return (n > 0) and (n & (n - 1) == 0) + +def get_nearest_pot(value: int) -> int: + """Finds the nearest power of two to the given value.""" + if value <= 0: + return 1 # POT must be positive, return 1 as a fallback + if is_power_of_two(value): + return value + + lower_pot = 1 << (value.bit_length() - 1) + upper_pot = 1 << value.bit_length() + + if (value - lower_pot) < (upper_pot - value): + return lower_pot + else: + return upper_pot + +def get_nearest_power_of_two_downscale(value: int) -> int: + """ + Finds the nearest power of two that is less than or equal to the given value. + If the value is already a power of two, it returns the value itself. + Returns 1 if the value is less than 1. + """ + if value < 1: + return 1 + if is_power_of_two(value): + return value + # Find the largest power of two strictly less than value, + # unless value itself is POT. + # (1 << (value.bit_length() - 1)) achieves this. + # Example: value=7 (0111, bl=3), 1<<2 = 4. + # Example: value=8 (1000, bl=4), 1<<3 = 8. + # Example: value=9 (1001, bl=4), 1<<3 = 8. + return 1 << (value.bit_length() - 1) +# --- Dimension Calculation --- + +def calculate_target_dimensions( + original_width: int, + original_height: int, + target_width: Optional[int] = None, + target_height: Optional[int] = None, + resize_mode: str = "fit", # e.g., "fit", "stretch", "max_dim_pot" + ensure_pot: bool = False, + allow_upscale: bool = False, + target_max_dim_for_pot_mode: Optional[int] = None # Specific for "max_dim_pot" +) -> Tuple[int, int]: + """ + Calculates target dimensions based on various modes and constraints. + + Args: + original_width: Original width of the image. + original_height: Original height of the image. + target_width: Desired target width. + target_height: Desired target height. + resize_mode: + - "fit": Scales to fit within target_width/target_height, maintaining aspect ratio. + Requires at least one of target_width or target_height. + - "stretch": Scales to exactly target_width and target_height, ignoring aspect ratio. + Requires both target_width and target_height. + - "max_dim_pot": Scales to fit target_max_dim_for_pot_mode while maintaining aspect ratio, + then finds nearest POT for each dimension. Requires target_max_dim_for_pot_mode. + ensure_pot: If True, final dimensions will be adjusted to the nearest power of two. + allow_upscale: If False, dimensions will not exceed original dimensions unless ensure_pot forces it. + target_max_dim_for_pot_mode: Max dimension to use when resize_mode is "max_dim_pot". + + Returns: + A tuple (new_width, new_height). + """ + if original_width <= 0 or original_height <= 0: + # Fallback for invalid original dimensions + fallback_dim = 1 + if ensure_pot: + if target_width and target_height: + fallback_dim = get_nearest_pot(max(target_width, target_height, 1)) + elif target_width: + fallback_dim = get_nearest_pot(target_width) + elif target_height: + fallback_dim = get_nearest_pot(target_height) + elif target_max_dim_for_pot_mode: + fallback_dim = get_nearest_pot(target_max_dim_for_pot_mode) + else: # Default POT if no target given + fallback_dim = 256 + return (fallback_dim, fallback_dim) + return (target_width or 1, target_height or 1) + + + w, h = original_width, original_height + + if resize_mode == "max_dim_pot": + if target_max_dim_for_pot_mode is None: + raise ValueError("target_max_dim_for_pot_mode must be provided for 'max_dim_pot' resize_mode.") + + # Logic adapted from old processing_engine.calculate_target_dimensions + ratio = w / h + if ratio > 1: # Width is dominant + scaled_w = target_max_dim_for_pot_mode + scaled_h = max(1, round(scaled_w / ratio)) + else: # Height is dominant or square + scaled_h = target_max_dim_for_pot_mode + scaled_w = max(1, round(scaled_h * ratio)) + + # Upscale check for this mode is implicitly handled by target_max_dim + # If ensure_pot is true (as it was in the original logic), it's applied here + # For this mode, ensure_pot is effectively always true for the final step + w = get_nearest_pot(scaled_w) + h = get_nearest_pot(scaled_h) + return int(w), int(h) + + elif resize_mode == "fit": + if target_width is None and target_height is None: + raise ValueError("At least one of target_width or target_height must be provided for 'fit' mode.") + + if target_width and target_height: + ratio_orig = w / h + ratio_target = target_width / target_height + if ratio_orig > ratio_target: # Original is wider than target aspect + w_new = target_width + h_new = max(1, round(w_new / ratio_orig)) + else: # Original is taller or same aspect + h_new = target_height + w_new = max(1, round(h_new * ratio_orig)) + elif target_width: + w_new = target_width + h_new = max(1, round(w_new / (w / h))) + else: # target_height is not None + h_new = target_height + w_new = max(1, round(h_new * (w / h))) + w, h = w_new, h_new + + elif resize_mode == "stretch": + if target_width is None or target_height is None: + raise ValueError("Both target_width and target_height must be provided for 'stretch' mode.") + w, h = target_width, target_height + + else: + raise ValueError(f"Unsupported resize_mode: {resize_mode}") + + if not allow_upscale: + if w > original_width: w = original_width + if h > original_height: h = original_height + + if ensure_pot: + w = get_nearest_pot(w) + h = get_nearest_pot(h) + # Re-check upscale if POT adjustment made it larger than original and not allowed + if not allow_upscale: + if w > original_width: w = get_nearest_pot(original_width) # Get closest POT to original + if h > original_height: h = get_nearest_pot(original_height) + + + return int(max(1, w)), int(max(1, h)) + + +# --- Image Statistics --- + +def get_image_bit_depth(image_path_str: str) -> Optional[int]: + """ + Determines the bit depth of an image file. + """ + try: + # Use IMREAD_UNCHANGED to preserve original bit depth + img = cv2.imread(image_path_str, cv2.IMREAD_UNCHANGED) + if img is None: + # logger.error(f"Failed to read image for bit depth: {image_path_str}") # Use print for utils + print(f"Warning: Failed to read image for bit depth: {image_path_str}") + return None + + dtype_to_bit_depth = { + np.dtype('uint8'): 8, + np.dtype('uint16'): 16, + np.dtype('float32'): 32, # Typically for EXR etc. + np.dtype('int8'): 8, # Unlikely for images but good to have + np.dtype('int16'): 16, # Unlikely + # Add other dtypes if necessary + } + bit_depth = dtype_to_bit_depth.get(img.dtype) + if bit_depth is None: + # logger.warning(f"Unknown dtype {img.dtype} for image {image_path_str}, cannot determine bit depth.") # Use print for utils + print(f"Warning: Unknown dtype {img.dtype} for image {image_path_str}, cannot determine bit depth.") + pass # Return None + return bit_depth + except Exception as e: + # logger.error(f"Error getting bit depth for {image_path_str}: {e}") # Use print for utils + print(f"Error getting bit depth for {image_path_str}: {e}") + return None + +def calculate_image_stats(image_data: np.ndarray) -> Optional[Dict]: + """ + Calculates min, max, mean for a given numpy image array. + Handles grayscale and multi-channel images. Converts to float64 for calculation. + Normalizes uint8/uint16 data to 0-1 range before calculating stats. + """ + if image_data is None: + return None + try: + data_float = image_data.astype(np.float64) + + if image_data.dtype == np.uint16: + data_float /= 65535.0 + elif image_data.dtype == np.uint8: + data_float /= 255.0 + + stats = {} + if len(data_float.shape) == 2: # Grayscale (H, W) + stats["min"] = float(np.min(data_float)) + stats["max"] = float(np.max(data_float)) + stats["mean"] = float(np.mean(data_float)) + stats["median"] = float(np.median(data_float)) + elif len(data_float.shape) == 3: # Color (H, W, C) + stats["min"] = [float(v) for v in np.min(data_float, axis=(0, 1))] + stats["max"] = [float(v) for v in np.max(data_float, axis=(0, 1))] + stats["mean"] = [float(v) for v in np.mean(data_float, axis=(0, 1))] + stats["median"] = [float(v) for v in np.median(data_float, axis=(0, 1))] + else: + return None # Unsupported shape + return stats + except Exception: + return {"error": "Error calculating image stats"} + +# --- Aspect Ratio String --- + +def normalize_aspect_ratio_change(original_width: int, original_height: int, resized_width: int, resized_height: int, decimals: int = 2) -> str: + """ + Calculates the aspect ratio change string (e.g., "EVEN", "X133"). + """ + if original_width <= 0 or original_height <= 0: + return "InvalidInput" + if resized_width <= 0 or resized_height <= 0: + return "InvalidResize" + + width_change_percentage = ((resized_width - original_width) / original_width) * 100 + height_change_percentage = ((resized_height - original_height) / original_height) * 100 + + normalized_width_change = width_change_percentage / 100 + normalized_height_change = height_change_percentage / 100 + + normalized_width_change = min(max(normalized_width_change + 1, 0), 2) + normalized_height_change = min(max(normalized_height_change + 1, 0), 2) + + epsilon = 1e-9 + if abs(normalized_width_change) < epsilon and abs(normalized_height_change) < epsilon: + closest_value_to_one = 1.0 + elif abs(normalized_width_change) < epsilon: + closest_value_to_one = abs(normalized_height_change) + elif abs(normalized_height_change) < epsilon: + closest_value_to_one = abs(normalized_width_change) + else: + closest_value_to_one = min(abs(normalized_width_change), abs(normalized_height_change)) + + scale_factor = 1 / (closest_value_to_one + epsilon) if abs(closest_value_to_one) < epsilon else 1 / closest_value_to_one + + scaled_normalized_width_change = scale_factor * normalized_width_change + scaled_normalized_height_change = scale_factor * normalized_height_change + + output_width = round(scaled_normalized_width_change, decimals) + output_height = round(scaled_normalized_height_change, decimals) + + if abs(output_width - 1.0) < epsilon: output_width = 1 + if abs(output_height - 1.0) < epsilon: output_height = 1 + + # Helper to format the number part + def format_value(val, dec): + # Multiply by 10^decimals, convert to int to keep trailing zeros in effect + # e.g. val=1.1, dec=2 -> 1.1 * 100 = 110 + # e.g. val=1.0, dec=2 -> 1.0 * 100 = 100 (though this might become "1" if it's exactly 1.0 before this) + # The existing logic already handles output_width/height being 1.0 to produce "EVEN" or skip a component. + # This formatting is for when output_width/height is NOT 1.0. + return str(int(round(val * (10**dec)))) + + if abs(output_width - output_height) < epsilon: # Handles original square or aspect maintained + output = "EVEN" + elif output_width != 1 and abs(output_height - 1.0) < epsilon : # Width changed, height maintained relative to width + output = f"X{format_value(output_width, decimals)}" + elif output_height != 1 and abs(output_width - 1.0) < epsilon: # Height changed, width maintained relative to height + output = f"Y{format_value(output_height, decimals)}" + else: # Both changed relative to each other + output = f"X{format_value(output_width, decimals)}Y{format_value(output_height, decimals)}" + return output + +# --- Image Loading, Conversion, Resizing --- + +def load_image(image_path: Union[str, Path], read_flag: int = cv2.IMREAD_UNCHANGED) -> Optional[np.ndarray]: + """Loads an image from the specified path. Converts BGR/BGRA to RGB/RGBA if color.""" + try: + img = cv2.imread(str(image_path), read_flag) + if img is None: + # print(f"Warning: Failed to load image: {image_path}") # Optional: for debugging utils + return None + + # Ensure RGB/RGBA for color images + if len(img.shape) == 3: + if img.shape[2] == 4: # BGRA from OpenCV + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) + elif img.shape[2] == 3: # BGR from OpenCV + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + return img + except Exception: # as e: + # print(f"Error loading image {image_path}: {e}") # Optional: for debugging utils + return None + +def convert_bgr_to_rgb(image: np.ndarray) -> np.ndarray: + """Converts an image from BGR/BGRA to RGB/RGBA color space.""" + if image is None or len(image.shape) < 3: + return image # Return as is if not a color image or None + + if image.shape[2] == 4: # BGRA + return cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA) # Keep alpha, convert to RGBA + elif image.shape[2] == 3: # BGR + return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image # Return as is if not 3 or 4 channels + +def convert_rgb_to_bgr(image: np.ndarray) -> np.ndarray: + """Converts an image from RGB/RGBA to BGR/BGRA color space.""" + if image is None or len(image.shape) < 3: + return image # Return as is if not a color image or None + + if image.shape[2] == 4: # RGBA + return cv2.cvtColor(image, cv2.COLOR_RGBA2BGRA) + elif image.shape[2] == 3: # RGB + return cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + return image # Return as is if not 3 or 4 channels + + +def resize_image(image: np.ndarray, target_width: int, target_height: int, interpolation: Optional[int] = None) -> np.ndarray: + """Resizes an image to target_width and target_height.""" + if image is None: + raise ValueError("Cannot resize a None image.") + if target_width <= 0 or target_height <= 0: + raise ValueError("Target width and height must be positive.") + + original_height, original_width = image.shape[:2] + + if interpolation is None: + # Default interpolation: Lanczos for downscaling, Cubic for upscaling/same + if (target_width * target_height) < (original_width * original_height): + interpolation = cv2.INTER_LANCZOS4 + else: + interpolation = cv2.INTER_CUBIC + + return cv2.resize(image, (target_width, target_height), interpolation=interpolation) + +# --- Image Saving --- + +def save_image( + image_path: Union[str, Path], + image_data: np.ndarray, + output_format: Optional[str] = None, # e.g. "png", "jpg", "exr" + output_dtype_target: Optional[np.dtype] = None, # e.g. np.uint8, np.uint16, np.float16 + params: Optional[List[int]] = None, + convert_to_bgr_before_save: bool = True # True for most formats except EXR +) -> bool: + """ + Saves image data to a file. Handles data type and color space conversions. + + Args: + image_path: Path to save the image. + image_data: NumPy array of the image. + output_format: Desired output format (e.g., 'png', 'jpg'). If None, derived from extension. + output_dtype_target: Target NumPy dtype for saving (e.g., np.uint8, np.uint16). + If None, tries to use image_data.dtype or a sensible default. + params: OpenCV imwrite parameters (e.g., [cv2.IMWRITE_JPEG_QUALITY, 90]). + convert_to_bgr_before_save: If True and image is 3-channel, converts RGB to BGR. + Set to False for formats like EXR that expect RGB. + + Returns: + True if saving was successful, False otherwise. + """ + if image_data is None: + return False + + img_to_save = image_data.copy() + path_obj = Path(image_path) + path_obj.parent.mkdir(parents=True, exist_ok=True) + + # 1. Data Type Conversion + if output_dtype_target is not None: + if output_dtype_target == np.uint8 and img_to_save.dtype != np.uint8: + if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0 * 255.0).astype(np.uint8) + elif img_to_save.dtype in [np.float16, np.float32, np.float64]: img_to_save = (np.clip(img_to_save, 0.0, 1.0) * 255.0).astype(np.uint8) + else: img_to_save = img_to_save.astype(np.uint8) + elif output_dtype_target == np.uint16 and img_to_save.dtype != np.uint16: + if img_to_save.dtype == np.uint8: img_to_save = (img_to_save.astype(np.float32) / 255.0 * 65535.0).astype(np.uint16) # More accurate + elif img_to_save.dtype in [np.float16, np.float32, np.float64]: img_to_save = (np.clip(img_to_save, 0.0, 1.0) * 65535.0).astype(np.uint16) + else: img_to_save = img_to_save.astype(np.uint16) + elif output_dtype_target == np.float16 and img_to_save.dtype != np.float16: + if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0).astype(np.float16) + elif img_to_save.dtype == np.uint8: img_to_save = (img_to_save.astype(np.float32) / 255.0).astype(np.float16) + elif img_to_save.dtype in [np.float32, np.float64]: img_to_save = img_to_save.astype(np.float16) + # else: cannot convert to float16 easily + elif output_dtype_target == np.float32 and img_to_save.dtype != np.float32: + if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0) + elif img_to_save.dtype == np.uint8: img_to_save = (img_to_save.astype(np.float32) / 255.0) + elif img_to_save.dtype == np.float16: img_to_save = img_to_save.astype(np.float32) + + + # 2. Color Space Conversion (Internal RGB/RGBA -> BGR/BGRA for OpenCV) + # Input `image_data` is assumed to be in RGB/RGBA format (due to `load_image` changes). + # OpenCV's `imwrite` typically expects BGR/BGRA for formats like PNG, JPG. + # EXR format usually expects RGB/RGBA. + # The `convert_to_bgr_before_save` flag controls this behavior. + current_format = output_format if output_format else path_obj.suffix.lower().lstrip('.') + + if convert_to_bgr_before_save and current_format != 'exr': + # If image is 3-channel (RGB) or 4-channel (RGBA), convert to BGR/BGRA. + if len(img_to_save.shape) == 3 and (img_to_save.shape[2] == 3 or img_to_save.shape[2] == 4): + img_to_save = convert_rgb_to_bgr(img_to_save) # Handles RGB->BGR and RGBA->BGRA + # If `convert_to_bgr_before_save` is False or format is 'exr', + # the image (assumed RGB/RGBA) is saved as is. + + # 3. Save Image + try: + if params: + cv2.imwrite(str(path_obj), img_to_save, params) + else: + cv2.imwrite(str(path_obj), img_to_save) + return True + except Exception: # as e: + # print(f"Error saving image {path_obj}: {e}") # Optional: for debugging utils + return False + +# --- Common Map Transformations --- + +import re +import logging + +ipu_log = logging.getLogger(__name__) + +def apply_common_map_transformations( + image_data: np.ndarray, + processing_map_type: str, # The potentially suffixed internal type + invert_normal_green: bool, + file_type_definitions: Dict[str, Dict], + log_prefix: str +) -> Tuple[np.ndarray, str, List[str]]: + """ + Applies common in-memory transformations (Gloss-to-Rough, Normal Green Invert). + Returns potentially transformed image data, potentially updated map type, and notes. + """ + transformation_notes = [] + current_image_data = image_data # Start with original data + updated_processing_map_type = processing_map_type # Start with original type + + # Gloss-to-Rough + # Check if the base type is Gloss (before suffix) + base_map_type_match = re.match(r"(MAP_GLOSS)", processing_map_type) + if base_map_type_match: + ipu_log.info(f"{log_prefix}: Applying Gloss-to-Rough conversion.") + inversion_succeeded = False + if np.issubdtype(current_image_data.dtype, np.floating): + current_image_data = 1.0 - current_image_data + current_image_data = np.clip(current_image_data, 0.0, 1.0) + ipu_log.debug(f"{log_prefix}: Inverted float image data for Gloss->Rough.") + inversion_succeeded = True + elif np.issubdtype(current_image_data.dtype, np.integer): + max_val = np.iinfo(current_image_data.dtype).max + current_image_data = max_val - current_image_data + ipu_log.debug(f"{log_prefix}: Inverted integer image data (max_val: {max_val}) for Gloss->Rough.") + inversion_succeeded = True + else: + ipu_log.error(f"{log_prefix}: Unsupported image data type {current_image_data.dtype} for GLOSS map. Cannot invert.") + transformation_notes.append("Gloss-to-Rough FAILED (unsupported dtype)") + + if inversion_succeeded: + # Update the type string itself (e.g., MAP_GLOSS-1 -> MAP_ROUGH-1) + updated_processing_map_type = processing_map_type.replace("GLOSS", "ROUGH") + ipu_log.info(f"{log_prefix}: Map type updated: '{processing_map_type}' -> '{updated_processing_map_type}'") + transformation_notes.append("Gloss-to-Rough applied") + + # Normal Green Invert + # Check if the base type is Normal (before suffix) + base_map_type_match_nrm = re.match(r"(MAP_NRM)", processing_map_type) + if base_map_type_match_nrm and invert_normal_green: + ipu_log.info(f"{log_prefix}: Applying Normal Map Green Channel Inversion (Global Setting).") + current_image_data = invert_normal_map_green_channel(current_image_data) + transformation_notes.append("Normal Green Inverted (Global)") + + return current_image_data, updated_processing_map_type, transformation_notes + +# --- Normal Map Utilities --- + +def invert_normal_map_green_channel(normal_map: np.ndarray) -> np.ndarray: + """ + Inverts the green channel of a normal map. + Assumes the normal map is in RGB or RGBA format (channel order R, G, B, A). + """ + if normal_map is None or len(normal_map.shape) < 3 or normal_map.shape[2] < 3: + # Not a valid color image with at least 3 channels + return normal_map + + # Ensure data is mutable + inverted_map = normal_map.copy() + + # Invert the green channel (index 1) + # Handle different data types + if np.issubdtype(inverted_map.dtype, np.floating): + inverted_map[:, :, 1] = 1.0 - inverted_map[:, :, 1] + elif np.issubdtype(inverted_map.dtype, np.integer): + max_val = np.iinfo(inverted_map.dtype).max + inverted_map[:, :, 1] = max_val - inverted_map[:, :, 1] + else: + # Unsupported dtype, return original + print(f"Warning: Unsupported dtype {inverted_map.dtype} for normal map green channel inversion.") + return normal_map + + return inverted_map \ No newline at end of file diff --git a/processing/utils/image_saving_utils.py b/processing/utils/image_saving_utils.py new file mode 100644 index 0000000..9147fc5 --- /dev/null +++ b/processing/utils/image_saving_utils.py @@ -0,0 +1,297 @@ +import logging +import cv2 +import numpy as np +from pathlib import Path +from typing import List, Dict, Any, Tuple, Optional + +# Potentially import ipu from ...utils import image_processing_utils as ipu +# Assuming ipu is available in the same utils directory or parent +try: + from . import image_processing_utils as ipu +except ImportError: + # Fallback for different import structures if needed, adjust based on actual project structure + # For this project structure, the relative import should work. + logging.warning("Could not import image_processing_utils using relative path. Attempting absolute import.") + try: + from processing.utils import image_processing_utils as ipu + except ImportError: + logging.error("Could not import image_processing_utils.") + ipu = None # Handle case where ipu is not available + +logger = logging.getLogger(__name__) + +def save_image_variants( + source_image_data: np.ndarray, + base_map_type: str, # Filename-friendly map type + source_bit_depth_info: List[Optional[int]], + image_resolutions: Dict[str, int], + file_type_defs: Dict[str, Dict[str, Any]], + output_format_8bit: str, + output_format_16bit_primary: str, + output_format_16bit_fallback: str, + png_compression_level: int, + jpg_quality: int, + output_filename_pattern_tokens: Dict[str, Any], # Must include 'output_base_directory': Path and 'asset_name': str + output_filename_pattern: str, + resolution_threshold_for_jpg: Optional[int] = None, # Added + # Consider adding ipu or relevant parts of it if not importing globally +) -> List[Dict[str, Any]]: + """ + Centralizes image saving logic, generating and saving various resolution variants + according to configuration. + + Args: + source_image_data (np.ndarray): High-res image data (in memory, potentially transformed). + base_map_type (str): Final map type (e.g., "COL", "ROUGH", "NORMAL", "MAP_NRMRGH"). + This is the filename-friendly map type. + source_bit_depth_info (List[Optional[int]]): List of original source bit depth(s) + (e.g., [8], [16], [8, 16]). Can contain None. + image_resolutions (Dict[str, int]): Dictionary mapping resolution keys (e.g., "4K") + to max dimensions (e.g., 4096). + file_type_defs (Dict[str, Dict[str, Any]]): Dictionary defining properties for map types, + including 'bit_depth_rule'. + output_format_8bit (str): File extension for 8-bit output (e.g., "jpg", "png"). + output_format_16bit_primary (str): Primary file extension for 16-bit output (e.g., "png", "tif"). + output_format_16bit_fallback (str): Fallback file extension for 16-bit output. + png_compression_level (int): Compression level for PNG output (0-9). + jpg_quality (int): Quality level for JPG output (0-100). + output_filename_pattern_tokens (Dict[str, Any]): Dictionary of tokens for filename + pattern replacement. Must include + 'output_base_directory' (Path) and + 'asset_name' (str). + output_filename_pattern (str): Pattern string for generating output filenames + (e.g., "[assetname]_[maptype]_[resolution].[ext]"). + + Returns: + List[Dict[str, Any]]: A list of dictionaries, each containing details about a saved file. + Example: [{'path': str, 'resolution_key': str, 'format': str, + 'bit_depth': int, 'dimensions': (w,h)}, ...] + """ + if ipu is None: + logger.error("image_processing_utils is not available. Cannot save images.") + return [] + + saved_file_details = [] + source_h, source_w = source_image_data.shape[:2] + source_max_dim = max(source_h, source_w) + + # 1. Use provided configuration inputs (already available as function arguments) + logger.info(f"SaveImageVariants: Starting for map type: {base_map_type}. Source shape: {source_image_data.shape}, Source bit depths: {source_bit_depth_info}") + logger.debug(f"SaveImageVariants: Resolutions: {image_resolutions}, File Type Defs: {file_type_defs.keys()}, Output Formats: 8bit={output_format_8bit}, 16bit_pri={output_format_16bit_primary}, 16bit_fall={output_format_16bit_fallback}") + logger.debug(f"SaveImageVariants: PNG Comp: {png_compression_level}, JPG Qual: {jpg_quality}") + logger.debug(f"SaveImageVariants: Output Tokens: {output_filename_pattern_tokens}, Output Pattern: {output_filename_pattern}") + logger.debug(f"SaveImageVariants: Received resolution_threshold_for_jpg: {resolution_threshold_for_jpg}") # Log received threshold + + # 2. Determine Target Bit Depth + target_bit_depth = 8 # Default + bit_depth_rule = file_type_defs.get(base_map_type, {}).get('bit_depth_rule', 'force_8bit') + if bit_depth_rule not in ['force_8bit', 'respect_inputs']: + logger.warning(f"Unknown bit_depth_rule '{bit_depth_rule}' for map type '{base_map_type}'. Defaulting to 'force_8bit'.") + bit_depth_rule = 'force_8bit' + + if bit_depth_rule == 'respect_inputs': + # Check if any source bit depth is > 8, ignoring None + if any(depth is not None and depth > 8 for depth in source_bit_depth_info): + target_bit_depth = 16 + else: + target_bit_depth = 8 + logger.info(f"Bit depth rule 'respect_inputs' applied. Source bit depths: {source_bit_depth_info}. Target bit depth: {target_bit_depth}") + else: # force_8bit + target_bit_depth = 8 + logger.info(f"Bit depth rule 'force_8bit' applied. Target bit depth: {target_bit_depth}") + + + # 3. Determine Output File Format(s) + if target_bit_depth == 8: + output_ext = output_format_8bit.lstrip('.').lower() + elif target_bit_depth == 16: + # Prioritize primary, fallback to fallback if primary is not supported/desired + # For now, just use primary. More complex logic might be needed later. + output_ext = output_format_16bit_primary.lstrip('.').lower() + # Basic fallback logic example (can be expanded) + if output_ext not in ['png', 'tif']: # Assuming common 16-bit formats + output_ext = output_format_16bit_fallback.lstrip('.').lower() + logger.warning(f"Primary 16-bit format '{output_format_16bit_primary}' might not be suitable. Using fallback '{output_format_16bit_fallback}'.") + else: + logger.error(f"Unsupported target bit depth: {target_bit_depth}. Defaulting to 8-bit format.") + output_ext = output_format_8bit.lstrip('.').lower() + + current_output_ext = output_ext # Store the initial extension based on bit depth + + logger.info(f"SaveImageVariants: Determined target bit depth: {target_bit_depth}, Initial output format: {current_output_ext} for map type {base_map_type}") + + # 4. Generate and Save Resolution Variants + # Sort resolutions by max dimension descending + sorted_resolutions = sorted(image_resolutions.items(), key=lambda item: item[1], reverse=True) + + for res_key, res_max_dim in sorted_resolutions: + logger.info(f"SaveImageVariants: Processing variant {res_key} ({res_max_dim}px) for {base_map_type}") + + # --- Prevent Upscaling --- + # Skip this resolution variant if its target dimension is larger than the source image's largest dimension. + if res_max_dim > source_max_dim: + logger.info(f"SaveImageVariants: Skipping variant {res_key} ({res_max_dim}px) for {base_map_type} because target resolution is larger than source ({source_max_dim}px).") + continue # Skip to the next resolution + + # Calculate target dimensions for valid variants (equal or smaller than source) + if source_max_dim == res_max_dim: + # Use source dimensions if target is equal + target_w_res, target_h_res = source_w, source_h + logger.info(f"SaveImageVariants: Using source resolution ({source_w}x{source_h}) for {res_key} variant of {base_map_type} as target matches source.") + else: # Downscale (source_max_dim > res_max_dim) + # Downscale, maintaining aspect ratio + aspect_ratio = source_w / source_h + if source_w >= source_h: # Use >= to handle square images correctly + target_w_res = res_max_dim + target_h_res = max(1, int(res_max_dim / aspect_ratio)) # Ensure height is at least 1 + else: + target_h_res = res_max_dim + target_w_res = max(1, int(res_max_dim * aspect_ratio)) # Ensure width is at least 1 + logger.info(f"SaveImageVariants: Calculated downscale for {base_map_type} {res_key}: from ({source_w}x{source_h}) to ({target_w_res}x{target_h_res})") + + + # Resize source_image_data (only if necessary) + if (target_w_res, target_h_res) == (source_w, source_h): + # No resize needed if dimensions match + variant_data = source_image_data.copy() # Copy to avoid modifying original if needed later + logger.debug(f"SaveImageVariants: No resize needed for {base_map_type} {res_key}, using copy of source data.") + else: + # Perform resize only if dimensions differ (i.e., downscaling) + interpolation_method = cv2.INTER_AREA # Good for downscaling + try: + variant_data = ipu.resize_image(source_image_data, target_w_res, target_h_res, interpolation=interpolation_method) + if variant_data is None: # Check if resize failed + raise ValueError("ipu.resize_image returned None") + logger.debug(f"SaveImageVariants: Resized variant data shape for {base_map_type} {res_key}: {variant_data.shape}") + except Exception as e: + logger.error(f"SaveImageVariants: Error resizing image for {base_map_type} {res_key} variant: {e}") + continue # Skip this variant if resizing fails + + # Filename Construction + current_tokens = output_filename_pattern_tokens.copy() + current_tokens['maptype'] = base_map_type + current_tokens['resolution'] = res_key + + # Determine final extension for this variant, considering JPG threshold + final_variant_ext = current_output_ext + + # --- Start JPG Threshold Logging --- + logger.debug(f"SaveImageVariants: JPG Threshold Check for {base_map_type} {res_key}:") + logger.debug(f" - target_bit_depth: {target_bit_depth}") + logger.debug(f" - resolution_threshold_for_jpg: {resolution_threshold_for_jpg}") + logger.debug(f" - target_w_res: {target_w_res}, target_h_res: {target_h_res}") + logger.debug(f" - max(target_w_res, target_h_res): {max(target_w_res, target_h_res)}") + logger.debug(f" - current_output_ext: {current_output_ext}") + + cond_bit_depth = target_bit_depth == 8 + cond_threshold_not_none = resolution_threshold_for_jpg is not None + cond_res_exceeded = False + if cond_threshold_not_none: # Avoid comparison if threshold is None + cond_res_exceeded = max(target_w_res, target_h_res) > resolution_threshold_for_jpg + cond_is_png = current_output_ext == 'png' + + logger.debug(f" - Condition (target_bit_depth == 8): {cond_bit_depth}") + logger.debug(f" - Condition (resolution_threshold_for_jpg is not None): {cond_threshold_not_none}") + logger.debug(f" - Condition (max(res) > threshold): {cond_res_exceeded}") + logger.debug(f" - Condition (current_output_ext == 'png'): {cond_is_png}") + # --- End JPG Threshold Logging --- + + if cond_bit_depth and cond_threshold_not_none and cond_res_exceeded and cond_is_png: + final_variant_ext = 'jpg' + logger.info(f"SaveImageVariants: Overriding 8-bit PNG to JPG for {base_map_type} {res_key} due to resolution {max(target_w_res, target_h_res)}px > threshold {resolution_threshold_for_jpg}px.") + + current_tokens['ext'] = final_variant_ext + + try: + # Replace placeholders in the pattern + filename = output_filename_pattern + for token, value in current_tokens.items(): + # Ensure value is string for replacement, handle Path objects later + filename = filename.replace(f"[{token}]", str(value)) + + # Construct full output path + output_base_directory = current_tokens.get('output_base_directory') + if not isinstance(output_base_directory, Path): + logger.error(f"'output_base_directory' token is missing or not a Path object: {output_base_directory}. Cannot save file.") + continue # Skip this variant + + output_path = output_base_directory / filename + logger.info(f"SaveImageVariants: Constructed output path for {base_map_type} {res_key}: {output_path}") + + # Ensure parent directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + logger.debug(f"SaveImageVariants: Ensured directory exists for {base_map_type} {res_key}: {output_path.parent}") + + except Exception as e: + logger.error(f"SaveImageVariants: Error constructing filepath for {base_map_type} {res_key} variant: {e}") + continue # Skip this variant if path construction fails + + + # Prepare Save Parameters + save_params_cv2 = [] + if final_variant_ext == 'jpg': # Check against final_variant_ext + save_params_cv2.append(cv2.IMWRITE_JPEG_QUALITY) + save_params_cv2.append(jpg_quality) + logger.debug(f"SaveImageVariants: Using JPG quality: {jpg_quality} for {base_map_type} {res_key}") + elif final_variant_ext == 'png': # Check against final_variant_ext + save_params_cv2.append(cv2.IMWRITE_PNG_COMPRESSION) + save_params_cv2.append(png_compression_level) + logger.debug(f"SaveImageVariants: Using PNG compression level: {png_compression_level} for {base_map_type} {res_key}") + # Add other format specific parameters if needed (e.g., TIFF compression) + + + # Bit Depth Conversion is handled by ipu.save_image via output_dtype_target + image_data_for_save = variant_data # Use the resized variant data directly + + # Determine the target dtype for ipu.save_image + output_dtype_for_save: Optional[np.dtype] = None + if target_bit_depth == 8: + output_dtype_for_save = np.uint8 + elif target_bit_depth == 16: + output_dtype_for_save = np.uint16 + # Add other target bit depths like float16/float32 if necessary + # elif target_bit_depth == 32: # Assuming float32 for EXR etc. + # output_dtype_for_save = np.float32 + + + # Saving + try: + # ipu.save_image is expected to handle the actual cv2.imwrite call + logger.debug(f"SaveImageVariants: Attempting to save {base_map_type} {res_key} to {output_path} with params {save_params_cv2}, target_dtype: {output_dtype_for_save}") + success = ipu.save_image( + str(output_path), + image_data_for_save, + output_dtype_target=output_dtype_for_save, # Pass the target dtype + params=save_params_cv2 + ) + if success: + logger.info(f"SaveImageVariants: Successfully saved {base_map_type} {res_key} variant to {output_path}") + # Collect details for the returned list + saved_file_details.append({ + 'path': str(output_path), + 'resolution_key': res_key, + 'format': final_variant_ext, # Log the actual saved format + 'bit_depth': target_bit_depth, + 'dimensions': (target_w_res, target_h_res) + }) + else: + logger.error(f"SaveImageVariants: Failed to save {base_map_type} {res_key} variant to {output_path} (ipu.save_image returned False)") + + except Exception as e: + logger.error(f"SaveImageVariants: Error during ipu.save_image for {base_map_type} {res_key} variant to {output_path}: {e}", exc_info=True) + # Continue to next variant even if one fails + + + # Discard in-memory variant after saving (Python's garbage collection handles this) + del variant_data + del image_data_for_save + + + # 5. Return List of Saved File Details + logger.info(f"Finished saving variants for map type: {base_map_type}. Saved {len(saved_file_details)} variants.") + return saved_file_details + +# Optional Helper Functions (can be added here if needed) +# def _determine_target_bit_depth(...): ... +# def _determine_output_format(...): ... +# def _construct_variant_filepath(...): ... \ No newline at end of file diff --git a/processing_engine.py b/processing_engine.py index f4d0812..441284d 100644 --- a/processing_engine.py +++ b/processing_engine.py @@ -5,40 +5,31 @@ import math import shutil import tempfile import logging -import json -import re -import time from pathlib import Path from typing import List, Dict, Tuple, Optional, Set -from collections import defaultdict - +log = logging.getLogger(__name__) # Attempt to import image processing libraries try: import cv2 import numpy as np -except ImportError: +except ImportError as e: + log.error(f"Failed to import cv2 or numpy in processing_engine.py: {e}", exc_info=True) print("ERROR: Missing required image processing libraries. Please install opencv-python and numpy:") print("pip install opencv-python numpy") # Allow import to fail but log error; execution will likely fail later cv2 = None np = None -# Attempt to import OpenEXR - Check if needed for advanced EXR flags/types -try: - import OpenEXR - import Imath - _HAS_OPENEXR = True -except ImportError: - _HAS_OPENEXR = False - # Log this information - basic EXR might still work via OpenCV - logging.debug("Optional 'OpenEXR' python package not found. EXR saving relies on OpenCV's built-in support.") - try: from configuration import Configuration, ConfigurationError from rule_structure import SourceRule, AssetRule, FileRule - from utils.path_utils import generate_path_from_pattern -except ImportError: + from utils.path_utils import generate_path_from_pattern, sanitize_filename + from processing.utils import image_processing_utils as ipu # Corrected import +except ImportError as e: + # Temporarily print to console as log might not be initialized yet + print(f"ERROR during initial imports in processing_engine.py: {e}") + # log.error(f"Failed to import Configuration or rule_structure classes in processing_engine.py: {e}", exc_info=True) # Log will be used after init print("ERROR: Cannot import Configuration or rule_structure classes.") print("Ensure configuration.py and rule_structure.py are in the same directory or Python path.") # Allow import to fail but log error; execution will likely fail later @@ -48,195 +39,35 @@ except ImportError: FileRule = None -# Use logger defined in main.py (or configure one here if run standalone) +# Initialize logger early log = logging.getLogger(__name__) # Basic config if logger hasn't been set up elsewhere (e.g., during testing) if not log.hasHandlers(): logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') +# Use logger defined in main.py (or configure one here if run standalone) + +from processing.pipeline.orchestrator import PipelineOrchestrator +# from processing.pipeline.asset_context import AssetProcessingContext # AssetProcessingContext is used by the orchestrator +# Import stages that will be passed to the orchestrator (outer stages) +from processing.pipeline.stages.supplier_determination import SupplierDeterminationStage +from processing.pipeline.stages.asset_skip_logic import AssetSkipLogicStage +from processing.pipeline.stages.metadata_initialization import MetadataInitializationStage +from processing.pipeline.stages.file_rule_filter import FileRuleFilterStage +from processing.pipeline.stages.gloss_to_rough_conversion import GlossToRoughConversionStage +from processing.pipeline.stages.alpha_extraction_to_mask import AlphaExtractionToMaskStage +from processing.pipeline.stages.normal_map_green_channel import NormalMapGreenChannelStage +# Removed: from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage +# Removed: from processing.pipeline.stages.map_merging import MapMergingStage +from processing.pipeline.stages.metadata_finalization_save import MetadataFinalizationAndSaveStage +from processing.pipeline.stages.output_organization import OutputOrganizationStage # --- Custom Exception --- class ProcessingEngineError(Exception): """Custom exception for errors during processing engine operations.""" pass -# --- Helper Functions (Moved from AssetProcessor or kept static) --- - -def _is_power_of_two(n: int) -> bool: - """Checks if a number is a power of two.""" - return (n > 0) and (n & (n - 1) == 0) - -def get_nearest_pot(value: int) -> int: - """Finds the nearest power of two to the given value.""" - if value <= 0: - return 1 # Or raise error, POT must be positive - if _is_power_of_two(value): - return value - - # Calculate the powers of two below and above the value - lower_pot = 1 << (value.bit_length() - 1) - upper_pot = 1 << value.bit_length() - - # Determine which power of two is closer - if (value - lower_pot) < (upper_pot - value): - return lower_pot - else: - return upper_pot - -def calculate_target_dimensions(orig_w, orig_h, target_max_dim) -> tuple[int, int]: - """ - Calculates target dimensions by first scaling to fit target_max_dim - while maintaining aspect ratio, then finding the nearest power-of-two - value for each resulting dimension (Stretch/Squash to POT). - """ - if orig_w <= 0 or orig_h <= 0: - # Fallback to target_max_dim if original dimensions are invalid - pot_dim = get_nearest_pot(target_max_dim) - log.warning(f"Invalid original dimensions ({orig_w}x{orig_h}). Falling back to nearest POT of target_max_dim: {pot_dim}x{pot_dim}") - return (pot_dim, pot_dim) - - # Step 1: Calculate intermediate dimensions maintaining aspect ratio - ratio = orig_w / orig_h - if ratio > 1: # Width is dominant - scaled_w = target_max_dim - scaled_h = max(1, round(scaled_w / ratio)) - else: # Height is dominant or square - scaled_h = target_max_dim - scaled_w = max(1, round(scaled_h * ratio)) - - # Step 2: Find the nearest power of two for each scaled dimension - pot_w = get_nearest_pot(scaled_w) - pot_h = get_nearest_pot(scaled_h) - - log.debug(f"POT Calc: Orig=({orig_w}x{orig_h}), MaxDim={target_max_dim} -> Scaled=({scaled_w}x{scaled_h}) -> POT=({pot_w}x{pot_h})") - - return int(pot_w), int(pot_h) - -def _calculate_image_stats(image_data: np.ndarray) -> dict | None: - """ - Calculates min, max, mean for a given numpy image array. - Handles grayscale and multi-channel images. Converts to float64 for calculation. - """ - if image_data is None: - log.warning("Attempted to calculate stats on None image data.") - return None - if np is None: - log.error("Numpy not available for stats calculation.") - return None - try: - # Use float64 for calculations to avoid potential overflow/precision issues - data_float = image_data.astype(np.float64) - - # Normalize data_float based on original dtype before calculating stats - if image_data.dtype == np.uint16: - log.debug("Stats calculation: Normalizing uint16 data to 0-1 range.") - data_float /= 65535.0 - elif image_data.dtype == np.uint8: - log.debug("Stats calculation: Normalizing uint8 data to 0-1 range.") - data_float /= 255.0 - # Assuming float inputs are already in 0-1 range or similar - - log.debug(f"Stats calculation: data_float dtype: {data_float.dtype}, shape: {data_float.shape}") - # Log a few sample values to check range after normalization - if data_float.size > 0: - sample_values = data_float.flatten()[:10] # Get first 10 values - log.debug(f"Stats calculation: Sample values (first 10) after normalization: {sample_values.tolist()}") - - - if len(data_float.shape) == 2: # Grayscale (H, W) - min_val = float(np.min(data_float)) - max_val = float(np.max(data_float)) - mean_val = float(np.mean(data_float)) - stats = {"min": min_val, "max": max_val, "mean": mean_val} - log.debug(f"Calculated Grayscale Stats: Min={min_val:.4f}, Max={max_val:.4f}, Mean={mean_val:.4f}") - elif len(data_float.shape) == 3: # Color (H, W, C) - channels = data_float.shape[2] - min_val = [float(v) for v in np.min(data_float, axis=(0, 1))] - max_val = [float(v) for v in np.max(data_float, axis=(0, 1))] - mean_val = [float(v) for v in np.mean(data_float, axis=(0, 1))] - # Assume data is RGB order after potential conversion in _load_and_transform_source - stats = {"min": min_val, "max": max_val, "mean": mean_val} - log.debug(f"Calculated {channels}-Channel Stats (RGB order): Min={min_val}, Max={max_val}, Mean={mean_val}") - else: - log.warning(f"Cannot calculate stats for image with unsupported shape {data_float.shape}") - return None - return stats - except Exception as e: - log.error(f"Error calculating image stats: {e}", exc_info=True) # Log exception info - return {"error": str(e)} - -def _sanitize_filename(name: str) -> str: - """Removes or replaces characters invalid for filenames/directory names.""" - if not isinstance(name, str): name = str(name) - name = re.sub(r'[^\w.\-]+', '_', name) # Allow alphanumeric, underscore, hyphen, dot - name = re.sub(r'_+', '_', name) - name = name.strip('_') - if not name: name = "invalid_name" - return name - -def _normalize_aspect_ratio_change(original_width, original_height, resized_width, resized_height, decimals=2): - """ - Calculates the aspect ratio change string (e.g., "EVEN", "X133"). - Returns the string representation. - """ - if original_width <= 0 or original_height <= 0: - log.warning("Cannot calculate aspect ratio change with zero original dimensions.") - return "InvalidInput" - - # Avoid division by zero if resize resulted in zero dimensions (shouldn't happen with checks) - if resized_width <= 0 or resized_height <= 0: - log.warning("Cannot calculate aspect ratio change with zero resized dimensions.") - return "InvalidResize" - - # Original logic from user feedback - width_change_percentage = ((resized_width - original_width) / original_width) * 100 - height_change_percentage = ((resized_height - original_height) / original_height) * 100 - - normalized_width_change = width_change_percentage / 100 - normalized_height_change = height_change_percentage / 100 - - normalized_width_change = min(max(normalized_width_change + 1, 0), 2) - normalized_height_change = min(max(normalized_height_change + 1, 0), 2) - - # Handle potential zero division if one dimension change is exactly -100% (normalized to 0) - # If both are 0, aspect ratio is maintained. If one is 0, the other dominates. - if normalized_width_change == 0 and normalized_height_change == 0: - closest_value_to_one = 1.0 # Avoid division by zero, effectively scale_factor = 1 - elif normalized_width_change == 0: - closest_value_to_one = abs(normalized_height_change) - elif normalized_height_change == 0: - closest_value_to_one = abs(normalized_width_change) - else: - closest_value_to_one = min(abs(normalized_width_change), abs(normalized_height_change)) - - # Add a small epsilon to avoid division by zero if closest_value_to_one is extremely close to 0 - epsilon = 1e-9 - scale_factor = 1 / (closest_value_to_one + epsilon) if abs(closest_value_to_one) < epsilon else 1 / closest_value_to_one - - scaled_normalized_width_change = scale_factor * normalized_width_change - scaled_normalized_height_change = scale_factor * normalized_height_change - - output_width = round(scaled_normalized_width_change, decimals) - output_height = round(scaled_normalized_height_change, decimals) - - # Convert to int if exactly 1.0 after rounding - if abs(output_width - 1.0) < epsilon: output_width = 1 - if abs(output_height - 1.0) < epsilon: output_height = 1 - - # Determine output string - if original_width == original_height or abs(output_width - output_height) < epsilon: - output = "EVEN" - elif output_width != 1 and output_height == 1: - output = f"X{str(output_width).replace('.', '')}" - elif output_height != 1 and output_width == 1: - output = f"Y{str(output_height).replace('.', '')}" - else: - # Both changed relative to each other - output = f"X{str(output_width).replace('.', '')}Y{str(output_height).replace('.', '')}" - - log.debug(f"Aspect ratio change calculated: Orig=({original_width}x{original_height}), Resized=({resized_width}x{resized_height}) -> String='{output}'") - return output - +# Helper functions moved to processing.utils.image_processing_utils # --- Processing Engine Class --- class ProcessingEngine: @@ -262,6 +93,38 @@ class ProcessingEngine: self.temp_dir: Path | None = None # Path to the temporary working directory for a process run self.loaded_data_cache: dict = {} # Cache for loaded/resized data within a single process call + # --- Pipeline Orchestrator Setup --- + # Define pre-item and post-item processing stages + pre_item_stages = [ + SupplierDeterminationStage(), + AssetSkipLogicStage(), + MetadataInitializationStage(), + FileRuleFilterStage(), + GlossToRoughConversionStage(), # Assumed to run on context.files_to_process if needed by old logic + AlphaExtractionToMaskStage(), # Same assumption as above + NormalMapGreenChannelStage(), # Same assumption as above + # Note: The new RegularMapProcessorStage and MergedTaskProcessorStage handle their own transformations + # on the specific items they process. These global transformation stages might need review + # if they were intended to operate on a broader scope or if their logic is now fully + # encapsulated in the new item-specific processor stages. For now, keeping them as pre-stages. + ] + + post_item_stages = [ + OutputOrganizationStage(), # Must run after all items are saved to temp + MetadataFinalizationAndSaveStage(),# Must run after output organization to have final paths + ] + + try: + self.pipeline_orchestrator = PipelineOrchestrator( + config_obj=self.config_obj, + pre_item_stages=pre_item_stages, + post_item_stages=post_item_stages + ) + log.info("PipelineOrchestrator initialized successfully in ProcessingEngine with pre and post stages.") + except Exception as e: + log.error(f"Failed to initialize PipelineOrchestrator in ProcessingEngine: {e}", exc_info=True) + self.pipeline_orchestrator = None # Ensure it's None if init fails + log.debug("ProcessingEngine initialized.") @@ -312,111 +175,21 @@ class ProcessingEngine: try: self.temp_dir = Path(tempfile.mkdtemp(prefix=self.config_obj.temp_dir_prefix)) log.debug(f"Created temporary workspace for engine: {self.temp_dir}") - # --- Loop through each asset defined in the SourceRule --- - for asset_rule in source_rule.assets: - asset_name = asset_rule.asset_name - log.info(f"--- Processing asset: '{asset_name}' ---") - asset_processed = False - asset_skipped = False - asset_failed = False - temp_metadata_path_asset = None # Track metadata file for this asset - - try: - # --- Determine Effective Supplier (Override > Identifier > Fallback) --- - effective_supplier = source_rule.supplier_override # Prioritize override - if effective_supplier is None: - effective_supplier = source_rule.supplier_identifier # Fallback to original identifier - if not effective_supplier: # Check if still None or empty - log.warning(f"Asset '{asset_name}': Supplier identifier missing from rule and override. Using fallback 'UnknownSupplier'.") - effective_supplier = "UnknownSupplier" # Final fallback - - log.debug(f"Asset '{asset_name}': Effective supplier determined as '{effective_supplier}' (Override: '{source_rule.supplier_override}', Original: '{source_rule.supplier_identifier}')") - - # --- Skip Check (using effective supplier) --- - supplier_sanitized = _sanitize_filename(effective_supplier) - asset_name_sanitized = _sanitize_filename(asset_name) - final_dir = output_base_path / supplier_sanitized / asset_name_sanitized - metadata_file_path = final_dir / self.config_obj.metadata_filename # Metadata filename still comes from config - - log.debug(f"Checking for existing output/overwrite at: {final_dir} (using effective supplier: '{effective_supplier}')") - - if not overwrite and final_dir.exists(): - log.info(f"Output directory found for asset '{asset_name_sanitized}' (Supplier: '{effective_supplier}') and overwrite is False. Skipping.") - overall_status["skipped"].append(asset_name) - asset_skipped = True - continue # Skip to the next asset - - elif overwrite and final_dir.exists(): - log.warning(f"Output directory exists for '{asset_name_sanitized}' (Supplier: '{effective_supplier}') and overwrite is True. Removing existing directory: {final_dir}") - try: - shutil.rmtree(final_dir) - except Exception as rm_err: - raise ProcessingEngineError(f"Failed to remove existing output directory {final_dir} during overwrite: {rm_err}") from rm_err - - # --- Prepare Asset Metadata --- - # Start with common metadata from the rule, add asset name - current_asset_metadata = asset_rule.common_metadata.copy() - current_asset_metadata["asset_name"] = asset_name - # Use the EFFECTIVE supplier here - current_asset_metadata["supplier_name"] = effective_supplier - # Add other fields that will be populated - current_asset_metadata["maps_present"] = [] - current_asset_metadata["merged_maps"] = [] - current_asset_metadata["shader_features"] = [] - current_asset_metadata["source_files_in_extra"] = [] - current_asset_metadata["image_stats_1k"] = {} - current_asset_metadata["map_details"] = {} - current_asset_metadata["aspect_ratio_change_string"] = "N/A" - current_asset_metadata["merged_map_channel_stats"] = {} - - # --- Process Individual Maps --- - processed_maps_details_asset, image_stats_asset, aspect_ratio_change_string_asset = self._process_individual_maps( - asset_rule=asset_rule, - workspace_path=workspace_path, # Use the workspace path received by process() (contains prepared files) - current_asset_metadata=current_asset_metadata # Pass mutable dict - ) - # Update metadata with results (stats and aspect ratio are updated directly in current_asset_metadata by the method) - # map_details are also updated directly in current_asset_metadata - - # --- Merge Maps --- - merged_maps_details_asset = self._merge_maps( - asset_rule=asset_rule, - workspace_path=workspace_path, - processed_maps_details_asset=processed_maps_details_asset, # Needed to find resolutions - current_asset_metadata=current_asset_metadata # Pass mutable dict for stats - ) - - # --- Generate Metadata --- - # Pass effective_supplier instead of the whole source_rule - temp_metadata_path_asset = self._generate_metadata_file( - effective_supplier=effective_supplier, # Pass the determined supplier - asset_rule=asset_rule, - current_asset_metadata=current_asset_metadata, # Pass the populated dict - processed_maps_details_asset=processed_maps_details_asset, - merged_maps_details_asset=merged_maps_details_asset - ) - - # --- Organize Output --- - # Pass effective_supplier instead of source_rule.supplier_identifier - self._organize_output_files( - asset_rule=asset_rule, - workspace_path=workspace_path, # Pass the original workspace path - supplier_identifier=effective_supplier, # Pass the determined supplier - output_base_path=output_base_path, # Pass output path - processed_maps_details_asset=processed_maps_details_asset, - merged_maps_details_asset=merged_maps_details_asset, - temp_metadata_info=temp_metadata_path_asset - ) - - log.info(f"--- Asset '{asset_name}' processed successfully (Supplier: {effective_supplier}). ---") - overall_status["processed"].append(asset_name) - asset_processed = True - - except Exception as asset_err: - log.error(f"--- Failed processing asset '{asset_name}': {asset_err} ---", exc_info=True) - overall_status["failed"].append(asset_name) - asset_failed = True - # Continue to the next asset + # --- NEW PIPELINE ORCHESTRATOR LOGIC --- + if hasattr(self, 'pipeline_orchestrator') and self.pipeline_orchestrator: + log.info("Processing source rule using PipelineOrchestrator.") + overall_status = self.pipeline_orchestrator.process_source_rule( + source_rule=source_rule, + workspace_path=workspace_path, # This is the path to the source files (e.g. extracted archive) + output_base_path=output_base_path, + overwrite=overwrite, + incrementing_value=self.current_incrementing_value, + sha5_value=self.current_sha5_value + ) + else: + log.error(f"PipelineOrchestrator not available for SourceRule '{source_rule.input_path}'. Marking all {len(source_rule.assets)} assets as failed.") + for asset_rule in source_rule.assets: + overall_status["failed"].append(asset_rule.asset_name) log.info(f"ProcessingEngine finished. Summary: {overall_status}") return overall_status @@ -446,1243 +219,3 @@ class ProcessingEngine: log.error(f"Failed to remove engine temporary workspace {self.temp_dir}: {e}", exc_info=True) self.loaded_data_cache = {} # Clear cache after cleanup - def _get_ftd_key_from_override(self, override_string: str) -> Optional[str]: - """ - Attempts to derive a base FILE_TYPE_DEFINITIONS key from an override string - which might have a variant suffix (e.g., "MAP_COL-1" -> "MAP_COL"). - """ - if not override_string: # Handle empty or None override_string - return None - if override_string in self.config_obj.FILE_TYPE_DEFINITIONS: - return override_string - - # Regex to remove trailing suffixes like -, -, _ - # e.g., "MAP_COL-1" -> "MAP_COL", "MAP_ROUGH_variantA" -> "MAP_ROUGH" - base_candidate = re.sub(r"(-[\w\d]+|_[\w\d]+)$", "", override_string) - if base_candidate in self.config_obj.FILE_TYPE_DEFINITIONS: - return base_candidate - - return None - - def _get_map_variant_suffix(self, map_identifier: str, base_ftd_key: str) -> str: - """ - Extracts a variant suffix (e.g., "-1", "_variantA") from a map_identifier - if the base_ftd_key is a prefix of it and the suffix indicates a variant. - Example: map_identifier="MAP_COL-1", base_ftd_key="MAP_COL" -> returns "-1" - map_identifier="MAP_COL_variant", base_ftd_key="MAP_COL" -> returns "_variant" - map_identifier="MAP_COL", base_ftd_key="MAP_COL" -> returns "" - """ - if not base_ftd_key: # Ensure base_ftd_key is not empty - return "" - if map_identifier.startswith(base_ftd_key): - suffix = map_identifier[len(base_ftd_key):] - # Ensure suffix looks like a variant (starts with - or _) or is empty - if not suffix or suffix.startswith(('-', '_')): - return suffix - return "" # Default to no suffix - - def _get_base_map_type(self, map_identifier: str) -> str: - """ - Gets the base standard type (e.g., "COL") from a map identifier (e.g., "MAP_COL-1", "COL-1"), - or returns the identifier itself if it's a merged type (e.g., "NRMRGH") or not resolvable to a standard type. - """ - if not map_identifier: # Handle empty or None map_identifier - return "" - - # Try to get FTD key from "MAP_COL-1" -> "MAP_COL" or "MAP_COL" -> "MAP_COL" - ftd_key = self._get_ftd_key_from_override(map_identifier) - if ftd_key: - definition = self.config_obj.FILE_TYPE_DEFINITIONS.get(ftd_key) - if definition and definition.get("standard_type"): # Check if standard_type exists and is not empty - return definition["standard_type"] # Returns "COL" - - # If map_identifier was like "COL-1" or "ROUGH" (a standard_type itself, possibly with suffix) - # Strip suffix and check if the base is a known standard_type - # Regex to get the initial part of the string composed of uppercase letters and underscores - base_candidate_match = re.match(r"([A-Z_]+)", map_identifier.upper()) - if base_candidate_match: - potential_std_type = base_candidate_match.group(1) - for _, definition_val in self.config_obj.FILE_TYPE_DEFINITIONS.items(): - if definition_val.get("standard_type") == potential_std_type: - return potential_std_type # Found "COL" - - # If it's a merged map type (e.g., "NRMRGH"), it won't be in FTDs as a key or standard_type. - # Check if it's one of the output_map_types from MAP_MERGE_RULES. - for rule in self.config_obj.map_merge_rules: - if rule.get("output_map_type") == map_identifier: - return map_identifier # Return "NRMRGH" as is - - # Fallback: return the original identifier, uppercased. - log.debug(f"_get_base_map_type: Could not determine standard base for '{map_identifier}'. Returning as is (uppercase).") - return map_identifier.upper() - - def _load_and_transform_source(self, source_path_abs: Path, map_type: str, target_resolution_key: str, is_gloss_source: bool) -> Tuple[Optional[np.ndarray], Optional[np.dtype]]: - """ - Loads a source image file, performs initial prep (BGR->RGB, Gloss->Rough if applicable), - resizes it to the target resolution, and caches the result. - Uses static configuration from self.config_obj. - - Args: - source_path_abs: Absolute path to the source file in the workspace. - map_type: The item_type_override (e.g., "MAP_NRM", "MAP_ROUGH-1"). - target_resolution_key: The key for the target resolution (e.g., "4K"). - is_gloss_source: Boolean indicating if this source should be treated as gloss for inversion (if map_type is ROUGH). - - Returns: - Tuple containing: - - Resized NumPy array (float32 for gloss-inverted, original type otherwise) or None if loading/processing fails. - - Original source NumPy dtype or None if loading fails. - """ - if cv2 is None or np is None: - log.error("OpenCV or NumPy not available for image loading.") - return None, None - - cache_key = (source_path_abs, target_resolution_key) # Use absolute path for cache key - if cache_key in self.loaded_data_cache: - log.debug(f"CACHE HIT: Returning cached data for {source_path_abs.name} at {target_resolution_key}") - return self.loaded_data_cache[cache_key] # Return tuple (image_data, source_dtype) - - log.debug(f"CACHE MISS: Loading and transforming {source_path_abs.name} for {target_resolution_key} (map_type: {map_type})") - img_prepared = None - source_dtype = None - - try: - # --- 1. Load Source Image --- - # Determine read flag based on is_grayscale from FTD - ftd_key = self._get_ftd_key_from_override(map_type) # map_type is item_type_override - is_map_grayscale = False - standard_type_for_checks = None # For MASK check - - if ftd_key: - ftd_definition = self.config_obj.FILE_TYPE_DEFINITIONS.get(ftd_key, {}) - is_map_grayscale = ftd_definition.get("is_grayscale", False) - standard_type_for_checks = ftd_definition.get("standard_type") - log.debug(f"For map_type '{map_type}' (FTD key '{ftd_key}'), is_grayscale: {is_map_grayscale}, standard_type: {standard_type_for_checks}") - else: - log.warning(f"Could not determine FTD key for map_type '{map_type}' to check is_grayscale. Assuming not grayscale.") - - read_flag = cv2.IMREAD_GRAYSCALE if is_map_grayscale else cv2.IMREAD_UNCHANGED - - # Special case for MASK: always load unchanged first to check alpha - if standard_type_for_checks == 'MASK': - log.debug(f"Map type '{map_type}' (standard_type 'MASK') will be loaded with IMREAD_UNCHANGED for alpha check.") - read_flag = cv2.IMREAD_UNCHANGED - - log.debug(f"Loading source {source_path_abs.name} with flag: {'GRAYSCALE' if read_flag == cv2.IMREAD_GRAYSCALE else 'UNCHANGED'}") - img_loaded = cv2.imread(str(source_path_abs), read_flag) - if img_loaded is None: - raise ProcessingEngineError(f"Failed to load image file: {source_path_abs.name} with flag {read_flag}") - source_dtype = img_loaded.dtype - log.debug(f"Loaded source {source_path_abs.name}, dtype: {source_dtype}, shape: {img_loaded.shape}") - - # --- 2. Initial Preparation (BGR->RGB, Gloss Inversion, MASK handling) --- - img_prepared = img_loaded # Start with loaded image - - # MASK Handling (Extract alpha or convert) - Do this BEFORE general color conversions - if standard_type_for_checks == 'MASK': - log.debug(f"Processing as MASK type for {source_path_abs.name}.") - shape = img_prepared.shape - if len(shape) == 3 and shape[2] == 4: # BGRA or RGBA (OpenCV loads BGRA) - log.debug("MASK processing: Extracting alpha channel (4-channel source).") - img_prepared = img_prepared[:, :, 3] # Extract alpha - elif len(shape) == 3 and shape[2] == 3: # BGR or RGB - log.debug("MASK processing: Converting 3-channel source to Grayscale.") - img_prepared = cv2.cvtColor(img_prepared, cv2.COLOR_BGR2GRAY if read_flag != cv2.IMREAD_GRAYSCALE else cv2.COLOR_RGB2GRAY) # If loaded UNCHANGED and 3-channel, assume BGR - elif len(shape) == 2: - log.debug("MASK processing: Source is already grayscale.") - else: - log.warning(f"MASK processing: Unexpected source shape {shape}. Cannot reliably extract mask.") - img_prepared = None # Cannot process - else: - # BGR -> RGB conversion (only for 3/4-channel images not loaded as grayscale) - if len(img_prepared.shape) == 3 and img_prepared.shape[2] >= 3 and read_flag != cv2.IMREAD_GRAYSCALE: - log.debug(f"Converting loaded image from BGR to RGB for {source_path_abs.name}.") - if img_prepared.shape[2] == 4: # BGRA -> RGBA (then to RGB) - img_prepared = cv2.cvtColor(img_prepared, cv2.COLOR_BGRA2RGB) # OpenCV BGRA to RGB - else: # BGR -> RGB - img_prepared = cv2.cvtColor(img_prepared, cv2.COLOR_BGR2RGB) - elif len(img_prepared.shape) == 2: - log.debug(f"Image {source_path_abs.name} is grayscale or loaded as such, no BGR->RGB conversion needed.") - - if img_prepared is None: raise ProcessingEngineError("Image data is None after MASK/Color prep.") - - # Gloss -> Roughness Inversion (if map_type is ROUGH and is_gloss_source is True) - # This is triggered by the new filename logic in _process_individual_maps - if standard_type_for_checks == 'ROUGH' and is_gloss_source: - log.info(f"Performing filename-triggered Gloss->Roughness inversion for {source_path_abs.name} (map_type: {map_type})") - if len(img_prepared.shape) == 3: - log.debug("Gloss Inversion: Converting 3-channel image to grayscale before inversion.") - img_prepared = cv2.cvtColor(img_prepared, cv2.COLOR_RGB2GRAY) # Should be RGB at this point if 3-channel - - stats_before = _calculate_image_stats(img_prepared) - log.debug(f"Gloss Inversion: Image stats BEFORE inversion: {stats_before}") - - if source_dtype == np.uint16: - img_float = 1.0 - (img_prepared.astype(np.float32) / 65535.0) - elif source_dtype == np.uint8: - img_float = 1.0 - (img_prepared.astype(np.float32) / 255.0) - else: # Assuming float input is already 0-1 range - img_float = 1.0 - img_prepared.astype(np.float32) - - img_prepared = np.clip(img_float, 0.0, 1.0) # Result is float32 - - stats_after = _calculate_image_stats(img_prepared) - log.debug(f"Gloss Inversion: Image stats AFTER inversion (float32): {stats_after}") - log.debug(f"Inverted gloss map stored as float32 for ROUGH, original dtype: {source_dtype}") - - # Ensure data is float32/uint8/uint16 for resizing compatibility - if isinstance(img_prepared, np.ndarray) and img_prepared.dtype not in [np.uint8, np.uint16, np.float32, np.float16]: - log.warning(f"Converting unexpected dtype {img_prepared.dtype} to float32 before resizing for {source_path_abs.name}.") - img_prepared = img_prepared.astype(np.float32) - - # --- 3. Resize --- - if img_prepared is None: raise ProcessingEngineError(f"Image data is None after initial prep for {source_path_abs.name}.") - orig_h, orig_w = img_prepared.shape[:2] - # Get resolutions from static config - target_dim_px = self.config_obj.image_resolutions.get(target_resolution_key) - if not target_dim_px: - raise ProcessingEngineError(f"Target resolution key '{target_resolution_key}' not found in config.") - - # Avoid upscaling check (using static config) - max_original_dimension = max(orig_w, orig_h) - if target_dim_px > max_original_dimension: - log.warning(f"Target dimension {target_dim_px}px is larger than original {max_original_dimension}px for {source_path_abs.name}. Skipping resize for {target_resolution_key}.") - # Store None in cache for this specific resolution to avoid retrying - self.loaded_data_cache[cache_key] = (None, source_dtype) - return None, source_dtype # Indicate resize was skipped - - if orig_w <= 0 or orig_h <= 0: - raise ProcessingEngineError(f"Invalid original dimensions ({orig_w}x{orig_h}) for {source_path_abs.name}.") - - target_w, target_h = calculate_target_dimensions(orig_w, orig_h, target_dim_px) - interpolation = cv2.INTER_LANCZOS4 if (target_w * target_h) < (orig_w * orig_h) else cv2.INTER_CUBIC - log.debug(f"Resizing {source_path_abs.name} from ({orig_w}x{orig_h}) to ({target_w}x{target_h}) for {target_resolution_key}") - img_resized = cv2.resize(img_prepared, (target_w, target_h), interpolation=interpolation) - - # --- 4. Cache and Return --- - # Keep resized dtype unless it was gloss-inverted (which is float32) - final_data_to_cache = img_resized - # Ensure gloss-inverted maps are float32 - if standard_type_for_checks == 'ROUGH' and is_gloss_source and final_data_to_cache.dtype != np.float32: - log.debug(f"Ensuring gloss-inverted ROUGH map ({map_type}) is float32.") - final_data_to_cache = final_data_to_cache.astype(np.float32) - - log.debug(f"CACHING result for {cache_key}. Shape: {final_data_to_cache.shape}, Dtype: {final_data_to_cache.dtype}") - self.loaded_data_cache[cache_key] = (final_data_to_cache, source_dtype) - return final_data_to_cache, source_dtype - - except Exception as e: - log.error(f"Error in _load_and_transform_source for {source_path_abs.name} at {target_resolution_key}: {e}", exc_info=True) - # Cache None to prevent retrying on error for this specific key - self.loaded_data_cache[cache_key] = (None, None) - return None, None - - - def _save_image(self, image_data: np.ndarray, supplier_name: str, asset_name: str, current_map_identifier: str, resolution_key: str, source_info: dict, output_bit_depth_rule: str) -> Optional[Dict]: - """ - Handles saving an image NumPy array to a temporary file within the engine's temp_dir using token-based path generation. - Uses static configuration from self.config_obj for formats, quality, etc. - The 'maptype' token for the filename is derived based on standard_type and variants. - - Args: - image_data: NumPy array containing the image data to save. - supplier_name: The effective supplier name for the asset. - asset_name: The name of the asset. - current_map_identifier: The map type being saved (e.g., "MAP_COL", "MAP_ROUGH-1", "NRMRGH"). This is item_type_override or merged map type. - resolution_key: The resolution key (e.g., "4K"). - source_info: Dictionary containing details about the source(s). - output_bit_depth_rule: Rule for determining output bit depth. - - Returns: - A dictionary containing details of the saved file or None if saving failed. - """ - if cv2 is None or np is None: - log.error("OpenCV or NumPy not available for image saving.") - return None - if image_data is None: - log.error(f"Cannot save image for {current_map_identifier} ({resolution_key}): image_data is None.") - return None - if not self.temp_dir or not self.temp_dir.exists(): - log.error(f"Cannot save image for {current_map_identifier} ({resolution_key}): Engine temp_dir is invalid.") - return None - - try: - h, w = image_data.shape[:2] - current_dtype = image_data.dtype - log.debug(f"Saving {current_map_identifier} ({resolution_key}) for asset '{asset_name}'. Input shape: {image_data.shape}, dtype: {current_dtype}") - - config = self.config_obj - primary_fmt_16, fallback_fmt_16 = config.get_16bit_output_formats() - fmt_8bit_config = config.get_8bit_output_format() - threshold = config.resolution_threshold_for_jpg - force_lossless_map_types = config.force_lossless_map_types - jpg_quality = config.jpg_quality - png_compression_level = config._core_settings.get('PNG_COMPRESSION_LEVEL', 6) - image_resolutions = config.image_resolutions - output_directory_pattern = config.output_directory_pattern - output_filename_pattern = config.output_filename_pattern - - # --- 1. Determine Output Bit Depth --- - source_bpc = source_info.get('source_bit_depth', 8) - max_input_bpc = source_info.get('max_input_bit_depth', source_bpc) - output_dtype_target, output_bit_depth = np.uint8, 8 - - if output_bit_depth_rule == 'force_8bit': output_dtype_target, output_bit_depth = np.uint8, 8 - elif output_bit_depth_rule == 'force_16bit': output_dtype_target, output_bit_depth = np.uint16, 16 - elif output_bit_depth_rule == 'respect': - if source_bpc == 16: output_dtype_target, output_bit_depth = np.uint16, 16 - elif output_bit_depth_rule == 'respect_inputs': - if max_input_bpc == 16: output_dtype_target, output_bit_depth = np.uint16, 16 - else: - log.warning(f"Unknown output_bit_depth_rule '{output_bit_depth_rule}'. Defaulting to 8-bit.") - output_dtype_target, output_bit_depth = np.uint8, 8 - log.debug(f"Target output bit depth: {output_bit_depth}-bit for {current_map_identifier}") - - # --- 2. Determine Output Format --- - output_format, output_ext, save_params, needs_float16 = "", "", [], False - # Use the (potentially suffixed) standard_type for lossless check - base_standard_type_for_lossless_check = self._get_base_map_type(current_map_identifier) # "COL", "NRM", "DISP-Detail" -> "DISP" - - # Check if the pure standard type (without suffix) is in force_lossless_map_types - pure_standard_type = self._get_ftd_key_from_override(base_standard_type_for_lossless_check) # Get FTD key if possible - std_type_from_ftd = None - if pure_standard_type and pure_standard_type in self.config_obj.FILE_TYPE_DEFINITIONS: - std_type_from_ftd = self.config_obj.FILE_TYPE_DEFINITIONS[pure_standard_type].get("standard_type") - - # Use std_type_from_ftd if available and non-empty, else base_standard_type_for_lossless_check - check_type_for_lossless = std_type_from_ftd if std_type_from_ftd else base_standard_type_for_lossless_check - - force_lossless = check_type_for_lossless in force_lossless_map_types - original_extension = source_info.get('original_extension', '.png') - involved_extensions = source_info.get('involved_extensions', {original_extension}) - target_dim_px = image_resolutions.get(resolution_key, 0) - - if force_lossless: - log.debug(f"Format forced to lossless for map type '{current_map_identifier}' (checked as '{check_type_for_lossless}').") - if output_bit_depth == 16: - output_format = primary_fmt_16 - if output_format.startswith("exr"): output_ext, needs_float16 = ".exr", True; save_params.extend([cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF]) - else: output_format = fallback_fmt_16 if fallback_fmt_16 == "png" else "png"; output_ext = ".png"; save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - else: output_format, output_ext = "png", ".png"; save_params = [cv2.IMWRITE_PNG_COMPRESSION, png_compression_level] - elif output_bit_depth == 8 and target_dim_px >= threshold: - output_format = 'jpg'; output_ext = '.jpg'; save_params.extend([cv2.IMWRITE_JPEG_QUALITY, jpg_quality]) - else: - highest_format_str = 'jpg' - if '.exr' in involved_extensions: highest_format_str = 'exr' - elif '.tif' in involved_extensions: highest_format_str = 'tif' - elif '.png' in involved_extensions: highest_format_str = 'png' - - if highest_format_str == 'exr': - if output_bit_depth == 16: output_format, output_ext, needs_float16 = "exr", ".exr", True; save_params.extend([cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF]) - else: output_format, output_ext = "png", ".png"; save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - elif highest_format_str == 'tif' or highest_format_str == 'png': - if output_bit_depth == 16: - output_format = primary_fmt_16 - if output_format.startswith("exr"): output_ext, needs_float16 = ".exr", True; save_params.extend([cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF]) - else: output_format = "png"; output_ext = ".png"; save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - else: output_format, output_ext = "png", ".png"; save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - else: - output_format = fmt_8bit_config; output_ext = f".{output_format}" - if output_format == "png": save_params.extend([cv2.IMWRITE_PNG_COMPRESSION, png_compression_level]) - elif output_format == "jpg": save_params.extend([cv2.IMWRITE_JPEG_QUALITY, jpg_quality]) - - if output_format == "jpg" and output_bit_depth == 16: - log.warning(f"Output format JPG, but target 16-bit. Forcing 8-bit for {current_map_identifier}.") - output_dtype_target, output_bit_depth = np.uint8, 8 - log.debug(f"Determined save format for {current_map_identifier}: {output_format}, ext: {output_ext}, bit_depth: {output_bit_depth}") - - # --- 3. Final Data Type Conversion --- - img_to_save = image_data.copy() - if output_dtype_target == np.uint8 and img_to_save.dtype != np.uint8: - if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0 * 255.0).astype(np.uint8) - elif img_to_save.dtype in [np.float16, np.float32]: img_to_save = (np.clip(img_to_save, 0.0, 1.0) * 255.0).astype(np.uint8) - else: img_to_save = img_to_save.astype(np.uint8) - elif output_dtype_target == np.uint16 and img_to_save.dtype != np.uint16: - if img_to_save.dtype == np.uint8: img_to_save = img_to_save.astype(np.uint16) * 257 - elif img_to_save.dtype in [np.float16, np.float32]: img_to_save = (np.clip(img_to_save, 0.0, 1.0) * 65535.0).astype(np.uint16) - else: img_to_save = img_to_save.astype(np.uint16) - if needs_float16 and img_to_save.dtype != np.float16: - if img_to_save.dtype == np.uint16: img_to_save = (img_to_save.astype(np.float32) / 65535.0).astype(np.float16) - elif img_to_save.dtype == np.uint8: img_to_save = (img_to_save.astype(np.float32) / 255.0).astype(np.float16) - elif img_to_save.dtype == np.float32: img_to_save = img_to_save.astype(np.float16) - else: log.warning(f"Cannot convert {img_to_save.dtype} to float16 for EXR save."); return None - - img_save_final = img_to_save - if len(img_to_save.shape) == 3 and img_to_save.shape[2] == 3 and not output_format.startswith("exr"): - try: img_save_final = cv2.cvtColor(img_to_save, cv2.COLOR_RGB2BGR) - except Exception as cvt_err: log.error(f"RGB->BGR conversion failed for {current_map_identifier}: {cvt_err}. Saving original."); - - filename_map_type_token: str - is_merged_map = any(rule.get("output_map_type") == current_map_identifier for rule in self.config_obj.map_merge_rules) - - if is_merged_map: - filename_map_type_token = current_map_identifier # e.g., "NRMRGH" - else: - base_ftd_key = self._get_ftd_key_from_override(current_map_identifier) # e.g., "MAP_COL" - if base_ftd_key: - definition = self.config_obj.FILE_TYPE_DEFINITIONS.get(base_ftd_key) - if definition and "standard_type" in definition: - standard_type_alias = definition["standard_type"] # e.g., "COL" - if standard_type_alias: # Ensure not empty - variant_suffix = self._get_map_variant_suffix(current_map_identifier, base_ftd_key) # e.g., "-1" or "" - if standard_type_alias in self.config_obj.respect_variant_map_types: - filename_map_type_token = standard_type_alias + variant_suffix # e.g., "COL-1" - else: - filename_map_type_token = standard_type_alias # e.g., "COL" - else: - log.warning(f"Empty standard_type for FTD key '{base_ftd_key}'. Using identifier '{current_map_identifier}' for maptype token.") - filename_map_type_token = current_map_identifier - else: - log.warning(f"No definition or standard_type for FTD key '{base_ftd_key}'. Using identifier '{current_map_identifier}' for maptype token.") - filename_map_type_token = current_map_identifier - else: - log.warning(f"Could not derive FTD key from '{current_map_identifier}'. Using it directly for maptype token.") - filename_map_type_token = current_map_identifier - - log.debug(f"Filename maptype token for '{current_map_identifier}' is '{filename_map_type_token}'") - - # --- 6. Construct Path using Token Pattern & Save --- - token_data = { - "supplier": _sanitize_filename(supplier_name), - "assetname": _sanitize_filename(asset_name), - "maptype": filename_map_type_token, - "resolution": resolution_key, - "width": w, "height": h, - "bitdepth": output_bit_depth, - "ext": output_ext.lstrip('.') - } - if hasattr(self, 'current_incrementing_value') and self.current_incrementing_value is not None: - token_data['incrementingvalue'] = self.current_incrementing_value - if hasattr(self, 'current_sha5_value') and self.current_sha5_value is not None: - token_data['sha5'] = self.current_sha5_value - - try: - relative_dir_path_str = generate_path_from_pattern(output_directory_pattern, token_data) - filename_str = generate_path_from_pattern(output_filename_pattern, token_data) - full_relative_path_str = str(Path(relative_dir_path_str) / filename_str) - except Exception as path_gen_err: - log.error(f"Failed to generate output path for {current_map_identifier} with data {token_data}: {path_gen_err}", exc_info=True) - return None - - output_path_temp = self.temp_dir / full_relative_path_str - log.debug(f"Attempting to save {current_map_identifier} to temporary path: {output_path_temp}") - - try: - output_path_temp.parent.mkdir(parents=True, exist_ok=True) - except Exception as mkdir_err: - log.error(f"Failed to create temporary directory {output_path_temp.parent}: {mkdir_err}", exc_info=True) - return None - - saved_successfully = False - actual_format_saved = output_format - try: - cv2.imwrite(str(output_path_temp), img_save_final, save_params) - saved_successfully = True - log.info(f" > Saved {current_map_identifier} ({resolution_key}, {output_bit_depth}-bit) as {output_format}") - except Exception as save_err: - log.error(f"Save failed ({output_format}) for {current_map_identifier} {resolution_key}: {save_err}") - if output_bit_depth == 16 and output_format.startswith("exr") and fallback_fmt_16 != output_format and fallback_fmt_16 == "png": - log.warning(f"Attempting fallback PNG save for {current_map_identifier} {resolution_key}") - actual_format_saved = "png"; output_ext = ".png" - # Regenerate path with .png extension for fallback - token_data_fallback = token_data.copy() - token_data_fallback["ext"] = "png" - try: - # Regenerate directory and filename separately for fallback - relative_dir_path_str_fb = generate_path_from_pattern(output_directory_pattern, token_data_fallback) - filename_str_fb = generate_path_from_pattern(output_filename_pattern, token_data_fallback) - full_relative_path_str_fb = str(Path(relative_dir_path_str_fb) / filename_str_fb) - output_path_temp = self.temp_dir / full_relative_path_str_fb # Update temp path for fallback - output_path_temp.parent.mkdir(parents=True, exist_ok=True) - except Exception as path_gen_err_fb: - log.error(f"Failed to generate fallback PNG path: {path_gen_err_fb}", exc_info=True) - return None - - save_params_fallback = [cv2.IMWRITE_PNG_COMPRESSION, png_compression_level] - img_fallback = None; target_fallback_dtype = np.uint16 - - if img_to_save.dtype == np.float16: - img_scaled = np.clip(img_to_save.astype(np.float32) * 65535.0, 0, 65535) - img_fallback = img_scaled.astype(target_fallback_dtype) - elif img_to_save.dtype == target_fallback_dtype: img_fallback = img_to_save - else: log.error(f"Cannot convert {img_to_save.dtype} for PNG fallback."); return None - - img_fallback_save_final = img_fallback - is_3_channel_fallback = len(img_fallback.shape) == 3 and img_fallback.shape[2] == 3 - if is_3_channel_fallback: # PNG is non-EXR - log.debug(f"Converting RGB to BGR for fallback PNG save {current_map_identifier} ({resolution_key})") - try: img_fallback_save_final = cv2.cvtColor(img_fallback, cv2.COLOR_RGB2BGR) - except Exception as cvt_err_fb: log.error(f"Failed RGB->BGR conversion for fallback PNG: {cvt_err_fb}. Saving original."); - - try: - cv2.imwrite(str(output_path_temp), img_fallback_save_final, save_params_fallback) - saved_successfully = True - log.info(f" > Saved {current_map_identifier} ({resolution_key}) using fallback PNG") - except Exception as fallback_err: - log.error(f"Fallback PNG save failed for {current_map_identifier} {resolution_key}: {fallback_err}", exc_info=True) - else: - log.error(f"No suitable fallback available or applicable for failed save of {current_map_identifier} ({resolution_key}) as {output_format}.") - - - # --- 6. Return Result --- - if saved_successfully: - # Return the full relative path string generated by the patterns - final_relative_path_str = full_relative_path_str_fb if actual_format_saved == "png" and output_format.startswith("exr") else full_relative_path_str - return { - "path": final_relative_path_str, # Store relative path string - "resolution": resolution_key, - "width": w, "height": h, - "bit_depth": output_bit_depth, - "format": actual_format_saved - } - else: - return None # Indicate save failure - - except Exception as e: - log.error(f"Unexpected error in _save_image for {current_map_identifier} ({resolution_key}): {e}", exc_info=True) - return None - - - def _process_individual_maps(self, asset_rule: AssetRule, workspace_path: Path, current_asset_metadata: Dict) -> Tuple[Dict[str, Dict[str, Dict]], Dict[str, Dict], str]: - """ - Processes, resizes, and saves individual map files for a specific asset - based on the provided AssetRule and static configuration. - - Args: - asset_rule: The AssetRule object containing file rules for this asset. - workspace_path: Path to the directory containing the source files. - current_asset_metadata: Mutable metadata dictionary for the current asset (updated directly). - - Returns: - Tuple containing: - - processed_maps_details_asset: Dict mapping map_type to resolution details. - - image_stats_asset: Dict mapping map_type to calculated image statistics (also added to current_asset_metadata). - - aspect_ratio_change_string_asset: String indicating aspect ratio change (also added to current_asset_metadata). - """ - if not self.temp_dir: raise ProcessingEngineError("Engine workspace (temp_dir) not setup.") - asset_name = asset_rule.asset_name - log.info(f"Processing individual map files for asset '{asset_name}'...") - - # Initialize results specific to this asset - processed_maps_details_asset: Dict[str, Dict[str, Dict]] = defaultdict(dict) - image_stats_asset: Dict[str, Dict] = {} # Local dict for stats - map_details_asset: Dict[str, Dict] = {} # Store details like source bit depth, gloss inversion - aspect_ratio_change_string_asset: str = "N/A" - - # --- Settings retrieval from static config --- - resolutions = self.config_obj.image_resolutions - stats_res_key = self.config_obj.calculate_stats_resolution - stats_target_dim = resolutions.get(stats_res_key) - if not stats_target_dim: log.warning(f"Stats resolution key '{stats_res_key}' not found in config. Stats skipped for '{asset_name}'.") - base_name = asset_name # Use the asset name from the rule - - # --- Aspect Ratio Calculation Setup --- - first_map_rule_for_aspect = next((fr for fr in asset_rule.files if fr.item_type_override is not None and fr.item_type_override != "EXTRA"), None) # Exclude EXTRA - orig_w_aspect, orig_h_aspect = None, None - if first_map_rule_for_aspect: - first_res_key = next(iter(resolutions)) # Use first resolution key - source_path_abs = workspace_path / first_map_rule_for_aspect.file_path - temp_img_for_dims, _ = self._load_and_transform_source( - source_path_abs, - first_map_rule_for_aspect.item_type_override, - first_res_key, - is_gloss_source=False # Not relevant for dimension check - # self.loaded_data_cache is used internally by the method - ) - if temp_img_for_dims is not None: - orig_h_aspect, orig_w_aspect = temp_img_for_dims.shape[:2] - log.debug(f"Got original dimensions ({orig_w_aspect}x{orig_h_aspect}) for aspect ratio calculation from {first_map_rule_for_aspect.file_path}") - else: - log.warning(f"Could not load image {first_map_rule_for_aspect.file_path} to get original dimensions for aspect ratio.") - else: - log.warning("No map files found in AssetRule, cannot calculate aspect ratio string.") - - - # --- Process Each Individual Map defined in the AssetRule --- - for file_rule in asset_rule.files: - should_skip = ( - file_rule.item_type_override is None or - file_rule.item_type_override == "EXTRA" or - getattr(file_rule, 'skip_processing', False) or - file_rule.item_type == "FILE_IGNORE" # Consolidated check: Use item_type for base classification - ) - if should_skip: - skip_reason = [] - if file_rule.item_type_override is None: skip_reason.append("No ItemTypeOverride") - if file_rule.item_type_override == "EXTRA": skip_reason.append("Explicitly EXTRA type") - if getattr(file_rule, 'skip_processing', False): skip_reason.append("SkipProcessing flag set") - if file_rule.item_type == "FILE_IGNORE": skip_reason.append("ItemType is FILE_IGNORE") - - log.debug(f"Skipping individual processing for {file_rule.file_path} ({', '.join(skip_reason)})") - continue # Skip to the next file_rule - - # --- Proceed with processing for this file_rule --- - source_path_rel = Path(file_rule.file_path) # Ensure it's a Path object - # IMPORTANT: Use the ENGINE's workspace_path (self.temp_dir) for loading, - # as individual maps should have been copied there by the caller (ProcessingTask) - # Correction: _process_individual_maps receives the *engine's* temp_dir as workspace_path - source_path_abs = workspace_path / source_path_rel - # Store original rule-based type and gloss flag - original_item_type_override = file_rule.item_type_override - # original_is_gloss_source_context removed as it's part of deprecated logic - - # --- New gloss map filename logic --- - filename_str = source_path_rel.name - is_filename_gloss_map = "map_gloss" in filename_str.lower() - - effective_map_type_for_processing = original_item_type_override - effective_is_gloss_source_for_load = False # Default to False, new filename logic will set to True if applicable - map_was_retagged_from_filename_gloss = False - - if is_filename_gloss_map: - log.info(f"-- Asset '{asset_name}': Filename '{filename_str}' contains 'MAP_GLOSS'. Applying new gloss handling. Original type from rule: '{original_item_type_override}'.") - effective_is_gloss_source_for_load = True # Force inversion if type becomes ROUGH (handled by filename logic below) - map_was_retagged_from_filename_gloss = True - - # Attempt to retag original_item_type_override from GLOSS to ROUGH, preserving MAP_ prefix case and suffix - if original_item_type_override and "gloss" in original_item_type_override.lower(): - match = re.match(r"(MAP_)(GLOSS)((?:[-_]\w+)*)", original_item_type_override, re.IGNORECASE) - if match: - prefix = match.group(1) # e.g., "MAP_" - suffix = match.group(3) if match.group(3) else "" # e.g., "-variant1_detail" or "" - effective_map_type_for_processing = f"{prefix}ROUGH{suffix}" - log.debug(f"Retagged filename gloss: original FTD key '{original_item_type_override}' to '{effective_map_type_for_processing}' for processing.") - else: - log.warning(f"Filename gloss '{original_item_type_override}' matched 'gloss' but not the expected 'MAP_GLOSS' pattern for precise retagging. Defaulting to 'MAP_ROUGH'.") - effective_map_type_for_processing = "MAP_ROUGH" - else: - # If original_item_type_override was None or didn't contain "gloss" (e.g., file was untyped but filename had MAP_GLOSS) - log.debug(f"Filename '{filename_str}' identified as gloss, but original type override ('{original_item_type_override}') was not GLOSS-specific. Setting type to 'MAP_ROUGH' for processing.") - effective_map_type_for_processing = "MAP_ROUGH" - # --- End of new gloss map filename logic --- - - log.debug(f"DEBUG POST-RETAG: effective_map_type_for_processing='{effective_map_type_for_processing}' for file '{source_path_rel.name}'") - original_extension = source_path_rel.suffix.lower() # Get from path - - log.info(f"-- Asset '{asset_name}': Processing Individual Map: {effective_map_type_for_processing} (Source: {source_path_rel.name}, EffectiveIsGlossSourceForLoad: {effective_is_gloss_source_for_load}, OriginalRuleItemType: {original_item_type_override}) --") - - current_map_details = {} # Old "derived_from_gloss_context" removed - if map_was_retagged_from_filename_gloss: - current_map_details["derived_from_gloss_filename"] = True - current_map_details["original_item_type_override_before_gloss_filename_retag"] = original_item_type_override - current_map_details["effective_item_type_override_after_gloss_filename_retag"] = effective_map_type_for_processing - source_bit_depth_found = None # Track if we've found the bit depth for this map type - - try: - # --- Loop through target resolutions from static config --- - for res_key, target_dim_px in resolutions.items(): - log.debug(f"Processing {effective_map_type_for_processing} for resolution: {res_key}...") - - # --- 1. Load and Transform Source (using helper + cache) --- - # This now only runs for files that have an item_type_override - img_resized, source_dtype = self._load_and_transform_source( - source_path_abs=source_path_abs, - map_type=effective_map_type_for_processing, # Use effective type - target_resolution_key=res_key, - is_gloss_source=effective_is_gloss_source_for_load # Pass the flag determined by filename logic - # self.loaded_data_cache is used internally - ) - - if img_resized is None: - # This warning now correctly indicates a failure for a map we *intended* to process - log.warning(f"Failed to load/transform source map {source_path_rel} (processed as {effective_map_type_for_processing}) for {res_key}. Skipping resolution.") - continue # Skip this resolution - - # Store source bit depth once found - if source_dtype is not None and source_bit_depth_found is None: - source_bit_depth_found = 16 if source_dtype == np.uint16 else (8 if source_dtype == np.uint8 else 8) # Default non-uint to 8 - current_map_details["source_bit_depth"] = source_bit_depth_found - log.debug(f"Stored source bit depth for {effective_map_type_for_processing}: {source_bit_depth_found}") - - # --- 2. Calculate Stats (if applicable) --- - if res_key == stats_res_key and stats_target_dim: - log.debug(f"Calculating stats for {effective_map_type_for_processing} using {res_key} image...") - stats = _calculate_image_stats(img_resized) - if stats: image_stats_asset[effective_map_type_for_processing] = stats # Store locally first - else: log.warning(f"Stats calculation failed for {effective_map_type_for_processing} at {res_key}.") - - # --- 3. Calculate Aspect Ratio Change String (once per asset) --- - if aspect_ratio_change_string_asset == "N/A" and orig_w_aspect is not None and orig_h_aspect is not None: - target_w_aspect, target_h_aspect = img_resized.shape[1], img_resized.shape[0] # Use current resized dims - try: - aspect_string = _normalize_aspect_ratio_change(orig_w_aspect, orig_h_aspect, target_w_aspect, target_h_aspect) - aspect_ratio_change_string_asset = aspect_string - log.debug(f"Stored aspect ratio change string using {res_key}: '{aspect_string}'") - except Exception as aspect_err: - log.error(f"Failed to calculate aspect ratio change string using {res_key}: {aspect_err}", exc_info=True) - aspect_ratio_change_string_asset = "Error" - elif aspect_ratio_change_string_asset == "N/A": - aspect_ratio_change_string_asset = "Unknown" # Set to unknown if original dims failed - - # --- 4. Save Image (using helper) --- - source_info = { - 'original_extension': original_extension, - 'source_bit_depth': source_bit_depth_found or 8, # Use found depth or default - 'involved_extensions': {original_extension} # Only self for individual maps - } - # Get bit depth rule solely from the static configuration using the correct method signature - bit_depth_rule = self.config_obj.get_bit_depth_rule(effective_map_type_for_processing) # Use effective type - - # Determine the map_type to use for saving (use effective_map_type_for_processing) - save_map_type_for_filename = effective_map_type_for_processing - # If effective_map_type_for_processing is None, this file shouldn't be saved as an individual map. - # This case should ideally be caught by the skip logic earlier, but adding a check here for safety. - if save_map_type_for_filename is None: - log.warning(f"Skipping save for {file_rule.file_path}: effective_map_type_for_processing is None.") - continue # Skip saving this file - - # Get supplier name from metadata (set in process method) - supplier_name = current_asset_metadata.get("supplier_name", "UnknownSupplier") - - save_result = self._save_image( - image_data=img_resized, - supplier_name=supplier_name, - asset_name=base_name, - current_map_identifier=save_map_type_for_filename, # Pass the effective map type to be saved - resolution_key=res_key, - source_info=source_info, - output_bit_depth_rule=bit_depth_rule - ) - - # --- 5. Store Result --- - if save_result: - processed_maps_details_asset.setdefault(effective_map_type_for_processing, {})[res_key] = save_result - # Update overall map detail (e.g., final format) if needed - current_map_details["output_format"] = save_result.get("format") - else: - log.error(f"Failed to save {effective_map_type_for_processing} at {res_key}.") - processed_maps_details_asset.setdefault(effective_map_type_for_processing, {})[f'error_{res_key}'] = "Save failed" - - - except Exception as map_proc_err: - log.error(f"Failed processing map {effective_map_type_for_processing} from {source_path_rel.name}: {map_proc_err}", exc_info=True) - processed_maps_details_asset.setdefault(effective_map_type_for_processing, {})['error'] = str(map_proc_err) - - # Store collected details for this map type (using effective_map_type_for_processing as the key) - map_details_asset[effective_map_type_for_processing] = current_map_details - - # --- Final Metadata Updates --- - # Update the passed-in current_asset_metadata dictionary directly - current_asset_metadata["map_details"] = map_details_asset - current_asset_metadata["image_stats_1k"] = image_stats_asset # Add collected stats - current_asset_metadata["aspect_ratio_change_string"] = aspect_ratio_change_string_asset # Add collected aspect string - - log.info(f"Finished processing individual map files for asset '{asset_name}'.") - # Return details needed for organization, stats and aspect ratio are updated in-place - return processed_maps_details_asset, image_stats_asset, aspect_ratio_change_string_asset - - - def _merge_maps(self, asset_rule: AssetRule, workspace_path: Path, processed_maps_details_asset: Dict[str, Dict[str, Dict]], current_asset_metadata: Dict) -> Dict[str, Dict[str, Dict]]: - """ - Merges channels from different source maps for a specific asset based on static - merge rules in configuration, using explicit file paths from the AssetRule. - - Args: - asset_rule: The AssetRule object containing file rules for this asset. - workspace_path: Path to the directory containing the source files. - processed_maps_details_asset: Details of processed maps (used to find common resolutions). - current_asset_metadata: Mutable metadata dictionary for the current asset (updated for stats). - - - Returns: - Dict[str, Dict[str, Dict]]: Details of the merged maps created for this asset. - """ - if not self.temp_dir: raise ProcessingEngineError("Engine workspace (temp_dir) not setup.") - asset_name = asset_rule.asset_name - # Get merge rules from static config - merge_rules = self.config_obj.map_merge_rules - log.info(f"Asset '{asset_name}': Applying {len(merge_rules)} map merging rule(s) from static config...") - - # Initialize results for this asset - merged_maps_details_asset: Dict[str, Dict[str, Dict]] = defaultdict(dict) - - for rule_index, rule in enumerate(merge_rules): - output_map_type = rule.get("output_map_type") - inputs_mapping = rule.get("inputs") # e.g., {"R": "AO", "G": "ROUGH", "B": "METAL"} - defaults = rule.get("defaults", {}) - rule_bit_depth = rule.get("output_bit_depth", "respect_inputs") - - if not output_map_type or not inputs_mapping: - log.warning(f"Asset '{asset_name}': Skipping static merge rule #{rule_index+1}: Missing 'output_map_type' or 'inputs'. Rule: {rule}") - continue - - log.info(f"-- Asset '{asset_name}': Applying merge rule for '{output_map_type}' --") - - # --- Find required SOURCE FileRules within the AssetRule --- - required_input_file_rules: Dict[str, FileRule] = {} # map_type -> FileRule - possible_to_find_sources = True - input_types_needed = set(inputs_mapping.values()) # e.g., {"AO", "ROUGH", "METAL"} - - for input_type in input_types_needed: - found_rule_for_type = False - # Search in the asset_rule's files - for file_rule in asset_rule.files: - # Check if the file_rule's item_type_override matches the required input type - item_override = getattr(file_rule, 'item_type_override', None) - item_base_type = getattr(file_rule, 'item_type', None) # Get base type for ignore check - - # Check if override matches the required input type AND the base type is not FILE_IGNORE - if item_override == input_type and item_base_type != "FILE_IGNORE": - # Found a valid match based on item_type_override and not ignored - required_input_file_rules[input_type] = file_rule - found_rule_for_type = True - # Update log message (see step 2) - log.debug(f"Found source FileRule for merge input '{input_type}': {file_rule.file_path} (ItemTypeOverride: {item_override}, ItemType: {item_base_type})") - break # Take the first valid match found - if not found_rule_for_type: - log.warning(f"Asset '{asset_name}': Required source FileRule for input map type '{input_type}' not found in AssetRule. Cannot perform merge for '{output_map_type}'.") - possible_to_find_sources = False - break - - if not possible_to_find_sources: - continue # Skip this merge rule - - # --- Determine common resolutions based on *processed* maps --- - # This still seems the most reliable way to know which sizes are actually available - possible_resolutions_per_input: List[Set[str]] = [] - resolutions_config = self.config_obj.image_resolutions # Static config - - for input_type in input_types_needed: - # Find the corresponding processed map details (might be ROUGH-1, ROUGH-2 etc.) - processed_details_for_input = None - input_file_rule = required_input_file_rules.get(input_type) - if input_file_rule: - processed_details_for_input = processed_maps_details_asset.get(input_file_rule.item_type_override) # Use the correct attribute - - if processed_details_for_input: - res_keys = {res for res, details in processed_details_for_input.items() if isinstance(details, dict) and 'error' not in details} - if not res_keys: - log.warning(f"Asset '{asset_name}': Input map type '{input_type}' (using {input_file_rule.item_type_override if input_file_rule else 'N/A'}) for merge rule '{output_map_type}' has no successfully processed resolutions.") # Use item_type_override - possible_resolutions_per_input = [] # Invalidate if any input has no resolutions - break - possible_resolutions_per_input.append(res_keys) - else: - # If the input map wasn't processed individually (used_for_merge_only=True) - # Assume all configured resolutions are potentially available. Loading will handle skips. - log.debug(f"Input map type '{input_type}' for merge rule '{output_map_type}' might not have been processed individually. Assuming all configured resolutions possible.") - possible_resolutions_per_input.append(set(resolutions_config.keys())) - - - if not possible_resolutions_per_input: - log.warning(f"Asset '{asset_name}': Cannot determine common resolutions for '{output_map_type}'. Skipping rule.") - continue - - common_resolutions = set.intersection(*possible_resolutions_per_input) - - if not common_resolutions: - log.warning(f"Asset '{asset_name}': No common resolutions found among required inputs {input_types_needed} for merge rule '{output_map_type}'. Skipping rule.") - continue - log.debug(f"Asset '{asset_name}': Common resolutions for '{output_map_type}': {common_resolutions}") - - # --- Loop through common resolutions --- - res_order = {k: resolutions_config[k] for k in common_resolutions if k in resolutions_config} - if not res_order: - log.warning(f"Asset '{asset_name}': Common resolutions {common_resolutions} do not match config. Skipping merge for '{output_map_type}'.") - continue - - sorted_res_keys = sorted(res_order.keys(), key=lambda k: res_order[k], reverse=True) - base_name = asset_name # Use current asset's name - - for current_res_key in sorted_res_keys: - log.debug(f"Asset '{asset_name}': Merging '{output_map_type}' for resolution: {current_res_key}") - try: - loaded_inputs_data = {} # map_type -> loaded numpy array - source_info_for_save = {'involved_extensions': set(), 'max_input_bit_depth': 8} - - # --- Load required SOURCE maps using helper --- - possible_to_load = True - target_channels = list(inputs_mapping.keys()) # e.g., ['R', 'G', 'B'] - - for map_type_needed in input_types_needed: # e.g., {"AO", "ROUGH", "METAL"} - file_rule = required_input_file_rules.get(map_type_needed) - if not file_rule: - log.error(f"Internal Error: FileRule missing for '{map_type_needed}' during merge load.") - possible_to_load = False; break - - source_path_rel_str = file_rule.file_path # Keep original string if needed - source_path_rel = Path(source_path_rel_str) # Convert to Path object - source_path_abs = workspace_path / source_path_rel - original_ext = source_path_rel.suffix.lower() # Now works on Path object - source_info_for_save['involved_extensions'].add(original_ext) - - # Determine if this specific source for merge should be treated as gloss - # based on its filename, aligning with the new primary rule. - filename_str_for_merge_input = source_path_rel.name - is_gloss_for_merge_input = "map_gloss" in filename_str_for_merge_input.lower() - if is_gloss_for_merge_input: - log.debug(f"Merge input '{filename_str_for_merge_input}' for '{map_type_needed}' identified as gloss by filename. Will pass is_gloss_source=True.") - - log.debug(f"Loading source '{source_path_rel}' for merge input '{map_type_needed}' at {current_res_key} (is_gloss_for_merge_input: {is_gloss_for_merge_input})") - img_resized, source_dtype = self._load_and_transform_source( - source_path_abs=source_path_abs, - map_type=file_rule.item_type_override, # Use the specific type override from rule (e.g., ROUGH-1) - target_resolution_key=current_res_key, - is_gloss_source=is_gloss_for_merge_input # Pass determined gloss state - # self.loaded_data_cache used internally - ) - - if img_resized is None: - log.warning(f"Asset '{asset_name}': Failed to load/transform source '{source_path_rel}' for merge input '{map_type_needed}' at {current_res_key}. Skipping resolution.") - possible_to_load = False; break - - loaded_inputs_data[map_type_needed] = img_resized # Store by base type (AO, ROUGH) - - # Track max source bit depth - if source_dtype == np.uint16: - source_info_for_save['max_input_bit_depth'] = max(source_info_for_save['max_input_bit_depth'], 16) - # Add other dtype checks if needed - - if not possible_to_load: continue - - # --- Calculate Stats for ROUGH source if used and at stats resolution --- - stats_res_key = self.config_obj.calculate_stats_resolution - if current_res_key == stats_res_key: - log.debug(f"Asset '{asset_name}': Checking for ROUGH source stats for '{output_map_type}' at {stats_res_key}") - for target_channel, source_map_type in inputs_mapping.items(): - if source_map_type == 'ROUGH' and source_map_type in loaded_inputs_data: - log.debug(f"Asset '{asset_name}': Calculating stats for ROUGH source (mapped to channel '{target_channel}') for '{output_map_type}' at {stats_res_key}") - rough_image_data = loaded_inputs_data[source_map_type] - rough_stats = _calculate_image_stats(rough_image_data) - if rough_stats: - # Update the mutable metadata dict passed in - stats_dict = current_asset_metadata.setdefault("merged_map_channel_stats", {}).setdefault(output_map_type, {}).setdefault(target_channel, {}) - stats_dict[stats_res_key] = rough_stats - log.debug(f"Asset '{asset_name}': Stored ROUGH stats for '{output_map_type}' channel '{target_channel}' at {stats_res_key}: {rough_stats}") - else: - log.warning(f"Asset '{asset_name}': Failed to calculate ROUGH stats for '{output_map_type}' channel '{target_channel}' at {stats_res_key}.") - - - # --- Determine dimensions --- - first_map_type = next(iter(loaded_inputs_data)) - h, w = loaded_inputs_data[first_map_type].shape[:2] - num_target_channels = len(target_channels) - - # --- Prepare and Merge Channels --- - merged_channels_float32 = [] - for target_channel in target_channels: # e.g., 'R', 'G', 'B' - source_map_type = inputs_mapping.get(target_channel) # e.g., "AO", "ROUGH", "METAL" - channel_data_float32 = None - - if source_map_type and source_map_type in loaded_inputs_data: - img_input = loaded_inputs_data[source_map_type] # Get the loaded NumPy array - - # Ensure input is float32 0-1 range for merging - if img_input.dtype == np.uint16: img_float = img_input.astype(np.float32) / 65535.0 - elif img_input.dtype == np.uint8: img_float = img_input.astype(np.float32) / 255.0 - elif img_input.dtype == np.float16: img_float = img_input.astype(np.float32) # Assume float16 is 0-1 - else: img_float = img_input.astype(np.float32) # Assume other floats are 0-1 - - num_source_channels = img_float.shape[2] if len(img_float.shape) == 3 else 1 - - # Extract the correct channel - if num_source_channels >= 3: - if target_channel == 'R': channel_data_float32 = img_float[:, :, 0] - elif target_channel == 'G': channel_data_float32 = img_float[:, :, 1] - elif target_channel == 'B': channel_data_float32 = img_float[:, :, 2] - elif target_channel == 'A' and num_source_channels == 4: channel_data_float32 = img_float[:, :, 3] - else: log.warning(f"Target channel '{target_channel}' invalid for 3/4 channel source '{source_map_type}'.") - elif num_source_channels == 1 or len(img_float.shape) == 2: - # If source is grayscale, use it for R, G, B, or A target channels - channel_data_float32 = img_float.reshape(h, w) - else: - log.warning(f"Unexpected shape {img_float.shape} for source '{source_map_type}'.") - - # Apply default if channel data couldn't be extracted - if channel_data_float32 is None: - default_val = defaults.get(target_channel) - if default_val is None: - raise ProcessingEngineError(f"Missing input/default for target channel '{target_channel}' in merge rule '{output_map_type}'.") - log.debug(f"Using default value {default_val} for target channel '{target_channel}' in '{output_map_type}'.") - channel_data_float32 = np.full((h, w), float(default_val), dtype=np.float32) - - merged_channels_float32.append(channel_data_float32) - - if not merged_channels_float32 or len(merged_channels_float32) != num_target_channels: - raise ProcessingEngineError(f"Channel count mismatch during merge for '{output_map_type}'. Expected {num_target_channels}, got {len(merged_channels_float32)}.") - - merged_image_float32 = cv2.merge(merged_channels_float32) - log.debug(f"Merged channels for '{output_map_type}' ({current_res_key}). Result shape: {merged_image_float32.shape}, dtype: {merged_image_float32.dtype}") - - # --- Save Merged Map using Helper --- - # Get supplier name from metadata (set in process method) - supplier_name = current_asset_metadata.get("supplier_name", "UnknownSupplier") - - save_result = self._save_image( - image_data=merged_image_float32, - supplier_name=supplier_name, - asset_name=base_name, - current_map_identifier=output_map_type, # Merged map type - resolution_key=current_res_key, - source_info=source_info_for_save, - output_bit_depth_rule=rule_bit_depth - ) - - # --- Record details locally --- - if save_result: - merged_maps_details_asset[output_map_type][current_res_key] = save_result - else: - log.error(f"Asset '{asset_name}': Failed to save merged map '{output_map_type}' at resolution '{current_res_key}'.") - merged_maps_details_asset.setdefault(output_map_type, {})[f'error_{current_res_key}'] = "Save failed via helper" - - - except Exception as merge_res_err: - log.error(f"Asset '{asset_name}': Failed merging '{output_map_type}' at resolution '{current_res_key}': {merge_res_err}", exc_info=True) - # Store error locally for this asset - merged_maps_details_asset.setdefault(output_map_type, {})[f'error_{current_res_key}'] = str(merge_res_err) - - log.info(f"Asset '{asset_name}': Finished applying map merging rules.") - # Return the details for this asset - return merged_maps_details_asset - - - def _generate_metadata_file(self, effective_supplier: str, asset_rule: AssetRule, current_asset_metadata: Dict, processed_maps_details_asset: Dict[str, Dict[str, Dict]], merged_maps_details_asset: Dict[str, Dict[str, Dict]]) -> Tuple[Path, str]: - """ - Gathers metadata for a specific asset based on the AssetRule and processing results, - and writes it to a temporary JSON file in the engine's temp_dir using separate directory/filename patterns. - - Args: - effective_supplier: The supplier name to use (override or original). - asset_rule: The AssetRule object for this asset. - current_asset_metadata: Base metadata dictionary (already contains name, category, archetype, stats, aspect ratio, map_details). - processed_maps_details_asset: Details of processed maps for this asset. - merged_maps_details_asset: Details of merged maps for this asset. - - Returns: - Tuple[Path, str]: A tuple containing the relative directory Path object and the filename string within the temp_dir. - """ - if not self.temp_dir: raise ProcessingEngineError("Engine workspace (temp_dir) not setup.") - asset_name = asset_rule.asset_name - if not asset_name: - log.warning("Asset name missing during metadata generation, file may be incomplete or incorrectly named.") - asset_name = "UnknownAsset_Metadata" # Fallback for filename - - log.info(f"Generating metadata file for asset '{asset_name}' (Supplier: {effective_supplier})...") - - # Start with the base metadata passed in (already contains name, category, archetype, stats, aspect, map_details) - final_metadata = current_asset_metadata.copy() - final_metadata["category"] = asset_rule.asset_type # Ensure standardized asset type is in metadata - - # Use the effective supplier passed as argument - final_metadata["supplier_name"] = effective_supplier # Already determined in process() - - # Populate map resolution details from processing results - final_metadata["processed_map_resolutions"] = {} - for map_type, res_dict in processed_maps_details_asset.items(): - keys = [res for res, d in res_dict.items() if isinstance(d, dict) and 'error' not in d] - if keys: final_metadata["processed_map_resolutions"][map_type] = sorted(keys) - - final_metadata["merged_map_resolutions"] = {} - for map_type, res_dict in merged_maps_details_asset.items(): - keys = [res for res, d in res_dict.items() if isinstance(d, dict) and 'error' not in d] - if keys: final_metadata["merged_map_resolutions"][map_type] = sorted(keys) - - # Determine maps present based on successful processing for this asset - final_metadata["maps_present"] = sorted(list(processed_maps_details_asset.keys())) - final_metadata["merged_maps"] = sorted(list(merged_maps_details_asset.keys())) - - # Determine shader features based on this asset's maps and rules - features = set() - map_details_asset = final_metadata.get("map_details", {}) # Get from metadata dict - for map_type, details in map_details_asset.items(): # map_type here is item_type_override like "MAP_COL-1" - base_standard_type = self._get_base_map_type(map_type) # Should give "COL" - # Check standard feature types - if base_standard_type in ["SSS", "FUZZ", "MASK", "TRANSMISSION", "EMISSION", "CLEARCOAT"]: - features.add(base_standard_type) - if details.get("derived_from_gloss"): features.add("InvertedGloss") - # Check if any resolution was saved as 16-bit - res_details = processed_maps_details_asset.get(map_type, {}) - if any(res_info.get("bit_depth") == 16 for res_info in res_details.values() if isinstance(res_info, dict)): - features.add(f"16bit_{base_standard_type}") - # Check merged maps for 16-bit output - for map_type, res_dict in merged_maps_details_asset.items(): # map_type here is "NRMRGH" - base_standard_type = self._get_base_map_type(map_type) # Should give "NRMRGH" - if any(res_info.get("bit_depth") == 16 for res_info in res_dict.values() if isinstance(res_info, dict)): - features.add(f"16bit_{base_standard_type}") - - final_metadata["shader_features"] = sorted(list(features)) - - # Determine source files in this asset's Extra folder based on FileRule category - source_files_in_extra_set = set() - for file_rule in asset_rule.files: - if file_rule.item_type_override is None: # Assume files without an assigned type are extra/ignored/unmatched - source_files_in_extra_set.add(str(file_rule.file_path)) - final_metadata["source_files_in_extra"] = sorted(list(source_files_in_extra_set)) - - # Add processing info - final_metadata["_processing_info"] = { - "preset_used": self.config_obj.preset_name, # Preset name comes from the engine's config - "timestamp_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), - "input_source": effective_supplier, # Use the effective supplier - } - - # Sort lists just before writing - for key in ["maps_present", "merged_maps", "shader_features", "source_files_in_extra"]: - if key in final_metadata and isinstance(final_metadata[key], list): final_metadata[key].sort() - - # --- Generate Path and Save --- - # Get the new separate patterns from config - output_directory_pattern = self.config_obj.output_directory_pattern - output_filename_pattern = self.config_obj.output_filename_pattern - metadata_filename_base = self.config_obj.metadata_filename # e.g., "metadata.json" - metadata_ext = Path(metadata_filename_base).suffix.lstrip('.') or 'json' - metadata_maptype = Path(metadata_filename_base).stem # Use filename stem as maptype token - - token_data = { - "supplier": _sanitize_filename(effective_supplier), - "assetname": _sanitize_filename(asset_name), - "maptype": metadata_maptype, - "resolution": "meta", - "width": 0, - "height": 0, - "bitdepth": 0, - "ext": metadata_ext - } - if hasattr(self, 'current_incrementing_value') and self.current_incrementing_value is not None: - token_data['incrementingvalue'] = self.current_incrementing_value - if hasattr(self, 'current_sha5_value') and self.current_sha5_value is not None: - token_data['sha5'] = self.current_sha5_value - log.debug(f"Token data for _generate_metadata_file path generation: {token_data}") # DEBUG LOG - - - try: - # Generate directory and filename separately - relative_dir_path_str = generate_path_from_pattern(output_directory_pattern, token_data) - filename_str = generate_path_from_pattern(output_filename_pattern, token_data) - # Combine for the full temporary path - full_relative_path_str = str(Path(relative_dir_path_str) / filename_str) - relative_dir_path = Path(relative_dir_path_str) # Keep the directory Path object - except Exception as path_gen_err: - log.error(f"Failed to generate metadata path using patterns '{output_directory_pattern}' / '{output_filename_pattern}' and data {token_data}: {path_gen_err}", exc_info=True) - raise ProcessingEngineError(f"Failed to generate metadata path for asset '{asset_name}'") from path_gen_err - - output_path_temp_abs = self.temp_dir / full_relative_path_str # Save to engine's temp dir, preserving structure - log.debug(f"Writing metadata for asset '{asset_name}' to temporary file: {output_path_temp_abs}") - - # Ensure parent directory exists in temp (using the full path) - try: - output_path_temp_abs.parent.mkdir(parents=True, exist_ok=True) - except Exception as mkdir_err: - log.error(f"Failed to create temporary directory {output_path_temp_abs.parent} for metadata: {mkdir_err}", exc_info=True) - raise ProcessingEngineError(f"Failed to create temporary directory for metadata for asset '{asset_name}'") from mkdir_err - - try: - with open(output_path_temp_abs, 'w', encoding='utf-8') as f: - json.dump(final_metadata, f, indent=4, ensure_ascii=False, sort_keys=True) - log.info(f"Metadata file '{filename_str}' generated successfully for asset '{asset_name}' at relative temp path '{full_relative_path_str}'.") - # Return the RELATIVE directory Path object and the filename string - return relative_dir_path, filename_str - except Exception as e: - raise ProcessingEngineError(f"Failed to write metadata file {output_path_temp_abs} for asset '{asset_name}': {e}") from e - - - def _organize_output_files(self, asset_rule: AssetRule, workspace_path: Path, supplier_identifier: str, output_base_path: Path, processed_maps_details_asset: Dict[str, Dict[str, Dict]], merged_maps_details_asset: Dict[str, Dict[str, Dict]], temp_metadata_info: Tuple[Path, str]): - """ - Moves/copies processed files for a specific asset from the engine's temp dir - and copies EXTRA files from the original workspace to the final output structure, - using the relative paths generated by the token pattern. - - Args: - asset_rule: The AssetRule object for this asset. - workspace_path: Path to the original workspace containing source files. - supplier_identifier: The supplier identifier from the SourceRule. - output_base_path: The final base output directory. - processed_maps_details_asset: Details of processed maps for this asset. - merged_maps_details_asset: Details of merged maps for this asset. - temp_metadata_info: Tuple containing the relative directory Path and filename string for the metadata file within temp_dir. - """ - if not self.temp_dir or not self.temp_dir.exists(): raise ProcessingEngineError("Engine temp workspace missing.") - asset_name = asset_rule.asset_name - if not asset_name: raise ProcessingEngineError("Asset name missing for organization.") - - if not asset_name: raise ProcessingEngineError("Asset name missing for organization.") - asset_name_sanitized = _sanitize_filename(asset_name) # Still useful for logging - - # Get structure names from static config - extra_subdir_name = self.config_obj.extra_files_subdir - - log.info(f"Organizing output files for asset '{asset_name_sanitized}' using generated paths relative to: {output_base_path}") - - # --- Helper for moving files from engine's temp dir to final output --- - def _safe_move_to_final(src_rel_path_str: str | None, file_desc: str): - """Moves a file from temp to its final location based on its relative path string.""" - if not src_rel_path_str: - log.warning(f"Asset '{asset_name_sanitized}': Missing src relative path string for {file_desc}. Cannot move.") - return - - source_abs = self.temp_dir / src_rel_path_str # Absolute path in temp - dest_abs = output_base_path / src_rel_path_str # Final absolute path - - try: - if source_abs.exists(): - # Ensure final destination directory exists - dest_abs.parent.mkdir(parents=True, exist_ok=True) - log.debug(f"Asset '{asset_name_sanitized}': Moving {file_desc}: {src_rel_path_str} -> {dest_abs.relative_to(output_base_path)}") - shutil.move(str(source_abs), str(dest_abs)) - else: - log.warning(f"Asset '{asset_name_sanitized}': Source file missing in engine temp for {file_desc}: {source_abs}") - except Exception as e: - log.error(f"Asset '{asset_name_sanitized}': Failed moving {file_desc} '{src_rel_path_str}': {e}", exc_info=True) - - # --- Move Processed/Merged Maps --- - moved_map_count = 0 - for details_dict in [processed_maps_details_asset, merged_maps_details_asset]: - for map_type, res_dict in details_dict.items(): - # Skip if the whole map type failed (e.g., merge rule source missing) - if isinstance(res_dict, dict) and 'error' in res_dict and len(res_dict) == 1: - log.warning(f"Skipping move for map type '{map_type}' due to processing error: {res_dict['error']}") - continue - for res_key, details in res_dict.items(): - # Skip specific resolution errors - if isinstance(details, str) and details.startswith("error_"): - log.warning(f"Skipping move for {map_type} ({res_key}) due to error: {details}") - continue - if isinstance(details, dict) and 'path' in details: - # details['path'] is the relative path string within temp_dir - relative_path_str = details['path'] - _safe_move_to_final(relative_path_str, f"{map_type} ({res_key})") - moved_map_count += 1 - log.debug(f"Asset '{asset_name_sanitized}': Moved {moved_map_count} map files.") - - # --- Move Metadata File --- - if temp_metadata_info: - relative_dir_path, filename = temp_metadata_info - metadata_rel_path_str = str(relative_dir_path / filename) - _safe_move_to_final(metadata_rel_path_str, "metadata file") - else: - log.warning(f"Asset '{asset_name_sanitized}': Temporary metadata info missing. Cannot move metadata file.") - - # --- Handle "EXTRA" Files (copy from original workspace to final asset dir) --- - # Determine the final asset directory based on the metadata's relative directory path - final_asset_relative_dir = relative_dir_path if temp_metadata_info else None - if final_asset_relative_dir is not None: # Check explicitly for None - final_extra_dir_abs = output_base_path / final_asset_relative_dir / extra_subdir_name - log.debug(f"Asset '{asset_name_sanitized}': Determined final EXTRA directory: {final_extra_dir_abs}") - copied_extra_files = [] - for file_rule in asset_rule.files: - # Copy files explicitly marked as EXTRA or those with no item_type_override (unmatched) - if file_rule.item_type_override == "EXTRA" or file_rule.item_type_override is None: - try: - source_rel_path = Path(file_rule.file_path) - source_abs = workspace_path / source_rel_path - # Place in Extra subdir within the final asset dir, keep original name - dest_abs = final_extra_dir_abs / source_rel_path.name - - if source_abs.is_file(): - log.debug(f"Asset '{asset_name_sanitized}': Copying EXTRA/unmatched file: {source_rel_path} -> {final_extra_dir_abs.relative_to(output_base_path)}/") - final_extra_dir_abs.mkdir(parents=True, exist_ok=True) - shutil.copy2(str(source_abs), str(dest_abs)) # copy2 preserves metadata - copied_extra_files.append(source_rel_path.name) - elif source_abs.is_dir(): - log.debug(f"Asset '{asset_name_sanitized}': Skipping EXTRA/unmatched directory: {source_rel_path}") - else: - log.warning(f"Asset '{asset_name_sanitized}': Source file marked as EXTRA/unmatched not found in workspace: {source_abs}") - except Exception as copy_err: - log.error(f"Asset '{asset_name_sanitized}': Failed copying EXTRA/unmatched file '{file_rule.file_path}': {copy_err}", exc_info=True) - - if copied_extra_files: - log.info(f"Asset '{asset_name_sanitized}': Copied {len(copied_extra_files)} EXTRA/unmatched file(s) to '{final_extra_dir_abs.relative_to(output_base_path)}' subdirectory.") - else: - log.warning(f"Asset '{asset_name_sanitized}': Could not determine final asset directory from metadata info '{temp_metadata_info}'. Skipping EXTRA file copying.") - - - log.info(f"Finished organizing output for asset '{asset_name_sanitized}'.") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..2e70fad --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# This file makes the 'tests' directory a Python package. \ No newline at end of file diff --git a/tests/processing/pipeline/__init__.py b/tests/processing/pipeline/__init__.py new file mode 100644 index 0000000..f178d82 --- /dev/null +++ b/tests/processing/pipeline/__init__.py @@ -0,0 +1 @@ +# This file makes Python treat the directory as a package. \ No newline at end of file diff --git a/tests/processing/pipeline/stages/__init__.py b/tests/processing/pipeline/stages/__init__.py new file mode 100644 index 0000000..f178d82 --- /dev/null +++ b/tests/processing/pipeline/stages/__init__.py @@ -0,0 +1 @@ +# This file makes Python treat the directory as a package. \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py b/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py new file mode 100644 index 0000000..198097b --- /dev/null +++ b/tests/processing/pipeline/stages/test_alpha_extraction_to_mask.py @@ -0,0 +1,273 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np + +from processing.pipeline.stages.alpha_extraction_to_mask import AlphaExtractionToMaskStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule +from configuration import Configuration, GeneralSettings +import processing.utils.image_processing_utils as ipu # Ensure ipu is available for mocking + +# Helper Functions +def create_mock_file_rule_for_alpha_test( + id_val: uuid.UUID = None, + map_type: str = "ALBEDO", + filename_pattern: str = "albedo.png", + item_type: str = "MAP_COL", + active: bool = True +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = item_type + mock_fr.active = active + mock_fr.transform_settings = mock.MagicMock(spec=TransformSettings) + return mock_fr + +def create_alpha_extraction_mock_context( + initial_file_rules: list = None, + initial_processed_details: dict = None, + skip_asset_flag: bool = False, + asset_name: str = "AlphaAsset", + # extract_alpha_globally: bool = True # If stage checks this +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + + mock_source_rule = mock.MagicMock(spec=SourceRule) + + mock_gs = mock.MagicMock(spec=GeneralSettings) + # if your stage uses a global flag: + # mock_gs.extract_alpha_to_mask_globally = extract_alpha_globally + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_gs + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details=initial_processed_details if initial_processed_details is not None else {}, + merged_maps_details={}, + files_to_process=list(initial_file_rules) if initial_file_rules else [], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, + sha5_value=None + ) + return context + +# Unit Tests +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') # Mock logging to avoid console output during tests +def test_asset_skipped(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + context = create_alpha_extraction_mock_context(skip_asset_flag=True) + + updated_context = stage.execute(context) + + assert updated_context == context # Context should be unchanged + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 0 + assert not updated_context.processed_maps_details + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') +def test_existing_mask_map(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + existing_mask_rule = create_mock_file_rule_for_alpha_test(map_type="MASK", filename_pattern="mask.png") + context = create_alpha_extraction_mock_context(initial_file_rules=[existing_mask_rule]) + + updated_context = stage.execute(context) + + assert updated_context == context + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 1 + assert updated_context.files_to_process[0].map_type == "MASK" + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') +def test_alpha_extraction_success(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + albedo_rule_id = uuid.uuid4() + albedo_fr = create_mock_file_rule_for_alpha_test(id_val=albedo_rule_id, map_type="ALBEDO") + + initial_processed_details = { + albedo_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_albedo.png', 'status': 'Processed', 'map_type': 'ALBEDO', 'source_file_path': Path('/fake/source/albedo.png')} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[albedo_fr], + initial_processed_details=initial_processed_details + ) + + mock_rgba_data = np.zeros((10, 10, 4), dtype=np.uint8) + mock_rgba_data[:, :, 3] = 128 # Example alpha data + mock_load_image.side_effect = [mock_rgba_data, mock_rgba_data] + + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + assert mock_load_image.call_count == 2 + # First call to check for alpha, second to get data for saving + mock_load_image.assert_any_call(Path('/fake/temp_engine_dir/processed_albedo.png')) + + mock_save_image.assert_called_once() + saved_path_arg = mock_save_image.call_args[0][0] + saved_data_arg = mock_save_image.call_args[0][1] + + assert isinstance(saved_path_arg, Path) + assert "mask_from_alpha_" in saved_path_arg.name + assert np.array_equal(saved_data_arg, mock_rgba_data[:, :, 3]) + + assert len(updated_context.files_to_process) == 2 + new_mask_rule = None + for fr in updated_context.files_to_process: + if fr.map_type == "MASK": + new_mask_rule = fr + break + assert new_mask_rule is not None + assert new_mask_rule.item_type == "MAP_DER" # Derived map + + assert new_mask_rule.id.hex in updated_context.processed_maps_details + new_mask_detail = updated_context.processed_maps_details[new_mask_rule.id.hex] + assert new_mask_detail['map_type'] == "MASK" + assert "mask_from_alpha_" in new_mask_detail['temp_processed_file'] + assert "Generated from alpha of ALBEDO" in new_mask_detail['notes'] # Check for specific note + assert new_mask_detail['status'] == 'Processed' + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') +def test_no_alpha_channel_in_source(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + albedo_rule_id = uuid.uuid4() + albedo_fr = create_mock_file_rule_for_alpha_test(id_val=albedo_rule_id, map_type="ALBEDO") + initial_processed_details = { + albedo_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_rgb_albedo.png', 'status': 'Processed', 'map_type': 'ALBEDO', 'source_file_path': Path('/fake/source/albedo_rgb.png')} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[albedo_fr], + initial_processed_details=initial_processed_details + ) + + mock_rgb_data = np.zeros((10, 10, 3), dtype=np.uint8) # RGB, no alpha + mock_load_image.return_value = mock_rgb_data # Only called once for check + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/temp_engine_dir/processed_rgb_albedo.png')) + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 1 # No new MASK rule + assert albedo_fr.id.hex in updated_context.processed_maps_details + assert len(updated_context.processed_maps_details) == 1 + + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.info') +def test_no_suitable_source_map_type(mock_log_info, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_alpha_test(id_val=normal_rule_id, map_type="NORMAL") + initial_processed_details = { + normal_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_normal.png', 'status': 'Processed', 'map_type': 'NORMAL'} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_processed_details + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 1 + assert normal_fr.id.hex in updated_context.processed_maps_details + assert len(updated_context.processed_maps_details) == 1 + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.warning') # Expect a warning log +def test_load_image_fails(mock_log_warning, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + albedo_rule_id = uuid.uuid4() + albedo_fr = create_mock_file_rule_for_alpha_test(id_val=albedo_rule_id, map_type="ALBEDO") + initial_processed_details = { + albedo_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_albedo_load_fail.png', 'status': 'Processed', 'map_type': 'ALBEDO', 'source_file_path': Path('/fake/source/albedo_load_fail.png')} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[albedo_fr], + initial_processed_details=initial_processed_details + ) + + mock_load_image.return_value = None # Simulate load failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/temp_engine_dir/processed_albedo_load_fail.png')) + mock_save_image.assert_not_called() + assert len(updated_context.files_to_process) == 1 + assert albedo_fr.id.hex in updated_context.processed_maps_details + assert len(updated_context.processed_maps_details) == 1 + mock_log_warning.assert_called_once() # Check that a warning was logged + +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.save_image') +@mock.patch('processing.pipeline.stages.alpha_extraction_to_mask.ipu.load_image') +@mock.patch('logging.error') # Expect an error log +def test_save_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = AlphaExtractionToMaskStage() + + albedo_rule_id = uuid.uuid4() + albedo_fr = create_mock_file_rule_for_alpha_test(id_val=albedo_rule_id, map_type="ALBEDO") + initial_processed_details = { + albedo_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_albedo_save_fail.png', 'status': 'Processed', 'map_type': 'ALBEDO', 'source_file_path': Path('/fake/source/albedo_save_fail.png')} + } + context = create_alpha_extraction_mock_context( + initial_file_rules=[albedo_fr], + initial_processed_details=initial_processed_details + ) + + mock_rgba_data = np.zeros((10, 10, 4), dtype=np.uint8) + mock_rgba_data[:, :, 3] = 128 + mock_load_image.side_effect = [mock_rgba_data, mock_rgba_data] # Load succeeds + + mock_save_image.return_value = False # Simulate save failure + + updated_context = stage.execute(context) + + assert mock_load_image.call_count == 2 + mock_save_image.assert_called_once() # Save was attempted + + assert len(updated_context.files_to_process) == 1 # No new MASK rule should be successfully added and detailed + + # Check that no new MASK details were added, or if they were, they reflect failure. + # The current stage logic returns context early, so no new rule or details should be present. + mask_rule_found = any(fr.map_type == "MASK" for fr in updated_context.files_to_process) + assert not mask_rule_found + + mask_details_found = any( + details['map_type'] == "MASK" + for fr_id, details in updated_context.processed_maps_details.items() + if fr_id != albedo_fr.id.hex # Exclude the original albedo + ) + assert not mask_details_found + mock_log_error.assert_called_once() # Check that an error was logged \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_asset_skip_logic.py b/tests/processing/pipeline/stages/test_asset_skip_logic.py new file mode 100644 index 0000000..388cc8c --- /dev/null +++ b/tests/processing/pipeline/stages/test_asset_skip_logic.py @@ -0,0 +1,213 @@ +import pytest +from unittest import mock +from pathlib import Path +from typing import Dict, Optional, Any + +from processing.pipeline.stages.asset_skip_logic import AssetSkipLogicStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule +from configuration import Configuration, GeneralSettings + +# Helper function to create a mock AssetProcessingContext +def create_skip_logic_mock_context( + effective_supplier: Optional[str] = "ValidSupplier", + asset_process_status: str = "PENDING", + overwrite_existing: bool = False, + asset_name: str = "TestAssetSkip" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.process_status = asset_process_status + mock_asset_rule.source_path = "fake/source" # Added for completeness + mock_asset_rule.output_path = "fake/output" # Added for completeness + mock_asset_rule.maps = [] # Added for completeness + mock_asset_rule.metadata = {} # Added for completeness + mock_asset_rule.material_name = None # Added for completeness + mock_asset_rule.notes = None # Added for completeness + mock_asset_rule.tags = [] # Added for completeness + mock_asset_rule.enabled = True # Added for completeness + + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.name = "TestSourceRule" # Added for completeness + mock_source_rule.path = "fake/source_rule_path" # Added for completeness + mock_source_rule.default_supplier = None # Added for completeness + mock_source_rule.assets = [mock_asset_rule] # Added for completeness + mock_source_rule.enabled = True # Added for completeness + + mock_general_settings = mock.MagicMock(spec=GeneralSettings) + mock_general_settings.overwrite_existing = overwrite_existing + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_general_settings + mock_config.suppliers = {"ValidSupplier": mock.MagicMock()} + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp"), + output_base_path=Path("/fake/output"), + effective_supplier=effective_supplier, + asset_metadata={}, + processed_maps_details={}, + merged_maps_details={}, + files_to_process=[], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha256_value if that's the actual field + ) + # Ensure status_flags is initialized if AssetSkipLogicStage expects it + # context.status_flags = {} # Already done in constructor + return context +@mock.patch('logging.info') +def test_skip_due_to_missing_supplier(mock_log_info): + """ + Test that the asset is skipped if effective_supplier is None. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context(effective_supplier=None, asset_name="MissingSupplierAsset") + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset') is True + assert updated_context.status_flags.get('skip_reason') == "Invalid or missing supplier" + mock_log_info.assert_any_call(f"Asset 'MissingSupplierAsset': Skipping due to missing or invalid supplier.") + +@mock.patch('logging.info') +def test_skip_due_to_process_status_skip(mock_log_info): + """ + Test that the asset is skipped if asset_rule.process_status is "SKIP". + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context(asset_process_status="SKIP", asset_name="SkipStatusAsset") + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset') is True + assert updated_context.status_flags.get('skip_reason') == "Process status set to SKIP" + mock_log_info.assert_any_call(f"Asset 'SkipStatusAsset': Skipping because process_status is 'SKIP'.") + +@mock.patch('logging.info') +def test_skip_due_to_processed_and_overwrite_disabled(mock_log_info): + """ + Test that the asset is skipped if asset_rule.process_status is "PROCESSED" + and overwrite_existing is False. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="PROCESSED", + overwrite_existing=False, + asset_name="ProcessedNoOverwriteAsset" + ) + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset') is True + assert updated_context.status_flags.get('skip_reason') == "Already processed, overwrite disabled" + mock_log_info.assert_any_call(f"Asset 'ProcessedNoOverwriteAsset': Skipping because already processed and overwrite is disabled.") + +@mock.patch('logging.info') +def test_no_skip_when_processed_and_overwrite_enabled(mock_log_info): + """ + Test that the asset is NOT skipped if asset_rule.process_status is "PROCESSED" + but overwrite_existing is True. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="PROCESSED", + overwrite_existing=True, + effective_supplier="ValidSupplier", # Ensure supplier is valid + asset_name="ProcessedOverwriteAsset" + ) + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset', False) is False # Default to False if key not present + # No specific skip_reason to check if not skipped + # Check that no skip log message was called for this specific reason + for call_args in mock_log_info.call_args_list: + assert "Skipping because already processed and overwrite is disabled" not in call_args[0][0] + assert "Skipping due to missing or invalid supplier" not in call_args[0][0] + assert "Skipping because process_status is 'SKIP'" not in call_args[0][0] + + +@mock.patch('logging.info') +def test_no_skip_when_process_status_pending(mock_log_info): + """ + Test that the asset is NOT skipped if asset_rule.process_status is "PENDING". + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="PENDING", + effective_supplier="ValidSupplier", # Ensure supplier is valid + asset_name="PendingAsset" + ) + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset', False) is False + # Check that no skip log message was called + for call_args in mock_log_info.call_args_list: + assert "Skipping" not in call_args[0][0] + + +@mock.patch('logging.info') +def test_no_skip_when_process_status_failed_previously(mock_log_info): + """ + Test that the asset is NOT skipped if asset_rule.process_status is "FAILED_PREVIOUSLY". + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="FAILED_PREVIOUSLY", + effective_supplier="ValidSupplier", # Ensure supplier is valid + asset_name="FailedPreviouslyAsset" + ) + + updated_context = stage.execute(context) + + assert updated_context.status_flags.get('skip_asset', False) is False + # Check that no skip log message was called + for call_args in mock_log_info.call_args_list: + assert "Skipping" not in call_args[0][0] + +@mock.patch('logging.info') +def test_no_skip_when_process_status_other_valid_status(mock_log_info): + """ + Test that the asset is NOT skipped for other valid, non-skip process statuses. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="READY_FOR_PROCESSING", # Example of another non-skip status + effective_supplier="ValidSupplier", + asset_name="ReadyAsset" + ) + updated_context = stage.execute(context) + assert updated_context.status_flags.get('skip_asset', False) is False + for call_args in mock_log_info.call_args_list: + assert "Skipping" not in call_args[0][0] + +@mock.patch('logging.info') +def test_skip_asset_flag_initialized_if_not_present(mock_log_info): + """ + Test that 'skip_asset' is initialized to False in status_flags if not skipped and not present. + """ + stage = AssetSkipLogicStage() + context = create_skip_logic_mock_context( + asset_process_status="PENDING", + effective_supplier="ValidSupplier", + asset_name="InitFlagAsset" + ) + # Ensure status_flags is empty before execute + context.status_flags = {} + + updated_context = stage.execute(context) + + # If not skipped, 'skip_asset' should be explicitly False. + assert updated_context.status_flags.get('skip_asset') is False + # No skip reason should be set + assert 'skip_reason' not in updated_context.status_flags + for call_args in mock_log_info.call_args_list: + assert "Skipping" not in call_args[0][0] \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_file_rule_filter.py b/tests/processing/pipeline/stages/test_file_rule_filter.py new file mode 100644 index 0000000..4a79308 --- /dev/null +++ b/tests/processing/pipeline/stages/test_file_rule_filter.py @@ -0,0 +1,330 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +from typing import Optional # Added Optional for type hinting + +from processing.pipeline.stages.file_rule_filter import FileRuleFilterStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule # FileRule is key here +from configuration import Configuration # Minimal config needed + +def create_mock_file_rule( + id_val: Optional[uuid.UUID] = None, + map_type: str = "Diffuse", + filename_pattern: str = "*.tif", + item_type: str = "MAP_COL", # e.g., MAP_COL, FILE_IGNORE + active: bool = True +) -> mock.MagicMock: # Return MagicMock to easily set other attributes if needed + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = item_type + mock_fr.active = active + return mock_fr + +def create_file_filter_mock_context( + file_rules_list: Optional[list] = None, # List of mock FileRule objects + skip_asset_flag: bool = False, + asset_name: str = "FileFilterAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.file_rules = file_rules_list if file_rules_list is not None else [] + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_config = mock.MagicMock(spec=Configuration) + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", # Assume valid for this stage + asset_metadata={'asset_name': asset_name}, # Assume metadata init happened + processed_maps_details={}, + merged_maps_details={}, + files_to_process=[], # Stage will populate this + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha256_value based on AssetProcessingContext + ) + return context +# Test Cases for FileRuleFilterStage.execute() + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_asset_skipped(mock_log_debug, mock_log_info): + """ + Test case: Asset Skipped - status_flags['skip_asset'] is True. + Assert context.files_to_process remains empty. + """ + stage = FileRuleFilterStage() + context = create_file_filter_mock_context(skip_asset_flag=True) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 0 + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule filtering as 'skip_asset' is True.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_no_file_rules(mock_log_debug, mock_log_info): + """ + Test case: No File Rules - asset_rule.file_rules is empty. + Assert context.files_to_process is empty. + """ + stage = FileRuleFilterStage() + context = create_file_filter_mock_context(file_rules_list=[]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 0 + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': No file rules defined. Skipping file rule filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_only_active_processable_rules(mock_log_debug, mock_log_info): + """ + Test case: Only Active, Processable Rules - All FileRules are active=True and item_type="MAP_COL". + Assert all are added to context.files_to_process. + """ + stage = FileRuleFilterStage() + fr1 = create_mock_file_rule(filename_pattern="diffuse.png", item_type="MAP_COL", active=True) + fr2 = create_mock_file_rule(filename_pattern="normal.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr1, fr2]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 2 + assert fr1 in updated_context.files_to_process + assert fr2 in updated_context.files_to_process + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 2 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_inactive_rules(mock_log_debug, mock_log_info): + """ + Test case: Inactive Rules - Some FileRules have active=False. + Assert only active rules are added. + """ + stage = FileRuleFilterStage() + fr_active = create_mock_file_rule(filename_pattern="active.png", item_type="MAP_COL", active=True) + fr_inactive = create_mock_file_rule(filename_pattern="inactive.png", item_type="MAP_COL", active=False) + fr_another_active = create_mock_file_rule(filename_pattern="another_active.jpg", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_active, fr_inactive, fr_another_active]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 2 + assert fr_active in updated_context.files_to_process + assert fr_another_active in updated_context.files_to_process + assert fr_inactive not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping inactive file rule: '{fr_inactive.filename_pattern}'") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 2 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_file_ignore_simple_match(mock_log_debug, mock_log_info): + """ + Test case: FILE_IGNORE Rule (Simple Match). + One FILE_IGNORE rule with filename_pattern="*_ignore.png". + One MAP_COL rule with filename_pattern="diffuse_ignore.png". + One MAP_COL rule with filename_pattern="normal_process.png". + Assert only "normal_process.png" rule is added. + """ + stage = FileRuleFilterStage() + fr_ignore = create_mock_file_rule(filename_pattern="*_ignore.png", item_type="FILE_IGNORE", active=True) + fr_ignored_map = create_mock_file_rule(filename_pattern="diffuse_ignore.png", item_type="MAP_COL", active=True) + fr_process_map = create_mock_file_rule(filename_pattern="normal_process.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_ignore, fr_ignored_map, fr_process_map]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_process_map in updated_context.files_to_process + assert fr_ignored_map not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_ignored_map.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_file_ignore_glob_pattern(mock_log_debug, mock_log_info): + """ + Test case: FILE_IGNORE Rule (Glob Pattern). + One FILE_IGNORE rule with filename_pattern="*_ignore.*". + MAP_COL rules: "tex_ignore.tif", "tex_process.png". + Assert only "tex_process.png" rule is added. + """ + stage = FileRuleFilterStage() + fr_ignore_glob = create_mock_file_rule(filename_pattern="*_ignore.*", item_type="FILE_IGNORE", active=True) + fr_ignored_tif = create_mock_file_rule(filename_pattern="tex_ignore.tif", item_type="MAP_COL", active=True) + fr_process_png = create_mock_file_rule(filename_pattern="tex_process.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_ignore_glob, fr_ignored_tif, fr_process_png]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_process_png in updated_context.files_to_process + assert fr_ignored_tif not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore_glob.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_ignored_tif.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_multiple_file_ignore_rules(mock_log_debug, mock_log_info): + """ + Test case: Multiple FILE_IGNORE Rules. + Test with several ignore patterns and ensure they are all respected. + """ + stage = FileRuleFilterStage() + fr_ignore1 = create_mock_file_rule(filename_pattern="*.tmp", item_type="FILE_IGNORE", active=True) + fr_ignore2 = create_mock_file_rule(filename_pattern="backup_*", item_type="FILE_IGNORE", active=True) + fr_ignore3 = create_mock_file_rule(filename_pattern="*_old.png", item_type="FILE_IGNORE", active=True) + + fr_map_ignored1 = create_mock_file_rule(filename_pattern="data.tmp", item_type="MAP_COL", active=True) + fr_map_ignored2 = create_mock_file_rule(filename_pattern="backup_diffuse.jpg", item_type="MAP_COL", active=True) + fr_map_ignored3 = create_mock_file_rule(filename_pattern="normal_old.png", item_type="MAP_COL", active=True) + fr_map_process = create_mock_file_rule(filename_pattern="final_texture.tif", item_type="MAP_COL", active=True) + + context = create_file_filter_mock_context(file_rules_list=[ + fr_ignore1, fr_ignore2, fr_ignore3, + fr_map_ignored1, fr_map_ignored2, fr_map_ignored3, fr_map_process + ]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_map_process in updated_context.files_to_process + assert fr_map_ignored1 not in updated_context.files_to_process + assert fr_map_ignored2 not in updated_context.files_to_process + assert fr_map_ignored3 not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore1.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore2.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore3.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_map_ignored1.filename_pattern}' due to matching ignore pattern.") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_map_ignored2.filename_pattern}' due to matching ignore pattern.") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_map_ignored3.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_file_ignore_rule_is_inactive(mock_log_debug, mock_log_info): + """ + Test case: FILE_IGNORE Rule is Inactive. + An ignore rule itself is active=False. Assert its pattern is NOT used for filtering. + """ + stage = FileRuleFilterStage() + fr_inactive_ignore = create_mock_file_rule(filename_pattern="*_ignore.tif", item_type="FILE_IGNORE", active=False) + fr_should_process1 = create_mock_file_rule(filename_pattern="diffuse_ignore.tif", item_type="MAP_COL", active=True) # Should be processed + fr_should_process2 = create_mock_file_rule(filename_pattern="normal_ok.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_inactive_ignore, fr_should_process1, fr_should_process2]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 2 + assert fr_should_process1 in updated_context.files_to_process + assert fr_should_process2 in updated_context.files_to_process + # Ensure the inactive ignore rule's pattern was not registered + # We check this by ensuring no debug log for registering *that specific* pattern was made. + # A more robust way would be to check mock_log_debug.call_args_list, but this is simpler for now. + for call in mock_log_debug.call_args_list: + args, kwargs = call + if "Registering ignore pattern" in args[0] and fr_inactive_ignore.filename_pattern in args[0]: + pytest.fail(f"Inactive ignore pattern '{fr_inactive_ignore.filename_pattern}' was incorrectly registered.") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping inactive file rule: '{fr_inactive_ignore.filename_pattern}' (type: FILE_IGNORE)") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 2 file rules queued for processing after filtering.") + + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_no_file_ignore_rules(mock_log_debug, mock_log_info): + """ + Test case: No FILE_IGNORE Rules. + All rules are MAP_COL or other processable types. + Assert all active, processable rules are included. + """ + stage = FileRuleFilterStage() + fr1 = create_mock_file_rule(filename_pattern="diffuse.png", item_type="MAP_COL", active=True) + fr2 = create_mock_file_rule(filename_pattern="normal.png", item_type="MAP_COL", active=True) + fr_other_type = create_mock_file_rule(filename_pattern="spec.tif", item_type="MAP_SPEC", active=True) # Assuming MAP_SPEC is processable + fr_inactive = create_mock_file_rule(filename_pattern="ao.jpg", item_type="MAP_AO", active=False) + + context = create_file_filter_mock_context(file_rules_list=[fr1, fr2, fr_other_type, fr_inactive]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 3 + assert fr1 in updated_context.files_to_process + assert fr2 in updated_context.files_to_process + assert fr_other_type in updated_context.files_to_process + assert fr_inactive not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping inactive file rule: '{fr_inactive.filename_pattern}'") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 3 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_item_type_not_processable(mock_log_debug, mock_log_info): + """ + Test case: Item type is not processable (e.g., not MAP_COL, MAP_AO etc., but something else like 'METADATA_ONLY'). + Assert such rules are not added to files_to_process, unless they are FILE_IGNORE. + """ + stage = FileRuleFilterStage() + fr_processable = create_mock_file_rule(filename_pattern="diffuse.png", item_type="MAP_COL", active=True) + fr_not_processable = create_mock_file_rule(filename_pattern="info.txt", item_type="METADATA_ONLY", active=True) + fr_ignore = create_mock_file_rule(filename_pattern="*.bak", item_type="FILE_IGNORE", active=True) + fr_ignored_by_bak = create_mock_file_rule(filename_pattern="diffuse.bak", item_type="MAP_COL", active=True) + + context = create_file_filter_mock_context(file_rules_list=[fr_processable, fr_not_processable, fr_ignore, fr_ignored_by_bak]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_processable in updated_context.files_to_process + assert fr_not_processable not in updated_context.files_to_process + assert fr_ignored_by_bak not in updated_context.files_to_process + + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_not_processable.filename_pattern}' as its item_type '{fr_not_processable.item_type}' is not processable.") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_ignored_by_bak.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") + +# Example tests from instructions (can be adapted or used as a base) +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_basic_active_example(mock_log_debug, mock_log_info): # Renamed to avoid conflict + stage = FileRuleFilterStage() + fr1 = create_mock_file_rule(filename_pattern="diffuse.png", item_type="MAP_COL", active=True) + fr2 = create_mock_file_rule(filename_pattern="normal.png", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr1, fr2]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 2 + assert fr1 in updated_context.files_to_process + assert fr2 in updated_context.files_to_process + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 2 file rules queued for processing after filtering.") + +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_file_rule_filter_with_file_ignore_example(mock_log_debug, mock_log_info): # Renamed to avoid conflict + stage = FileRuleFilterStage() + fr_ignore = create_mock_file_rule(filename_pattern="*_ignore.tif", item_type="FILE_IGNORE", active=True) + fr_process = create_mock_file_rule(filename_pattern="diffuse_ok.tif", item_type="MAP_COL", active=True) + fr_skip = create_mock_file_rule(filename_pattern="normal_ignore.tif", item_type="MAP_COL", active=True) + context = create_file_filter_mock_context(file_rules_list=[fr_ignore, fr_process, fr_skip]) + + updated_context = stage.execute(context) + + assert len(updated_context.files_to_process) == 1 + assert fr_process in updated_context.files_to_process + assert fr_skip not in updated_context.files_to_process + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Registering ignore pattern: '{fr_ignore.filename_pattern}'") + mock_log_debug.assert_any_call(f"Asset '{context.asset_rule.name}': Skipping file rule '{fr_skip.filename_pattern}' due to matching ignore pattern.") + mock_log_info.assert_any_call(f"Asset '{context.asset_rule.name}': 1 file rules queued for processing after filtering.") \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_gloss_to_rough_conversion.py b/tests/processing/pipeline/stages/test_gloss_to_rough_conversion.py new file mode 100644 index 0000000..934ad2c --- /dev/null +++ b/tests/processing/pipeline/stages/test_gloss_to_rough_conversion.py @@ -0,0 +1,486 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np +from typing import Optional, List, Dict + +from processing.pipeline.stages.gloss_to_rough_conversion import GlossToRoughConversionStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule +from configuration import Configuration, GeneralSettings +# No direct ipu import needed in test if we mock its usage by the stage + +def create_mock_file_rule_for_gloss_test( + id_val: Optional[uuid.UUID] = None, + map_type: str = "GLOSS", # Test with GLOSS and other types + filename_pattern: str = "gloss.png" +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = "MAP_COL" + mock_fr.active = True + return mock_fr + +def create_gloss_conversion_mock_context( + initial_file_rules: Optional[List[FileRule]] = None, # Type hint corrected + initial_processed_details: Optional[Dict] = None, # Type hint corrected + skip_asset_flag: bool = False, + asset_name: str = "GlossAsset", + # Add a mock for general_settings if your stage checks a global flag + # convert_gloss_globally: bool = True +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.file_rules = initial_file_rules if initial_file_rules is not None else [] + + mock_source_rule = mock.MagicMock(spec=SourceRule) + + mock_gs = mock.MagicMock(spec=GeneralSettings) + # if your stage uses a global flag: + # mock_gs.convert_gloss_to_rough_globally = convert_gloss_globally + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_gs + + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), # Important for new temp file paths + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details=initial_processed_details if initial_processed_details is not None else {}, + merged_maps_details={}, + files_to_process=list(initial_file_rules) if initial_file_rules else [], # Stage modifies this list + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, # Added as per AssetProcessingContext definition + sha5_value=None # Added as per AssetProcessingContext definition + ) + return context + +# Unit tests will be added below +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_asset_skipped(mock_load_image, mock_save_image): + """ + Test that if 'skip_asset' is True, no processing occurs. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS") + + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_gloss_map.png', 'status': 'Processed', 'map_type': 'GLOSS'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr], + initial_processed_details=initial_details, + skip_asset_flag=True # Asset is skipped + ) + + # Keep a copy of files_to_process and processed_maps_details to compare + original_files_to_process = list(context.files_to_process) + original_processed_maps_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + + assert updated_context.files_to_process == original_files_to_process, "files_to_process should not change if asset is skipped" + assert updated_context.processed_maps_details == original_processed_maps_details, "processed_maps_details should not change if asset is skipped" + assert updated_context.status_flags['skip_asset'] is True +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_no_gloss_map_present(mock_load_image, mock_save_image): + """ + Test that if no GLOSS maps are in files_to_process, no conversion occurs. + """ + stage = GlossToRoughConversionStage() + + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_gloss_test(id_val=normal_rule_id, map_type="NORMAL", filename_pattern="normal.png") + albedo_fr = create_mock_file_rule_for_gloss_test(map_type="ALBEDO", filename_pattern="albedo.jpg") + + initial_details = { + normal_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_normal_map.png', 'status': 'Processed', 'map_type': 'NORMAL'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[normal_fr, albedo_fr], + initial_processed_details=initial_details + ) + + original_files_to_process = list(context.files_to_process) + original_processed_maps_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + + assert updated_context.files_to_process == original_files_to_process, "files_to_process should not change if no GLOSS maps are present" + assert updated_context.processed_maps_details == original_processed_maps_details, "processed_maps_details should not change if no GLOSS maps are present" + + # Ensure map types of existing rules are unchanged + for fr_in_list in updated_context.files_to_process: + if fr_in_list.id == normal_fr.id: + assert fr_in_list.map_type == "NORMAL" + elif fr_in_list.id == albedo_fr.id: + assert fr_in_list.map_type == "ALBEDO" +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') # Mock logging +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_gloss_conversion_uint8_success(mock_load_image, mock_save_image, mock_logging): + """ + Test successful conversion of a GLOSS map (uint8 data) to ROUGHNESS. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + # Use a distinct filename for the gloss map to ensure correct path construction + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="my_gloss_map.png") + other_fr_id = uuid.uuid4() + other_fr = create_mock_file_rule_for_gloss_test(id_val=other_fr_id, map_type="NORMAL", filename_pattern="normal_map.png") + + initial_gloss_temp_path = Path("/fake/temp_engine_dir/processed_gloss_map.png") + initial_other_temp_path = Path("/fake/temp_engine_dir/processed_normal_map.png") + + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': str(initial_gloss_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'}, + other_fr.id.hex: {'temp_processed_file': str(initial_other_temp_path), 'status': 'Processed', 'map_type': 'NORMAL'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr, other_fr], + initial_processed_details=initial_details + ) + + mock_loaded_gloss_data = np.array([10, 50, 250], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_gloss_data + mock_save_image.return_value = True # Simulate successful save + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_gloss_temp_path) + + # Check that save_image was called with inverted data and correct path + expected_inverted_data = 255 - mock_loaded_gloss_data + + # call_args[0] is a tuple of positional args, call_args[1] is a dict of kwargs + saved_path_arg = mock_save_image.call_args[0][0] + saved_data_arg = mock_save_image.call_args[0][1] + + assert np.array_equal(saved_data_arg, expected_inverted_data), "Image data passed to save_image is not correctly inverted." + assert "rough_from_gloss_" in saved_path_arg.name, "Saved file name should indicate conversion from gloss." + assert saved_path_arg.parent == Path("/fake/temp_engine_dir"), "Saved file should be in the engine temp directory." + # Ensure the new filename is based on the original gloss map's ID for uniqueness + assert gloss_fr.id.hex in saved_path_arg.name + + # Check context.files_to_process + assert len(updated_context.files_to_process) == 2, "Number of file rules in context should remain the same." + converted_rule_found = False + other_rule_untouched = False + for fr_in_list in updated_context.files_to_process: + if fr_in_list.id == gloss_fr.id: # Should be the same rule object, modified + assert fr_in_list.map_type == "ROUGHNESS", "GLOSS map_type should be changed to ROUGHNESS." + # Check if filename_pattern was updated (optional, depends on stage logic) + # For now, assume it might not be, as the primary identifier is map_type and ID + converted_rule_found = True + elif fr_in_list.id == other_fr.id: + assert fr_in_list.map_type == "NORMAL", "Other map_type should remain unchanged." + other_rule_untouched = True + assert converted_rule_found, "The converted GLOSS rule was not found or not updated correctly in files_to_process." + assert other_rule_untouched, "The non-GLOSS rule was modified unexpectedly." + + # Check context.processed_maps_details + assert len(updated_context.processed_maps_details) == 2, "Number of entries in processed_maps_details should remain the same." + + gloss_detail = updated_context.processed_maps_details[gloss_fr.id.hex] + assert "rough_from_gloss_" in gloss_detail['temp_processed_file'], "temp_processed_file for gloss map not updated." + assert Path(gloss_detail['temp_processed_file']).name == saved_path_arg.name, "Path in details should match saved path." + assert gloss_detail['original_map_type_before_conversion'] == "GLOSS", "original_map_type_before_conversion not set correctly." + assert "Converted from GLOSS to ROUGHNESS" in gloss_detail['notes'], "Conversion notes not added or incorrect." + assert gloss_detail['map_type'] == "ROUGHNESS", "map_type in details not updated to ROUGHNESS." + + + other_detail = updated_context.processed_maps_details[other_fr.id.hex] + assert other_detail['temp_processed_file'] == str(initial_other_temp_path), "Other map's temp_processed_file should be unchanged." + assert other_detail['map_type'] == "NORMAL", "Other map's map_type should be unchanged." + assert 'original_map_type_before_conversion' not in other_detail, "Other map should not have conversion history." + assert 'notes' not in other_detail or "Converted from GLOSS" not in other_detail['notes'], "Other map should not have conversion notes." + + mock_logging.info.assert_any_call(f"Successfully converted GLOSS map {gloss_fr.id.hex} to ROUGHNESS.") +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') # Mock logging +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_gloss_conversion_float_success(mock_load_image, mock_save_image, mock_logging): + """ + Test successful conversion of a GLOSS map (float data) to ROUGHNESS. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="gloss_float.hdr") # Example float format + + initial_gloss_temp_path = Path("/fake/temp_engine_dir/processed_gloss_float.hdr") + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': str(initial_gloss_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr], + initial_processed_details=initial_details + ) + + mock_loaded_gloss_data = np.array([0.1, 0.5, 0.9], dtype=np.float32) + mock_load_image.return_value = mock_loaded_gloss_data + mock_save_image.return_value = True # Simulate successful save + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_gloss_temp_path) + + expected_inverted_data = 1.0 - mock_loaded_gloss_data + + saved_path_arg = mock_save_image.call_args[0][0] + saved_data_arg = mock_save_image.call_args[0][1] + + assert np.allclose(saved_data_arg, expected_inverted_data), "Image data (float) passed to save_image is not correctly inverted." + assert "rough_from_gloss_" in saved_path_arg.name, "Saved file name should indicate conversion from gloss." + assert saved_path_arg.parent == Path("/fake/temp_engine_dir"), "Saved file should be in the engine temp directory." + assert gloss_fr.id.hex in saved_path_arg.name + + assert len(updated_context.files_to_process) == 1 + converted_rule = updated_context.files_to_process[0] + assert converted_rule.id == gloss_fr.id + assert converted_rule.map_type == "ROUGHNESS" + + gloss_detail = updated_context.processed_maps_details[gloss_fr.id.hex] + assert "rough_from_gloss_" in gloss_detail['temp_processed_file'] + assert Path(gloss_detail['temp_processed_file']).name == saved_path_arg.name + assert gloss_detail['original_map_type_before_conversion'] == "GLOSS" + assert "Converted from GLOSS to ROUGHNESS" in gloss_detail['notes'] + assert gloss_detail['map_type'] == "ROUGHNESS" + + mock_logging.info.assert_any_call(f"Successfully converted GLOSS map {gloss_fr.id.hex} to ROUGHNESS.") +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_load_image_fails(mock_load_image, mock_save_image, mock_logging): + """ + Test behavior when ipu.load_image fails (returns None). + The original FileRule should be kept, and an error logged. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="gloss_fails_load.png") + + initial_gloss_temp_path = Path("/fake/temp_engine_dir/processed_gloss_fails_load.png") + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': str(initial_gloss_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr], + initial_processed_details=initial_details + ) + + # Keep a copy for comparison + original_file_rule_map_type = gloss_fr.map_type + original_details_entry = context.processed_maps_details[gloss_fr.id.hex].copy() + + mock_load_image.return_value = None # Simulate load failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_gloss_temp_path) + mock_save_image.assert_not_called() # Save should not be attempted + + # Check context.files_to_process: rule should be unchanged + assert len(updated_context.files_to_process) == 1 + processed_rule = updated_context.files_to_process[0] + assert processed_rule.id == gloss_fr.id + assert processed_rule.map_type == original_file_rule_map_type, "FileRule map_type should not change if load fails." + assert processed_rule.map_type == "GLOSS" # Explicitly check it's still GLOSS + + # Check context.processed_maps_details: details should be unchanged + current_details_entry = updated_context.processed_maps_details[gloss_fr.id.hex] + assert current_details_entry['temp_processed_file'] == str(initial_gloss_temp_path) + assert current_details_entry['map_type'] == "GLOSS" + assert 'original_map_type_before_conversion' not in current_details_entry + assert 'notes' not in current_details_entry or "Converted from GLOSS" not in current_details_entry['notes'] + + mock_logging.error.assert_called_once_with( + f"Failed to load image data for GLOSS map {gloss_fr.id.hex} from {initial_gloss_temp_path}. Skipping conversion for this map." + ) +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_save_image_fails(mock_load_image, mock_save_image, mock_logging): + """ + Test behavior when ipu.save_image fails (returns False). + The original FileRule should be kept, and an error logged. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + gloss_fr = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="gloss_fails_save.png") + + initial_gloss_temp_path = Path("/fake/temp_engine_dir/processed_gloss_fails_save.png") + initial_details = { + gloss_fr.id.hex: {'temp_processed_file': str(initial_gloss_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'} + } + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr], + initial_processed_details=initial_details + ) + + original_file_rule_map_type = gloss_fr.map_type + original_details_entry = context.processed_maps_details[gloss_fr.id.hex].copy() + + mock_loaded_gloss_data = np.array([10, 50, 250], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_gloss_data + mock_save_image.return_value = False # Simulate save failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_gloss_temp_path) + + # Check that save_image was called with correct data and path + expected_inverted_data = 255 - mock_loaded_gloss_data + # call_args[0] is a tuple of positional args + saved_path_arg = mock_save_image.call_args[0][0] + saved_data_arg = mock_save_image.call_args[0][1] + + assert np.array_equal(saved_data_arg, expected_inverted_data), "Image data passed to save_image is not correctly inverted even on failure." + assert "rough_from_gloss_" in saved_path_arg.name, "Attempted save file name should indicate conversion from gloss." + assert saved_path_arg.parent == Path("/fake/temp_engine_dir"), "Attempted save file should be in the engine temp directory." + + # Check context.files_to_process: rule should be unchanged + assert len(updated_context.files_to_process) == 1 + processed_rule = updated_context.files_to_process[0] + assert processed_rule.id == gloss_fr.id + assert processed_rule.map_type == original_file_rule_map_type, "FileRule map_type should not change if save fails." + assert processed_rule.map_type == "GLOSS" + + # Check context.processed_maps_details: details should be unchanged + current_details_entry = updated_context.processed_maps_details[gloss_fr.id.hex] + assert current_details_entry['temp_processed_file'] == str(initial_gloss_temp_path) + assert current_details_entry['map_type'] == "GLOSS" + assert 'original_map_type_before_conversion' not in current_details_entry + assert 'notes' not in current_details_entry or "Converted from GLOSS" not in current_details_entry['notes'] + + mock_logging.error.assert_called_once_with( + f"Failed to save inverted GLOSS map {gloss_fr.id.hex} to {saved_path_arg}. Retaining original GLOSS map." + ) +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_gloss_map_in_files_to_process_but_not_in_details(mock_load_image, mock_save_image, mock_logging): + """ + Test behavior when a GLOSS FileRule is in files_to_process but its details + are missing from processed_maps_details. + The stage should log an error and skip this FileRule. + """ + stage = GlossToRoughConversionStage() + + gloss_rule_id = uuid.uuid4() + # This FileRule is in files_to_process + gloss_fr_in_list = create_mock_file_rule_for_gloss_test(id_val=gloss_rule_id, map_type="GLOSS", filename_pattern="orphan_gloss.png") + + # processed_maps_details is empty or does not contain gloss_fr_in_list.id.hex + initial_details = {} + + context = create_gloss_conversion_mock_context( + initial_file_rules=[gloss_fr_in_list], + initial_processed_details=initial_details + ) + + original_files_to_process = list(context.files_to_process) + original_processed_maps_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() # Load should not be attempted if details are missing + mock_save_image.assert_not_called() # Save should not be attempted + + # Check context.files_to_process: rule should be unchanged + assert len(updated_context.files_to_process) == 1 + processed_rule = updated_context.files_to_process[0] + assert processed_rule.id == gloss_fr_in_list.id + assert processed_rule.map_type == "GLOSS", "FileRule map_type should not change if its details are missing." + + # Check context.processed_maps_details: should remain unchanged + assert updated_context.processed_maps_details == original_processed_maps_details, "processed_maps_details should not change." + + mock_logging.error.assert_called_once_with( + f"GLOSS map {gloss_fr_in_list.id.hex} found in files_to_process but missing from processed_maps_details. Skipping conversion." + ) + +# Test for Case 8.2 (GLOSS map ID in processed_maps_details but no corresponding FileRule in files_to_process) +# This case is implicitly handled because the stage iterates files_to_process. +# If a FileRule isn't in files_to_process, its corresponding entry in processed_maps_details (if any) won't be acted upon. +# We can add a simple test to ensure no errors occur and non-relevant details are untouched. + +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.logging') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.save_image') +@mock.patch('processing.pipeline.stages.gloss_to_rough_conversion.ipu.load_image') +def test_gloss_detail_exists_but_not_in_files_to_process(mock_load_image, mock_save_image, mock_logging): + """ + Test that if a GLOSS map detail exists in processed_maps_details but + no corresponding FileRule is in files_to_process, it's simply ignored + without error, and other valid conversions proceed. + """ + stage = GlossToRoughConversionStage() + + # This rule will be processed + convert_rule_id = uuid.uuid4() + convert_fr = create_mock_file_rule_for_gloss_test(id_val=convert_rule_id, map_type="GLOSS", filename_pattern="convert_me.png") + convert_initial_temp_path = Path("/fake/temp_engine_dir/processed_convert_me.png") + + # This rule's details exist, but the rule itself is not in files_to_process + orphan_detail_id = uuid.uuid4() + + initial_details = { + convert_fr.id.hex: {'temp_processed_file': str(convert_initial_temp_path), 'status': 'Processed', 'map_type': 'GLOSS'}, + orphan_detail_id.hex: {'temp_processed_file': '/fake/temp_engine_dir/orphan.png', 'status': 'Processed', 'map_type': 'GLOSS', 'notes': 'This is an orphan'} + } + + context = create_gloss_conversion_mock_context( + initial_file_rules=[convert_fr], # Only convert_fr is in files_to_process + initial_processed_details=initial_details + ) + + mock_loaded_data = np.array([100], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_data + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + # Assert that load/save were called only for the rule in files_to_process + mock_load_image.assert_called_once_with(convert_initial_temp_path) + mock_save_image.assert_called_once() # Check it was called, details checked in other tests + + # Check that the orphan detail in processed_maps_details is untouched + assert orphan_detail_id.hex in updated_context.processed_maps_details + orphan_entry = updated_context.processed_maps_details[orphan_detail_id.hex] + assert orphan_entry['temp_processed_file'] == '/fake/temp_engine_dir/orphan.png' + assert orphan_entry['map_type'] == 'GLOSS' + assert orphan_entry['notes'] == 'This is an orphan' + assert 'original_map_type_before_conversion' not in orphan_entry + + # Check that the processed rule was indeed converted + assert convert_fr.id.hex in updated_context.processed_maps_details + converted_entry = updated_context.processed_maps_details[convert_fr.id.hex] + assert converted_entry['map_type'] == 'ROUGHNESS' + assert "rough_from_gloss_" in converted_entry['temp_processed_file'] + + # No errors should have been logged regarding the orphan detail + for call_args in mock_logging.error.call_args_list: + assert str(orphan_detail_id.hex) not in call_args[0][0], "Error logged for orphan detail" \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_individual_map_processing.py b/tests/processing/pipeline/stages/test_individual_map_processing.py new file mode 100644 index 0000000..8af2d19 --- /dev/null +++ b/tests/processing/pipeline/stages/test_individual_map_processing.py @@ -0,0 +1,555 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np +from typing import Optional # Added for type hinting in helper functions + +from processing.pipeline.stages.individual_map_processing import IndividualMapProcessingStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule # Key models +from configuration import Configuration, GeneralSettings +# cv2 might be imported by the stage for interpolation constants, ensure it's mockable if so. +# For now, assume ipu handles interpolation details. + +def create_mock_transform_settings( + target_width=0, target_height=0, resize_mode="FIT", + ensure_pot=False, allow_upscale=True, target_color_profile="RGB" # Add other fields as needed +) -> mock.MagicMock: + ts = mock.MagicMock(spec=TransformSettings) + ts.target_width = target_width + ts.target_height = target_height + ts.resize_mode = resize_mode + ts.ensure_pot = ensure_pot + ts.allow_upscale = allow_upscale + ts.target_color_profile = target_color_profile + # ts.resize_filter = "AREA" # if your stage uses this + return ts + +def create_mock_file_rule_for_individual_processing( + id_val: Optional[uuid.UUID] = None, + map_type: str = "ALBEDO", + filename_pattern: str = "albedo_*.png", # Pattern for glob + item_type: str = "MAP_COL", + active: bool = True, + transform_settings: Optional[mock.MagicMock] = None +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = item_type + mock_fr.active = active + mock_fr.transform_settings = transform_settings if transform_settings else create_mock_transform_settings() + return mock_fr + +def create_individual_map_proc_mock_context( + initial_file_rules: Optional[list] = None, + asset_source_path_str: str = "/fake/asset_source", + skip_asset_flag: bool = False, + asset_name: str = "IndividualMapAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.source_path = Path(asset_source_path_str) + # file_rules on AssetRule not directly used by stage, context.files_to_process is + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_config = mock.MagicMock(spec=Configuration) + # mock_config.general_settings = mock.MagicMock(spec=GeneralSettings) # If needed + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details={}, # Stage populates this + merged_maps_details={}, + files_to_process=list(initial_file_rules) if initial_file_rules else [], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha_value if that's the actual param + ) + return context + +# Placeholder for tests to be added next +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu') +@mock.patch('logging.info') +def test_asset_skipped_if_flag_is_true(mock_log_info, mock_ipu): + stage = IndividualMapProcessingStage() + context = create_individual_map_proc_mock_context(skip_asset_flag=True) + + # Add a dummy file rule to ensure it's not processed + file_rule = create_mock_file_rule_for_individual_processing() + context.files_to_process = [file_rule] + + updated_context = stage.execute(context) + + mock_ipu.load_image.assert_not_called() + mock_ipu.save_image.assert_not_called() + assert not updated_context.processed_maps_details # No details should be added + # Check for a log message indicating skip, if applicable (depends on stage's logging) + # mock_log_info.assert_any_call("Skipping asset IndividualMapAsset due to status_flags['skip_asset'] = True") # Example + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu') +@mock.patch('logging.info') +def test_no_processing_if_no_map_col_rules(mock_log_info, mock_ipu): + stage = IndividualMapProcessingStage() + + # Create a file rule that is NOT of item_type MAP_COL + non_map_col_rule = create_mock_file_rule_for_individual_processing(item_type="METADATA") + context = create_individual_map_proc_mock_context(initial_file_rules=[non_map_col_rule]) + + updated_context = stage.execute(context) + + mock_ipu.load_image.assert_not_called() + mock_ipu.save_image.assert_not_called() + assert not updated_context.processed_maps_details + # mock_log_info.assert_any_call("No FileRules of item_type 'MAP_COL' to process for asset IndividualMapAsset.") # Example + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') # Mocking Path.glob used by the stage's _find_source_file +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_individual_map_processing_success_no_resize( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + + source_file_name = "albedo_source.png" + # The glob is called on context.asset_rule.source_path, so mock that Path object's glob + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + + # We need to mock the glob method of the Path instance + # that represents the asset's source directory. + # The stage does something like: Path(context.asset_rule.source_path).glob(...) + # So, we need to ensure that when Path() is called with that specific string, + # the resulting object's glob method is our mock. + # A more robust way is to mock Path itself to return a mock object + # whose glob method is also a mock. + + # Simpler approach for now: assume Path.glob is used as a static/class method call + # or that the instance it's called on is correctly patched by @mock.patch('pathlib.Path.glob') + # if the stage does `from pathlib import Path` and then `Path(path_str).glob(...)`. + # The prompt example uses @mock.patch('pathlib.Path.glob'), implying the stage might do this: + # for f_pattern in patterns: + # for found_file in Path(base_dir).glob(f_pattern): ... + # Let's refine the mock_path_glob setup. + # The stage's _find_source_file likely does: + # search_path = Path(self.context.asset_rule.source_path) + # found_files = list(search_path.glob(filename_pattern)) + + # To correctly mock this, we need to mock the `glob` method of the specific Path instance. + # Or, if `_find_source_file` instantiates `Path` like `Path(str(context.asset_rule.source_path)).glob(...)`, + # then patching `pathlib.Path.glob` might work if it's treated as a method that gets bound. + # Let's stick to the example's @mock.patch('pathlib.Path.glob') and assume it covers the usage. + mock_path_glob.return_value = [mock_found_source_path] # Glob finds one file + + ts = create_mock_transform_settings(target_width=100, target_height=100) + file_rule = create_mock_file_rule_for_individual_processing( + map_type="ALBEDO", filename_pattern="albedo_*.png", transform_settings=ts + ) + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) # Ensure context uses this path + ) + + mock_img_data = np.zeros((100, 100, 3), dtype=np.uint8) # Original dimensions + mock_load_image.return_value = mock_img_data + mock_calc_dims.return_value = (100, 100) # No resize needed + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + # Assert that Path(context.asset_rule.source_path).glob was called + # This requires a bit more intricate mocking if Path instances are created inside. + # For now, assert mock_path_glob was called with the pattern. + # The actual call in stage is `Path(context.asset_rule.source_path).glob(file_rule.filename_pattern)` + # So, `mock_path_glob` (if it patches `Path.glob` globally) should be called. + # We need to ensure the mock_path_glob is associated with the correct Path instance or that + # the global patch works as intended. + # A common pattern is: + # with mock.patch.object(Path, 'glob', return_value=[mock_found_source_path]) as specific_glob_mock: + # # execute code + # specific_glob_mock.assert_called_once_with(file_rule.filename_pattern) + # However, the decorator @mock.patch('pathlib.Path.glob') should work if the stage code is + # `from pathlib import Path; p = Path(...); p.glob(...)` + + # The stage's _find_source_file will instantiate a Path object from context.asset_rule.source_path + # and then call glob on it. + # So, @mock.patch('pathlib.Path.glob') is patching the method on the class. + # When an instance calls it, the mock is used. + mock_path_glob.assert_called_once_with(file_rule.filename_pattern) + + + mock_load_image.assert_called_once_with(mock_found_source_path) + # The actual call to calculate_target_dimensions is: + # ipu.calculate_target_dimensions(original_dims, ts.target_width, ts.target_height, ts.resize_mode, ts.ensure_pot, ts.allow_upscale) + mock_calc_dims.assert_called_once_with( + (100, 100), ts.target_width, ts.target_height, ts.resize_mode, ts.ensure_pot, ts.allow_upscale + ) + mock_resize_image.assert_not_called() # Crucial for this test case + mock_save_image.assert_called_once() + + # Check save path and data + saved_image_arg, saved_path_arg = mock_save_image.call_args[0] + assert np.array_equal(saved_image_arg, mock_img_data) # Ensure correct image data is passed to save + assert "processed_ALBEDO_" in saved_path_arg.name # Based on map_type + assert file_rule.id.hex in saved_path_arg.name # Ensure unique name with FileRule ID + assert saved_path_arg.parent == context.engine_temp_dir + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Processed' + assert details['source_file'] == str(mock_found_source_path) + assert Path(details['temp_processed_file']) == saved_path_arg + assert details['original_dimensions'] == (100, 100) + assert details['processed_dimensions'] == (100, 100) + assert details['map_type'] == file_rule.map_type + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Successfully processed map {file_rule.map_type} (ID: {file_rule.id.hex}) for asset {context.asset_rule.name}. Output: {saved_path_arg}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_source_file_not_found( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + mock_asset_source_path = Path("/fake/asset_source") + + mock_path_glob.return_value = [] # Glob finds no files + + file_rule = create_mock_file_rule_for_individual_processing(filename_pattern="nonexistent_*.png") + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + + updated_context = stage.execute(context) + + mock_path_glob.assert_called_once_with(file_rule.filename_pattern) + mock_load_image.assert_not_called() + mock_calc_dims.assert_not_called() + mock_resize_image.assert_not_called() + mock_save_image.assert_not_called() + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Source Not Found' + assert details['source_file'] is None + assert details['temp_processed_file'] is None + assert details['error_message'] is not None # Check an error message is present + mock_log_error.assert_called_once() + # Example: mock_log_error.assert_called_with(f"Could not find source file for rule {file_rule.id} (pattern: {file_rule.filename_pattern}) in {context.asset_rule.source_path}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_load_image_fails( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + source_file_name = "albedo_corrupt.png" + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + mock_path_glob.return_value = [mock_found_source_path] + + mock_load_image.return_value = None # Simulate load failure + + file_rule = create_mock_file_rule_for_individual_processing(filename_pattern="albedo_*.png") + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + + updated_context = stage.execute(context) + + mock_path_glob.assert_called_once_with(file_rule.filename_pattern) + mock_load_image.assert_called_once_with(mock_found_source_path) + mock_calc_dims.assert_not_called() + mock_resize_image.assert_not_called() + mock_save_image.assert_not_called() + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Load Failed' + assert details['source_file'] == str(mock_found_source_path) + assert details['temp_processed_file'] is None + assert details['error_message'] is not None + mock_log_error.assert_called_once() + # Example: mock_log_error.assert_called_with(f"Failed to load image {mock_found_source_path} for rule {file_rule.id}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_resize_occurs_when_dimensions_differ( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + source_file_name = "albedo_resize.png" + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + mock_path_glob.return_value = [mock_found_source_path] + + original_dims = (100, 100) + target_dims = (50, 50) # Different dimensions + mock_img_data = np.zeros((*original_dims, 3), dtype=np.uint8) + mock_resized_img_data = np.zeros((*target_dims, 3), dtype=np.uint8) + + mock_load_image.return_value = mock_img_data + ts = create_mock_transform_settings(target_width=target_dims[0], target_height=target_dims[1]) + file_rule = create_mock_file_rule_for_individual_processing(transform_settings=ts) + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + + mock_calc_dims.return_value = target_dims # Simulate calc_dims returning new dimensions + mock_resize_image.return_value = mock_resized_img_data # Simulate resize returning new image data + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(mock_found_source_path) + mock_calc_dims.assert_called_once_with( + original_dims, ts.target_width, ts.target_height, ts.resize_mode, ts.ensure_pot, ts.allow_upscale + ) + # The actual call to resize_image is: + # ipu.resize_image(loaded_image, target_dims, ts.resize_filter) # Assuming resize_filter is used + # If resize_filter is not on TransformSettings or not used, adjust this. + # For now, let's assume it's ipu.resize_image(loaded_image, target_dims) or similar + # The stage code is: resized_image = ipu.resize_image(loaded_image, target_dims_calculated, file_rule.transform_settings.resize_filter) + # So we need to mock ts.resize_filter + ts.resize_filter = "LANCZOS4" # Example filter + mock_resize_image.assert_called_once_with(mock_img_data, target_dims, ts.resize_filter) + + saved_image_arg, saved_path_arg = mock_save_image.call_args[0] + assert np.array_equal(saved_image_arg, mock_resized_img_data) # Check resized data is saved + assert "processed_ALBEDO_" in saved_path_arg.name + assert saved_path_arg.parent == context.engine_temp_dir + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Processed' + assert details['original_dimensions'] == original_dims + assert details['processed_dimensions'] == target_dims + mock_log_error.assert_not_called() + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_save_image_fails( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + source_file_name = "albedo_save_fail.png" + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + mock_path_glob.return_value = [mock_found_source_path] + + mock_img_data = np.zeros((100, 100, 3), dtype=np.uint8) + mock_load_image.return_value = mock_img_data + mock_calc_dims.return_value = (100, 100) # No resize + mock_save_image.return_value = False # Simulate save failure + + ts = create_mock_transform_settings() + file_rule = create_mock_file_rule_for_individual_processing(transform_settings=ts) + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + + updated_context = stage.execute(context) + + mock_save_image.assert_called_once() # Attempt to save should still happen + + assert file_rule.id.hex in updated_context.processed_maps_details + details = updated_context.processed_maps_details[file_rule.id.hex] + assert details['status'] == 'Save Failed' + assert details['source_file'] == str(mock_found_source_path) + assert details['temp_processed_file'] is not None # Path was generated + assert details['error_message'] is not None + mock_log_error.assert_called_once() + # Example: mock_log_error.assert_called_with(f"Failed to save processed image for rule {file_rule.id} to {details['temp_processed_file']}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.convert_bgr_to_rgb') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_color_conversion_bgr_to_rgb( + mock_log_error, mock_log_info, mock_path_glob, mock_convert_bgr, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + source_file_name = "albedo_bgr.png" + mock_asset_source_path = Path("/fake/asset_source") + mock_found_source_path = mock_asset_source_path / source_file_name + mock_path_glob.return_value = [mock_found_source_path] + + mock_bgr_img_data = np.zeros((100, 100, 3), dtype=np.uint8) # Loaded as BGR + mock_rgb_img_data = np.zeros((100, 100, 3), dtype=np.uint8) # After conversion + + mock_load_image.return_value = mock_bgr_img_data # Image is loaded (assume BGR by default from cv2) + mock_convert_bgr.return_value = mock_rgb_img_data # Mock the conversion + mock_calc_dims.return_value = (100, 100) # No resize + mock_save_image.return_value = True + + # Transform settings request RGB, and stage assumes load might be BGR + ts = create_mock_transform_settings(target_color_profile="RGB") + file_rule = create_mock_file_rule_for_individual_processing(transform_settings=ts) + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule], + asset_source_path_str=str(mock_asset_source_path) + ) + # The stage code is: + # if file_rule.transform_settings.target_color_profile == "RGB" and loaded_image.shape[2] == 3: + # logger.info(f"Attempting to convert image from BGR to RGB for {file_rule_id_hex}") + # processed_image_data = ipu.convert_bgr_to_rgb(processed_image_data) + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(mock_found_source_path) + mock_convert_bgr.assert_called_once_with(mock_bgr_img_data) + mock_resize_image.assert_not_called() + + saved_image_arg, _ = mock_save_image.call_args[0] + assert np.array_equal(saved_image_arg, mock_rgb_img_data) # Ensure RGB data is saved + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Attempting to convert image from BGR to RGB for {file_rule.id.hex}") + + +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.save_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.resize_image') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.calculate_target_dimensions') +@mock.patch('processing.pipeline.stages.individual_map_processing.ipu.load_image') +@mock.patch('pathlib.Path.glob') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_multiple_map_col_rules_processed( + mock_log_error, mock_log_info, mock_path_glob, mock_load_image, + mock_calc_dims, mock_resize_image, mock_save_image +): + stage = IndividualMapProcessingStage() + mock_asset_source_path = Path("/fake/asset_source") + + # Rule 1: Albedo + ts1 = create_mock_transform_settings(target_width=100, target_height=100) + file_rule1_id = uuid.uuid4() + file_rule1 = create_mock_file_rule_for_individual_processing( + id_val=file_rule1_id, map_type="ALBEDO", filename_pattern="albedo_*.png", transform_settings=ts1 + ) + source_file1 = mock_asset_source_path / "albedo_map.png" + img_data1 = np.zeros((100, 100, 3), dtype=np.uint8) + + # Rule 2: Roughness + ts2 = create_mock_transform_settings(target_width=50, target_height=50) # Resize + ts2.resize_filter = "AREA" + file_rule2_id = uuid.uuid4() + file_rule2 = create_mock_file_rule_for_individual_processing( + id_val=file_rule2_id, map_type="ROUGHNESS", filename_pattern="rough_*.png", transform_settings=ts2 + ) + source_file2 = mock_asset_source_path / "rough_map.png" + img_data2_orig = np.zeros((200, 200, 1), dtype=np.uint8) # Original, needs resize + img_data2_resized = np.zeros((50, 50, 1), dtype=np.uint8) # Resized + + context = create_individual_map_proc_mock_context( + initial_file_rules=[file_rule1, file_rule2], + asset_source_path_str=str(mock_asset_source_path) + ) + + # Mock behaviors for Path.glob, load_image, calc_dims, resize, save + # Path.glob will be called twice + mock_path_glob.side_effect = [ + [source_file1], # For albedo_*.png + [source_file2] # For rough_*.png + ] + mock_load_image.side_effect = [img_data1, img_data2_orig] + mock_calc_dims.side_effect = [ + (100, 100), # For rule1 (no change) + (50, 50) # For rule2 (change) + ] + mock_resize_image.return_value = img_data2_resized # Only called for rule2 + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + # Assertions for Rule 1 (Albedo) + assert mock_path_glob.call_args_list[0][0][0] == file_rule1.filename_pattern + assert mock_load_image.call_args_list[0][0][0] == source_file1 + assert mock_calc_dims.call_args_list[0][0] == ((100,100), ts1.target_width, ts1.target_height, ts1.resize_mode, ts1.ensure_pot, ts1.allow_upscale) + + # Assertions for Rule 2 (Roughness) + assert mock_path_glob.call_args_list[1][0][0] == file_rule2.filename_pattern + assert mock_load_image.call_args_list[1][0][0] == source_file2 + assert mock_calc_dims.call_args_list[1][0] == ((200,200), ts2.target_width, ts2.target_height, ts2.resize_mode, ts2.ensure_pot, ts2.allow_upscale) + mock_resize_image.assert_called_once_with(img_data2_orig, (50,50), ts2.resize_filter) + + assert mock_save_image.call_count == 2 + # Check saved image for rule 1 + saved_img1_arg, saved_path1_arg = mock_save_image.call_args_list[0][0] + assert np.array_equal(saved_img1_arg, img_data1) + assert "processed_ALBEDO_" in saved_path1_arg.name + assert file_rule1_id.hex in saved_path1_arg.name + + # Check saved image for rule 2 + saved_img2_arg, saved_path2_arg = mock_save_image.call_args_list[1][0] + assert np.array_equal(saved_img2_arg, img_data2_resized) + assert "processed_ROUGHNESS_" in saved_path2_arg.name + assert file_rule2_id.hex in saved_path2_arg.name + + # Check context details + assert file_rule1_id.hex in updated_context.processed_maps_details + details1 = updated_context.processed_maps_details[file_rule1_id.hex] + assert details1['status'] == 'Processed' + assert details1['original_dimensions'] == (100, 100) + assert details1['processed_dimensions'] == (100, 100) + + assert file_rule2_id.hex in updated_context.processed_maps_details + details2 = updated_context.processed_maps_details[file_rule2_id.hex] + assert details2['status'] == 'Processed' + assert details2['original_dimensions'] == (200, 200) # Original dims of img_data2_orig + assert details2['processed_dimensions'] == (50, 50) + + mock_log_error.assert_not_called() \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_map_merging.py b/tests/processing/pipeline/stages/test_map_merging.py new file mode 100644 index 0000000..f7c0c56 --- /dev/null +++ b/tests/processing/pipeline/stages/test_map_merging.py @@ -0,0 +1,538 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np +from typing import Optional # Added Optional for type hinting + +from processing.pipeline.stages.map_merging import MapMergingStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule +from configuration import Configuration + +# Mock Helper Functions +def create_mock_merge_input_channel( + file_rule_id: uuid.UUID, source_channel: int = 0, target_channel: int = 0, invert: bool = False +) -> mock.MagicMock: + mic = mock.MagicMock(spec=MergeInputChannel) + mic.file_rule_id = file_rule_id + mic.source_channel = source_channel + mic.target_channel = target_channel + mic.invert_source_channel = invert + mic.default_value_if_missing = 0 # Or some other default + return mic + +def create_mock_merge_settings( + input_maps: Optional[list] = None, # List of mock MergeInputChannel + output_channels: int = 3 +) -> mock.MagicMock: + ms = mock.MagicMock(spec=MergeSettings) + ms.input_maps = input_maps if input_maps is not None else [] + ms.output_channels = output_channels + return ms + +def create_mock_file_rule_for_merging( + id_val: Optional[uuid.UUID] = None, + map_type: str = "ORM", # Output map type + item_type: str = "MAP_MERGE", + merge_settings: Optional[mock.MagicMock] = None +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = f"{map_type.lower()}_merged.png" # Placeholder + mock_fr.item_type = item_type + mock_fr.active = True + mock_fr.merge_settings = merge_settings if merge_settings else create_mock_merge_settings() + return mock_fr + +def create_map_merging_mock_context( + initial_file_rules: Optional[list] = None, # Will contain the MAP_MERGE rule + initial_processed_details: Optional[dict] = None, # Pre-processed inputs for merge + skip_asset_flag: bool = False, + asset_name: str = "MergeAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_config = mock.MagicMock(spec=Configuration) + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details=initial_processed_details if initial_processed_details is not None else {}, + merged_maps_details={}, # Stage populates this + files_to_process=list(initial_file_rules) if initial_file_rules else [], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha_value based on AssetProcessingContext + ) + return context +def test_asset_skipped(): + stage = MapMergingStage() + context = create_map_merging_mock_context(skip_asset_flag=True) + + updated_context = stage.execute(context) + + assert updated_context == context # No changes expected + assert not updated_context.merged_maps_details # No maps should be merged + +def test_no_map_merge_rules(): + stage = MapMergingStage() + # Context with a non-MAP_MERGE rule + non_merge_rule = create_mock_file_rule_for_merging(item_type="TEXTURE_MAP", map_type="Diffuse") + context = create_map_merging_mock_context(initial_file_rules=[non_merge_rule]) + + updated_context = stage.execute(context) + + assert updated_context == context # No changes expected + assert not updated_context.merged_maps_details # No maps should be merged + +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.resize_image') # If testing resize +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_map_merging_rgb_success(mock_log_error, mock_log_info, mock_load_image, mock_resize_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRules (mocked as already processed) + r_id, g_id, b_id = uuid.uuid4(), uuid.uuid4(), uuid.uuid4() + processed_details = { + r_id.hex: {'temp_processed_file': '/fake/red.png', 'status': 'Processed', 'map_type': 'RED_SRC'}, + g_id.hex: {'temp_processed_file': '/fake/green.png', 'status': 'Processed', 'map_type': 'GREEN_SRC'}, + b_id.hex: {'temp_processed_file': '/fake/blue.png', 'status': 'Processed', 'map_type': 'BLUE_SRC'} + } + # Mock loaded image data (grayscale for inputs) + mock_r_data = np.full((10, 10), 200, dtype=np.uint8) + mock_g_data = np.full((10, 10), 100, dtype=np.uint8) + mock_b_data = np.full((10, 10), 50, dtype=np.uint8) + mock_load_image.side_effect = [mock_r_data, mock_g_data, mock_b_data] + + # Merge Rule setup + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=r_id, source_channel=0, target_channel=0), # R to R + create_mock_merge_input_channel(file_rule_id=g_id, source_channel=0, target_channel=1), # G to G + create_mock_merge_input_channel(file_rule_id=b_id, source_channel=0, target_channel=2) # B to B + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=3) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="RGB_Combined", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + assert mock_load_image.call_count == 3 + mock_resize_image.assert_not_called() # Assuming all inputs are same size for this test + mock_save_image.assert_called_once() + + # Check that the correct filename was passed to save_image + # The filename is constructed as: f"{context.asset_rule.name}_merged_{merge_rule.map_type}{Path(first_input_path).suffix}" + # In this case, first_input_path is '/fake/red.png', so suffix is '.png' + # Asset name is "MergeAsset" + expected_filename_part = f"{context.asset_rule.name}_merged_{merge_rule.map_type}.png" + saved_path_arg = mock_save_image.call_args[0][0] + assert expected_filename_part in str(saved_path_arg) + + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (10, 10, 3) + assert np.all(saved_data[:,:,0] == 200) # Red channel + assert np.all(saved_data[:,:,1] == 100) # Green channel + assert np.all(saved_data[:,:,2] == 50) # Blue channel + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + # The temp_merged_file path will be under engine_temp_dir / asset_name / filename + assert f"{context.engine_temp_dir / context.asset_rule.name / expected_filename_part}" == details['temp_merged_file'] + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Successfully merged map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'.") + +# Unit tests will be added below this line +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.resize_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_map_merging_channel_inversion(mock_log_error, mock_log_info, mock_load_image, mock_resize_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRule + input_id = uuid.uuid4() + processed_details = { + input_id.hex: {'temp_processed_file': '/fake/source.png', 'status': 'Processed', 'map_type': 'SOURCE_MAP'} + } + # Mock loaded image data (single channel for simplicity, to be inverted) + mock_source_data = np.array([[0, 100], [155, 255]], dtype=np.uint8) + mock_load_image.return_value = mock_source_data + + # Merge Rule setup: one input, inverted, to one output channel + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=0, invert=True) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="Inverted_Gray", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/source.png')) + mock_resize_image.assert_not_called() + mock_save_image.assert_called_once() + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (2, 2) # Grayscale output + + # Expected inverted data: 255-original + expected_inverted_data = np.array([[255, 155], [100, 0]], dtype=np.uint8) + assert np.all(saved_data == expected_inverted_data) + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + assert "merged_Inverted_Gray" in details['temp_merged_file'] + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Successfully merged map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'.") +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_input_map_missing(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRule ID that will be missing from processed_details + missing_input_id = uuid.uuid4() + + # Merge Rule setup + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=missing_input_id, source_channel=0, target_channel=0) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="TestMissing", merge_settings=merge_settings) + + # processed_details is empty, so missing_input_id will not be found + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details={} + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Failed' + assert 'error_message' in details + assert f"Input map FileRule ID {missing_input_id.hex} not found in processed_maps_details or not successfully processed" in details['error_message'] + + mock_log_error.assert_called_once() + assert f"Failed to merge map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'" in mock_log_error.call_args[0][0] + assert f"Input map FileRule ID {missing_input_id.hex} not found in processed_maps_details or not successfully processed" in mock_log_error.call_args[0][0] + +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_input_map_status_not_processed(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + input_id = uuid.uuid4() + processed_details = { + # Status is 'Failed', not 'Processed' + input_id.hex: {'temp_processed_file': '/fake/source.png', 'status': 'Failed', 'map_type': 'SOURCE_MAP'} + } + + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=0) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="TestNotProcessed", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Failed' + assert 'error_message' in details + assert f"Input map FileRule ID {input_id.hex} not found in processed_maps_details or not successfully processed" in details['error_message'] + + mock_log_error.assert_called_once() + assert f"Failed to merge map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'" in mock_log_error.call_args[0][0] + assert f"Input map FileRule ID {input_id.hex} not found in processed_maps_details or not successfully processed" in mock_log_error.call_args[0][0] +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_load_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + input_id = uuid.uuid4() + processed_details = { + input_id.hex: {'temp_processed_file': '/fake/source.png', 'status': 'Processed', 'map_type': 'SOURCE_MAP'} + } + + # Configure mock_load_image to raise an exception + mock_load_image.side_effect = Exception("Failed to load image") + + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=0) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="TestLoadFail", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/source.png')) + mock_save_image.assert_not_called() + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Failed' + assert 'error_message' in details + assert "Failed to load image for merge input" in details['error_message'] + assert str(Path('/fake/source.png')) in details['error_message'] + + mock_log_error.assert_called_once() + assert f"Failed to merge map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'" in mock_log_error.call_args[0][0] + assert "Failed to load image for merge input" in mock_log_error.call_args[0][0] +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_save_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + input_id = uuid.uuid4() + processed_details = { + input_id.hex: {'temp_processed_file': '/fake/source.png', 'status': 'Processed', 'map_type': 'SOURCE_MAP'} + } + mock_source_data = np.full((10, 10), 128, dtype=np.uint8) + mock_load_image.return_value = mock_source_data + + # Configure mock_save_image to return False (indicating failure) + mock_save_image.return_value = False + + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=0) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="TestSaveFail", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/source.png')) + mock_save_image.assert_called_once() # save_image is called, but returns False + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Failed' + assert 'error_message' in details + assert "Failed to save merged map" in details['error_message'] + + mock_log_error.assert_called_once() + assert f"Failed to merge map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'" in mock_log_error.call_args[0][0] + assert "Failed to save merged map" in mock_log_error.call_args[0][0] +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.resize_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_map_merging_dimension_mismatch_handling(mock_log_error, mock_log_info, mock_load_image, mock_resize_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRules + id1, id2 = uuid.uuid4(), uuid.uuid4() + processed_details = { + id1.hex: {'temp_processed_file': '/fake/img1.png', 'status': 'Processed', 'map_type': 'IMG1_SRC'}, + id2.hex: {'temp_processed_file': '/fake/img2.png', 'status': 'Processed', 'map_type': 'IMG2_SRC'} + } + + # Mock loaded image data with different dimensions + mock_img1_data = np.full((10, 10), 100, dtype=np.uint8) # 10x10 + mock_img2_data_original = np.full((5, 5), 200, dtype=np.uint8) # 5x5, will be resized + + mock_load_image.side_effect = [mock_img1_data, mock_img2_data_original] + + # Mock resize_image to return an image of the target dimensions + # For simplicity, it just creates a new array of the target size filled with a value. + mock_img2_data_resized = np.full((10, 10), 210, dtype=np.uint8) # Resized to 10x10 + mock_resize_image.return_value = mock_img2_data_resized + + # Merge Rule setup: two inputs, one output channel (e.g., averaging them) + # Target channel 0 for both, the stage should handle combining them if they map to the same target. + # However, the current stage logic for multiple inputs to the same target channel is to take the last one. + # Let's make them target different channels for a clearer test of resize. + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=id1, source_channel=0, target_channel=0), + create_mock_merge_input_channel(file_rule_id=id2, source_channel=0, target_channel=1) + ] + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=2) # Outputting 2 channels + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="ResizedMerge", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + assert mock_load_image.call_count == 2 + mock_load_image.assert_any_call(Path('/fake/img1.png')) + mock_load_image.assert_any_call(Path('/fake/img2.png')) + + # Assert resize_image was called for the second image to match the first's dimensions + mock_resize_image.assert_called_once() + # The first argument to resize_image is the image data, second is target_shape tuple (height, width) + # np.array_equal is needed for comparing numpy arrays in mock calls + assert np.array_equal(mock_resize_image.call_args[0][0], mock_img2_data_original) + assert mock_resize_image.call_args[0][1] == (10, 10) + + mock_save_image.assert_called_once() + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (10, 10, 2) # 2 output channels + assert np.all(saved_data[:,:,0] == mock_img1_data) # First channel from img1 + assert np.all(saved_data[:,:,1] == mock_img2_data_resized) # Second channel from resized img2 + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + assert "merged_ResizedMerge" in details['temp_merged_file'] + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Resized input map from {Path('/fake/img2.png')} from {mock_img2_data_original.shape} to {(10,10)} to match first loaded map.") + mock_log_info.assert_any_call(f"Successfully merged map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'.") +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.resize_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_map_merging_to_grayscale_output(mock_log_error, mock_log_info, mock_load_image, mock_resize_image, mock_save_image): + stage = MapMergingStage() + + # Input FileRule (e.g., an RGB image) + input_id = uuid.uuid4() + processed_details = { + input_id.hex: {'temp_processed_file': '/fake/rgb_source.png', 'status': 'Processed', 'map_type': 'RGB_SRC'} + } + # Mock loaded image data (3 channels) + mock_rgb_data = np.full((10, 10, 3), [50, 100, 150], dtype=np.uint8) + mock_load_image.return_value = mock_rgb_data + + # Merge Rule setup: take the Green channel (source_channel=1) from input and map it to the single output channel (target_channel=0) + merge_inputs = [ + create_mock_merge_input_channel(file_rule_id=input_id, source_channel=1, target_channel=0) # G to Grayscale + ] + # output_channels = 1 for grayscale + merge_settings = create_mock_merge_settings(input_maps=merge_inputs, output_channels=1) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="GrayscaleFromGreen", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/rgb_source.png')) + mock_resize_image.assert_not_called() + mock_save_image.assert_called_once() + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (10, 10) # Grayscale output (2D) + assert np.all(saved_data == 100) # Green channel's value + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + assert "merged_GrayscaleFromGreen" in details['temp_merged_file'] + mock_log_error.assert_not_called() + mock_log_info.assert_any_call(f"Successfully merged map '{merge_rule.map_type}' for asset '{context.asset_rule.name}'.") + +@mock.patch('processing.pipeline.stages.map_merging.ipu.save_image') +@mock.patch('processing.pipeline.stages.map_merging.ipu.load_image') +@mock.patch('logging.error') +def test_map_merging_default_value_if_missing_channel(mock_log_error, mock_load_image, mock_save_image): + stage = MapMergingStage() + + input_id = uuid.uuid4() + processed_details = { + # Input is a grayscale image (1 channel) + input_id.hex: {'temp_processed_file': '/fake/gray_source.png', 'status': 'Processed', 'map_type': 'GRAY_SRC'} + } + mock_gray_data = np.full((10, 10), 50, dtype=np.uint8) + mock_load_image.return_value = mock_gray_data + + # Merge Rule: try to read source_channel 1 (which doesn't exist in grayscale) + # and use default_value_if_missing for target_channel 0. + # Also, read source_channel 0 (which exists) for target_channel 1. + mic1 = create_mock_merge_input_channel(file_rule_id=input_id, source_channel=1, target_channel=0) + mic1.default_value_if_missing = 128 # Set a specific default value + mic2 = create_mock_merge_input_channel(file_rule_id=input_id, source_channel=0, target_channel=1) + + merge_settings = create_mock_merge_settings(input_maps=[mic1, mic2], output_channels=2) + merge_rule_id = uuid.uuid4() + merge_rule = create_mock_file_rule_for_merging(id_val=merge_rule_id, map_type="DefaultValueTest", merge_settings=merge_settings) + + context = create_map_merging_mock_context( + initial_file_rules=[merge_rule], + initial_processed_details=processed_details + ) + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path('/fake/gray_source.png')) + mock_save_image.assert_called_once() + + saved_data = mock_save_image.call_args[0][1] + assert saved_data.shape == (10, 10, 2) + assert np.all(saved_data[:,:,0] == 128) # Default value for missing source channel 1 + assert np.all(saved_data[:,:,1] == 50) # Value from existing source channel 0 + + assert merge_rule.id.hex in updated_context.merged_maps_details + details = updated_context.merged_maps_details[merge_rule.id.hex] + assert details['status'] == 'Processed' + mock_log_error.assert_not_called() \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_metadata_finalization_save.py b/tests/processing/pipeline/stages/test_metadata_finalization_save.py new file mode 100644 index 0000000..68741ce --- /dev/null +++ b/tests/processing/pipeline/stages/test_metadata_finalization_save.py @@ -0,0 +1,359 @@ +import pytest +from unittest import mock +from pathlib import Path +import datetime +import json # For comparing dumped content +import uuid +from typing import Optional, Dict, Any + +from processing.pipeline.stages.metadata_finalization_save import MetadataFinalizationAndSaveStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule +from configuration import Configuration, GeneralSettings # Added GeneralSettings as it's in the helper + + +def create_metadata_save_mock_context( + status_flags: Optional[Dict[str, Any]] = None, + initial_asset_metadata: Optional[Dict[str, Any]] = None, + processed_details: Optional[Dict[str, Any]] = None, + merged_details: Optional[Dict[str, Any]] = None, + asset_name: str = "MetaSaveAsset", + output_path_pattern_val: str = "{asset_name}/metadata/{filename}", + # ... other common context fields ... +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.output_path_pattern = output_path_pattern_val + mock_asset_rule.id = uuid.uuid4() # Needed for generate_path_from_pattern if it uses it + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.name = "MetaSaveSource" + + mock_config = mock.MagicMock(spec=Configuration) + # mock_config.general_settings = mock.MagicMock(spec=GeneralSettings) # If needed + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output_base"), # For generate_path + effective_supplier="ValidSupplier", + asset_metadata=initial_asset_metadata if initial_asset_metadata is not None else {}, + processed_maps_details=processed_details if processed_details is not None else {}, + merged_maps_details=merged_details if merged_details is not None else {}, + files_to_process=[], + loaded_data_cache={}, + config_obj=mock_config, + status_flags=status_flags if status_flags is not None else {}, + incrementing_value="001", # Example for path generation + sha5_value="abc" # Example for path generation + ) + return context +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +def test_asset_skipped_before_metadata_init( + mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests that if an asset is marked for skipping and has no initial metadata, + the stage returns early without attempting to save metadata. + """ + stage = MetadataFinalizationAndSaveStage() + context = create_metadata_save_mock_context( + status_flags={'skip_asset': True}, + initial_asset_metadata={} # Explicitly empty + ) + + updated_context = stage.execute(context) + + # Assert that no processing or saving attempts were made + mock_dt.now.assert_not_called() # Should not even try to set end time if no metadata + mock_gen_path.assert_not_called() + mock_mkdir.assert_not_called() + mock_file_open.assert_not_called() + mock_json_dump.assert_not_called() + + assert updated_context.asset_metadata == {} # Metadata remains empty + assert 'metadata_file_path' not in updated_context.asset_metadata + assert updated_context.status_flags.get('metadata_save_error') is None +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +def test_asset_skipped_after_metadata_init( + mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests that if an asset is marked for skipping but has initial metadata, + the status is updated to 'Skipped' and metadata is saved. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 0, 0) + mock_dt.now.return_value = fixed_now + + fake_metadata_path_str = "/fake/output_base/SkippedAsset/metadata/SkippedAsset_metadata.json" + mock_gen_path.return_value = fake_metadata_path_str + + initial_meta = {'asset_name': "SkippedAsset", 'status': "Pending"} + + context = create_metadata_save_mock_context( + asset_name="SkippedAsset", + status_flags={'skip_asset': True}, + initial_asset_metadata=initial_meta + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() + mock_gen_path.assert_called_once_with( + context.asset_rule.output_path_pattern, + context.asset_rule, + context.source_rule, + context.output_base_path, + context.asset_metadata, # Original metadata passed for path gen + context.incrementing_value, + context.sha5_value, + filename_override=f"{context.asset_rule.name}_metadata.json" + ) + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_file_open.assert_called_once_with(Path(fake_metadata_path_str), 'w') + mock_json_dump.assert_called_once() + + dumped_data = mock_json_dump.call_args[0][0] + assert dumped_data['status'] == "Skipped" + assert dumped_data['processing_end_time'] == fixed_now.isoformat() + assert 'processed_map_details' not in dumped_data # Should not be present if skipped early + assert 'merged_map_details' not in dumped_data # Should not be present if skipped early + + assert updated_context.asset_metadata['status'] == "Skipped" + assert updated_context.asset_metadata['processing_end_time'] == fixed_now.isoformat() + assert updated_context.asset_metadata['metadata_file_path'] == fake_metadata_path_str + assert updated_context.status_flags.get('metadata_save_error') is None +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) # Mocks open() +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +def test_metadata_save_success(mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump): + """ + Tests successful metadata finalization and saving, including serialization of Path objects. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 30, 0) + mock_dt.now.return_value = fixed_now + + fake_metadata_path_str = "/fake/output_base/MetaSaveAsset/metadata/MetaSaveAsset_metadata.json" + mock_gen_path.return_value = fake_metadata_path_str + + initial_meta = {'asset_name': "MetaSaveAsset", 'status': "Pending", 'processing_start_time': "2023-01-01T12:00:00"} + # Example of a Path object that needs serialization + proc_details = {'map1': {'temp_processed_file': Path('/fake/temp_engine_dir/map1.png'), 'final_file_path': Path('/fake/output_base/MetaSaveAsset/map1.png')}} + merged_details = {'merged_map_A': {'output_path': Path('/fake/output_base/MetaSaveAsset/merged_A.png')}} + + context = create_metadata_save_mock_context( + initial_asset_metadata=initial_meta, + processed_details=proc_details, + merged_details=merged_details, + status_flags={} # No errors, no skip + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() + mock_gen_path.assert_called_once_with( + context.asset_rule.output_path_pattern, + context.asset_rule, + context.source_rule, + context.output_base_path, + context.asset_metadata, # The metadata *before* adding end_time, status etc. + context.incrementing_value, + context.sha5_value, + filename_override=f"{context.asset_rule.name}_metadata.json" + ) + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) # Checks parent dir of fake_metadata_path_str + mock_file_open.assert_called_once_with(Path(fake_metadata_path_str), 'w') + mock_json_dump.assert_called_once() + + # Check what was passed to json.dump + dumped_data = mock_json_dump.call_args[0][0] + assert dumped_data['status'] == "Processed" + assert dumped_data['processing_end_time'] == fixed_now.isoformat() + assert 'processing_start_time' in dumped_data # Ensure existing fields are preserved + + # Verify processed_map_details and Path serialization + assert 'processed_map_details' in dumped_data + assert dumped_data['processed_map_details']['map1']['temp_processed_file'] == '/fake/temp_engine_dir/map1.png' + assert dumped_data['processed_map_details']['map1']['final_file_path'] == '/fake/output_base/MetaSaveAsset/map1.png' + + # Verify merged_map_details and Path serialization + assert 'merged_map_details' in dumped_data + assert dumped_data['merged_map_details']['merged_map_A']['output_path'] == '/fake/output_base/MetaSaveAsset/merged_A.png' + + assert updated_context.asset_metadata['metadata_file_path'] == fake_metadata_path_str + assert updated_context.asset_metadata['status'] == "Processed" + assert updated_context.status_flags.get('metadata_save_error') is None +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +def test_processing_failed_due_to_previous_error( + mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests that if a previous stage set an error flag, the status is 'Failed' + and metadata (including any existing details) is saved. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 45, 0) + mock_dt.now.return_value = fixed_now + + fake_metadata_path_str = "/fake/output_base/FailedAsset/metadata/FailedAsset_metadata.json" + mock_gen_path.return_value = fake_metadata_path_str + + initial_meta = {'asset_name': "FailedAsset", 'status': "Processing"} + # Simulate some details might exist even if a later stage failed + proc_details = {'map1_partial': {'temp_processed_file': Path('/fake/temp_engine_dir/map1_partial.png')}} + + context = create_metadata_save_mock_context( + asset_name="FailedAsset", + initial_asset_metadata=initial_meta, + processed_details=proc_details, + merged_details={}, # No merged details if processing failed before that + status_flags={'file_processing_error': True, 'error_message': "Something went wrong"} + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() + mock_gen_path.assert_called_once() # Path generation should still occur + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_file_open.assert_called_once_with(Path(fake_metadata_path_str), 'w') + mock_json_dump.assert_called_once() + + dumped_data = mock_json_dump.call_args[0][0] + assert dumped_data['status'] == "Failed" + assert dumped_data['processing_end_time'] == fixed_now.isoformat() + assert 'error_message' in dumped_data # Assuming error messages from status_flags are copied + assert dumped_data['error_message'] == "Something went wrong" + + # Check that existing details are included + assert 'processed_map_details' in dumped_data + assert dumped_data['processed_map_details']['map1_partial']['temp_processed_file'] == '/fake/temp_engine_dir/map1_partial.png' + assert 'merged_map_details' in dumped_data # Should be present, even if empty + assert dumped_data['merged_map_details'] == {} + + assert updated_context.asset_metadata['status'] == "Failed" + assert updated_context.asset_metadata['metadata_file_path'] == fake_metadata_path_str + assert updated_context.status_flags.get('metadata_save_error') is None + # Ensure the original error flag is preserved + assert updated_context.status_flags['file_processing_error'] is True +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +@mock.patch('logging.error') # To check if error is logged +def test_generate_path_fails( + mock_log_error, mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests behavior when generate_path_from_pattern raises an exception. + Ensures status is updated, error flag is set, and no save is attempted. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 50, 0) + mock_dt.now.return_value = fixed_now + + mock_gen_path.side_effect = Exception("Simulated path generation error") + + initial_meta = {'asset_name': "PathFailAsset", 'status': "Processing"} + context = create_metadata_save_mock_context( + asset_name="PathFailAsset", + initial_asset_metadata=initial_meta, + status_flags={} + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() # Time is set before path generation + mock_gen_path.assert_called_once() # generate_path_from_pattern is called + + # File operations should NOT be called if path generation fails + mock_mkdir.assert_not_called() + mock_file_open.assert_not_called() + mock_json_dump.assert_not_called() + + mock_log_error.assert_called_once() # Check that an error was logged + # Example: check if the log message contains relevant info, if needed + # assert "Failed to generate metadata path" in mock_log_error.call_args[0][0] + + assert updated_context.asset_metadata['status'] == "Failed" # Or a more specific error status + assert 'processing_end_time' in updated_context.asset_metadata # End time should still be set + assert updated_context.asset_metadata['processing_end_time'] == fixed_now.isoformat() + assert 'metadata_file_path' not in updated_context.asset_metadata # Path should not be set + + assert updated_context.status_flags.get('metadata_save_error') is True + assert 'error_message' in updated_context.asset_metadata # Check if error message is populated + assert "Simulated path generation error" in updated_context.asset_metadata['error_message'] +@mock.patch('processing.pipeline.stages.metadata_finalization_save.json.dump') +@mock.patch('builtins.open', new_callable=mock.mock_open) +@mock.patch('pathlib.Path.mkdir') +@mock.patch('processing.pipeline.stages.metadata_finalization_save.generate_path_from_pattern') +@mock.patch('datetime.datetime') +@mock.patch('logging.error') # To check if error is logged +def test_json_dump_fails( + mock_log_error, mock_dt, mock_gen_path, mock_mkdir, mock_file_open, mock_json_dump +): + """ + Tests behavior when json.dump raises an exception during saving. + Ensures status is updated, error flag is set, and error is logged. + """ + stage = MetadataFinalizationAndSaveStage() + + fixed_now = datetime.datetime(2023, 1, 1, 12, 55, 0) + mock_dt.now.return_value = fixed_now + + fake_metadata_path_str = "/fake/output_base/JsonDumpFailAsset/metadata/JsonDumpFailAsset_metadata.json" + mock_gen_path.return_value = fake_metadata_path_str + + mock_json_dump.side_effect = IOError("Simulated JSON dump error") # Or TypeError for non-serializable + + initial_meta = {'asset_name': "JsonDumpFailAsset", 'status': "Processing"} + context = create_metadata_save_mock_context( + asset_name="JsonDumpFailAsset", + initial_asset_metadata=initial_meta, + status_flags={} + ) + + updated_context = stage.execute(context) + + mock_dt.now.assert_called_once() + mock_gen_path.assert_called_once() + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_file_open.assert_called_once_with(Path(fake_metadata_path_str), 'w') + mock_json_dump.assert_called_once() # json.dump was attempted + + mock_log_error.assert_called_once() + # assert "Failed to save metadata JSON" in mock_log_error.call_args[0][0] + + assert updated_context.asset_metadata['status'] == "Failed" # Or specific "Metadata Save Failed" + assert 'processing_end_time' in updated_context.asset_metadata + assert updated_context.asset_metadata['processing_end_time'] == fixed_now.isoformat() + # metadata_file_path might be set if path generation succeeded, even if dump failed. + # Depending on desired behavior, this could be asserted or not. + # For now, let's assume it's set if path generation was successful. + assert updated_context.asset_metadata['metadata_file_path'] == fake_metadata_path_str + + assert updated_context.status_flags.get('metadata_save_error') is True + assert 'error_message' in updated_context.asset_metadata + assert "Simulated JSON dump error" in updated_context.asset_metadata['error_message'] \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_metadata_initialization.py b/tests/processing/pipeline/stages/test_metadata_initialization.py new file mode 100644 index 0000000..5b358fd --- /dev/null +++ b/tests/processing/pipeline/stages/test_metadata_initialization.py @@ -0,0 +1,169 @@ +import pytest +from unittest import mock +from pathlib import Path +import datetime +import uuid +from typing import Optional + +from processing.pipeline.stages.metadata_initialization import MetadataInitializationStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule +from configuration import Configuration, GeneralSettings + +# Helper function to create a mock AssetProcessingContext +def create_metadata_init_mock_context( + skip_asset_flag: bool = False, + asset_name: str = "MetaAsset", + asset_id: uuid.UUID = None, # Allow None to default to uuid.uuid4() + source_path_str: str = "source/meta_asset", + output_pattern: str = "{asset_name}/{map_type}", + tags: list = None, + custom_fields: dict = None, + source_rule_name: str = "MetaSource", + source_rule_id: uuid.UUID = None, # Allow None to default to uuid.uuid4() + eff_supplier: Optional[str] = "SupplierMeta", + app_version_str: str = "1.0.0-test", + inc_val: Optional[str] = None, + sha_val: Optional[str] = None +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.id = asset_id if asset_id is not None else uuid.uuid4() + mock_asset_rule.source_path = Path(source_path_str) + mock_asset_rule.output_path_pattern = output_pattern + mock_asset_rule.tags = tags if tags is not None else ["tag1", "test_tag"] + mock_asset_rule.custom_fields = custom_fields if custom_fields is not None else {"custom_key": "custom_value"} + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.name = source_rule_name + mock_source_rule.id = source_rule_id if source_rule_id is not None else uuid.uuid4() + + mock_general_settings = mock.MagicMock(spec=GeneralSettings) + mock_general_settings.app_version = app_version_str + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_general_settings + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp"), + output_base_path=Path("/fake/output"), + effective_supplier=eff_supplier, + asset_metadata={}, + processed_maps_details={}, + merged_maps_details={}, + files_to_process=[], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=inc_val, + sha5_value=sha_val + ) + return context + +@mock.patch('processing.pipeline.stages.metadata_initialization.datetime') +def test_metadata_initialization_not_skipped(mock_datetime_module): + stage = MetadataInitializationStage() + + fixed_now = datetime.datetime(2023, 10, 26, 12, 0, 0, tzinfo=datetime.timezone.utc) + mock_datetime_module.datetime.now.return_value = fixed_now + + asset_id_val = uuid.uuid4() + source_id_val = uuid.uuid4() + + context = create_metadata_init_mock_context( + skip_asset_flag=False, + asset_id=asset_id_val, + source_rule_id=source_id_val, + inc_val="001", + sha_val="abcde" + ) + + updated_context = stage.execute(context) + + assert isinstance(updated_context.asset_metadata, dict) + assert isinstance(updated_context.processed_maps_details, dict) + assert isinstance(updated_context.merged_maps_details, dict) + + md = updated_context.asset_metadata + assert md['asset_name'] == "MetaAsset" + assert md['asset_id'] == str(asset_id_val) + assert md['source_rule_name'] == "MetaSource" + assert md['source_rule_id'] == str(source_id_val) + assert md['source_path'] == "source/meta_asset" + assert md['effective_supplier'] == "SupplierMeta" + assert md['output_path_pattern'] == "{asset_name}/{map_type}" + assert md['processing_start_time'] == fixed_now.isoformat() + assert md['status'] == "Pending" + assert md['version'] == "1.0.0-test" + assert md['tags'] == ["tag1", "test_tag"] + assert md['custom_fields'] == {"custom_key": "custom_value"} + assert md['incrementing_value'] == "001" + assert md['sha5_value'] == "abcde" + +@mock.patch('processing.pipeline.stages.metadata_initialization.datetime') +def test_metadata_initialization_not_skipped_none_inc_sha(mock_datetime_module): + stage = MetadataInitializationStage() + + fixed_now = datetime.datetime(2023, 10, 26, 12, 0, 0, tzinfo=datetime.timezone.utc) + mock_datetime_module.datetime.now.return_value = fixed_now + + context = create_metadata_init_mock_context( + skip_asset_flag=False, + inc_val=None, + sha_val=None + ) + + updated_context = stage.execute(context) + + md = updated_context.asset_metadata + assert 'incrementing_value' not in md # Or assert md['incrementing_value'] is None, depending on desired behavior + assert 'sha5_value' not in md # Or assert md['sha5_value'] is None + +def test_metadata_initialization_skipped(): + stage = MetadataInitializationStage() + context = create_metadata_init_mock_context(skip_asset_flag=True) + + # Make copies of initial state to ensure they are not modified + initial_asset_metadata = dict(context.asset_metadata) + initial_processed_maps = dict(context.processed_maps_details) + initial_merged_maps = dict(context.merged_maps_details) + + updated_context = stage.execute(context) + + assert updated_context.asset_metadata == initial_asset_metadata + assert updated_context.processed_maps_details == initial_processed_maps + assert updated_context.merged_maps_details == initial_merged_maps + assert not updated_context.asset_metadata # Explicitly check it's empty as per initial setup + assert not updated_context.processed_maps_details + assert not updated_context.merged_maps_details + +@mock.patch('processing.pipeline.stages.metadata_initialization.datetime') +def test_tags_and_custom_fields_are_copies(mock_datetime_module): + stage = MetadataInitializationStage() + fixed_now = datetime.datetime(2023, 10, 26, 12, 0, 0, tzinfo=datetime.timezone.utc) + mock_datetime_module.datetime.now.return_value = fixed_now + + original_tags = ["original_tag"] + original_custom_fields = {"original_key": "original_value"} + + context = create_metadata_init_mock_context( + skip_asset_flag=False, + tags=original_tags, + custom_fields=original_custom_fields + ) + + # Modify originals after context creation but before stage execution + original_tags.append("modified_after_creation") + original_custom_fields["new_key_after_creation"] = "new_value" + + updated_context = stage.execute(context) + + md = updated_context.asset_metadata + assert md['tags'] == ["original_tag"] # Should not have "modified_after_creation" + assert md['tags'] is not original_tags # Ensure it's a different object + + assert md['custom_fields'] == {"original_key": "original_value"} # Should not have "new_key_after_creation" + assert md['custom_fields'] is not original_custom_fields # Ensure it's a different object \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_normal_map_green_channel.py b/tests/processing/pipeline/stages/test_normal_map_green_channel.py new file mode 100644 index 0000000..3120655 --- /dev/null +++ b/tests/processing/pipeline/stages/test_normal_map_green_channel.py @@ -0,0 +1,323 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import numpy as np +import logging # Added for mocking logger + +from processing.pipeline.stages.normal_map_green_channel import NormalMapGreenChannelStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule +from configuration import Configuration, GeneralSettings + +# Helper functions +def create_mock_file_rule_for_normal_test( + id_val: uuid.UUID = None, # Corrected type hint from Optional[uuid.UUID] + map_type: str = "NORMAL", + filename_pattern: str = "normal.png" +) -> mock.MagicMock: + mock_fr = mock.MagicMock(spec=FileRule) + mock_fr.id = id_val if id_val else uuid.uuid4() + mock_fr.map_type = map_type + mock_fr.filename_pattern = filename_pattern + mock_fr.item_type = "MAP_COL" # As per example, though not directly used by stage + mock_fr.active = True # As per example + return mock_fr + +def create_normal_map_mock_context( + initial_file_rules: list = None, # Corrected type hint + initial_processed_details: dict = None, # Corrected type hint + invert_green_globally: bool = False, + skip_asset_flag: bool = False, + asset_name: str = "NormalMapAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + + mock_source_rule = mock.MagicMock(spec=SourceRule) + + mock_gs = mock.MagicMock(spec=GeneralSettings) + mock_gs.invert_normal_map_green_channel_globally = invert_green_globally + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_gs + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output"), + effective_supplier="ValidSupplier", + asset_metadata={'asset_name': asset_name}, + processed_maps_details=initial_processed_details if initial_processed_details is not None else {}, + merged_maps_details={}, + files_to_process=list(initial_file_rules) if initial_file_rules else [], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={'skip_asset': skip_asset_flag}, + incrementing_value=None, # Added as per AssetProcessingContext constructor + sha5_value=None # Added as per AssetProcessingContext constructor + ) + return context + +# Unit tests will be added below +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +def test_asset_skipped(mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_fr = create_mock_file_rule_for_normal_test(map_type="NORMAL") + initial_details = { + normal_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_normal.png', 'status': 'Processed', 'map_type': 'NORMAL', 'notes': ''} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True, + skip_asset_flag=True # Asset is skipped + ) + original_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert updated_context.processed_maps_details == original_details + assert normal_fr in updated_context.files_to_process # Ensure rule is still there + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +def test_no_normal_map_present(mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + # Create a non-normal map rule + diffuse_fr = create_mock_file_rule_for_normal_test(map_type="DIFFUSE", filename_pattern="diffuse.png") + initial_details = { + diffuse_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_diffuse.png', 'status': 'Processed', 'map_type': 'DIFFUSE', 'notes': ''} + } + context = create_normal_map_mock_context( + initial_file_rules=[diffuse_fr], + initial_processed_details=initial_details, + invert_green_globally=True # Inversion enabled, but no normal map + ) + original_details = context.processed_maps_details.copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert updated_context.processed_maps_details == original_details + assert diffuse_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +def test_normal_map_present_inversion_disabled(mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_details = { + normal_fr.id.hex: {'temp_processed_file': '/fake/temp_engine_dir/processed_normal.png', 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Initial note'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=False # Inversion disabled + ) + original_details_entry = context.processed_maps_details[normal_fr.id.hex].copy() + + updated_context = stage.execute(context) + + mock_load_image.assert_not_called() + mock_save_image.assert_not_called() + assert updated_context.processed_maps_details[normal_fr.id.hex] == original_details_entry + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_normal_map_inversion_uint8_success(mock_log_debug, mock_log_info, mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + + initial_temp_path = Path('/fake/temp_engine_dir/processed_normal.png') + initial_details = { + normal_fr.id.hex: {'temp_processed_file': str(initial_temp_path), 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Initial note'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True # Enable inversion + ) + + # R=10, G=50, B=100 + mock_loaded_normal_data = np.array([[[10, 50, 100]]], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_normal_data + mock_save_image.return_value = True # Simulate successful save + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_temp_path) + + # Check that save_image was called with green channel inverted + assert mock_save_image.call_count == 1 + saved_path_arg, saved_data_arg = mock_save_image.call_args[0] + + assert saved_data_arg[0,0,0] == 10 # R unchanged + assert saved_data_arg[0,0,1] == 255 - 50 # G inverted + assert saved_data_arg[0,0,2] == 100 # B unchanged + + assert isinstance(saved_path_arg, Path) + assert "normal_g_inv_" in saved_path_arg.name + assert saved_path_arg.parent == initial_temp_path.parent # Should be in same temp dir + + normal_detail = updated_context.processed_maps_details[normal_fr.id.hex] + assert "normal_g_inv_" in normal_detail['temp_processed_file'] + assert Path(normal_detail['temp_processed_file']).name == saved_path_arg.name + assert "Green channel inverted" in normal_detail['notes'] + assert "Initial note" in normal_detail['notes'] # Check existing notes preserved + + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.info') +@mock.patch('logging.debug') +def test_normal_map_inversion_float_success(mock_log_debug, mock_log_info, mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_temp_path = Path('/fake/temp_engine_dir/processed_normal_float.png') + initial_details = { + normal_fr.id.hex: {'temp_processed_file': str(initial_temp_path), 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Float image'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True + ) + + # R=0.1, G=0.25, B=0.75 + mock_loaded_normal_data = np.array([[[0.1, 0.25, 0.75]]], dtype=np.float32) + mock_load_image.return_value = mock_loaded_normal_data + mock_save_image.return_value = True + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_temp_path) + + assert mock_save_image.call_count == 1 + saved_path_arg, saved_data_arg = mock_save_image.call_args[0] + + assert np.isclose(saved_data_arg[0,0,0], 0.1) # R unchanged + assert np.isclose(saved_data_arg[0,0,1], 1.0 - 0.25) # G inverted + assert np.isclose(saved_data_arg[0,0,2], 0.75) # B unchanged + + assert "normal_g_inv_" in saved_path_arg.name + normal_detail = updated_context.processed_maps_details[normal_fr.id.hex] + assert "normal_g_inv_" in normal_detail['temp_processed_file'] + assert "Green channel inverted" in normal_detail['notes'] + assert "Float image" in normal_detail['notes'] + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.error') +def test_load_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_temp_path_str = '/fake/temp_engine_dir/processed_normal_load_fail.png' + initial_details = { + normal_fr.id.hex: {'temp_processed_file': initial_temp_path_str, 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Load fail test'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True + ) + original_details_entry = context.processed_maps_details[normal_fr.id.hex].copy() + + mock_load_image.return_value = None # Simulate load failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path(initial_temp_path_str)) + mock_save_image.assert_not_called() + mock_log_error.assert_called_once() + assert f"Failed to load image {Path(initial_temp_path_str)} for green channel inversion." in mock_log_error.call_args[0][0] + + # Details should be unchanged + assert updated_context.processed_maps_details[normal_fr.id.hex] == original_details_entry + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.error') +def test_save_image_fails(mock_log_error, mock_load_image, mock_save_image): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_temp_path = Path('/fake/temp_engine_dir/processed_normal_save_fail.png') + initial_details = { + normal_fr.id.hex: {'temp_processed_file': str(initial_temp_path), 'status': 'Processed', 'map_type': 'NORMAL', 'notes': 'Save fail test'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True + ) + original_details_entry = context.processed_maps_details[normal_fr.id.hex].copy() + + mock_loaded_normal_data = np.array([[[10, 50, 100]]], dtype=np.uint8) + mock_load_image.return_value = mock_loaded_normal_data + mock_save_image.return_value = False # Simulate save failure + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(initial_temp_path) + mock_save_image.assert_called_once() # Save is attempted + + saved_path_arg = mock_save_image.call_args[0][0] # Get the path it tried to save to + mock_log_error.assert_called_once() + assert f"Failed to save green channel inverted image to {saved_path_arg}." in mock_log_error.call_args[0][0] + + # Details should be unchanged + assert updated_context.processed_maps_details[normal_fr.id.hex] == original_details_entry + assert normal_fr in updated_context.files_to_process + +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.save_image') +@mock.patch('processing.pipeline.stages.normal_map_green_channel.ipu.load_image') +@mock.patch('logging.error') +@pytest.mark.parametrize("unsuitable_data, description", [ + (np.array([[1, 2], [3, 4]], dtype=np.uint8), "2D array"), # 2D array + (np.array([[[1, 2]]], dtype=np.uint8), "2-channel image") # Image with less than 3 channels +]) +def test_image_not_suitable_for_inversion(mock_log_error, mock_load_image, mock_save_image, unsuitable_data, description): + stage = NormalMapGreenChannelStage() + normal_rule_id = uuid.uuid4() + normal_fr = create_mock_file_rule_for_normal_test(id_val=normal_rule_id, map_type="NORMAL") + initial_temp_path_str = f'/fake/temp_engine_dir/unsuitable_{description.replace(" ", "_")}.png' + initial_details = { + normal_fr.id.hex: {'temp_processed_file': initial_temp_path_str, 'status': 'Processed', 'map_type': 'NORMAL', 'notes': f'Unsuitable: {description}'} + } + context = create_normal_map_mock_context( + initial_file_rules=[normal_fr], + initial_processed_details=initial_details, + invert_green_globally=True + ) + original_details_entry = context.processed_maps_details[normal_fr.id.hex].copy() + + mock_load_image.return_value = unsuitable_data + + updated_context = stage.execute(context) + + mock_load_image.assert_called_once_with(Path(initial_temp_path_str)) + mock_save_image.assert_not_called() # Save should not be attempted + mock_log_error.assert_called_once() + assert f"Image at {Path(initial_temp_path_str)} is not suitable for green channel inversion (e.g., not RGB/RGBA)." in mock_log_error.call_args[0][0] + + # Details should be unchanged + assert updated_context.processed_maps_details[normal_fr.id.hex] == original_details_entry + assert normal_fr in updated_context.files_to_process \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_output_organization.py b/tests/processing/pipeline/stages/test_output_organization.py new file mode 100644 index 0000000..ccf6c08 --- /dev/null +++ b/tests/processing/pipeline/stages/test_output_organization.py @@ -0,0 +1,417 @@ +import pytest +from unittest import mock +from pathlib import Path +import shutil # To check if shutil.copy2 is called +import uuid +from typing import Optional # Added for type hinting in helper + +from processing.pipeline.stages.output_organization import OutputOrganizationStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule # For context setup +from configuration import Configuration, GeneralSettings + +def create_output_org_mock_context( + status_flags: Optional[dict] = None, + asset_metadata_status: str = "Processed", # Default to processed for testing copy + processed_map_details: Optional[dict] = None, + merged_map_details: Optional[dict] = None, + overwrite_setting: bool = False, + asset_name: str = "OutputOrgAsset", + output_path_pattern_val: str = "{asset_name}/{map_type}/{filename}" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.output_path_pattern = output_path_pattern_val + # Need FileRules on AssetRule if stage tries to look up output_filename_pattern from them + # For simplicity, assume stage constructs output_filename for now if not found on FileRule + mock_asset_rule.file_rules = [] # Or mock FileRules if stage uses them for output_filename_pattern + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.name = "OutputOrgSource" + + mock_gs = mock.MagicMock(spec=GeneralSettings) + mock_gs.overwrite_existing = overwrite_setting + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock_gs + + # Ensure asset_metadata has a status + initial_asset_metadata = {'asset_name': asset_name, 'status': asset_metadata_status} + + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp_engine_dir"), + output_base_path=Path("/fake/output_final"), + effective_supplier="ValidSupplier", + asset_metadata=initial_asset_metadata, + processed_maps_details=processed_map_details if processed_map_details is not None else {}, + merged_maps_details=merged_map_details if merged_map_details is not None else {}, + files_to_process=[], # Not directly used by this stage, but good to have + loaded_data_cache={}, + config_obj=mock_config, + status_flags=status_flags if status_flags is not None else {}, + incrementing_value="001", + sha5_value="xyz" # Corrected from sha5_value to sha256_value if that's the actual param, or ensure it's a valid param. Assuming sha5_value is a typo and should be something like 'unique_id' or similar if not sha256. For now, keeping as sha5_value as per instructions. + ) + return context +@mock.patch('shutil.copy2') +@mock.patch('logging.info') # To check for log messages +def test_output_organization_asset_skipped_by_status_flag(mock_log_info, mock_shutil_copy): + stage = OutputOrganizationStage() + context = create_output_org_mock_context(status_flags={'skip_asset': True}) + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_not_called() + # Check if a log message indicates skipping, if applicable + # e.g., mock_log_info.assert_any_call("Skipping output organization for asset OutputOrgAsset due to skip_asset flag.") + assert 'final_output_files' not in updated_context.asset_metadata # Or assert it's empty + assert updated_context.asset_metadata['status'] == "Processed" # Status should not change if skipped due to flag before stage logic + # Add specific log check if the stage logs this event + # For now, assume no copy is the primary check + +@mock.patch('shutil.copy2') +@mock.patch('logging.warning') # Or info, depending on how failure is logged +def test_output_organization_asset_failed_by_metadata_status(mock_log_warning, mock_shutil_copy): + stage = OutputOrganizationStage() + context = create_output_org_mock_context(asset_metadata_status="Failed") + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_not_called() + # Check for a log message indicating skipping due to failure status + # e.g., mock_log_warning.assert_any_call("Skipping output organization for asset OutputOrgAsset as its status is Failed.") + assert 'final_output_files' not in updated_context.asset_metadata # Or assert it's empty + assert updated_context.asset_metadata['status'] == "Failed" # Status remains Failed + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_output_organization_success_no_overwrite( + mock_log_error, mock_log_info, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + + proc_id_1 = uuid.uuid4().hex + merged_id_1 = uuid.uuid4().hex + + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc1.png', 'map_type': 'Diffuse', 'output_filename': 'OutputOrgAsset_Diffuse.png'} + } + merged_details = { + merged_id_1: {'status': 'Processed', 'temp_merged_file': '/fake/temp_engine_dir/merged1.png', 'map_type': 'ORM', 'output_filename': 'OutputOrgAsset_ORM.png'} + } + + context = create_output_org_mock_context( + processed_map_details=processed_details, + merged_map_details=merged_details, + overwrite_setting=False + ) + + # Mock generate_path_from_pattern to return different paths for each call + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/Diffuse/OutputOrgAsset_Diffuse.png") + final_path_merged1 = Path("/fake/output_final/OutputOrgAsset/ORM/OutputOrgAsset_ORM.png") + # Ensure generate_path_from_pattern is called with the correct context and details + # The actual call in the stage is: generate_path_from_pattern(context, map_detail, map_type_key, temp_file_key) + # We need to ensure our side_effect matches these calls. + + def gen_path_side_effect(ctx, detail, map_type_key, temp_file_key, output_filename_key): + if detail['temp_processed_file'] == '/fake/temp_engine_dir/proc1.png': + return final_path_proc1 + elif detail['temp_merged_file'] == '/fake/temp_engine_dir/merged1.png': + return final_path_merged1 + raise ValueError("Unexpected call to generate_path_from_pattern") + + mock_gen_path.side_effect = gen_path_side_effect + + mock_path_exists.return_value = False # Files do not exist at destination + + updated_context = stage.execute(context) + + assert mock_shutil_copy.call_count == 2 + mock_shutil_copy.assert_any_call(Path(processed_details[proc_id_1]['temp_processed_file']), final_path_proc1) + mock_shutil_copy.assert_any_call(Path(merged_details[merged_id_1]['temp_merged_file']), final_path_merged1) + + # Check mkdir calls + # It should be called for each unique parent directory + expected_mkdir_calls = [ + mock.call(Path("/fake/output_final/OutputOrgAsset/Diffuse"), parents=True, exist_ok=True), + mock.call(Path("/fake/output_final/OutputOrgAsset/ORM"), parents=True, exist_ok=True) + ] + mock_mkdir.assert_has_calls(expected_mkdir_calls, any_order=True) + # Ensure mkdir was called for the parent of each file + assert mock_mkdir.call_count >= 1 # Could be 1 or 2 if paths share a base that's created once + + assert len(updated_context.asset_metadata['final_output_files']) == 2 + assert str(final_path_proc1) in updated_context.asset_metadata['final_output_files'] + assert str(final_path_merged1) in updated_context.asset_metadata['final_output_files'] + + assert updated_context.processed_maps_details[proc_id_1]['final_output_path'] == str(final_path_proc1) + assert updated_context.merged_maps_details[merged_id_1]['final_output_path'] == str(final_path_merged1) + mock_log_error.assert_not_called() + # Check for specific info logs if necessary + # mock_log_info.assert_any_call(f"Copying {processed_details[proc_id_1]['temp_processed_file']} to {final_path_proc1}") + # mock_log_info.assert_any_call(f"Copying {merged_details[merged_id_1]['temp_merged_file']} to {final_path_merged1}") +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') # Still might be called if other files are processed +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.info') +def test_output_organization_overwrite_disabled_file_exists( + mock_log_info, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_exists.png', 'map_type': 'Diffuse', 'output_filename': 'OutputOrgAsset_Diffuse_Exists.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=False + ) + + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/Diffuse/OutputOrgAsset_Diffuse_Exists.png") + mock_gen_path.return_value = final_path_proc1 # Only one file + mock_path_exists.return_value = True # File exists at destination + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_not_called() + mock_log_info.assert_any_call( + f"Skipping copy for {final_path_proc1} as it already exists and overwrite is disabled." + ) + # final_output_files should still be populated if the file exists and is considered "organized" + assert str(final_path_proc1) in updated_context.asset_metadata['final_output_files'] + assert updated_context.processed_maps_details[proc_id_1]['final_output_path'] == str(final_path_proc1) + + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.info') +@mock.patch('logging.error') +def test_output_organization_overwrite_enabled_file_exists( + mock_log_error, mock_log_info, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_overwrite.png', 'map_type': 'Diffuse', 'output_filename': 'OutputOrgAsset_Diffuse_Overwrite.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=True # Overwrite is enabled + ) + + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/Diffuse/OutputOrgAsset_Diffuse_Overwrite.png") + mock_gen_path.return_value = final_path_proc1 + mock_path_exists.return_value = True # File exists, but we should overwrite + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_called_once_with(Path(processed_details[proc_id_1]['temp_processed_file']), final_path_proc1) + mock_mkdir.assert_called_once_with(final_path_proc1.parent, parents=True, exist_ok=True) + assert str(final_path_proc1) in updated_context.asset_metadata['final_output_files'] + assert updated_context.processed_maps_details[proc_id_1]['final_output_path'] == str(final_path_proc1) + mock_log_error.assert_not_called() + # Optionally check for a log message indicating overwrite, if implemented + # mock_log_info.assert_any_call(f"Overwriting existing file {final_path_proc1}...") + + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.error') +def test_output_organization_only_processed_maps( + mock_log_error, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_only.png', 'map_type': 'Albedo', 'output_filename': 'OutputOrgAsset_Albedo.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + merged_map_details={}, # No merged maps + overwrite_setting=False + ) + + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/Albedo/OutputOrgAsset_Albedo.png") + mock_gen_path.return_value = final_path_proc1 + mock_path_exists.return_value = False + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_called_once_with(Path(processed_details[proc_id_1]['temp_processed_file']), final_path_proc1) + mock_mkdir.assert_called_once_with(final_path_proc1.parent, parents=True, exist_ok=True) + assert len(updated_context.asset_metadata['final_output_files']) == 1 + assert str(final_path_proc1) in updated_context.asset_metadata['final_output_files'] + assert updated_context.processed_maps_details[proc_id_1]['final_output_path'] == str(final_path_proc1) + assert not updated_context.merged_maps_details # Should remain empty + mock_log_error.assert_not_called() + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.error') +def test_output_organization_only_merged_maps( + mock_log_error, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + merged_id_1 = uuid.uuid4().hex + merged_details = { + merged_id_1: {'status': 'Processed', 'temp_merged_file': '/fake/temp_engine_dir/merged_only.png', 'map_type': 'Metallic', 'output_filename': 'OutputOrgAsset_Metallic.png'} + } + context = create_output_org_mock_context( + processed_map_details={}, # No processed maps + merged_map_details=merged_details, + overwrite_setting=False + ) + + final_path_merged1 = Path("/fake/output_final/OutputOrgAsset/Metallic/OutputOrgAsset_Metallic.png") + mock_gen_path.return_value = final_path_merged1 + mock_path_exists.return_value = False + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_called_once_with(Path(merged_details[merged_id_1]['temp_merged_file']), final_path_merged1) + mock_mkdir.assert_called_once_with(final_path_merged1.parent, parents=True, exist_ok=True) + assert len(updated_context.asset_metadata['final_output_files']) == 1 + assert str(final_path_merged1) in updated_context.asset_metadata['final_output_files'] + assert updated_context.merged_maps_details[merged_id_1]['final_output_path'] == str(final_path_merged1) + assert not updated_context.processed_maps_details # Should remain empty + mock_log_error.assert_not_called() + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.warning') # Expect a warning for skipped map +@mock.patch('logging.error') +def test_output_organization_map_status_not_processed( + mock_log_error, mock_log_warning, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + + proc_id_1_failed = uuid.uuid4().hex + proc_id_2_ok = uuid.uuid4().hex + + processed_details = { + proc_id_1_failed: {'status': 'Failed', 'temp_processed_file': '/fake/temp_engine_dir/proc_failed.png', 'map_type': 'Diffuse', 'output_filename': 'OutputOrgAsset_Diffuse_Failed.png'}, + proc_id_2_ok: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_ok.png', 'map_type': 'Normal', 'output_filename': 'OutputOrgAsset_Normal_OK.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=False + ) + + final_path_proc_ok = Path("/fake/output_final/OutputOrgAsset/Normal/OutputOrgAsset_Normal_OK.png") + # generate_path_from_pattern should only be called for the 'Processed' map + mock_gen_path.return_value = final_path_proc_ok + mock_path_exists.return_value = False + + updated_context = stage.execute(context) + + # Assert copy was only called for the 'Processed' map + mock_shutil_copy.assert_called_once_with(Path(processed_details[proc_id_2_ok]['temp_processed_file']), final_path_proc_ok) + mock_mkdir.assert_called_once_with(final_path_proc_ok.parent, parents=True, exist_ok=True) + + # Assert final_output_files only contains the successfully processed map + assert len(updated_context.asset_metadata['final_output_files']) == 1 + assert str(final_path_proc_ok) in updated_context.asset_metadata['final_output_files'] + + # Assert final_output_path is set for the processed map + assert updated_context.processed_maps_details[proc_id_2_ok]['final_output_path'] == str(final_path_proc_ok) + # Assert final_output_path is NOT set for the failed map + assert 'final_output_path' not in updated_context.processed_maps_details[proc_id_1_failed] + + mock_log_warning.assert_any_call( + f"Skipping output organization for map with ID {proc_id_1_failed} (type: Diffuse) as its status is 'Failed'." + ) + mock_log_error.assert_not_called() +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.error') +def test_output_organization_generate_path_fails( + mock_log_error, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_path_fail.png', 'map_type': 'Roughness', 'output_filename': 'OutputOrgAsset_Roughness_PathFail.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=False + ) + + mock_gen_path.side_effect = Exception("Simulated path generation error") + mock_path_exists.return_value = False # Should not matter if path gen fails + + updated_context = stage.execute(context) + + mock_shutil_copy.assert_not_called() # No copy if path generation fails + mock_mkdir.assert_not_called() # No mkdir if path generation fails + + assert not updated_context.asset_metadata.get('final_output_files') # No files should be listed + assert 'final_output_path' not in updated_context.processed_maps_details[proc_id_1] + + assert updated_context.status_flags.get('output_organization_error') is True + assert updated_context.asset_metadata['status'] == "Error" # Or "Failed" depending on desired behavior + + mock_log_error.assert_any_call( + f"Error generating output path for map ID {proc_id_1} (type: Roughness): Simulated path generation error" + ) + +@mock.patch('shutil.copy2') +@mock.patch('pathlib.Path.mkdir') +@mock.patch('pathlib.Path.exists') +@mock.patch('processing.pipeline.stages.output_organization.generate_path_from_pattern') +@mock.patch('logging.error') +def test_output_organization_shutil_copy_fails( + mock_log_error, mock_gen_path, mock_path_exists, mock_mkdir, mock_shutil_copy +): + stage = OutputOrganizationStage() + proc_id_1 = uuid.uuid4().hex + processed_details = { + proc_id_1: {'status': 'Processed', 'temp_processed_file': '/fake/temp_engine_dir/proc_copy_fail.png', 'map_type': 'AO', 'output_filename': 'OutputOrgAsset_AO_CopyFail.png'} + } + context = create_output_org_mock_context( + processed_map_details=processed_details, + overwrite_setting=False + ) + + final_path_proc1 = Path("/fake/output_final/OutputOrgAsset/AO/OutputOrgAsset_AO_CopyFail.png") + mock_gen_path.return_value = final_path_proc1 + mock_path_exists.return_value = False + mock_shutil_copy.side_effect = shutil.Error("Simulated copy error") # Can also be IOError, OSError + + updated_context = stage.execute(context) + + mock_mkdir.assert_called_once_with(final_path_proc1.parent, parents=True, exist_ok=True) # mkdir would be called before copy + mock_shutil_copy.assert_called_once_with(Path(processed_details[proc_id_1]['temp_processed_file']), final_path_proc1) + + # Even if copy fails, the path might be added to final_output_files before the error is caught, + # or the design might be to not add it. Let's assume it's not added on error. + # Check the stage's actual behavior for this. + # If the intention is to record the *attempted* path, this assertion might change. + # For now, assume failure means it's not a "final" output. + assert not updated_context.asset_metadata.get('final_output_files') + assert 'final_output_path' not in updated_context.processed_maps_details[proc_id_1] # Or it might contain the path but status is error + + assert updated_context.status_flags.get('output_organization_error') is True + assert updated_context.asset_metadata['status'] == "Error" # Or "Failed" + + mock_log_error.assert_any_call( + f"Error copying file {processed_details[proc_id_1]['temp_processed_file']} to {final_path_proc1}: Simulated copy error" + ) \ No newline at end of file diff --git a/tests/processing/pipeline/stages/test_supplier_determination.py b/tests/processing/pipeline/stages/test_supplier_determination.py new file mode 100644 index 0000000..a1613b1 --- /dev/null +++ b/tests/processing/pipeline/stages/test_supplier_determination.py @@ -0,0 +1,213 @@ +import pytest +from unittest import mock +from pathlib import Path +from typing import Dict, List, Optional, Any + +# Assuming pytest is run from project root, adjust if necessary +from processing.pipeline.stages.supplier_determination import SupplierDeterminationStage +from processing.pipeline.asset_context import AssetProcessingContext +from rule_structure import AssetRule, SourceRule, FileRule # For constructing mock context +from configuration import Configuration, GeneralSettings, Supplier # For mock config + +# Example helper (can be a pytest fixture too) +def create_mock_context( + asset_rule_supplier_override: Optional[str] = None, + source_rule_supplier: Optional[str] = None, + config_suppliers: Optional[Dict[str, Any]] = None, # Mocked Supplier objects or dicts + asset_name: str = "TestAsset" +) -> AssetProcessingContext: + mock_asset_rule = mock.MagicMock(spec=AssetRule) + mock_asset_rule.name = asset_name + mock_asset_rule.supplier_override = asset_rule_supplier_override + # ... other AssetRule fields if needed by the stage ... + + mock_source_rule = mock.MagicMock(spec=SourceRule) + mock_source_rule.supplier = source_rule_supplier + # ... other SourceRule fields ... + + mock_config = mock.MagicMock(spec=Configuration) + mock_config.suppliers = config_suppliers if config_suppliers is not None else {} + + # Basic AssetProcessingContext fields + context = AssetProcessingContext( + source_rule=mock_source_rule, + asset_rule=mock_asset_rule, + workspace_path=Path("/fake/workspace"), + engine_temp_dir=Path("/fake/temp"), + output_base_path=Path("/fake/output"), + effective_supplier=None, + asset_metadata={}, + processed_maps_details={}, + merged_maps_details={}, + files_to_process=[], + loaded_data_cache={}, + config_obj=mock_config, + status_flags={}, + incrementing_value=None, + sha5_value=None # Corrected from sha5_value to sha256_value if that's the actual field name + ) + return context + +@pytest.fixture +def supplier_stage(): + return SupplierDeterminationStage() + +@mock.patch('logging.error') +@mock.patch('logging.info') +def test_supplier_from_asset_rule_override_valid(mock_log_info, mock_log_error, supplier_stage): + mock_suppliers_config = {"SupplierA": mock.MagicMock(spec=Supplier)} + context = create_mock_context( + asset_rule_supplier_override="SupplierA", + config_suppliers=mock_suppliers_config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier == "SupplierA" + assert not updated_context.status_flags.get('supplier_error') + mock_log_info.assert_any_call("Effective supplier for asset 'TestAsset' set to 'SupplierA' from asset rule override.") + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.info') +def test_supplier_from_source_rule_fallback_valid(mock_log_info, mock_log_error, supplier_stage): + mock_suppliers_config = {"SupplierB": mock.MagicMock(spec=Supplier)} + context = create_mock_context( + asset_rule_supplier_override=None, + source_rule_supplier="SupplierB", + config_suppliers=mock_suppliers_config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier == "SupplierB" + assert not updated_context.status_flags.get('supplier_error') + mock_log_info.assert_any_call("Effective supplier for asset 'TestAsset' set to 'SupplierB' from source rule.") + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') # supplier_determination uses logging.warning for invalid suppliers +def test_asset_rule_override_invalid_supplier(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + asset_rule_supplier_override="InvalidSupplier", + config_suppliers={"SupplierA": mock.MagicMock(spec=Supplier)} # "InvalidSupplier" not in config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "Asset 'TestAsset' has supplier_override 'InvalidSupplier' which is not defined in global suppliers. No supplier set." + ) + mock_log_error.assert_not_called() + + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_source_rule_fallback_invalid_supplier(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + asset_rule_supplier_override=None, + source_rule_supplier="InvalidSupplierB", + config_suppliers={"SupplierA": mock.MagicMock(spec=Supplier)} # "InvalidSupplierB" not in config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "Asset 'TestAsset' has source rule supplier 'InvalidSupplierB' which is not defined in global suppliers. No supplier set." + ) + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_no_supplier_defined(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + asset_rule_supplier_override=None, + source_rule_supplier=None, + config_suppliers={"SupplierA": mock.MagicMock(spec=Supplier)} + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "No supplier could be determined for asset 'TestAsset'. " + "AssetRule override is None and SourceRule supplier is None or empty." + ) + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_empty_config_suppliers_with_asset_override(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + asset_rule_supplier_override="SupplierX", + config_suppliers={} # Empty global supplier config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "Asset 'TestAsset' has supplier_override 'SupplierX' which is not defined in global suppliers. No supplier set." + ) + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_empty_config_suppliers_with_source_rule(mock_log_warning, mock_log_error, supplier_stage): + context = create_mock_context( + source_rule_supplier="SupplierY", + config_suppliers={} # Empty global supplier config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "Asset 'TestAsset' has source rule supplier 'SupplierY' which is not defined in global suppliers. No supplier set." + ) + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.info') +def test_asset_rule_override_empty_string(mock_log_info, mock_log_error, supplier_stage): + # This scenario should fall back to source_rule.supplier if asset_rule.supplier_override is "" + mock_suppliers_config = {"SupplierB": mock.MagicMock(spec=Supplier)} + context = create_mock_context( + asset_rule_supplier_override="", # Empty string override + source_rule_supplier="SupplierB", + config_suppliers=mock_suppliers_config + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier == "SupplierB" # Falls back to SourceRule + assert not updated_context.status_flags.get('supplier_error') + mock_log_info.assert_any_call("Effective supplier for asset 'TestAsset' set to 'SupplierB' from source rule.") + mock_log_error.assert_not_called() + +@mock.patch('logging.error') +@mock.patch('logging.warning') +def test_source_rule_supplier_empty_string(mock_log_warning, mock_log_error, supplier_stage): + # This scenario should result in an error if asset_rule.supplier_override is None and source_rule.supplier is "" + context = create_mock_context( + asset_rule_supplier_override=None, + source_rule_supplier="", # Empty string source supplier + config_suppliers={"SupplierA": mock.MagicMock(spec=Supplier)} + ) + + updated_context = supplier_stage.execute(context) + + assert updated_context.effective_supplier is None + assert updated_context.status_flags.get('supplier_error') is True + mock_log_warning.assert_any_call( + "No supplier could be determined for asset 'TestAsset'. " + "AssetRule override is None and SourceRule supplier is None or empty." + ) + mock_log_error.assert_not_called() \ No newline at end of file diff --git a/tests/processing/pipeline/test_orchestrator.py b/tests/processing/pipeline/test_orchestrator.py new file mode 100644 index 0000000..3f52908 --- /dev/null +++ b/tests/processing/pipeline/test_orchestrator.py @@ -0,0 +1,383 @@ +import pytest +from unittest import mock +from pathlib import Path +import uuid +import shutil # For checking rmtree +import tempfile # For mocking mkdtemp + +from processing.pipeline.orchestrator import PipelineOrchestrator +from processing.pipeline.asset_context import AssetProcessingContext +from processing.pipeline.stages.base_stage import ProcessingStage # For mocking stages +from rule_structure import SourceRule, AssetRule, FileRule +from configuration import Configuration, GeneralSettings + +# Mock Stage that modifies context +class MockPassThroughStage(ProcessingStage): + def __init__(self, stage_name="mock_stage"): + self.stage_name = stage_name + self.execute_call_count = 0 + self.contexts_called_with = [] # To store contexts for verification + + def execute(self, context: AssetProcessingContext) -> AssetProcessingContext: + self.execute_call_count += 1 + self.contexts_called_with.append(context) + # Optionally, modify context for testing + context.asset_metadata[f'{self.stage_name}_executed'] = True + if self.stage_name == "skipper_stage": # Example conditional logic + context.status_flags['skip_asset'] = True + context.status_flags['skip_reason'] = "Skipped by skipper_stage" + elif self.stage_name == "error_stage": # Example error-raising stage + raise ValueError("Simulated error in error_stage") + + # Simulate status update based on stage execution + if not context.status_flags.get('skip_asset') and not context.status_flags.get('asset_failed'): + context.asset_metadata['status'] = "Processed" # Default to processed if not skipped/failed + return context + +def create_orchestrator_test_config() -> mock.MagicMock: + mock_config = mock.MagicMock(spec=Configuration) + mock_config.general_settings = mock.MagicMock(spec=GeneralSettings) + mock_config.general_settings.temp_dir_override = None # Default, can be overridden in tests + # Add other config details if orchestrator or stages depend on them directly + return mock_config + +def create_orchestrator_test_asset_rule(name: str, num_file_rules: int = 1) -> mock.MagicMock: + asset_rule = mock.MagicMock(spec=AssetRule) + asset_rule.name = name + asset_rule.id = uuid.uuid4() + asset_rule.source_path = Path(f"/fake/source/{name}") # Using Path object + asset_rule.file_rules = [mock.MagicMock(spec=FileRule) for _ in range(num_file_rules)] + asset_rule.enabled = True + asset_rule.map_types = {} # Initialize as dict + asset_rule.material_name_scheme = "{asset_name}" + asset_rule.texture_name_scheme = "{asset_name}_{map_type}" + asset_rule.output_path_scheme = "{source_name}/{asset_name}" + # ... other necessary AssetRule fields ... + return asset_rule + +def create_orchestrator_test_source_rule(name: str, num_assets: int = 1, asset_names: list = None) -> mock.MagicMock: + source_rule = mock.MagicMock(spec=SourceRule) + source_rule.name = name + source_rule.id = uuid.uuid4() + if asset_names: + source_rule.assets = [create_orchestrator_test_asset_rule(an) for an in asset_names] + else: + source_rule.assets = [create_orchestrator_test_asset_rule(f"Asset_{i+1}_in_{name}") for i in range(num_assets)] + source_rule.enabled = True + source_rule.source_path = Path(f"/fake/source_root/{name}") # Using Path object + # ... other necessary SourceRule fields ... + return source_rule + +# --- Test Cases for PipelineOrchestrator.process_source_rule() --- + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_basic_flow_mock_stages(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_path" # Path for mkdtemp + + config = create_orchestrator_test_config() + stage1 = MockPassThroughStage("stage1") + stage2 = MockPassThroughStage("stage2") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[stage1, stage2]) + + source_rule = create_orchestrator_test_source_rule("MySourceRule", num_assets=2) + asset1_name = source_rule.assets[0].name + asset2_name = source_rule.assets[1].name + + # Mock asset_metadata to be updated by stages for status check + # The MockPassThroughStage already sets a 'status' = "Processed" if not skipped/failed + # and adds '{stage_name}_executed' = True to asset_metadata. + + results = orchestrator.process_source_rule( + source_rule, Path("/ws"), Path("/out"), False, "inc_val_123", "sha_val_abc" + ) + + assert stage1.execute_call_count == 2 # Called for each asset + assert stage2.execute_call_count == 2 # Called for each asset + + assert asset1_name in results['processed'] + assert asset2_name in results['processed'] + assert not results['skipped'] + assert not results['failed'] + + # Verify context modifications by stages + for i in range(2): # For each asset + # Stage 1 context checks + s1_context_asset = stage1.contexts_called_with[i] + assert s1_context_asset.asset_metadata.get('stage1_executed') is True + assert s1_context_asset.asset_metadata.get('stage2_executed') is None # Stage 2 not yet run for this asset + + # Stage 2 context checks + s2_context_asset = stage2.contexts_called_with[i] + assert s2_context_asset.asset_metadata.get('stage1_executed') is True # From stage 1 + assert s2_context_asset.asset_metadata.get('stage2_executed') is True + assert s2_context_asset.asset_metadata.get('status') == "Processed" + + mock_mkdtemp.assert_called_once() + # The orchestrator creates a subdirectory within the mkdtemp path + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_asset_skipping_by_stage(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_path_skip" + + config = create_orchestrator_test_config() + skipper_stage = MockPassThroughStage("skipper_stage") # This stage will set skip_asset = True + stage_after_skip = MockPassThroughStage("stage_after_skip") + + orchestrator = PipelineOrchestrator(config_obj=config, stages=[skipper_stage, stage_after_skip]) + + source_rule = create_orchestrator_test_source_rule("SkipSourceRule", num_assets=1) + asset_to_skip_name = source_rule.assets[0].name + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_skip"), Path("/out_skip"), False, "inc_skip", "sha_skip" + ) + + assert skipper_stage.execute_call_count == 1 # Called for the asset + assert stage_after_skip.execute_call_count == 0 # Not called because asset was skipped + + assert asset_to_skip_name in results['skipped'] + assert not results['processed'] + assert not results['failed'] + + # Verify skip reason in context if needed (MockPassThroughStage stores contexts) + skipped_context = skipper_stage.contexts_called_with[0] + assert skipped_context.status_flags['skip_asset'] is True + assert skipped_context.status_flags['skip_reason'] == "Skipped by skipper_stage" + + mock_mkdtemp.assert_called_once() + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_no_assets_in_source_rule(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_no_assets" + + config = create_orchestrator_test_config() + stage1 = MockPassThroughStage("stage1_no_assets") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[stage1]) + + source_rule = create_orchestrator_test_source_rule("NoAssetSourceRule", num_assets=0) + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_no_assets"), Path("/out_no_assets"), False, "inc_no", "sha_no" + ) + + assert stage1.execute_call_count == 0 + assert not results['processed'] + assert not results['skipped'] + assert not results['failed'] + + # mkdtemp should still be called for the source rule processing, even if no assets + mock_mkdtemp.assert_called_once() + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) + + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_error_during_stage_execution(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_error" + + config = create_orchestrator_test_config() + error_stage = MockPassThroughStage("error_stage") # This stage will raise an error + stage_after_error = MockPassThroughStage("stage_after_error") + + orchestrator = PipelineOrchestrator(config_obj=config, stages=[error_stage, stage_after_error]) + + # Test with two assets, one fails, one processes (if orchestrator continues) + # The current orchestrator's process_asset is per asset, so an error in one + # should not stop processing of other assets in the same source_rule. + source_rule = create_orchestrator_test_source_rule("ErrorSourceRule", asset_names=["AssetFails", "AssetSucceeds"]) + asset_fails_name = source_rule.assets[0].name + asset_succeeds_name = source_rule.assets[1].name + + # Make only the first asset's processing trigger the error + original_execute = error_stage.execute + def error_execute_side_effect(context: AssetProcessingContext): + if context.asset_rule.name == asset_fails_name: + # The MockPassThroughStage is already configured to raise ValueError for "error_stage" + # but we need to ensure it's only for the first asset. + # We can achieve this by modifying the stage_name temporarily or by checking asset_rule.name + # For simplicity, let's assume the mock stage's error logic is fine, + # and we just need to check the outcome. + # The error_stage will raise ValueError("Simulated error in error_stage") + # The orchestrator's _process_single_asset catches generic Exception. + return original_execute(context) # This will call the erroring logic + else: + # For the second asset, make it pass through without error + context.asset_metadata[f'{error_stage.stage_name}_executed'] = True + context.asset_metadata['status'] = "Processed" + return context + + error_stage.execute = mock.MagicMock(side_effect=error_execute_side_effect) + # stage_after_error should still be called for the successful asset + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_error"), Path("/out_error"), False, "inc_err", "sha_err" + ) + + assert error_stage.execute.call_count == 2 # Called for both assets + # stage_after_error is only called for the asset that didn't fail in error_stage + assert stage_after_error.execute_call_count == 1 + + assert asset_fails_name in results['failed'] + assert asset_succeeds_name in results['processed'] + assert not results['skipped'] + + # Verify the context of the failed asset + failed_context = None + for ctx in error_stage.contexts_called_with: + if ctx.asset_rule.name == asset_fails_name: + failed_context = ctx + break + assert failed_context is not None + assert failed_context.status_flags['asset_failed'] is True + assert "Simulated error in error_stage" in failed_context.status_flags['failure_reason'] + + # Verify the context of the successful asset after stage_after_error + successful_context_after_s2 = None + for ctx in stage_after_error.contexts_called_with: + if ctx.asset_rule.name == asset_succeeds_name: + successful_context_after_s2 = ctx + break + assert successful_context_after_s2 is not None + assert successful_context_after_s2.asset_metadata.get('error_stage_executed') is True # from the non-erroring path + assert successful_context_after_s2.asset_metadata.get('stage_after_error_executed') is True + assert successful_context_after_s2.asset_metadata.get('status') == "Processed" + + + mock_mkdtemp.assert_called_once() + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) + + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_asset_processing_context_initialization(mock_mkdtemp, mock_rmtree): + mock_engine_temp_dir = "/fake/engine_temp_dir_context_init" + mock_mkdtemp.return_value = mock_engine_temp_dir + + config = create_orchestrator_test_config() + mock_stage = MockPassThroughStage("context_check_stage") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[mock_stage]) + + source_rule = create_orchestrator_test_source_rule("ContextSourceRule", num_assets=1) + asset_rule = source_rule.assets[0] + + workspace_path = Path("/ws_context") + output_base_path = Path("/out_context") + incrementing_value = "inc_context_123" + sha5_value = "sha_context_abc" + + orchestrator.process_source_rule( + source_rule, workspace_path, output_base_path, False, incrementing_value, sha5_value + ) + + assert mock_stage.execute_call_count == 1 + + # Retrieve the context passed to the mock stage + captured_context = mock_stage.contexts_called_with[0] + + assert captured_context.source_rule == source_rule + assert captured_context.asset_rule == asset_rule + assert captured_context.workspace_path == workspace_path + + # engine_temp_dir for the asset is a sub-directory of the source_rule's temp dir + # which itself is a sub-directory of the main engine_temp_dir from mkdtemp + expected_source_rule_temp_dir = Path(mock_engine_temp_dir) / source_rule.id.hex + expected_asset_temp_dir = expected_source_rule_temp_dir / asset_rule.id.hex + assert captured_context.engine_temp_dir == expected_asset_temp_dir + + assert captured_context.output_base_path == output_base_path + assert captured_context.config_obj == config + assert captured_context.incrementing_value == incrementing_value + assert captured_context.sha5_value == sha5_value + + # Check initial state of other context fields + assert captured_context.asset_metadata == {} # Should be empty initially for an asset + assert captured_context.status_flags == {} # Should be empty initially + assert captured_context.shared_data == {} # Should be empty initially + assert captured_context.current_files == [] # Should be empty initially + + mock_mkdtemp.assert_called_once() + mock_rmtree.assert_called_once_with(expected_source_rule_temp_dir, ignore_errors=True) + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_temp_dir_override_from_config(mock_mkdtemp, mock_rmtree): + # This test verifies that if config.general_settings.temp_dir_override is set, + # mkdtemp is NOT called, and the override path is used and cleaned up. + + config = create_orchestrator_test_config() + override_temp_path_str = "/override/temp/path" + config.general_settings.temp_dir_override = override_temp_path_str + + stage1 = MockPassThroughStage("stage_temp_override") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[stage1]) + + source_rule = create_orchestrator_test_source_rule("TempOverrideRule", num_assets=1) + asset_rule = source_rule.assets[0] + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_override"), Path("/out_override"), False, "inc_override", "sha_override" + ) + + assert stage1.execute_call_count == 1 + assert asset_rule.name in results['processed'] + + mock_mkdtemp.assert_not_called() # mkdtemp should not be called due to override + + # The orchestrator should create its source-rule specific subdir within the override + expected_source_rule_temp_dir_in_override = Path(override_temp_path_str) / source_rule.id.hex + + # Verify the context passed to the stage uses the overridden path structure + captured_context = stage1.contexts_called_with[0] + expected_asset_temp_dir_in_override = expected_source_rule_temp_dir_in_override / asset_rule.id.hex + assert captured_context.engine_temp_dir == expected_asset_temp_dir_in_override + + # rmtree should be called on the source_rule's directory within the override path + mock_rmtree.assert_called_once_with(expected_source_rule_temp_dir_in_override, ignore_errors=True) + +@mock.patch('shutil.rmtree') +@mock.patch('tempfile.mkdtemp') +def test_orchestrator_disabled_asset_rule_is_skipped(mock_mkdtemp, mock_rmtree): + mock_mkdtemp.return_value = "/fake/engine_temp_dir_disabled_asset" + + config = create_orchestrator_test_config() + stage1 = MockPassThroughStage("stage_disabled_check") + orchestrator = PipelineOrchestrator(config_obj=config, stages=[stage1]) + + source_rule = create_orchestrator_test_source_rule("DisabledAssetSourceRule", asset_names=["EnabledAsset", "DisabledAsset"]) + enabled_asset = source_rule.assets[0] + disabled_asset = source_rule.assets[1] + disabled_asset.enabled = False # Disable this asset rule + + results = orchestrator.process_source_rule( + source_rule, Path("/ws_disabled"), Path("/out_disabled"), False, "inc_dis", "sha_dis" + ) + + assert stage1.execute_call_count == 1 # Only called for the enabled asset + + assert enabled_asset.name in results['processed'] + assert disabled_asset.name in results['skipped'] + assert not results['failed'] + + # Verify context for the processed asset + assert stage1.contexts_called_with[0].asset_rule.name == enabled_asset.name + + # Verify skip reason for the disabled asset (this is set by the orchestrator itself) + # The orchestrator's _process_single_asset checks asset_rule.enabled + # We need to inspect the results dictionary for the skip reason if it's stored there, + # or infer it. The current structure of `results` doesn't store detailed skip reasons directly, + # but the test ensures it's in the 'skipped' list. + # For a more detailed check, one might need to adjust how results are reported or mock deeper. + # For now, confirming it's in 'skipped' and stage1 wasn't called for it is sufficient. + + mock_mkdtemp.assert_called_once() + expected_temp_path = Path(mock_mkdtemp.return_value) / source_rule.id.hex + mock_rmtree.assert_called_once_with(expected_temp_path, ignore_errors=True) \ No newline at end of file diff --git a/tests/processing/utils/test_image_processing_utils.py b/tests/processing/utils/test_image_processing_utils.py new file mode 100644 index 0000000..e128b3f --- /dev/null +++ b/tests/processing/utils/test_image_processing_utils.py @@ -0,0 +1,504 @@ +import pytest +from unittest import mock +import numpy as np +from pathlib import Path +import sys + +# Attempt to import the module under test +# This assumes that the 'tests' directory is at the same level as the 'processing' directory, +# and pytest handles the PYTHONPATH correctly. +try: + from processing.utils import image_processing_utils as ipu + import cv2 # Import cv2 here if it's used for constants like cv2.COLOR_BGR2RGB +except ImportError: + # Fallback for environments where PYTHONPATH might not be set up as expected by pytest initially + # This adds the project root to sys.path to find the 'processing' module + # Adjust the number of Path.parent calls if your test structure is deeper or shallower + project_root = Path(__file__).parent.parent.parent.parent + sys.path.insert(0, str(project_root)) + from processing.utils import image_processing_utils as ipu + import cv2 # Import cv2 here as well + +# If cv2 is imported directly in image_processing_utils, you might need to mock it globally for some tests +# For example, at the top of the test file: +# sys.modules['cv2'] = mock.MagicMock() # Basic global mock if needed +# We will use more targeted mocks with @mock.patch where cv2 is used. + +# --- Tests for Mathematical Helpers --- + +def test_is_power_of_two(): + assert ipu.is_power_of_two(1) is True + assert ipu.is_power_of_two(2) is True + assert ipu.is_power_of_two(4) is True + assert ipu.is_power_of_two(16) is True + assert ipu.is_power_of_two(1024) is True + assert ipu.is_power_of_two(0) is False + assert ipu.is_power_of_two(-2) is False + assert ipu.is_power_of_two(3) is False + assert ipu.is_power_of_two(100) is False + +def test_get_nearest_pot(): + assert ipu.get_nearest_pot(1) == 1 + assert ipu.get_nearest_pot(2) == 2 + # Based on current implementation: + # For 3: lower=2, upper=4. (3-2)=1, (4-3)=1. Else branch returns upper_pot. So 4. + assert ipu.get_nearest_pot(3) == 4 + assert ipu.get_nearest_pot(50) == 64 # (50-32)=18, (64-50)=14 -> upper + assert ipu.get_nearest_pot(100) == 128 # (100-64)=36, (128-100)=28 -> upper + assert ipu.get_nearest_pot(256) == 256 + assert ipu.get_nearest_pot(0) == 1 + assert ipu.get_nearest_pot(-10) == 1 + # For 700: value.bit_length() = 10. lower_pot = 1<<(10-1) = 512. upper_pot = 1<<10 = 1024. + # (700-512) = 188. (1024-700) = 324. (188 < 324) is True. Returns lower_pot. So 512. + assert ipu.get_nearest_pot(700) == 512 + assert ipu.get_nearest_pot(6) == 8 # (6-4)=2, (8-6)=2. Returns upper. + assert ipu.get_nearest_pot(5) == 4 # (5-4)=1, (8-5)=3. Returns lower. + + +@pytest.mark.parametrize( + "orig_w, orig_h, target_w, target_h, resize_mode, ensure_pot, allow_upscale, target_max_dim, expected_w, expected_h", + [ + # FIT mode + (1000, 800, 500, None, "fit", False, False, None, 500, 400), # Fit width + (1000, 800, None, 400, "fit", False, False, None, 500, 400), # Fit height + (1000, 800, 500, 500, "fit", False, False, None, 500, 400), # Fit to box (width constrained) + (800, 1000, 500, 500, "fit", False, False, None, 400, 500), # Fit to box (height constrained) + (100, 80, 200, None, "fit", False, False, None, 100, 80), # Fit width, no upscale + (100, 80, 200, None, "fit", False, True, None, 200, 160), # Fit width, allow upscale + (100, 80, 128, None, "fit", True, False, None, 128, 64), # Re-evaluated + (100, 80, 128, None, "fit", True, True, None, 128, 128), # Fit width, ensure_pot, allow upscale (128, 102 -> pot 128, 128) + + # STRETCH mode + (1000, 800, 500, 400, "stretch", False, False, None, 500, 400), + (100, 80, 200, 160, "stretch", False, True, None, 200, 160), # Stretch, allow upscale + (100, 80, 200, 160, "stretch", False, False, None, 100, 80), # Stretch, no upscale + (100, 80, 128, 128, "stretch", True, True, None, 128, 128), # Stretch, ensure_pot, allow upscale + (100, 80, 70, 70, "stretch", True, False, None, 64, 64), # Stretch, ensure_pot, no upscale (70,70 -> pot 64,64) + + # MAX_DIM_POT mode + (1000, 800, None, None, "max_dim_pot", True, False, 512, 512, 512), + (800, 1000, None, None, "max_dim_pot", True, False, 512, 512, 512), + (1920, 1080, None, None, "max_dim_pot", True, False, 1024, 1024, 512), + (100, 100, None, None, "max_dim_pot", True, False, 60, 64, 64), + # Edge cases for calculate_target_dimensions + (0, 0, 512, 512, "fit", False, False, None, 512, 512), + (10, 10, 512, 512, "fit", True, False, None, 8, 8), + (100, 100, 150, 150, "fit", True, False, None, 128, 128), + ] +) +def test_calculate_target_dimensions(orig_w, orig_h, target_w, target_h, resize_mode, ensure_pot, allow_upscale, target_max_dim, expected_w, expected_h): + if resize_mode == "max_dim_pot" and target_max_dim is None: + with pytest.raises(ValueError, match="target_max_dim_for_pot_mode must be provided"): + ipu.calculate_target_dimensions(orig_w, orig_h, target_width=target_w, target_height=target_h, + resize_mode=resize_mode, ensure_pot=ensure_pot, allow_upscale=allow_upscale, + target_max_dim_for_pot_mode=target_max_dim) + elif (resize_mode == "fit" and target_w is None and target_h is None) or \ + (resize_mode == "stretch" and (target_w is None or target_h is None)): + with pytest.raises(ValueError): + ipu.calculate_target_dimensions(orig_w, orig_h, target_width=target_w, target_height=target_h, + resize_mode=resize_mode, ensure_pot=ensure_pot, allow_upscale=allow_upscale, + target_max_dim_for_pot_mode=target_max_dim) + else: + actual_w, actual_h = ipu.calculate_target_dimensions( + orig_w, orig_h, target_width=target_w, target_height=target_h, + resize_mode=resize_mode, ensure_pot=ensure_pot, allow_upscale=allow_upscale, + target_max_dim_for_pot_mode=target_max_dim + ) + assert (actual_w, actual_h) == (expected_w, expected_h), \ + f"Input: ({orig_w},{orig_h}), T=({target_w},{target_h}), M={resize_mode}, POT={ensure_pot}, UPSC={allow_upscale}, TMAX={target_max_dim}" + + +def test_calculate_target_dimensions_invalid_mode(): + with pytest.raises(ValueError, match="Unsupported resize_mode"): + ipu.calculate_target_dimensions(100, 100, 50, 50, resize_mode="invalid_mode") + +@pytest.mark.parametrize( + "ow, oh, rw, rh, expected_str", + [ + (100, 100, 100, 100, "EVEN"), + (100, 100, 200, 200, "EVEN"), + (200, 200, 100, 100, "EVEN"), + (100, 100, 150, 100, "X15Y1"), + (100, 100, 50, 100, "X05Y1"), + (100, 100, 100, 150, "X1Y15"), + (100, 100, 100, 50, "X1Y05"), + (100, 50, 150, 75, "EVEN"), + (100, 50, 150, 50, "X15Y1"), + (100, 50, 100, 75, "X1Y15"), + (100, 50, 120, 60, "EVEN"), + (100, 50, 133, 66, "EVEN"), + (100, 100, 133, 100, "X133Y1"), + (100, 100, 100, 133, "X1Y133"), + (100, 100, 133, 133, "EVEN"), + (100, 100, 67, 100, "X067Y1"), + (100, 100, 100, 67, "X1Y067"), + (100, 100, 67, 67, "EVEN"), + (1920, 1080, 1024, 576, "EVEN"), + (1920, 1080, 1024, 512, "X112Y1"), + (0, 100, 50, 50, "InvalidInput"), + (100, 0, 50, 50, "InvalidInput"), + (100, 100, 0, 50, "InvalidResize"), + (100, 100, 50, 0, "InvalidResize"), + ] +) +def test_normalize_aspect_ratio_change(ow, oh, rw, rh, expected_str): + assert ipu.normalize_aspect_ratio_change(ow, oh, rw, rh) == expected_str + +# --- Tests for Image Manipulation --- + +@mock.patch('cv2.imread') +def test_load_image_success_str_path(mock_cv2_imread): + mock_img_data = np.array([[[1, 2, 3]]], dtype=np.uint8) + mock_cv2_imread.return_value = mock_img_data + + result = ipu.load_image("dummy/path.png") + + mock_cv2_imread.assert_called_once_with("dummy/path.png", cv2.IMREAD_UNCHANGED) + assert np.array_equal(result, mock_img_data) + +@mock.patch('cv2.imread') +def test_load_image_success_path_obj(mock_cv2_imread): + mock_img_data = np.array([[[1, 2, 3]]], dtype=np.uint8) + mock_cv2_imread.return_value = mock_img_data + dummy_path = Path("dummy/path.png") + + result = ipu.load_image(dummy_path) + + mock_cv2_imread.assert_called_once_with(str(dummy_path), cv2.IMREAD_UNCHANGED) + assert np.array_equal(result, mock_img_data) + +@mock.patch('cv2.imread') +def test_load_image_failure(mock_cv2_imread): + mock_cv2_imread.return_value = None + + result = ipu.load_image("dummy/path.png") + + mock_cv2_imread.assert_called_once_with("dummy/path.png", cv2.IMREAD_UNCHANGED) + assert result is None + +@mock.patch('cv2.imread', side_effect=Exception("CV2 Read Error")) +def test_load_image_exception(mock_cv2_imread): + result = ipu.load_image("dummy/path.png") + mock_cv2_imread.assert_called_once_with("dummy/path.png", cv2.IMREAD_UNCHANGED) + assert result is None + + +@mock.patch('cv2.cvtColor') +def test_convert_bgr_to_rgb_3_channel(mock_cv2_cvtcolor): + bgr_image = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + rgb_image_mock = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + mock_cv2_cvtcolor.return_value = rgb_image_mock + + result = ipu.convert_bgr_to_rgb(bgr_image) + + mock_cv2_cvtcolor.assert_called_once_with(bgr_image, cv2.COLOR_BGR2RGB) + assert np.array_equal(result, rgb_image_mock) + +@mock.patch('cv2.cvtColor') +def test_convert_bgr_to_rgb_4_channel_bgra(mock_cv2_cvtcolor): + bgra_image = np.random.randint(0, 255, (10, 10, 4), dtype=np.uint8) + rgb_image_mock = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) # cvtColor BGRA2RGB drops alpha + mock_cv2_cvtcolor.return_value = rgb_image_mock # Mocking the output of BGRA2RGB + + result = ipu.convert_bgr_to_rgb(bgra_image) + + mock_cv2_cvtcolor.assert_called_once_with(bgra_image, cv2.COLOR_BGRA2RGB) + assert np.array_equal(result, rgb_image_mock) + + +def test_convert_bgr_to_rgb_none_input(): + assert ipu.convert_bgr_to_rgb(None) is None + +def test_convert_bgr_to_rgb_grayscale_input(): + gray_image = np.random.randint(0, 255, (10, 10), dtype=np.uint8) + result = ipu.convert_bgr_to_rgb(gray_image) + assert np.array_equal(result, gray_image) # Should return as is + +@mock.patch('cv2.cvtColor') +def test_convert_rgb_to_bgr_3_channel(mock_cv2_cvtcolor): + rgb_image = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + bgr_image_mock = np.random.randint(0, 255, (10, 10, 3), dtype=np.uint8) + mock_cv2_cvtcolor.return_value = bgr_image_mock + + result = ipu.convert_rgb_to_bgr(rgb_image) + + mock_cv2_cvtcolor.assert_called_once_with(rgb_image, cv2.COLOR_RGB2BGR) + assert np.array_equal(result, bgr_image_mock) + +def test_convert_rgb_to_bgr_none_input(): + assert ipu.convert_rgb_to_bgr(None) is None + +def test_convert_rgb_to_bgr_grayscale_input(): + gray_image = np.random.randint(0, 255, (10, 10), dtype=np.uint8) + result = ipu.convert_rgb_to_bgr(gray_image) + assert np.array_equal(result, gray_image) # Should return as is + +def test_convert_rgb_to_bgr_4_channel_input(): + rgba_image = np.random.randint(0, 255, (10, 10, 4), dtype=np.uint8) + result = ipu.convert_rgb_to_bgr(rgba_image) + assert np.array_equal(result, rgba_image) # Should return as is + + +@mock.patch('cv2.resize') +def test_resize_image_downscale(mock_cv2_resize): + original_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + resized_image_mock = np.random.randint(0, 255, (50, 50, 3), dtype=np.uint8) + mock_cv2_resize.return_value = resized_image_mock + target_w, target_h = 50, 50 + + result = ipu.resize_image(original_image, target_w, target_h) + + mock_cv2_resize.assert_called_once_with(original_image, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4) + assert np.array_equal(result, resized_image_mock) + +@mock.patch('cv2.resize') +def test_resize_image_upscale(mock_cv2_resize): + original_image = np.random.randint(0, 255, (50, 50, 3), dtype=np.uint8) + resized_image_mock = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + mock_cv2_resize.return_value = resized_image_mock + target_w, target_h = 100, 100 + + result = ipu.resize_image(original_image, target_w, target_h) + + mock_cv2_resize.assert_called_once_with(original_image, (target_w, target_h), interpolation=cv2.INTER_CUBIC) + assert np.array_equal(result, resized_image_mock) + +@mock.patch('cv2.resize') +def test_resize_image_custom_interpolation(mock_cv2_resize): + original_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + resized_image_mock = np.random.randint(0, 255, (50, 50, 3), dtype=np.uint8) + mock_cv2_resize.return_value = resized_image_mock + target_w, target_h = 50, 50 + + result = ipu.resize_image(original_image, target_w, target_h, interpolation=cv2.INTER_NEAREST) + + mock_cv2_resize.assert_called_once_with(original_image, (target_w, target_h), interpolation=cv2.INTER_NEAREST) + assert np.array_equal(result, resized_image_mock) + +def test_resize_image_none_input(): + with pytest.raises(ValueError, match="Cannot resize a None image."): + ipu.resize_image(None, 50, 50) + +@pytest.mark.parametrize("w, h", [(0, 50), (50, 0), (-1, 50)]) +def test_resize_image_invalid_dims(w, h): + original_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8) + with pytest.raises(ValueError, match="Target width and height must be positive."): + ipu.resize_image(original_image, w, h) + + +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') # Mock mkdir to avoid actual directory creation +def test_save_image_success(mock_mkdir, mock_cv2_imwrite): + mock_cv2_imwrite.return_value = True + img_data = np.zeros((10,10,3), dtype=np.uint8) # RGB + save_path = "output/test.png" + + # ipu.save_image converts RGB to BGR by default for non-EXR + # So we expect convert_rgb_to_bgr to be called internally, + # and cv2.imwrite to receive BGR data. + # We can mock convert_rgb_to_bgr if we want to be very specific, + # or trust its own unit tests and check the data passed to imwrite. + # For simplicity, let's assume convert_rgb_to_bgr works and imwrite gets BGR. + # The function copies data, so we can check the mock call. + + success = ipu.save_image(save_path, img_data, convert_to_bgr_before_save=True) + + assert success is True + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + + # Check that imwrite was called. The first arg to assert_called_once_with is the path. + # The second arg is the image data. We need to compare it carefully. + # Since convert_rgb_to_bgr is called internally, the data passed to imwrite will be BGR. + # Let's create expected BGR data. + expected_bgr_data = cv2.cvtColor(img_data, cv2.COLOR_RGB2BGR) + + args, kwargs = mock_cv2_imwrite.call_args + assert args[0] == str(Path(save_path)) + assert np.array_equal(args[1], expected_bgr_data) + + +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') +def test_save_image_success_exr_no_bgr_conversion(mock_mkdir, mock_cv2_imwrite): + mock_cv2_imwrite.return_value = True + img_data_rgb_float = np.random.rand(10,10,3).astype(np.float32) # RGB float for EXR + save_path = "output/test.exr" + + success = ipu.save_image(save_path, img_data_rgb_float, output_format="exr", convert_to_bgr_before_save=False) + + assert success is True + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + args, kwargs = mock_cv2_imwrite.call_args + assert args[0] == str(Path(save_path)) + assert np.array_equal(args[1], img_data_rgb_float) # Should be original RGB data + +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') +def test_save_image_success_explicit_bgr_false_png(mock_mkdir, mock_cv2_imwrite): + mock_cv2_imwrite.return_value = True + img_data_rgb = np.zeros((10,10,3), dtype=np.uint8) # RGB + save_path = "output/test.png" + + # If convert_to_bgr_before_save is False, it should save RGB as is. + # However, OpenCV's imwrite for PNG might still expect BGR. + # The function's docstring says: "If True and image is 3-channel, converts RGB to BGR." + # So if False, it passes the data as is. + success = ipu.save_image(save_path, img_data_rgb, convert_to_bgr_before_save=False) + + assert success is True + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + args, kwargs = mock_cv2_imwrite.call_args + assert args[0] == str(Path(save_path)) + assert np.array_equal(args[1], img_data_rgb) + + +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') +def test_save_image_failure(mock_mkdir, mock_cv2_imwrite): + mock_cv2_imwrite.return_value = False + img_data = np.zeros((10,10,3), dtype=np.uint8) + save_path = "output/fail.png" + + success = ipu.save_image(save_path, img_data) + + assert success is False + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_cv2_imwrite.assert_called_once() # Check it was called + +def test_save_image_none_data(): + assert ipu.save_image("output/none.png", None) is False + +@mock.patch('cv2.imwrite', side_effect=Exception("CV2 Write Error")) +@mock.patch('pathlib.Path.mkdir') +def test_save_image_exception(mock_mkdir, mock_cv2_imwrite_exception): + img_data = np.zeros((10,10,3), dtype=np.uint8) + save_path = "output/exception.png" + + success = ipu.save_image(save_path, img_data) + + assert success is False + mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) + mock_cv2_imwrite_exception.assert_called_once() + +# Test data type conversions in save_image +@pytest.mark.parametrize( + "input_dtype, input_data_producer, output_dtype_target, expected_conversion_dtype, check_scaling", + [ + (np.uint16, lambda: (np.random.randint(0, 65535, (10,10,3), dtype=np.uint16)), np.uint8, np.uint8, True), + (np.float32, lambda: np.random.rand(10,10,3).astype(np.float32), np.uint8, np.uint8, True), + (np.uint8, lambda: (np.random.randint(0, 255, (10,10,3), dtype=np.uint8)), np.uint16, np.uint16, True), + (np.float32, lambda: np.random.rand(10,10,3).astype(np.float32), np.uint16, np.uint16, True), + (np.uint8, lambda: (np.random.randint(0, 255, (10,10,3), dtype=np.uint8)), np.float16, np.float16, True), + (np.uint16, lambda: (np.random.randint(0, 65535, (10,10,3), dtype=np.uint16)), np.float32, np.float32, True), + ] +) +@mock.patch('cv2.imwrite') +@mock.patch('pathlib.Path.mkdir') +def test_save_image_dtype_conversion(mock_mkdir, mock_cv2_imwrite, input_dtype, input_data_producer, output_dtype_target, expected_conversion_dtype, check_scaling): + mock_cv2_imwrite.return_value = True + img_data = input_data_producer() + original_img_data_copy = img_data.copy() # For checking scaling if needed + + ipu.save_image("output/dtype_test.png", img_data, output_dtype_target=output_dtype_target) + + mock_cv2_imwrite.assert_called_once() + saved_img_data = mock_cv2_imwrite.call_args[0][1] # Get the image data passed to imwrite + + assert saved_img_data.dtype == expected_conversion_dtype + + if check_scaling: + # This is a basic check. More precise checks would require known input/output values. + if output_dtype_target == np.uint8: + if input_dtype == np.uint16: + expected_scaled_data = (original_img_data_copy.astype(np.float32) / 65535.0 * 255.0).astype(np.uint8) + assert np.allclose(saved_img_data, cv2.cvtColor(expected_scaled_data, cv2.COLOR_RGB2BGR), atol=1) # Allow small diff due to float precision + elif input_dtype in [np.float16, np.float32, np.float64]: + expected_scaled_data = (np.clip(original_img_data_copy, 0.0, 1.0) * 255.0).astype(np.uint8) + assert np.allclose(saved_img_data, cv2.cvtColor(expected_scaled_data, cv2.COLOR_RGB2BGR), atol=1) + elif output_dtype_target == np.uint16: + if input_dtype == np.uint8: + expected_scaled_data = (original_img_data_copy.astype(np.float32) / 255.0 * 65535.0).astype(np.uint16) + assert np.allclose(saved_img_data, cv2.cvtColor(expected_scaled_data, cv2.COLOR_RGB2BGR), atol=1) + elif input_dtype in [np.float16, np.float32, np.float64]: + expected_scaled_data = (np.clip(original_img_data_copy, 0.0, 1.0) * 65535.0).astype(np.uint16) + assert np.allclose(saved_img_data, cv2.cvtColor(expected_scaled_data, cv2.COLOR_RGB2BGR), atol=1) + # Add more scaling checks for float16, float32 if necessary + + +# --- Tests for calculate_image_stats --- + +def test_calculate_image_stats_grayscale_uint8(): + img_data = np.array([[0, 128], [255, 10]], dtype=np.uint8) + # Expected normalized: [[0, 0.50196], [1.0, 0.03921]] approx + stats = ipu.calculate_image_stats(img_data) + assert stats is not None + assert np.isclose(stats["min"], 0/255.0) + assert np.isclose(stats["max"], 255/255.0) + assert np.isclose(stats["mean"], np.mean(img_data.astype(np.float64)/255.0)) + +def test_calculate_image_stats_color_uint8(): + img_data = np.array([ + [[0, 50, 100], [10, 60, 110]], + [[255, 128, 200], [20, 70, 120]] + ], dtype=np.uint8) + stats = ipu.calculate_image_stats(img_data) + assert stats is not None + # Min per channel (normalized) + assert np.allclose(stats["min"], [0/255.0, 50/255.0, 100/255.0]) + # Max per channel (normalized) + assert np.allclose(stats["max"], [255/255.0, 128/255.0, 200/255.0]) + # Mean per channel (normalized) + expected_mean = np.mean(img_data.astype(np.float64)/255.0, axis=(0,1)) + assert np.allclose(stats["mean"], expected_mean) + +def test_calculate_image_stats_grayscale_uint16(): + img_data = np.array([[0, 32768], [65535, 1000]], dtype=np.uint16) + stats = ipu.calculate_image_stats(img_data) + assert stats is not None + assert np.isclose(stats["min"], 0/65535.0) + assert np.isclose(stats["max"], 65535/65535.0) + assert np.isclose(stats["mean"], np.mean(img_data.astype(np.float64)/65535.0)) + +def test_calculate_image_stats_color_float32(): + # Floats are assumed to be in 0-1 range already by the function's normalization logic + img_data = np.array([ + [[0.0, 0.2, 0.4], [0.1, 0.3, 0.5]], + [[1.0, 0.5, 0.8], [0.05, 0.25, 0.6]] + ], dtype=np.float32) + stats = ipu.calculate_image_stats(img_data) + assert stats is not None + assert np.allclose(stats["min"], [0.0, 0.2, 0.4]) + assert np.allclose(stats["max"], [1.0, 0.5, 0.8]) + expected_mean = np.mean(img_data.astype(np.float64), axis=(0,1)) + assert np.allclose(stats["mean"], expected_mean) + +def test_calculate_image_stats_none_input(): + assert ipu.calculate_image_stats(None) is None + +def test_calculate_image_stats_unsupported_shape(): + img_data = np.zeros((2,2,2,2), dtype=np.uint8) # 4D array + assert ipu.calculate_image_stats(img_data) is None + +@mock.patch('numpy.mean', side_effect=Exception("Numpy error")) +def test_calculate_image_stats_exception_during_calculation(mock_np_mean): + img_data = np.array([[0, 128], [255, 10]], dtype=np.uint8) + stats = ipu.calculate_image_stats(img_data) + assert stats == {"error": "Error calculating image stats"} + +# Example of mocking ipu.load_image for a function that uses it (if calculate_image_stats used it) +# For the current calculate_image_stats, it takes image_data directly, so this is not needed for it. +# This is just an example as requested in the prompt for a hypothetical scenario. +@mock.patch('processing.utils.image_processing_utils.load_image') +def test_hypothetical_function_using_load_image(mock_load_image): + # This test is for a function that would call ipu.load_image internally + # e.g. def process_image_from_path(path): + # img_data = ipu.load_image(path) + # return ipu.calculate_image_stats(img_data) + + mock_img_data = np.array([[[0.5]]], dtype=np.float32) + mock_load_image.return_value = mock_img_data + + # result = ipu.hypothetical_process_image_from_path("dummy.png") + # mock_load_image.assert_called_once_with("dummy.png") + # assert result["mean"] == 0.5 + pass # This is a conceptual example \ No newline at end of file diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 0000000..cfc5ffa --- /dev/null +++ b/tests/utils/__init__.py @@ -0,0 +1 @@ +# This file makes the 'tests/utils' directory a Python package. \ No newline at end of file diff --git a/tests/utils/test_path_utils.py b/tests/utils/test_path_utils.py new file mode 100644 index 0000000..56f7a0a --- /dev/null +++ b/tests/utils/test_path_utils.py @@ -0,0 +1,252 @@ +import pytest +from pathlib import Path +from utils.path_utils import sanitize_filename, generate_path_from_pattern + +# Tests for sanitize_filename +def test_sanitize_filename_valid(): + assert sanitize_filename("valid_filename.txt") == "valid_filename.txt" + +def test_sanitize_filename_with_spaces(): + assert sanitize_filename("file name with spaces.txt") == "file_name_with_spaces.txt" + +def test_sanitize_filename_with_special_characters(): + assert sanitize_filename("file!@#$%^&*()[]{};:'\",.<>/?\\|.txt") == "file____________________.txt" + +def test_sanitize_filename_with_leading_trailing_whitespace(): + assert sanitize_filename(" filename_with_spaces .txt") == "filename_with_spaces.txt" + +def test_sanitize_filename_empty_string(): + assert sanitize_filename("") == "" + +def test_sanitize_filename_with_none(): + with pytest.raises(TypeError): + sanitize_filename(None) + +def test_sanitize_filename_mixed_case(): + assert sanitize_filename("MixedCaseFileName.PNG") == "MixedCaseFileName.PNG" + +def test_sanitize_filename_long_filename(): + long_name = "a" * 255 + ".txt" + # Assuming the function doesn't truncate, but sanitizes. + # If it's meant to handle OS limits, this test might need adjustment + # based on the function's specific behavior for long names. + assert sanitize_filename(long_name) == long_name + +def test_sanitize_filename_unicode_characters(): + assert sanitize_filename("文件名前缀_文件名_后缀.jpg") == "文件名前缀_文件名_后缀.jpg" + +def test_sanitize_filename_multiple_extensions(): + assert sanitize_filename("archive.tar.gz") == "archive.tar.gz" + +def test_sanitize_filename_no_extension(): + assert sanitize_filename("filename") == "filename" + +def test_sanitize_filename_only_special_chars(): + assert sanitize_filename("!@#$%^") == "______" + +def test_sanitize_filename_with_hyphens_and_underscores(): + assert sanitize_filename("file-name_with-hyphens_and_underscores.zip") == "file-name_with-hyphens_and_underscores.zip" + +# Tests for generate_path_from_pattern +def test_generate_path_basic(): + result = generate_path_from_pattern( + base_path="output", + pattern="{asset_name}/{map_type}/{filename}", + asset_name="MyAsset", + map_type="Diffuse", + filename="MyAsset_Diffuse.png", + source_rule_name="TestRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("output/MyAsset/Diffuse/MyAsset_Diffuse.png") + assert Path(result) == expected + +def test_generate_path_all_placeholders(): + result = generate_path_from_pattern( + base_path="project_files", + pattern="{source_rule_name}/{asset_name}/{map_type}_{incrementing_value}_{sha5_value}/{filename}", + asset_name="AnotherAsset", + map_type="Normal", + filename="NormalMap.tif", + source_rule_name="ComplexRule", + incrementing_value="001", + sha5_value="abcde" + ) + expected = Path("project_files/ComplexRule/AnotherAsset/Normal_001_abcde/NormalMap.tif") + assert Path(result) == expected + +def test_generate_path_optional_placeholders_none(): + result = generate_path_from_pattern( + base_path="data", + pattern="{asset_name}/{filename}", + asset_name="SimpleAsset", + map_type="Albedo", # map_type is in pattern but not used if not in string + filename="texture.jpg", + source_rule_name="Basic", + incrementing_value=None, + sha5_value=None + ) + expected = Path("data/SimpleAsset/texture.jpg") + assert Path(result) == expected + +def test_generate_path_optional_incrementing_value_present(): + result = generate_path_from_pattern( + base_path="assets", + pattern="{asset_name}/{map_type}/v{incrementing_value}/{filename}", + asset_name="VersionedAsset", + map_type="Specular", + filename="spec.png", + source_rule_name="VersioningRule", + incrementing_value="3", + sha5_value=None + ) + expected = Path("assets/VersionedAsset/Specular/v3/spec.png") + assert Path(result) == expected + +def test_generate_path_optional_sha5_value_present(): + result = generate_path_from_pattern( + base_path="cache", + pattern="{asset_name}/{sha5_value}/{filename}", + asset_name="HashedAsset", + map_type="Roughness", + filename="rough.exr", + source_rule_name="HashingRule", + incrementing_value=None, + sha5_value="f1234" + ) + expected = Path("cache/HashedAsset/f1234/rough.exr") + assert Path(result) == expected + +def test_generate_path_base_path_is_path_object(): + result = generate_path_from_pattern( + base_path=Path("output_path"), + pattern="{asset_name}/{filename}", + asset_name="ObjectAsset", + map_type="AO", + filename="ao.png", + source_rule_name="PathObjectRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("output_path/ObjectAsset/ao.png") + assert Path(result) == expected + +def test_generate_path_empty_pattern(): + result = generate_path_from_pattern( + base_path="output", + pattern="", # Empty pattern should just use base_path and filename + asset_name="MyAsset", + map_type="Diffuse", + filename="MyAsset_Diffuse.png", + source_rule_name="TestRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("output/MyAsset_Diffuse.png") + assert Path(result) == expected + +def test_generate_path_pattern_with_no_placeholders(): + result = generate_path_from_pattern( + base_path="fixed_output", + pattern="some/static/path", # Pattern has no placeholders + asset_name="MyAsset", + map_type="Diffuse", + filename="MyAsset_Diffuse.png", + source_rule_name="TestRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("fixed_output/some/static/path/MyAsset_Diffuse.png") + assert Path(result) == expected + +def test_generate_path_filename_with_subdirs_in_pattern(): + result = generate_path_from_pattern( + base_path="output", + pattern="{asset_name}", # Filename itself will be appended + asset_name="AssetWithSubdirFile", + map_type="Color", + filename="textures/variant1/color.png", # Filename contains subdirectories + source_rule_name="SubdirRule", + incrementing_value=None, + sha5_value=None + ) + # The function is expected to join pattern result with filename + expected = Path("output/AssetWithSubdirFile/textures/variant1/color.png") + assert Path(result) == expected + +def test_generate_path_no_filename_provided(): + # This test assumes that if filename is None or empty, it might raise an error + # or behave in a specific way, e.g. not append anything or use a default. + # Adjust based on actual function behavior for missing filename. + # For now, let's assume it might raise TypeError if filename is critical. + with pytest.raises(TypeError): # Or ValueError, depending on implementation + generate_path_from_pattern( + base_path="output", + pattern="{asset_name}/{map_type}", + asset_name="MyAsset", + map_type="Diffuse", + filename=None, # No filename + source_rule_name="TestRule", + incrementing_value=None, + sha5_value=None + ) + +def test_generate_path_all_values_are_empty_strings_or_none_where_applicable(): + result = generate_path_from_pattern( + base_path="", # Empty base_path + pattern="{asset_name}/{map_type}/{incrementing_value}/{sha5_value}", + asset_name="", # Empty asset_name + map_type="", # Empty map_type + filename="empty_test.file", + source_rule_name="", # Empty source_rule_name + incrementing_value="", # Empty incrementing_value + sha5_value="" # Empty sha5_value + ) + # Behavior with empty strings might vary. Assuming they are treated as literal empty segments. + # Path("///empty_test.file") might resolve to "/empty_test.file" on POSIX + # or just "empty_test.file" if base_path is current dir. + # Let's assume Path() handles normalization. + # If base_path is "", it means current directory. + # So, "//empty_test.file" relative to current dir. + # Path objects normalize this. e.g. Path('//a') -> Path('/a') on POSIX + # Path('a//b') -> Path('a/b') + # Path('/a//b') -> Path('/a/b') + # Path('//a//b') -> Path('/a/b') + # If base_path is empty, it's like Path('.////empty_test.file') + expected = Path("empty_test.file") # Simplified, actual result might be OS dependent or Path lib norm. + # More robust check: + # result_path = Path(result) + # expected_path = Path.cwd() / "" / "" / "" / "" / "empty_test.file" # This is not quite right + # Let's assume the function joins them: "" + "/" + "" + "/" + "" + "/" + "" + "/" + "empty_test.file" + # which becomes "////empty_test.file" + # Path("////empty_test.file") on Windows becomes "\\empty_test.file" (network path attempt) + # Path("////empty_test.file") on Linux becomes "/empty_test.file" + # Given the function likely uses os.path.join or Path.joinpath, + # and base_path="", asset_name="", map_type="", inc_val="", sha5_val="" + # pattern = "{asset_name}/{map_type}/{incrementing_value}/{sha5_value}" -> "///" + # result = base_path / pattern_result / filename + # result = "" / "///" / "empty_test.file" + # Path("") / "///" / "empty_test.file" -> Path("///empty_test.file") + # This is tricky. Let's assume the function is robust. + # If all path segments are empty, it should ideally resolve to just the filename relative to base_path. + # If base_path is also empty, then filename relative to CWD. + # Let's test the expected output based on typical os.path.join behavior: + # os.path.join("", "", "", "", "", "empty_test.file") -> "empty_test.file" on Windows + # os.path.join("", "", "", "", "", "empty_test.file") -> "empty_test.file" on Linux + assert Path(result) == Path("empty_test.file") + + +def test_generate_path_with_dots_in_placeholders(): + result = generate_path_from_pattern( + base_path="output", + pattern="{asset_name}/{map_type}", + asset_name="My.Asset.V1", + map_type="Diffuse.Main", + filename="texture.png", + source_rule_name="DotsRule", + incrementing_value=None, + sha5_value=None + ) + expected = Path("output/My.Asset.V1/Diffuse.Main/texture.png") + assert Path(result) == expected \ No newline at end of file diff --git a/utils/path_utils.py b/utils/path_utils.py index e20d3c2..5b86194 100644 --- a/utils/path_utils.py +++ b/utils/path_utils.py @@ -154,6 +154,48 @@ def get_next_incrementing_value(output_base_path: Path, output_directory_pattern logger.info(f"Determined next incrementing value: {next_value_str} (Max found: {max_value})") return next_value_str +def sanitize_filename(name: str) -> str: + """Removes or replaces characters invalid for filenames/directory names.""" + if not isinstance(name, str): name = str(name) + name = re.sub(r'[^\w.\-]+', '_', name) # Allow alphanumeric, underscore, hyphen, dot + name = re.sub(r'_+', '_', name) + name = name.strip('_') + if not name: name = "invalid_name" + return name + +def get_filename_friendly_map_type(internal_map_type: str, file_type_definitions: Optional[Dict[str, Dict]]) -> str: + """Derives a filename-friendly map type from the internal map type.""" + filename_friendly_map_type = internal_map_type # Fallback + if not file_type_definitions or not isinstance(file_type_definitions, dict) or not file_type_definitions: + logger.warning(f"Filename-friendly lookup: FILE_TYPE_DEFINITIONS not available or invalid. Falling back to internal type: {internal_map_type}") + return filename_friendly_map_type + + base_map_key_val = None + suffix_part = "" + # Sort keys by length descending to match longest prefix first (e.g., MAP_ROUGHNESS before MAP_ROUGH) + sorted_known_base_keys = sorted(list(file_type_definitions.keys()), key=len, reverse=True) + + for known_key in sorted_known_base_keys: + if internal_map_type.startswith(known_key): + base_map_key_val = known_key + suffix_part = internal_map_type[len(known_key):] + break + + if base_map_key_val: + definition = file_type_definitions.get(base_map_key_val) + if definition and isinstance(definition, dict): + standard_type_alias = definition.get("standard_type") + if standard_type_alias and isinstance(standard_type_alias, str) and standard_type_alias.strip(): + filename_friendly_map_type = standard_type_alias.strip() + suffix_part + logger.debug(f"Filename-friendly lookup: Transformed '{internal_map_type}' -> '{filename_friendly_map_type}'") + else: + logger.warning(f"Filename-friendly lookup: Standard type alias for '{base_map_key_val}' is missing or invalid. Falling back.") + else: + logger.warning(f"Filename-friendly lookup: No valid definition for '{base_map_key_val}'. Falling back.") + else: + logger.warning(f"Filename-friendly lookup: Could not parse base key from '{internal_map_type}'. Falling back.") + + return filename_friendly_map_type # --- Basic Unit Tests --- if __name__ == "__main__": print("Running basic tests for path_utils.generate_path_from_pattern...")