From 0de4db182626e9c2578c47ff3add7094ac94ecb3 Mon Sep 17 00:00:00 2001
From: Rusfort <theis@rusfort.dk>
Date: Tue, 13 May 2025 02:52:07 +0200
Subject: [PATCH] Fixed inconcistencies - only processes MAP_ files now

---
 ProjectNotes/MAP_Prefix_Enforcement_Plan.md   | 96 +++++++++++++++++++
 .../stages/alpha_extraction_to_mask.py        | 17 ++--
 .../stages/gloss_to_rough_conversion.py       | 16 ++--
 .../pipeline/stages/merged_task_processor.py  |  9 ++
 .../stages/normal_map_green_channel.py        |  4 +-
 .../pipeline/stages/regular_map_processor.py  |  7 ++
 6 files changed, 135 insertions(+), 14 deletions(-)
 create mode 100644 ProjectNotes/MAP_Prefix_Enforcement_Plan.md

diff --git a/ProjectNotes/MAP_Prefix_Enforcement_Plan.md b/ProjectNotes/MAP_Prefix_Enforcement_Plan.md
new file mode 100644
index 0000000..205918d
--- /dev/null
+++ b/ProjectNotes/MAP_Prefix_Enforcement_Plan.md
@@ -0,0 +1,96 @@
+# Plan: Enforcing "MAP_" Prefix for Internal Processing and Standard Type for Output Naming
+
+**Date:** 2025-05-13
+
+**I. Goal:**
+The primary goal is to ensure that for all internal processing, the system *exclusively* uses `FileRule.item_type` values that start with the "MAP_" prefix (e.g., "MAP_COL", "MAP_NRM"). The "standard type" (e.g., "COL", "NRM") associated with these "MAP_" types (as defined in `config/app_settings.json`) should *only* be used during the file saving stages for output naming. Any `FileRule` whose `item_type` does not start with "MAP_" (and isn't a special type like "EXTRA" or "MODEL") should be skipped by the relevant map processing stages.
+
+**II. Current State Analysis Summary:**
+
+*   **Output Naming:** The use of "standard type" for output filenames via the `get_filename_friendly_map_type` utility in `SaveVariantsStage` and `OutputOrganizationStage` is **correct** and already meets the requirement.
+*   **Internal "MAP_" Prefix Usage:**
+    *   Some stages like `GlossToRoughConversionStage` correctly check for "MAP_" prefixes (e.g., `processing_map_type.startswith("MAP_GLOSS")`).
+    *   Other stages like `RegularMapProcessorStage` and `MergedTaskProcessorStage` (and its helpers) implicitly expect "MAP_" prefixed types for their internal regex-based logic but lack explicit checks to skip items if the prefix is missing.
+    *   Stages like `AlphaExtractionToMaskStage` and `NormalMapGreenChannelStage` currently use non-"MAP_" prefixed "standard types" (e.g., "NORMAL", "ALBEDO") when reading from `context.processed_maps_details` for their decision-making logic.
+    *   The `PrepareProcessingItemsStage` adds `FileRule`s to the processing queue without filtering based on the "MAP_" prefix in `item_type`.
+*   **Data Consistency in `AssetProcessingContext`:**
+    *   `FileRule.item_type` is the field that should hold the "MAP_" prefixed type from the initial rule generation.
+    *   `context.processed_maps_details` entries can contain various map type representations:
+        *   `map_type`: Often stores the "standard type" (e.g., "Roughness", "MASK", "NORMAL").
+        *   `processing_map_type` / `internal_map_type`: Generally seem to store the "MAP_" prefixed type. This needs to be consistent.
+*   **Configuration (`config/app_settings.json`):**
+    *   `FILE_TYPE_DEFINITIONS` correctly use "MAP_" prefixed keys.
+    *   `MAP_MERGE_RULES` need to be reviewed to ensure their `output_map_type` and input map types are "MAP_" prefixed.
+
+**III. Proposed Changes (Code Identification & Recommendations):**
+
+**A. Enforce "MAP_" Prefix for Processing Items (Skipping Logic):**
+The core requirement is that processing stages should skip `FileRule` items if their `item_type` doesn't start with "MAP_".
+
+1.  **`RegularMapProcessorStage` (`processing/pipeline/stages/regular_map_processor.py`):**
+    *   **Identify:** In the `execute` method, `initial_internal_map_type` is derived from `file_rule.item_type_override` or `file_rule.item_type`.
+    *   **Recommend:** Add an explicit check after determining `initial_internal_map_type`. If `initial_internal_map_type` does not start with `"MAP_"`, the stage should log a warning, set the `result.status` to "Skipped (Invalid Type)" or similar, and return `result` early, effectively skipping processing for this item.
+
+2.  **`MergedTaskProcessorStage` (`processing/pipeline/stages/merged_task_processor.py`):**
+    *   **Identify:** This stage processes `MergeTaskDefinition`s. The definitions for these tasks (input types, output type) come from `MAP_MERGE_RULES` in `config/app_settings.json`. The stage uses `required_map_type_from_rule` for its inputs.
+    *   **Recommend:**
+        *   **Configuration First:** Review all entries in `MAP_MERGE_RULES` in `config/app_settings.json`.
+            *   Ensure the `output_map_type` for each rule (e.g., "MAP_NRMRGH") starts with "MAP_".
+            *   Ensure all map type values within the `inputs` dictionary (e.g., `"R": "MAP_NRM"`) start with "MAP_".
+        *   **Stage Logic:** In the `execute` method, when iterating through `merge_inputs_config.items()`, check if `required_map_type_from_rule` starts with `"MAP_"`. If not, log a warning and either:
+            *   Skip loading/processing this specific input channel (potentially using its fallback if the overall merge can still proceed).
+            *   Or, if a non-"MAP_" input is critical, fail the entire merge task for this asset.
+        *   The helper `_apply_in_memory_transformations` already uses regex expecting "MAP_" prefixes; this will naturally fail or misbehave if inputs are not "MAP_" prefixed, reinforcing the need for the check above.
+
+**B. Standardize Map Type Fields and Usage in `context.processed_maps_details`:**
+Ensure consistency in how "MAP_" prefixed types are stored and accessed within `context.processed_maps_details` for internal logic (not naming).
+
+1.  **Recommendation:** Establish a single, consistent field name within `context.processed_maps_details` to store the definitive "MAP_" prefixed internal map type (e.g., `internal_map_type` or `processing_map_type`). All stages that perform logic based on the specific *kind* of map (e.g., transformations, source selection) should read from this standardized field. The `map_type` field can continue to store the "standard type" (e.g., "Roughness") primarily for informational/metadata purposes if needed, but not for core processing logic.
+
+2.  **`AlphaExtractionToMaskStage` (`processing/pipeline/stages/alpha_extraction_to_mask.py`):**
+    *   **Identify:**
+        *   Checks for existing MASK map using `file_rule.map_type == "MASK"`. (Discrepancy: `FileRule` uses `item_type`).
+        *   Searches for suitable source maps using `details.get('map_type') in self.SUITABLE_SOURCE_MAP_TYPES` where `SUITABLE_SOURCE_MAP_TYPES` are standard types like "ALBEDO".
+        *   When adding new details, it sets `map_type: "MASK"` and the new `FileRule` gets `item_type="MAP_MASK"`.
+    *   **Recommend:**
+        *   Change the check for an existing MASK map to `file_rule.item_type == "MAP_MASK"`.
+        *   Modify the source map search to use the standardized "MAP_" prefixed field from `details` (e.g., `details.get('internal_map_type')`) and update `SUITABLE_SOURCE_MAP_TYPES` to be "MAP_" prefixed (e.g., "MAP_COL", "MAP_ALBEDO").
+        *   When adding new details for the created MASK map to `context.processed_maps_details`, ensure the standardized "MAP_" prefixed field is set to "MAP_MASK", and `map_type` (if kept) is "MASK".
+
+3.  **`NormalMapGreenChannelStage` (`processing/pipeline/stages/normal_map_green_channel.py`):**
+    *   **Identify:** Checks `map_details.get('map_type') == "NORMAL"`.
+    *   **Recommend:** Change this check to use the standardized "MAP_" prefixed field from `map_details` (e.g., `map_details.get('internal_map_type')`) and verify if it `startswith("MAP_NRM")`.
+
+4.  **`GlossToRoughConversionStage` (`processing/pipeline/stages/gloss_to_rough_conversion.py`):**
+    *   **Identify:** This stage already uses `processing_map_type.startswith("MAP_GLOSS")` and updates `processing_map_type` to "MAP_ROUGH" in `map_details`. It also updates the `FileRule.item_type` correctly.
+    *   **Recommend:** This stage is largely consistent. Ensure the field it reads/writes (`processing_map_type`) aligns with the chosen standardized "MAP_" prefixed field for `processed_maps_details`.
+
+**C. Review Orchestration Logic (Conceptual):**
+*   When the orchestrator populates `context.processed_maps_details` after stages like `SaveVariantsStage`, ensure it stores the "MAP_" prefixed `internal_map_type` (from `SaveVariantsInput`) into the chosen standardized field in `processed_maps_details`.
+
+**IV. Testing Recommendations:**
+
+*   Create test cases with `AssetRule`s containing `FileRule`s where `item_type` is intentionally set to a non-"MAP_" prefixed value (e.g., "COLOR_MAP", "TEXTURE_ROUGH"). Verify that `RegularMapProcessorStage` skips these.
+*   Modify `MAP_MERGE_RULES` in a test configuration:
+    *   Set an `output_map_type` to a non-"MAP_" value.
+    *   Set an input map type (e.g., for channel "R") to a non-"MAP_" value.
+    *   Verify that `MergedTaskProcessorStage` correctly handles these (e.g., fails the task, skips the input, logs warnings).
+*   Test `AlphaExtractionToMaskStage`:
+    *   With an existing `FileRule` having `item_type="MAP_MASK"` to ensure extraction is skipped.
+    *   With source maps having "MAP_COL" (with alpha) as their `internal_map_type` in `processed_maps_details` to ensure they are correctly identified as sources.
+*   Test `NormalMapGreenChannelStage` with a normal map having "MAP_NRM" as its `internal_map_type` in `processed_maps_details` to ensure it's processed.
+*   Verify that output filenames continue to use the "standard type" (e.g., "COL", "ROUGH", "NRM") correctly.
+
+**V. Mermaid Diagram (Illustrative Flow for `FileRule` Processing):**
+
+```mermaid
+graph TD
+    A[AssetRule with FileRules] --> B{FileRuleFilterStage};
+    B -- files_to_process --> C{PrepareProcessingItemsStage};
+    C -- processing_items (FileRule) --> D{PipelineOrchestrator};
+    D -- FileRule --> E(RegularMapProcessorStage);
+    E --> F{Check FileRule.item_type};
+    F -- Starts with "MAP_"? --> G[Process Map];
+    F -- No --> H[Skip Map / Log Warning];
+    G --> I[...subsequent stages...];
+    H --> I;
\ No newline at end of file
diff --git a/processing/pipeline/stages/alpha_extraction_to_mask.py b/processing/pipeline/stages/alpha_extraction_to_mask.py
index 8de310e..87aa3b6 100644
--- a/processing/pipeline/stages/alpha_extraction_to_mask.py
+++ b/processing/pipeline/stages/alpha_extraction_to_mask.py
@@ -18,7 +18,8 @@ class AlphaExtractionToMaskStage(ProcessingStage):
     Extracts an alpha channel from a suitable source map (e.g., Albedo, Diffuse)
     to generate a MASK map if one is not explicitly defined.
     """
-    SUITABLE_SOURCE_MAP_TYPES = ["ALBEDO", "DIFFUSE", "BASE_COLOR"] # Map types likely to have alpha
+    # Use MAP_ prefixed types for internal logic checks
+    SUITABLE_SOURCE_MAP_TYPES = ["MAP_COL", "MAP_ALBEDO", "MAP_BASECOLOR"] # Map types likely to have alpha
 
     def execute(self, context: AssetProcessingContext) -> AssetProcessingContext:
         asset_name_for_log = context.asset_rule.asset_name if context.asset_rule else "Unknown Asset"
@@ -38,7 +39,8 @@ class AlphaExtractionToMaskStage(ProcessingStage):
         # A. Check for Existing MASK Map
         for file_rule in context.files_to_process:
             # Assuming file_rule has 'map_type' and 'file_path' (instead of filename_pattern)
-            if hasattr(file_rule, 'map_type') and file_rule.map_type == "MASK":
+            # Check for existing MASK map using the correct item_type field and MAP_ prefix
+            if file_rule.item_type == "MAP_MASK":
                 file_path_for_log = file_rule.file_path if hasattr(file_rule, 'file_path') else "Unknown file path"
                 logger.info(
                     f"Asset '{asset_name_for_log}': MASK map already defined by FileRule "
@@ -51,8 +53,10 @@ class AlphaExtractionToMaskStage(ProcessingStage):
         source_file_rule_id_for_alpha: Optional[str] = None # This ID comes from processed_maps_details keys
 
         for file_rule_id, details in context.processed_maps_details.items():
+            # Check for suitable source map using the standardized internal_map_type field
+            internal_map_type = details.get('internal_map_type') # Use the standardized field
             if details.get('status') == 'Processed' and \
-               details.get('map_type') in self.SUITABLE_SOURCE_MAP_TYPES:
+               internal_map_type in self.SUITABLE_SOURCE_MAP_TYPES:
                 try:
                     temp_path = Path(details['temp_processed_file'])
                     if not temp_path.exists():
@@ -153,15 +157,16 @@ class AlphaExtractionToMaskStage(ProcessingStage):
 
 
         context.processed_maps_details[new_mask_processed_map_key] = {
-            'map_type': "MASK",
+            'internal_map_type': "MAP_MASK", # Use the standardized MAP_ prefixed field
+            'map_type': "MASK", # Keep standard type for metadata/naming consistency if needed
             'source_file': str(source_image_path),
             'temp_processed_file': str(mask_temp_path),
             'original_dimensions': original_dims,
             'processed_dimensions': (alpha_channel.shape[1], alpha_channel.shape[0]),
             'status': 'Processed',
             'notes': (
-                f"Generated from alpha of {source_map_details_for_alpha['map_type']} "
-                f"(Source Detail ID: {source_file_rule_id_for_alpha})" # Changed from Source Rule ID
+                f"Generated from alpha of {source_map_details_for_alpha.get('internal_map_type', 'unknown type')} " # Use internal_map_type for notes
+                f"(Source Detail ID: {source_file_rule_id_for_alpha})"
             ),
             # 'file_rule_id': new_mask_file_rule_id_str # FileRule doesn't have an ID to link here directly
         }
diff --git a/processing/pipeline/stages/gloss_to_rough_conversion.py b/processing/pipeline/stages/gloss_to_rough_conversion.py
index 2de863c..9c2f948 100644
--- a/processing/pipeline/stages/gloss_to_rough_conversion.py
+++ b/processing/pipeline/stages/gloss_to_rough_conversion.py
@@ -51,7 +51,8 @@ class GlossToRoughConversionStage(ProcessingStage):
 
         # Iterate using the index (map_key_index) as the key, which is now standard.
         for map_key_index, map_details in context.processed_maps_details.items():
-            processing_map_type = map_details.get('processing_map_type', '')
+            # Use the standardized internal_map_type field
+            internal_map_type = map_details.get('internal_map_type', '')
             map_status = map_details.get('status')
             original_temp_path_str = map_details.get('temp_processed_file')
             # source_file_rule_idx from details should align with map_key_index.
@@ -70,11 +71,12 @@ class GlossToRoughConversionStage(ProcessingStage):
                 processing_tag = f"mki_{map_key_index}_fallback_tag"
 
 
-            if not processing_map_type.startswith("MAP_GLOSS"):
-                # logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Type '{processing_map_type}' is not GLOSS. Skipping.")
+            # Check if the map is a GLOSS map using the standardized internal_map_type
+            if not internal_map_type.startswith("MAP_GLOSS"):
+                # logger.debug(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index}: Type '{internal_map_type}' is not GLOSS. Skipping.")
                 continue
 
-            logger.info(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Identified potential GLOSS map (Type: {processing_map_type}).")
+            logger.info(f"Asset '{asset_name_for_log}', Map Key Index {map_key_index} (Tag: {processing_tag}): Identified potential GLOSS map (Type: {internal_map_type}).")
 
             if map_status not in successful_conversion_statuses:
                 logger.warning(
@@ -163,9 +165,9 @@ class GlossToRoughConversionStage(ProcessingStage):
                 
                 # Update context.processed_maps_details for this map_key_index
                 map_details['temp_processed_file'] = str(new_temp_path)
-                map_details['original_map_type_before_conversion'] = processing_map_type
-                map_details['processing_map_type'] = "MAP_ROUGH"
-                map_details['map_type'] = "Roughness"
+                map_details['original_map_type_before_conversion'] = internal_map_type # Store the original internal type
+                map_details['internal_map_type'] = "MAP_ROUGH" # Use the standardized MAP_ prefixed field
+                map_details['map_type'] = "Roughness" # Keep standard type for metadata/naming consistency if needed
                 map_details['status'] = "Converted_To_Rough"
                 map_details['notes'] = map_details.get('notes', '') + "; Converted from GLOSS by GlossToRoughConversionStage"
                 if 'base_pot_resolution_name' in map_details:
diff --git a/processing/pipeline/stages/merged_task_processor.py b/processing/pipeline/stages/merged_task_processor.py
index e9a8eea..13f9281 100644
--- a/processing/pipeline/stages/merged_task_processor.py
+++ b/processing/pipeline/stages/merged_task_processor.py
@@ -125,6 +125,15 @@ class MergedTaskProcessorStage(ProcessingStage):
             # --- Load, Transform, and Prepare Inputs ---
             log.debug(f"{log_prefix}: Loading and preparing inputs...")
             for channel_char, required_map_type_from_rule in merge_inputs_config.items():
+                # Validate that the required input map type starts with "MAP_"
+                if not required_map_type_from_rule.startswith("MAP_"):
+                    result.error_message = (
+                        f"Invalid input map type '{required_map_type_from_rule}' for channel '{channel_char}'. "
+                        f"Input map types for merging must start with 'MAP_'."
+                    )
+                    log.error(f"{log_prefix}: {result.error_message}")
+                    return result # Fail the task if an input type is invalid
+
                 input_info = input_map_sources_from_task.get(required_map_type_from_rule)
                 input_image_data: Optional[np.ndarray] = None
                 input_source_desc = f"Fallback for {required_map_type_from_rule}"
diff --git a/processing/pipeline/stages/normal_map_green_channel.py b/processing/pipeline/stages/normal_map_green_channel.py
index 38d9034..636c1ec 100644
--- a/processing/pipeline/stages/normal_map_green_channel.py
+++ b/processing/pipeline/stages/normal_map_green_channel.py
@@ -38,7 +38,9 @@ class NormalMapGreenChannelStage(ProcessingStage):
 
         # Iterate through processed maps, as FileRule objects don't have IDs directly
         for map_id_hex, map_details in context.processed_maps_details.items():
-            if map_details.get('map_type') == "NORMAL" and map_details.get('status') == 'Processed':
+            # Check if the map is a processed normal map using the standardized internal_map_type
+            internal_map_type = map_details.get('internal_map_type')
+            if internal_map_type and internal_map_type.startswith("MAP_NRM") and map_details.get('status') == 'Processed':
                 
                 # Check configuration for inversion
                 # Assuming general_settings is an attribute of config_obj and might be a dict or an object
diff --git a/processing/pipeline/stages/regular_map_processor.py b/processing/pipeline/stages/regular_map_processor.py
index 2bb5b52..bb74321 100644
--- a/processing/pipeline/stages/regular_map_processor.py
+++ b/processing/pipeline/stages/regular_map_processor.py
@@ -183,6 +183,13 @@ class RegularMapProcessorStage(ProcessingStage):
                  log.error(f"{log_prefix}: {result.error_message}")
                  return result # Early exit
 
+            # Explicitly skip if the determined type doesn't start with "MAP_"
+            if not initial_internal_map_type.startswith("MAP_"):
+                result.status = "Skipped (Invalid Type)"
+                result.error_message = f"FileRule item_type '{initial_internal_map_type}' does not start with 'MAP_'. Skipping processing."
+                log.warning(f"{log_prefix}: {result.error_message}")
+                return result # Early exit
+
             processing_map_type = self._get_suffixed_internal_map_type(
                 context.asset_rule, file_rule, initial_internal_map_type, respect_variant_map_types, asset_name_for_log
             )