# gui/prediction_handler.py import logging from pathlib import Path import time import os import re # Import regex import tempfile # Added for temporary extraction directory import zipfile # Added for zip file handling # import patoolib # Potential import for rar/7z - Add later if zip works from collections import defaultdict, Counter # Added Counter from typing import List, Dict, Any # For type hinting # --- PySide6 Imports --- from PySide6.QtCore import QObject, Signal, QThread, Slot # --- Backend Imports --- import sys script_dir = Path(__file__).parent project_root = script_dir.parent if str(project_root) not in sys.path: sys.path.insert(0, str(project_root)) try: from configuration import Configuration, ConfigurationError, load_base_config # Import Configuration, ConfigurationError, and load_base_config # AssetProcessor might not be needed directly anymore if logic is moved here # from asset_processor import AssetProcessor, AssetProcessingError from rule_structure import SourceRule, AssetRule, FileRule # Removed AssetType, ItemType # Removed: import config as app_config # Import project's config module # Removed: Import the new dictionaries directly for easier access # Removed: from config import ASSET_TYPE_DEFINITIONS, FILE_TYPE_DEFINITIONS BACKEND_AVAILABLE = True except ImportError as e: print(f"ERROR (PredictionHandler): Failed to import backend/config modules: {e}") # Define placeholders if imports fail Configuration = None load_base_config = None # Placeholder ConfigurationError = Exception # AssetProcessingError = Exception SourceRule, AssetRule, FileRule = (None,)*3 # Placeholder for rule structures # Removed: AssetType, ItemType = (None,)*2 # Placeholder for types # Removed: app_config = None # Placeholder for config BACKEND_AVAILABLE = False log = logging.getLogger(__name__) # Basic config if logger hasn't been set up elsewhere if not log.hasHandlers(): logging.basicConfig(level=logging.INFO, format='%(levelname)s (PredictHandler): %(message)s') # Helper function for classification (can be moved outside class if preferred) def classify_files(file_list: List[str], config: Configuration) -> Dict[str, List[Dict[str, Any]]]: """ Analyzes a list of files based on configuration rules using a two-pass approach to group them by asset and determine initial file properties. Pass 1: Identifies and classifies prioritized bit depth variants. Pass 2: Classifies extras, general maps (downgrading if primary exists), and ignores. Args: file_list: List of absolute file paths. config: The loaded Configuration object containing naming rules. Returns: A dictionary grouping file information by predicted asset name. Example: { 'AssetName1': [ {'file_path': '/path/to/AssetName1_DISP16.png', 'item_type': 'DISP', 'asset_name': 'AssetName1', 'is_gloss_source': False}, {'file_path': '/path/to/AssetName1_DISP.png', 'item_type': 'EXTRA', 'asset_name': 'AssetName1', 'is_gloss_source': False}, {'file_path': '/path/to/AssetName1_Color.png', 'item_type': 'COL', 'asset_name': 'AssetName1', 'is_gloss_source': False} ], # ... other assets } Returns an empty dict if classification fails or no files are provided. """ temp_grouped_files = defaultdict(list) extra_files_to_associate = [] # Store tuples: (file_path_str, filename) for Pass 2 association primary_asset_names = set() # Store asset names derived *only* from primary map files (populated in Pass 1) primary_assignments = set() # Stores tuples: (asset_name, target_type) (populated *only* in Pass 1) processed_in_pass1 = set() # Keep track of files handled in Pass 1 # --- Validation --- if not file_list or not config: log.warning("Classification skipped: Missing file list or config.") return {} # Access compiled regex directly from the config object if not hasattr(config, 'compiled_map_keyword_regex') or not config.compiled_map_keyword_regex: log.warning("Classification skipped: Missing compiled map keyword regex in config.") if not hasattr(config, 'compiled_extra_regex'): log.warning("Configuration object missing 'compiled_extra_regex'. Cannot classify extra files.") if not hasattr(config, 'compiled_bit_depth_regex_map'): log.warning("Configuration object missing 'compiled_bit_depth_regex_map'. Cannot prioritize bit depth variants.") compiled_map_regex = getattr(config, 'compiled_map_keyword_regex', {}) compiled_extra_regex = getattr(config, 'compiled_extra_regex', []) compiled_bit_depth_regex_map = getattr(config, 'compiled_bit_depth_regex_map', {}) num_map_rules = sum(len(patterns) for patterns in compiled_map_regex.values()) num_extra_rules = len(compiled_extra_regex) num_bit_depth_rules = len(compiled_bit_depth_regex_map) log.debug(f"Starting classification for {len(file_list)} files using {num_map_rules} map keyword patterns, {num_bit_depth_rules} bit depth patterns, and {num_extra_rules} extra patterns.") # --- Asset Name Extraction Helper --- def get_asset_name(f_path: Path, cfg: Configuration) -> str: filename = f_path.name asset_name = None try: separator = cfg.source_naming_separator indices = cfg.source_naming_indices base_name_index = indices.get('base_name') if separator is not None and base_name_index is not None: stem = f_path.stem parts = stem.split(separator) if 0 <= base_name_index < len(parts): asset_name = parts[base_name_index] else: log.warning(f"Preset base_name index {base_name_index} out of bounds for '{stem}' split by '{separator}'. Falling back.") else: log.debug(f"Preset rules for asset name extraction incomplete (separator: {separator}, index: {base_name_index}). Falling back for '{filename}'.") if not asset_name: asset_name = f_path.stem.split('_')[0] if '_' in f_path.stem else f_path.stem log.debug(f"Used fallback asset name extraction: '{asset_name}' for '{filename}'.") except Exception as e: log.exception(f"Error extracting asset name for '{filename}': {e}. Falling back to stem.") asset_name = f_path.stem if not asset_name: asset_name = f_path.stem log.warning(f"Asset name extraction resulted in empty string for '{filename}'. Using stem: '{asset_name}'.") return asset_name # --- Pass 1: Prioritized Bit Depth Variants --- log.debug("--- Starting Classification Pass 1: Prioritized Variants ---") for file_path_str in file_list: file_path = Path(file_path_str) filename = file_path.name asset_name = get_asset_name(file_path, config) processed = False for target_type, variant_regex in compiled_bit_depth_regex_map.items(): match = variant_regex.search(filename) if match: log.debug(f"PASS 1: File '{filename}' matched PRIORITIZED bit depth variant for type '{target_type}'.") matched_item_type = target_type is_gloss_flag = False # Bit depth variants are typically not gloss # Check if primary already assigned (safety for overlapping patterns) if (asset_name, matched_item_type) in primary_assignments: log.warning(f"PASS 1: Primary assignment ({asset_name}, {matched_item_type}) already exists. File '{filename}' will be handled in Pass 2.") # Don't process here, let Pass 2 handle it as a general map or extra else: primary_assignments.add((asset_name, matched_item_type)) log.debug(f" PASS 1: Added primary assignment: ({asset_name}, {matched_item_type})") primary_asset_names.add(asset_name) temp_grouped_files[asset_name].append({ 'file_path': file_path_str, 'item_type': matched_item_type, 'asset_name': asset_name, 'is_gloss_source': is_gloss_flag }) processed_in_pass1.add(file_path_str) processed = True break # Stop checking other variant patterns for this file # Log if not processed in this pass # if not processed: # log.debug(f"PASS 1: File '{filename}' did not match any prioritized variant.") log.debug(f"--- Finished Pass 1. Primary assignments made: {primary_assignments} ---") # --- Pass 2: Extras, General Maps, Ignores --- log.debug("--- Starting Classification Pass 2: Extras, General Maps, Ignores ---") for file_path_str in file_list: if file_path_str in processed_in_pass1: log.debug(f"PASS 2: Skipping '{Path(file_path_str).name}' (processed in Pass 1).") continue # Skip files already classified as prioritized variants file_path = Path(file_path_str) filename = file_path.name asset_name = get_asset_name(file_path, config) is_extra = False is_map = False # 1. Check for Extra Files FIRST in Pass 2 for extra_pattern in compiled_extra_regex: if extra_pattern.search(filename): log.debug(f"PASS 2: File '{filename}' matched EXTRA pattern: {extra_pattern.pattern}") # Don't group yet, just collect for later association extra_files_to_associate.append((file_path_str, filename)) is_extra = True break if is_extra: continue # Move to the next file if it's an extra # 2. Check for General Map Files in Pass 2 for target_type, patterns_list in compiled_map_regex.items(): for compiled_regex, original_keyword, rule_index in patterns_list: match = compiled_regex.search(filename) if match: # Access rule details is_gloss_flag = False try: map_type_mapping_list = config.map_type_mapping matched_rule_details = map_type_mapping_list[rule_index] is_gloss_flag = matched_rule_details.get('is_gloss_source', False) log.debug(f" PASS 2: Match found! Rule Index: {rule_index}, Keyword: '{original_keyword}', Target: '{target_type}', Gloss: {is_gloss_flag}") except Exception as e: log.exception(f" PASS 2: Error accessing rule details for index {rule_index}: {e}") # *** Crucial Check: Has a prioritized variant claimed this type? *** if (asset_name, target_type) in primary_assignments: log.debug(f"PASS 2: File '{filename}' matched '{original_keyword}' for type '{target_type}', but primary already assigned via Pass 1. Classifying as EXTRA.") matched_item_type = "EXTRA" is_gloss_flag = False # Extras are not gloss sources else: # No prioritized variant exists, assign the general map type log.debug(f"PASS 2: File '{filename}' matched '{original_keyword}' for item_type '{target_type}'.") matched_item_type = target_type # Do NOT add to primary_assignments here - only Pass 1 does that. # Do NOT add to primary_asset_names here either. temp_grouped_files[asset_name].append({ 'file_path': file_path_str, 'item_type': matched_item_type, # Could be target_type or EXTRA 'asset_name': asset_name, 'is_gloss_source': is_gloss_flag }) is_map = True break # Stop checking patterns for this file if is_map: break # Stop checking target types for this file # 3. Handle Unmatched Files in Pass 2 (Not Extra, Not Map) if not is_extra and not is_map: log.debug(f"PASS 2: File '{filename}' did not match any map/extra pattern. Grouping under asset '{asset_name}' as FILE_IGNORE.") temp_grouped_files[asset_name].append({ 'file_path': file_path_str, 'item_type': "FILE_IGNORE", 'asset_name': asset_name, 'is_gloss_source': False }) log.debug("--- Finished Pass 2 ---") # --- Determine Primary Asset Name for Extra Association (using Pass 1 results) --- final_primary_asset_name = None if primary_asset_names: # Use names derived only from Pass 1 (prioritized variants) # Find the most common name among those derived from primary maps identified in Pass 1 primary_map_asset_names_pass1 = [ f_info['asset_name'] for asset_files in temp_grouped_files.values() for f_info in asset_files if f_info['asset_name'] in primary_asset_names and (f_info['asset_name'], f_info['item_type']) in primary_assignments # Ensure it was a Pass 1 assignment ] if primary_map_asset_names_pass1: name_counts = Counter(primary_map_asset_names_pass1) most_common_names = name_counts.most_common() final_primary_asset_name = most_common_names[0][0] if len(most_common_names) > 1 and most_common_names[0][1] == most_common_names[1][1]: tied_names = sorted([name for name, count in most_common_names if count == most_common_names[0][1]]) final_primary_asset_name = tied_names[0] log.warning(f"Multiple primary asset names tied for most common based on Pass 1: {tied_names}. Using '{final_primary_asset_name}' for associating extra files.") log.debug(f"Determined primary asset name for extras based on Pass 1 primary maps: '{final_primary_asset_name}'") else: log.warning("Primary asset names set (from Pass 1) was populated, but no corresponding groups found. Falling back.") if not final_primary_asset_name: # Fallback: No primary maps found in Pass 1. Use the first asset group found overall. if temp_grouped_files and extra_files_to_associate: fallback_name = sorted(temp_grouped_files.keys())[0] final_primary_asset_name = fallback_name log.warning(f"No primary map files found in Pass 1. Associating extras with first group found alphabetically: '{final_primary_asset_name}'.") elif extra_files_to_associate: log.warning(f"Could not determine any asset name to associate {len(extra_files_to_associate)} extra file(s) with. They will be ignored.") else: log.debug("No primary asset name determined (no maps or extras found).") # --- Associate Extra Files (collected in Pass 2) --- if final_primary_asset_name and extra_files_to_associate: log.debug(f"Associating {len(extra_files_to_associate)} extra file(s) with primary asset '{final_primary_asset_name}'") for file_path_str, filename in extra_files_to_associate: # Check if file already exists in the group (e.g., if somehow classified twice) if not any(f['file_path'] == file_path_str for f in temp_grouped_files[final_primary_asset_name]): temp_grouped_files[final_primary_asset_name].append({ 'file_path': file_path_str, 'item_type': "EXTRA", 'asset_name': final_primary_asset_name, 'is_gloss_source': False }) else: log.debug(f"Skipping duplicate association of extra file: {filename}") elif extra_files_to_associate: # Logged warning above if final_primary_asset_name couldn't be determined pass log.debug(f"Classification complete. Found {len(temp_grouped_files)} potential assets.") return dict(temp_grouped_files) class PredictionHandler(QObject): """ Handles running predictions in a separate thread to avoid GUI freezes. Generates the initial SourceRule hierarchy based on file lists and presets. """ # --- Signals --- # Emitted when the hierarchical rule structure is ready for a single source rule_hierarchy_ready = Signal(list) # Emits a LIST containing ONE SourceRule object # Emitted when prediction/hierarchy generation for a source is done (emits the input_source_identifier) prediction_finished = Signal(str) # Emitted for status updates status_message = Signal(str, int) def __init__(self, parent=None): super().__init__(parent) self._is_running = False @property def is_running(self): return self._is_running # Removed _predict_single_asset method @Slot(str, list, str) # Explicitly define types for the slot def run_prediction(self, input_source_identifier: str, original_input_paths: list[str], preset_name: str): """ Generates the initial SourceRule hierarchy for a given source identifier (which could be a folder or archive path), extracting the actual file list first. file list, and preset name. Populates only overridable fields based on classification and preset defaults. This method is intended to be run in a separate QThread. """ thread_id = QThread.currentThread() log.info(f"[{time.time():.4f}][T:{thread_id}] --> Entered PredictionHandler.run_prediction.") # Note: file_list argument is renamed to original_input_paths for clarity, # but the signal passes the list of source paths, not the content files yet. # We use input_source_identifier as the primary path to analyze. log.info(f"VERIFY: PredictionHandler received request. Source: '{input_source_identifier}', Original Paths: {original_input_paths}, Preset: '{preset_name}'") # DEBUG Verify log.info(f"Source Identifier: '{input_source_identifier}', Preset: '{preset_name}'") if self._is_running: log.warning("Prediction is already running for another source. Aborting this run.") # Don't emit finished, let the running one complete. return if not BACKEND_AVAILABLE: log.error("Backend/config modules not available. Cannot run prediction.") self.status_message.emit("Error: Backend components missing.", 5000) # self.prediction_finished.emit() # Don't emit finished if never started properly return if not preset_name: log.warning("No preset selected for prediction.") self.status_message.emit("No preset selected.", 3000) # self.prediction_finished.emit() return # Check the identifier path itself source_path = Path(input_source_identifier) if not source_path.exists(): log.warning(f"Input source path does not exist: '{input_source_identifier}'. Skipping prediction.") self.status_message.emit("Input path not found.", 3000) self.rule_hierarchy_ready.emit([]) self.prediction_finished.emit(input_source_identifier) return self._is_running = True self.status_message.emit(f"Analyzing '{source_path.name}'...", 0) config: Configuration | None = None # Removed: asset_type_definitions: Dict[str, Dict] = {} # Removed: file_type_definitions: Dict[str, Dict] = {} # These are ItemType names try: config = Configuration(preset_name) # Removed: Load allowed types from the project's config module (now dictionaries) # Removed: if app_config: # Removed: asset_type_definitions = getattr(app_config, 'ASSET_TYPE_DEFINITIONS', {}) # Removed: file_type_definitions = getattr(app_config, 'FILE_TYPE_DEFINITIONS', {}) # Removed: log.debug(f"Loaded AssetType Definitions: {list(asset_type_definitions.keys())}") # Removed: log.debug(f"Loaded FileType Definitions (ItemTypes): {list(file_type_definitions.keys())}") # Removed: else: # Removed: log.warning("Project config module not loaded. Cannot get type definitions.") except ConfigurationError as e: log.error(f"Failed to load configuration for preset '{preset_name}': {e}") self.status_message.emit(f"Error loading preset '{preset_name}': {e}", 5000) self.prediction_finished.emit(input_source_identifier) self._is_running = False return except Exception as e: log.exception(f"Unexpected error loading configuration or allowed types for preset '{preset_name}': {e}") self.status_message.emit(f"Unexpected error loading preset '{preset_name}'.", 5000) self.prediction_finished.emit(input_source_identifier) self._is_running = False return log.debug(f"DEBUG: Calling classify_files with file_list: {original_input_paths}") # DEBUG LOG # --- Perform Classification --- try: classified_assets = classify_files(original_input_paths, config) except Exception as e: log.exception(f"Error during file classification for source '{input_source_identifier}': {e}") self.status_message.emit(f"Error classifying files: {e}", 5000) self.prediction_finished.emit(input_source_identifier) self._is_running = False return if not classified_assets: log.warning(f"Classification yielded no assets for source '{input_source_identifier}'.") self.status_message.emit("No assets identified from files.", 3000) self.rule_hierarchy_ready.emit([]) # Emit empty list self.prediction_finished.emit(input_source_identifier) self._is_running = False return # --- Build the Hierarchy --- source_rules_list = [] try: # Determine SourceRule level overrides/defaults # Get supplier name from the config property supplier_identifier = config.supplier_name # Use the property # Create the single SourceRule for this input source source_rule = SourceRule( input_path=input_source_identifier, # Use the identifier provided supplier_identifier=supplier_identifier, # Set overridable field preset_name=preset_name # Pass the selected preset name ) log.debug(f"Created SourceRule for identifier: {input_source_identifier} with supplier: {supplier_identifier}") asset_rules = [] # Get allowed asset types from config's internal core settings asset_type_definitions = config._core_settings.get('ASSET_TYPE_DEFINITIONS', {}) log.debug(f"Loaded AssetType Definitions from config: {list(asset_type_definitions.keys())}") for asset_name, files_info in classified_assets.items(): if not files_info: continue # Skip empty asset groups # Determine AssetRule level overrides/defaults item_types_in_asset = {f_info['item_type'] for f_info in files_info} predicted_asset_type = "Surface" # Default to "Surface" string material_indicators = {"MAP_COL", "MAP_NRM", "MAP_ROUGH", "MAP_METAL", "MAP_AO", "MAP_DISP", "COL", "NRM", "ROUGH", "METAL", "AO", "DISP"} # Added base types too if any(it in material_indicators for it in item_types_in_asset if it not in ["EXTRA", "FILE_IGNORE"]): # Exclude non-maps predicted_asset_type = "Surface" # Predict as "Surface" string # Ensure the predicted type is allowed, fallback if necessary if asset_type_definitions and predicted_asset_type not in asset_type_definitions: log.warning(f"Predicted AssetType '{predicted_asset_type}' for asset '{asset_name}' is not in ASSET_TYPE_DEFINITIONS from config. Falling back.") default_type = config.default_asset_category if default_type in asset_type_definitions: predicted_asset_type = default_type elif asset_type_definitions: predicted_asset_type = list(asset_type_definitions.keys())[0] else: pass # Keep the original prediction if definitions are empty asset_rule = AssetRule( asset_name=asset_name, asset_type=predicted_asset_type, ) log.debug(f"Created AssetRule for asset: {asset_name} with type: {predicted_asset_type}") file_rules = [] file_type_definitions = config._core_settings.get('FILE_TYPE_DEFINITIONS', {}) log.debug(f"Loaded FileType Definitions (ItemTypes) from config: {list(file_type_definitions.keys())}") for file_info in files_info: base_item_type = file_info['item_type'] target_asset_name_override = file_info['asset_name'] # Determine the final item_type string (prefix maps, check if allowed) final_item_type = base_item_type if not base_item_type.startswith("MAP_") and base_item_type not in ["FILE_IGNORE", "EXTRA", "MODEL"]: final_item_type = f"MAP_{base_item_type}" # Check if the final type is allowed if file_type_definitions and final_item_type not in file_type_definitions and base_item_type not in ["FILE_IGNORE", "EXTRA"]: log.warning(f"Predicted ItemType '{base_item_type}' (checked as '{final_item_type}') for file '{file_info['file_path']}' is not in FILE_TYPE_DEFINITIONS. Setting to FILE_IGNORE.") final_item_type = "FILE_IGNORE" # Retrieve the standard_type standard_map_type = None file_type_details = file_type_definitions.get(final_item_type) if file_type_details: standard_map_type = file_type_details.get('standard_type') log.debug(f" Found standard_type '{standard_map_type}' for final_item_type '{final_item_type}'") else: file_type_details_alias = file_type_definitions.get(base_item_type) if file_type_details_alias: standard_map_type = file_type_details_alias.get('standard_type') log.debug(f" Found standard_type '{standard_map_type}' via alias lookup for base_item_type '{base_item_type}'") elif base_item_type in file_type_definitions: standard_map_type = base_item_type log.debug(f" Using base_item_type '{base_item_type}' itself as standard_map_type.") else: log.debug(f" Could not determine standard_map_type for base '{base_item_type}' / final '{final_item_type}'. Setting to None.") output_format_override = None item_type_override = None log.debug(f" Creating FileRule for: {file_info['file_path']}") log.debug(f" Base Item Type (from classification): {base_item_type}") log.debug(f" Final Item Type (for model): {final_item_type}") log.debug(f" Target Asset Name Override: {target_asset_name_override}") log.debug(f" Determined Standard Map Type: {standard_map_type}") is_gloss_source_value = file_info.get('is_gloss_source', 'MISSING') log.debug(f" Value for 'is_gloss_source' from file_info: {is_gloss_source_value}") file_rule = FileRule( file_path=file_info['file_path'], item_type=final_item_type, item_type_override=final_item_type, target_asset_name_override=target_asset_name_override, output_format_override=output_format_override, is_gloss_source=is_gloss_source_value if isinstance(is_gloss_source_value, bool) else False, standard_map_type=standard_map_type, resolution_override=None, channel_merge_instructions={}, ) file_rules.append(file_rule) asset_rule.files = file_rules asset_rules.append(asset_rule) source_rule.assets = asset_rules log.debug(f"Built SourceRule '{source_rule.input_path}' with {len(asset_rules)} AssetRule(s).") source_rules_list.append(source_rule) except Exception as e: log.exception(f"Error building rule hierarchy for source '{input_source_identifier}': {e}") self.status_message.emit(f"Error building rules: {e}", 5000) self.prediction_finished.emit(input_source_identifier) self._is_running = False return # --- Emit Results --- log.info(f"VERIFY: Emitting rule_hierarchy_ready with {len(source_rules_list)} SourceRule(s).") for i, rule in enumerate(source_rules_list): log.debug(f" VERIFY Rule {i}: Input='{rule.input_path}', Assets={len(rule.assets)}") log.info(f"[{time.time():.4f}][T:{thread_id}] Prediction run finished. Emitting hierarchy for '{input_source_identifier}'.") self.rule_hierarchy_ready.emit(source_rules_list) log.info(f"[{time.time():.4f}][T:{thread_id}] Emitted rule_hierarchy_ready signal.") self.status_message.emit(f"Analysis complete for '{input_source_identifier}'.", 3000) self.prediction_finished.emit(input_source_identifier) self._is_running = False log.info(f"[{time.time():.4f}][T:{thread_id}] <-- Exiting PredictionHandler.run_prediction.")