Major Comment and codebase cleanup

2025-05-06 22:47:26 +02:00
parent ddb5a43a21
commit 932b39fd01
109 changed files with 622 additions and 10137 deletions
--- a/utils/hash_utils.py
+++ b/utils/hash_utils.py
@@ -3,7 +3,6 @@ import logging
 from pathlib import Path
 from typing import Optional

-# Configure logging
 logger = logging.getLogger(__name__)

 def calculate_sha256(file_path: Path) -> Optional[str]:
--- a/utils/path_utils.py
+++ b/utils/path_utils.py
@@ -98,7 +98,6 @@ def generate_path_from_pattern(pattern_string: str, token_data: dict) -> str:
    return output_path
 def get_next_incrementing_value(output_base_path: Path, output_directory_pattern: str) -> str:
    """Determines the next incrementing value based on existing directories."""
-    # Implementation as detailed in the previous plan revision...
    logger.debug(f"Calculating next increment value for pattern '{output_directory_pattern}' in '{output_base_path}'")
    match = re.match(r"(.*?)(\[IncrementingValue\]|(#+))(.*)", output_directory_pattern)
    if not match:
--- a/utils/prediction_utils.py
+++ b/utils/prediction_utils.py
@@ -1,16 +1,11 @@
-# utils/prediction_utils.py

 import logging
 import re
 from pathlib import Path
 from typing import Optional, Dict, Any

-# Assuming these imports based on project structure and task description
 from rule_structure import SourceRule, RuleSet, MapRule, AssetRule
-from configuration import load_preset # Assuming preset loading is handled here or similar
-# If RuleBasedPredictionHandler exists and is the intended mechanism:
-# from gui.rule_based_prediction_handler import RuleBasedPredictionHandler
-# Or, if we need to replicate its core logic:
+from configuration import load_preset
 from utils.structure_analyzer import analyze_archive_structure # Hypothetical utility

 log = logging.getLogger(__name__)
@@ -46,7 +41,6 @@ def generate_source_rule_from_archive(archive_path: Path, config: Dict[str, Any]

    log.debug(f"Generating SourceRule for archive: {archive_path.name}")

-    # --- 1. Extract Preset Name ---
    match = PRESET_FILENAME_REGEX.match(archive_path.name)
    if not match:
        raise PredictionError(f"Filename '{archive_path.name}' does not match expected format '[preset]_filename.ext'. Cannot determine preset.")
@@ -54,10 +48,6 @@ def generate_source_rule_from_archive(archive_path: Path, config: Dict[str, Any]
    preset_name = match.group(1)
    log.info(f"Extracted preset name: '{preset_name}' from {archive_path.name}")

-    # --- 2. Load Preset Rules ---
-    # Option A: Presets are pre-loaded in config (e.g., under 'presets' key)
-    # preset_rules_dict = config.get('presets', {}).get(preset_name)
-    # Option B: Load preset dynamically using a utility
    try:
        # Assuming load_preset takes the name and maybe the base config/path
        # Adjust based on the actual signature of load_preset
@@ -68,10 +58,9 @@ def generate_source_rule_from_archive(archive_path: Path, config: Dict[str, Any]
        # or needs parsing into RuleSet. Let's assume it needs parsing.
        # This part is highly dependent on how presets are stored and loaded.
        # For now, let's assume preset_config IS the RuleSet dictionary.
-        if not isinstance(preset_config.get('rules'), dict): # Basic validation
+        if not isinstance(preset_config.get('rules'), dict):
             raise PredictionError(f"Preset '{preset_name}' does not contain a valid 'rules' dictionary.")
        rule_set_dict = preset_config['rules']
-        # We need to deserialize this dict into RuleSet object
        # Assuming RuleSet has a class method or similar for this
        rule_set = RuleSet.from_dict(rule_set_dict) # Placeholder for actual deserialization

@@ -86,26 +75,22 @@ def generate_source_rule_from_archive(archive_path: Path, config: Dict[str, Any]

    log.debug(f"Successfully loaded RuleSet for preset: {preset_name}")

-    # --- 3. Generate SourceRule (Simplified Rule-Based Approach) ---
    # This simulates what a RuleBasedPredictionHandler might do, but without
    # needing the actual extracted files for *this* step. The rules themselves
    # define the expected structure. The ProcessingEngine will later use this
    # rule against the actual extracted files.

-    # Create the root SourceRule based on the archive name and the loaded RuleSet
    # The actual structure (AssetRules, MapRules) comes directly from the RuleSet.
    # We might need to adapt the archive name slightly (e.g., remove preset prefix)
    # for the root node name, depending on desired output structure.
-    root_name = archive_path.stem # Or further processing if needed
+    root_name = archive_path.stem
    source_rule = SourceRule(name=root_name, rule_set=rule_set)

    # Potentially add logic here if basic archive structure analysis *is* needed
    # for rule generation (e.g., using utils.structure_analyzer if it exists)
-    # analyze_archive_structure(archive_path, source_rule) # Example

    log.info(f"Generated initial SourceRule for '{archive_path.name}' based on preset '{preset_name}'.")

-    # --- 4. Return SourceRule ---
    # No temporary workspace needed/created in this function based on current plan.
    # Cleanup is not required here.
    return source_rule
@@ -115,11 +100,9 @@ if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    log.info("Testing prediction_utils...")

-    # Create dummy files/config for testing
    dummy_archive = Path("./[TestPreset]_MyAsset.zip")
    dummy_archive.touch()

-    # Need a dummy preset file `Presets/TestPreset.json`
    preset_dir = Path(__file__).parent.parent / "Presets"
    preset_dir.mkdir(exist_ok=True)
    dummy_preset_path = preset_dir / "TestPreset.json"
@@ -139,59 +122,11 @@ if __name__ == '__main__':
        "settings": {}
    }
    """
-    # Need RuleSet.from_dict implementation for this to work
-    # try:
-    #     with open(dummy_preset_path, 'w') as f:
-    #         f.write(dummy_preset_content)
-    #     log.info(f"Created dummy preset: {dummy_preset_path}")
-
-    #     # Dummy config - structure depends on actual implementation
-    #     dummy_config = {
-    #         'paths': {'presets': str(preset_dir)},
-    #         # 'presets': { 'TestPreset': json.loads(dummy_preset_content) } # Alt if pre-loaded
-    #     }
-
-    #     # Mock load_preset if it's complex
-    #     original_load_preset = load_preset
-    #     def mock_load_preset(name):
-    #         if name == "TestPreset":
-    #             import json
-    #             return json.loads(dummy_preset_content)
-    #         else:
-    #             raise FileNotFoundError
-    #     load_preset = mock_load_preset # Monkey patch
-
-    #     # Mock RuleSet.from_dict
-    #     original_from_dict = RuleSet.from_dict
-    #     def mock_from_dict(data):
-    #          # Basic mock - replace with actual logic
-    #          mock_rule_set = RuleSet()
-    #          mock_rule_set.map_rules = [MapRule(**mr) for mr in data.get('map_rules', [])]
-    #          mock_rule_set.asset_rules = [AssetRule(**ar) for ar in data.get('asset_rules', [])]
-    #          return mock_rule_set
-    #     RuleSet.from_dict = mock_from_dict # Monkey patch


-    #     try:
-    #         generated_rule = generate_source_rule_from_archive(dummy_archive, dummy_config)
-    #         log.info(f"Successfully generated SourceRule: {generated_rule.name}")
-    #         log.info(f"  RuleSet Map Rules: {len(generated_rule.rule_set.map_rules)}")
-    #         log.info(f"  RuleSet Asset Rules: {len(generated_rule.rule_set.asset_rules)}")
-    #         # Add more detailed checks if needed
-    #     except (PredictionError, FileNotFoundError) as e:
-    #         log.error(f"Test failed: {e}")
-    #     except Exception as e:
-    #          log.exception("Unexpected error during test")

-    # finally:
-    #     # Clean up dummy files
-    #     if dummy_archive.exists():
-    #         dummy_archive.unlink()
-    #     if dummy_preset_path.exists():
-    #         dummy_preset_path.unlink()
-    #     # Restore mocked functions
-    #     load_preset = original_load_preset
-    #     RuleSet.from_dict = original_from_dict
-    #     log.info("Test cleanup complete.")
+
+
+

    log.warning("Note: Main execution block is commented out as it requires specific implementations of load_preset and RuleSet.from_dict.")
--- a/utils/workspace_utils.py
+++ b/utils/workspace_utils.py
@@ -1,4 +1,3 @@
-# utils/workspace_utils.py

 import tempfile
 import shutil
@@ -7,11 +6,10 @@ import logging
 from pathlib import Path
 from typing import Union

-# Get a logger for this module
 log = logging.getLogger(__name__)

-# Define supported archive extensions (add more as needed, e.g., '.rar', '.7z')
-# Requires additional libraries like patoolib for non-zip formats.
+# Add more archive extensions as needed (e.g., '.rar', '.7z').
+# Non-zip formats may require additional libraries like patoolib.
 SUPPORTED_ARCHIVES = {'.zip'}

 def prepare_processing_workspace(input_path_str: Union[str, Path]) -> Path:
@@ -41,20 +39,17 @@ def prepare_processing_workspace(input_path_str: Union[str, Path]) -> Path:
    if not input_path.exists():
        raise FileNotFoundError(f"Input path does not exist: {input_path}")

-    # Create a secure temporary directory
    try:
        temp_workspace_dir = tempfile.mkdtemp(prefix="asset_proc_")
        prepared_workspace_path = Path(temp_workspace_dir)
        log.info(f"Created temporary workspace: {prepared_workspace_path}")
    except OSError as e:
        log.error(f"Failed to create temporary directory: {e}")
-        raise # Re-raise the exception
+        raise

    try:
-        # Check if input is directory or a supported archive file
        if input_path.is_dir():
            log.info(f"Input is a directory, copying contents to workspace: {input_path}")
-            # Copy directory contents into the temp workspace
            shutil.copytree(input_path, prepared_workspace_path, dirs_exist_ok=True)
        elif input_path.is_file() and input_path.suffix.lower() in SUPPORTED_ARCHIVES:
            log.info(f"Input is a supported archive ({input_path.suffix}), extracting to workspace: {input_path}")
@@ -62,21 +57,16 @@ def prepare_processing_workspace(input_path_str: Union[str, Path]) -> Path:
                with zipfile.ZipFile(input_path, 'r') as zip_ref:
                    zip_ref.extractall(prepared_workspace_path)
            # Add elif blocks here for other archive types (e.g., using patoolib)
-            # elif input_path.suffix.lower() in ['.rar', '.7z']:
-            #     import patoolib
-            #     patoolib.extract_archive(str(input_path), outdir=str(prepared_workspace_path))
            else:
                # This case should ideally not be reached if SUPPORTED_ARCHIVES is correct
                raise ValueError(f"Archive type {input_path.suffix} marked as supported but no extraction logic defined.")
        else:
-            # Handle unsupported input types
            raise ValueError(f"Unsupported input type: {input_path}. Must be a directory or a supported archive ({', '.join(SUPPORTED_ARCHIVES)}).")

        log.debug(f"Workspace preparation successful for: {input_path}")
        return prepared_workspace_path

    except (FileNotFoundError, ValueError, zipfile.BadZipFile, OSError, ImportError) as e:
-        # Clean up the created temp directory if preparation fails mid-way
        log.error(f"Error during workspace preparation for {input_path}: {e}. Cleaning up workspace.")
        if prepared_workspace_path.exists():
            try:
@@ -84,4 +74,4 @@ def prepare_processing_workspace(input_path_str: Union[str, Path]) -> Path:
                log.info(f"Cleaned up failed workspace: {prepared_workspace_path}")
            except OSError as cleanup_error:
                log.error(f"Failed to cleanup workspace {prepared_workspace_path} after error: {cleanup_error}")
-        raise # Re-raise the original exception
+        raise