From 01c8f68ea018cfd8c584378d31eead10f42612c2 Mon Sep 17 00:00:00 2001 From: Rusfort Date: Sun, 4 May 2025 12:56:16 +0200 Subject: [PATCH] LLM Restructure - UNTESTED! --- .../02_Developer_Guide/03_Key_Components.md | 5 +- .../12_LLM_Predictor_Integration.md | 100 +++- config/app_settings.json | 523 ++++++++---------- configuration.py | 2 +- gui/llm_prediction_handler.py | 231 +++++--- requirements.txt | 3 +- 6 files changed, 467 insertions(+), 397 deletions(-) diff --git a/Documentation/02_Developer_Guide/03_Key_Components.md b/Documentation/02_Developer_Guide/03_Key_Components.md index d547ecf..79cbaad 100644 --- a/Documentation/02_Developer_Guide/03_Key_Components.md +++ b/Documentation/02_Developer_Guide/03_Key_Components.md @@ -136,8 +136,9 @@ An experimental predictor (inheriting from `BasePredictionHandler`) that uses a * Takes an input source identifier, file list, and `Configuration` object. * Interacts with the `LLMInteractionHandler` to send data to the LLM and receive predictions. -* Parses the LLM response to construct a `SourceRule` hierarchy. -* Emits the `prediction_signal` with the generated `SourceRule` object. +* **Parses the LLM's JSON response**: It expects a specific two-part JSON structure (see `12_LLM_Predictor_Integration.md`). It first sanitizes the response (removing comments/markdown) and then parses the JSON. +* **Constructs `SourceRule`**: It groups files based on the `proposed_asset_group_name` from the JSON, assigns the final `asset_type` using the `asset_group_classifications` map, and builds the complete `SourceRule` hierarchy. +* Emits the `prediction_signal` with the generated `SourceRule` object or `error_signal` on failure. ### `LLMInteractionHandler` (`gui/llm_interaction_handler.py`) diff --git a/Documentation/02_Developer_Guide/12_LLM_Predictor_Integration.md b/Documentation/02_Developer_Guide/12_LLM_Predictor_Integration.md index f5cd0cd..d6d2650 100644 --- a/Documentation/02_Developer_Guide/12_LLM_Predictor_Integration.md +++ b/Documentation/02_Developer_Guide/12_LLM_Predictor_Integration.md @@ -18,46 +18,94 @@ The LLM Predictor is configured via new settings in the `config/app_settings.jso The prompt structure is crucial for effective classification. It should clearly instruct the LLM on the task and the expected output format. Placeholders within the prompt template (e.g., `{asset_name}`) are dynamically replaced with relevant data before the request is sent. -## `LLMPredictionHandler` +## Expected LLM Output Format (Refactored) -The `gui/llm_prediction_handler.py` module contains the `LLMPredictionHandler` class, which is responsible for interacting with the LLM API. It operates in a separate thread to avoid blocking the GUI during potentially long API calls. +The LLM is now expected to return a JSON object containing two distinct parts. This structure helps the LLM maintain context across multiple files belonging to the same conceptual asset and allows for a more robust grouping mechanism. -Key methods: +**Rationale:** The previous implicit format made it difficult for the LLM to consistently group related files (e.g., different texture maps for the same material) under a single asset, especially in complex archives. The new two-part structure explicitly separates file-level analysis from asset-level classification, improving accuracy and consistency. -- `run()`: The main method executed when the thread starts. It processes prediction requests from a queue. -- `_prepare_prompt(asset_name)`: Constructs the final prompt string by loading the template from settings, including examples, and replacing placeholders like `{asset_name}`. -- `_call_llm(prompt)`: Sends the prepared prompt to the configured LLM API endpoint using the `requests` library and handles the HTTP communication. -- `_parse_llm_response(response)`: Parses the response received from the LLM API to extract the predicted classification. +**Structure:** + +```json +{ + "individual_file_analysis": [ + { + "relative_file_path": "Textures/Wood_Floor_01/Wood_Floor_01_BaseColor.png", + "classified_file_type": "BaseColor", + "proposed_asset_group_name": "Wood_Floor_01" + }, + { + "relative_file_path": "Textures/Wood_Floor_01/Wood_Floor_01_Roughness.png", + "classified_file_type": "Roughness", + "proposed_asset_group_name": "Wood_Floor_01" + }, + { + "relative_file_path": "Textures/Metal_Plate_03/Metal_Plate_03_Metallic.jpg", + "classified_file_type": "Metallic", + "proposed_asset_group_name": "Metal_Plate_03" + } + ], + "asset_group_classifications": { + "Wood_Floor_01": "PBR Material", + "Metal_Plate_03": "PBR Material" + } +} +``` + +- **`individual_file_analysis`**: A list where each object represents a single file within the source. + - `relative_file_path`: The path of the file relative to the source root. + - `classified_file_type`: The LLM's prediction for the *type* of this specific file (e.g., "BaseColor", "Normal", "Model"). This corresponds to the `item_type` in the `FileRule`. + - `proposed_asset_group_name`: A name suggested by the LLM to group this file with others belonging to the same conceptual asset. This is used internally by the parser. +- **`asset_group_classifications`**: A dictionary mapping the `proposed_asset_group_name` values from the list above to a final `asset_type` (e.g., "PBR Material", "HDR Environment"). + +## `LLMPredictionHandler` (Refactored Parsing) + +The `gui/llm_prediction_handler.py` module contains the `LLMPredictionHandler` class (inheriting from `BasePredictionHandler`), which orchestrates the LLM prediction process. It runs in a background thread managed by the `MainWindow`'s `QThreadPool`. + +Key Responsibilities & Methods: + +- **Initialization**: Takes the source identifier, file list, and `Configuration` object. +- **`run()`**: The main method executed by the thread pool. It prepares the prompt, calls the LLM (via `LLMInteractionHandler`), parses the response, and emits the result or error. +- **Interaction**: Uses `LLMInteractionHandler` to handle the actual prompt construction and API communication (details in `03_Key_Components.md` and `llm_interaction_handler.py`). +- **`_parse_llm_response(response_text)`**: This method contains the **new parsing logic**: + 1. **Sanitization**: Removes common non-JSON elements like comments (`//`, `/* */`) and markdown code fences (```json ... ```) from the raw `response_text` to increase the likelihood of successful JSON parsing. + 2. **JSON Parsing**: Parses the sanitized string into a Python dictionary. + 3. **Structure Validation**: Checks if the parsed dictionary contains the required top-level keys: `individual_file_analysis` (list) and `asset_group_classifications` (dict). + 4. **Grouping**: Iterates through the `individual_file_analysis` list. For each file: + * Retrieves the `proposed_asset_group_name`. + * Uses the `asset_group_classifications` dictionary to find the corresponding final `asset_type` for that group. + * Creates or updates an `AssetRule` for the group name, assigning the determined `asset_type`. + * Creates a `FileRule` for the specific file, assigning its `classified_file_type` as the `item_type`. + 5. **Hierarchy Construction**: Organizes the created `AssetRule` and `FileRule` objects into a single `SourceRule` object representing the entire source. + 6. **Validation**: Ensures all files from the input list were accounted for in the LLM response. Signals: -- `prediction_ready(asset_name, prediction_result)`: Emitted when a prediction is successfully received and parsed for a given asset. -- `prediction_error(asset_name, error_message)`: Emitted if an error occurs during the prediction process (e.g., API call failure, parsing error). - -The handler uses the `requests` library to make HTTP POST requests to the LLM endpoint, including the API key in the headers for authentication. +- `prediction_signal(source_id, source_rule)`: Emitted when a prediction is successfully parsed and the `SourceRule` hierarchy is constructed. The `source_rule` argument contains the complete hierarchy. +- `error_signal(source_id, error_message)`: Emitted if an error occurs during any stage (API call, sanitization, parsing, validation). ## GUI Integration -The `gui/main_window.py` module integrates the LLM Predictor feature into the main application window. +Integration remains largely the same at the `MainWindow` level: -Integration points: +- The LLM predictor is selected via the preset dropdown or triggered explicitly. +- `MainWindow` manages the `QThreadPool` and starts the `LLMPredictionHandler` task. +- Slots in `MainWindow` connect to the `prediction_signal` and `error_signal` of the handler. -- **Preset Dropdown Option:** A new option is added to the preset dropdown to enable LLM prediction as the classification method. -- **Re-interpret Button:** The "Re-interpret" button's functionality is extended to trigger LLM prediction when the LLM method is selected. -- `llm_processing_queue`: A queue (`Queue` object) is used to hold asset names that require LLM prediction. The `LLMPredictionHandler` thread consumes items from this queue. -- `_start_llm_prediction(asset_name)`: A method to add an asset name to the `llm_processing_queue` and ensure the `LLMPredictionHandler` thread is running. -- `_process_next_llm_item()`: A slot connected to the `prediction_ready` and `prediction_error` signals. It processes the results received from the `LLMPredictionHandler` and updates the GUI accordingly. -- **Signal Handling:** Connections are established between the `LLMPredictionHandler`'s signals (`prediction_ready`, `prediction_error`) and slots in `main_window.py` to handle prediction results and errors asynchronously. +## Model Integration (Refactored) -## Model Integration +The `gui/unified_view_model.py` module's `update_rules_for_sources` method still incorporates the results. -The `gui/unified_view_model.py` module, specifically the `update_rules_for_sources` method, is responsible for incorporating the prediction results into the application's data model. When a prediction is received via the `prediction_ready` signal, the `update_rules_for_sources` method is called to update the classification rules for the corresponding asset source based on the LLM's output. +- When the `prediction_signal` is received from `LLMPredictionHandler`, the accompanying `SourceRule` object (which has already been constructed based on the new two-part JSON parsing logic) is passed to `update_rules_for_sources`. +- This method then merges the new `SourceRule` hierarchy into the existing model data, preserving user overrides where applicable. The internal structure of the received `SourceRule` now directly reflects the groupings and classifications determined by the LLM and the new parser. -## Error Handling +## Error Handling (Updated) -Error handling for the LLM Predictor includes: +Error handling now covers additional scenarios: -- **LLM API Errors:** The `_call_llm` method in `LLMPredictionHandler` catches exceptions during the HTTP request and emits the `prediction_error` signal with a relevant error message. -- **Parsing Errors:** The `_parse_llm_response` method handles potential errors during the parsing of the LLM's response, emitting `prediction_error` if the response format is unexpected or invalid. +- **LLM API Errors:** Handled by `LLMInteractionHandler` and propagated via the `error_signal`. +- **Sanitization/Parsing Errors:** The `_parse_llm_response` method catches errors during comment/markdown removal and `json.loads()`. +- **Structure Errors:** Explicit checks for the presence and types of `individual_file_analysis` and `asset_group_classifications` keys in the parsed JSON. +- **Data Consistency Errors:** Validation errors if the parsed data doesn't align (e.g., a `proposed_asset_group_name` missing from `asset_group_classifications`, or files missing from the analysis). -These errors are then handled in `main_window.py` by the slot connected to the `prediction_error` signal, typically by displaying an error message to the user. \ No newline at end of file +These errors trigger the `error_signal`, allowing `MainWindow` to inform the user. \ No newline at end of file diff --git a/config/app_settings.json b/config/app_settings.json index 4287481..67760a4 100644 --- a/config/app_settings.json +++ b/config/app_settings.json @@ -268,310 +268,263 @@ { "input": "MessyTextures/Concrete_Damage_Set/concrete_col.png\nMessyTextures/Concrete_Damage_Set/concrete_N.png\nMessyTextures/Concrete_Damage_Set/concrete_rough.jpg\nMessyTextures/Concrete_Damage_Set/height_map_concrete.tif\nMessyTextures/Concrete_Damage_Set/Thumbs.db\nMessyTextures/Fabric_Pattern/pattern_01_diffuse.tga\nMessyTextures/Fabric_Pattern/pattern_01_ao.png\nMessyTextures/Fabric_Pattern/pattern_01_normal.png\nMessyTextures/Fabric_Pattern/notes.txt\nMessyTextures/Fabric_Pattern/variant_blue_diffuse.tga\nMessyTextures/Fabric_Pattern/fabric_flat.jpg", "output": { - "predicted_assets": [ - { - "suggested_asset_name": "Concrete_Damage_01", - "predicted_asset_type": "Surface", - "files": [ - { - "file_path": "MessyTextures/Concrete_Damage_Set/concrete_col.png", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "MessyTextures/Concrete_Damage_Set/concrete_N.png", - "predicted_file_type": "MAP_NRM" - }, - { - "file_path": "MessyTextures/Concrete_Damage_Set/concrete_rough.jpg", - "predicted_file_type": "MAP_ROUGH" - }, - { - "file_path": "MessyTextures/Concrete_Damage_Set/height_map_concrete.tif", - "predicted_file_type": "MAP_DISP" - }, - { - "file_path": "MessyTextures/Concrete_Damage_Set/Thumbs.db", - "predicted_file_type": "FILE_IGNORE" - } - ] - }, - { - "suggested_asset_name": "Fabric_Pattern_01", - "predicted_asset_type": "Surface", - "files": [ - { - "file_path": "MessyTextures/Fabric_Pattern/pattern_01_diffuse.tga", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "MessyTextures/Fabric_Pattern/pattern_01_ao.png", - "predicted_file_type": "MAP_AO" - }, - { - "file_path": "MessyTextures/Fabric_Pattern/pattern_01_normal.png", - "predicted_file_type": "MAP_NRM" - }, - { - "file_path": "MessyTextures/Fabric_Pattern/variant_blue_diffuse.tga", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "MessyTextures/Fabric_Pattern/fabric_flat.jpg", - "predicted_file_type": "EXTRA" - }, - { - "file_path": "MessyTextures/Fabric_Pattern/notes.txt", - "predicted_file_type": "EXTRA" - } - ] - } - ] + "individual_file_analysis": [ + { + "relative_file_path": "MessyTextures/Concrete_Damage_Set/concrete_col.png", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Concrete_Damage_Set" + }, + { + "relative_file_path": "MessyTextures/Concrete_Damage_Set/concrete_N.png", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "Concrete_Damage_Set" + }, + { + "relative_file_path": "MessyTextures/Concrete_Damage_Set/concrete_rough.jpg", + "classified_file_type": "MAP_ROUGH", + "proposed_asset_group_name": "Concrete_Damage_Set" + }, + { + "relative_file_path": "MessyTextures/Concrete_Damage_Set/height_map_concrete.tif", + "classified_file_type": "MAP_DISP", + "proposed_asset_group_name": "Concrete_Damage_Set" + }, + { + "relative_file_path": "MessyTextures/Concrete_Damage_Set/Thumbs.db", + "classified_file_type": "FILE_IGNORE", + "proposed_asset_group_name": null + }, + { + "relative_file_path": "MessyTextures/Fabric_Pattern/pattern_01_diffuse.tga", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Fabric_Pattern_01" + }, + { + "relative_file_path": "MessyTextures/Fabric_Pattern/pattern_01_ao.png", + "classified_file_type": "MAP_AO", + "proposed_asset_group_name": "Fabric_Pattern_01" + }, + { + "relative_file_path": "MessyTextures/Fabric_Pattern/pattern_01_normal.png", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "Fabric_Pattern_01" + }, + { + "relative_file_path": "MessyTextures/Fabric_Pattern/notes.txt", + "classified_file_type": "EXTRA", + "proposed_asset_group_name": "Fabric_Pattern_01" + }, + { + "relative_file_path": "MessyTextures/Fabric_Pattern/variant_blue_diffuse.tga", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Fabric_Pattern_01" + }, + { + "relative_file_path": "MessyTextures/Fabric_Pattern/fabric_flat.jpg", + "classified_file_type": "EXTRA", + "proposed_asset_group_name": "Fabric_Pattern_01" + } + ], + "asset_group_classifications": { + "Concrete_Damage_Set": "Surface", + "Fabric_Pattern_01": "Surface" + } } }, { "input": "SciFi_Drone/Drone_Model.fbx\nSciFi_Drone/Textures/Drone_BaseColor.png\nSciFi_Drone/Textures/Drone_Metallic.png\nSciFi_Drone/Textures/Drone_Roughness.png\nSciFi_Drone/Textures/Drone_Normal.png\nSciFi_Drone/Textures/Drone_Emissive.jpg\nSciFi_Drone/ReferenceImages/concept.jpg", "output": { - "predicted_assets": [ - { - "suggested_asset_name": "SciFi_Drone", - "predicted_asset_type": "Model", - "files": [ - { - "file_path": "SciFi_Drone/Drone_Model.fbx", - "predicted_file_type": "MODEL" - }, - { - "file_path": "SciFi_Drone/Textures/Drone_BaseColor.png", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "SciFi_Drone/Textures/Drone_Metallic.png", - "predicted_file_type": "MAP_METAL" - }, - { - "file_path": "SciFi_Drone/Textures/Drone_Roughness.png", - "predicted_file_type": "MAP_ROUGH" - }, - { - "file_path": "SciFi_Drone/Textures/Drone_Normal.png", - "predicted_file_type": "MAP_NRM" - }, - { - "file_path": "SciFi_Drone/Textures/Drone_Emissive.jpg", - "predicted_file_type": "EXTRA" - }, - { - "file_path": "SciFi_Drone/ReferenceImages/concept.jpg", - "predicted_file_type": "EXTRA" - } - ] - } - ] + "individual_file_analysis": [ + { + "relative_file_path": "SciFi_Drone/Drone_Model.fbx", + "classified_file_type": "MODEL", + "proposed_asset_group_name": "SciFi_Drone" + }, + { + "relative_file_path": "SciFi_Drone/Textures/Drone_BaseColor.png", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "SciFi_Drone" + }, + { + "relative_file_path": "SciFi_Drone/Textures/Drone_Metallic.png", + "classified_file_type": "MAP_METAL", + "proposed_asset_group_name": "SciFi_Drone" + }, + { + "relative_file_path": "SciFi_Drone/Textures/Drone_Roughness.png", + "classified_file_type": "MAP_ROUGH", + "proposed_asset_group_name": "SciFi_Drone" + }, + { + "relative_file_path": "SciFi_Drone/Textures/Drone_Normal.png", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "SciFi_Drone" + }, + { + "relative_file_path": "SciFi_Drone/Textures/Drone_Emissive.jpg", + "classified_file_type": "EXTRA", + "proposed_asset_group_name": "SciFi_Drone" + }, + { + "relative_file_path": "SciFi_Drone/ReferenceImages/concept.jpg", + "classified_file_type": "EXTRA", + "proposed_asset_group_name": "SciFi_Drone" + } + ], + "asset_group_classifications": { + "SciFi_Drone": "Model" + } } }, { "input": "21_hairs_deposits.tif\n22_hairs_fabric.tif\n23_hairs_fibres.tif\n24_hairs_fibres.tif\n25_bonus_isolatedFingerprints.tif\n26_bonus_isolatedPalmprint.tif\n27_metal_aluminum.tif\n28_metal_castIron.tif\n29_scratcehes_deposits_shapes.tif\n30_scratches_deposits.tif", "output": { - "predicted_assets": [ - { - "suggested_asset_name": "21-Hairs-Deposits", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "21_hairs_deposits.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "22-Hairs-Fabric", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "22_hairs_fabric.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "23-Hairs-Deposits", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "23_hairs_fibres.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "24-Hairs-Fibres", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "24_hairs_fibres.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "27-MetalAluminium", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "27_metal_aluminum.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "28-MetalCastiron", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "28_metal_castIron.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "29-Scratches-Deposits-Shapes", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "29_scratcehes_deposits_shapes.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "30-Scrathes-Deposits", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "30_scratches_deposits.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "Bonus-IsolatedFingerprints", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "25_bonus_isolatedFingerprints.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - }, - { - "suggested_asset_name": "Bonus-IsolatedPalmprint", - "predicted_asset_type": "UtilityMap", - "files": [ - { - "file_path": "26_bonus_isolatedPalmprint.tif", - "predicted_file_type": "MAP_IMPERFECTION" - } - ] - } - ] + "individual_file_analysis": [ + { + "relative_file_path": "21_hairs_deposits.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Hairs_Deposits_21" + }, + { + "relative_file_path": "22_hairs_fabric.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Hairs_Fabric_22" + }, + { + "relative_file_path": "23_hairs_fibres.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Hairs_Fibres_23" + }, + { + "relative_file_path": "24_hairs_fibres.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Hairs_Fibres_24" + }, + { + "relative_file_path": "25_bonus_isolatedFingerprints.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Bonus_IsolatedFingerprints_25" + }, + { + "relative_file_path": "26_bonus_isolatedPalmprint.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Bonus_IsolatedPalmprint_26" + }, + { + "relative_file_path": "27_metal_aluminum.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Metal_Aluminum_27" + }, + { + "relative_file_path": "28_metal_castIron.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Metal_CastIron_28" + }, + { + "relative_file_path": "29_scratcehes_deposits_shapes.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Scratches_Deposits_Shapes_29" + }, + { + "relative_file_path": "30_scratches_deposits.tif", + "classified_file_type": "MAP_IMPERFECTION", + "proposed_asset_group_name": "Scratches_Deposits_30" + } + ], + "asset_group_classifications": { + "Hairs_Deposits_21": "UtilityMap", + "Hairs_Fabric_22": "UtilityMap", + "Hairs_Fibres_23": "UtilityMap", + "Hairs_Fibres_24": "UtilityMap", + "Bonus_IsolatedFingerprints_25": "UtilityMap", + "Bonus_IsolatedPalmprint_26": "UtilityMap", + "Metal_Aluminum_27": "UtilityMap", + "Metal_CastIron_28": "UtilityMap", + "Scratches_Deposits_Shapes_29": "UtilityMap", + "Scratches_Deposits_30": "UtilityMap" + } } }, { "input": "Part1/TextureSupply_Boards001_A_28x300cm-Albedo.jpg\nPart1/TextureSupply_Boards001_A_28x300cm-Normal.jpg\nPart1/TextureSupply_Boards001_B_28x300cm-Albedo.jpg\nPart1/TextureSupply_Boards001_B_28x300cm-Normal.jpg\nPart1/TextureSupply_Boards001_C_28x300cm-Albedo.jpg\nPart1/TextureSupply_Boards001_C_28x300cm-Normal.jpg\nPart1/TextureSupply_Boards001_D_28x300cm-Albedo.jpg\nPart1/TextureSupply_Boards001_D_28x300cm-Normal.jpg\nPart1/TextureSupply_Boards001_E_28x300cm-Albedo.jpg\nPart1/TextureSupply_Boards001_E_28x300cm-Normal.jpg\nPart1/TextureSupply_Boards001_F_28x300cm-Albedo.jpg\nPart1/TextureSupply_Boards001_F_28x300cm-Normal.jpg", "output": { - "predicted_assets": [ - { - "suggested_asset_name": "Boards001_A", - "predicted_asset_type": "Surface", - "files": [ - { - "file_path": "Part1/TextureSupply_Boards001_A_28x300cm-Albedo.jpg", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "Part1/TextureSupply_Boards001_A_28x300cm-Normal.jpg", - "predicted_file_type": "MAP_NRM" - } - ] - }, - { - "suggested_asset_name": "Boards001_B", - "predicted_asset_type": "Surface", - "files": [ - { - "file_path": "Part1/TextureSupply_Boards001_B_28x300cm-Albedo.jpg", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "Part1/TextureSupply_Boards001_B_28x300cm-Normal.jpg", - "predicted_file_type": "MAP_NRM" - } - ] - }, - { - "suggested_asset_name": "Boards001_C", - "predicted_asset_type": "Surface", - "files": [ - { - "file_path": "Part1/TextureSupply_Boards001_C_28x300cm-Albedo.jpg", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "Part1/TextureSupply_Boards001_C_28x300cm-Normal.jpg", - "predicted_file_type": "MAP_NRM" - } - ] - }, - { - "suggested_asset_name": "Boards001_D", - "predicted_asset_type": "Surface", - "files": [ - { - "file_path": "Part1/TextureSupply_Boards001_D_28x300cm-Albedo.jpg", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "Part1/TextureSupply_Boards001_D_28x300cm-Normal.jpg", - "predicted_file_type": "MAP_NRM" - } - ] - }, - { - "suggested_asset_name": "Boards001_E", - "predicted_asset_type": "Surface", - "files": [ - { - "file_path": "Part1/TextureSupply_Boards001_E_28x300cm-Albedo.jpg", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "Part1/TextureSupply_Boards001_E_28x300cm-Normal.jpg", - "predicted_file_type": "MAP_NRM" - } - ] - }, - { - "suggested_asset_name": "Boards001_F", - "predicted_asset_type": "Surface", - "files": [ - { - "file_path": "Part1/TextureSupply_Boards001_F_28x300cm-Albedo.jpg", - "predicted_file_type": "MAP_COL" - }, - { - "file_path": "Part1/TextureSupply_Boards001_F_28x300cm-Normal.jpg", - "predicted_file_type": "MAP_NRM" - } - ] - } - ] + "individual_file_analysis": [ + { + "relative_file_path": "Part1/TextureSupply_Boards001_A_28x300cm-Albedo.jpg", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Boards001_A" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_A_28x300cm-Normal.jpg", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "Boards001_A" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_B_28x300cm-Albedo.jpg", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Boards001_B" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_B_28x300cm-Normal.jpg", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "Boards001_B" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_C_28x300cm-Albedo.jpg", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Boards001_C" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_C_28x300cm-Normal.jpg", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "Boards001_C" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_D_28x300cm-Albedo.jpg", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Boards001_D" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_D_28x300cm-Normal.jpg", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "Boards001_D" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_E_28x300cm-Albedo.jpg", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Boards001_E" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_E_28x300cm-Normal.jpg", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "Boards001_E" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_F_28x300cm-Albedo.jpg", + "classified_file_type": "MAP_COL", + "proposed_asset_group_name": "Boards001_F" + }, + { + "relative_file_path": "Part1/TextureSupply_Boards001_F_28x300cm-Normal.jpg", + "classified_file_type": "MAP_NRM", + "proposed_asset_group_name": "Boards001_F" + } + ], + "asset_group_classifications": { + "Boards001_A": "Surface", + "Boards001_B": "Surface", + "Boards001_C": "Surface", + "Boards001_D": "Surface", + "Boards001_E": "Surface", + "Boards001_F": "Surface" + } } } ], - "llm_endpoint_url": "http://100.65.14.122:1234/v1/chat/completions", + "llm_endpoint_url": "https://api.llm.gestaltservers.com/v1/chat/completions", "llm_api_key": "", - "llm_model_name": "", + "llm_model_name": "qwen2.5-coder:3b", "llm_temperature": 0.5, "llm_request_timeout": 120, - "llm_predictor_prompt": "You are an expert asset classification system. Your task is to analyze a list of file paths from a directory, identify patterns based on directory structure and filenames, and then group related files into logical assets. For each grouped asset, you must suggest a concise asset name, determine the overall asset type, and for each file within that asset, assign its specific file type.\n\nDefinitions:\n\nAsset Types: These define the overall category of an asset. Use one of the following keys for predicted_asset_type:\njson\n{ASSET_TYPE_DEFINITIONS}\n\n\nFile Types: These define the specific purpose of each file. Use one of the following keys for predicted_file_type:\njson\n{FILE_TYPE_DEFINITIONS}\n\n\nCore Task & Grouping Logic:\n\n1. Analyze Input: Examine the provided FILE_LIST. Pay close attention to directory paths and filenames (including prefixes, suffixes, separators like underscores or hyphens, and file extensions).\n2. Identify Potential Assets: Look for patterns that indicate files belong together:\n - Common Base Name: Files sharing a significant common prefix before map-type identifiers (e.g., Concrete_Damage_Set/concrete_ followed by col.png, N.png, rough.jpg).\n - Directory Grouping: Files located within the same immediate directory are often related, especially if their names follow a pattern (e.g., all files directly under SciFi_Drone/Textures/).\n - Model Association: If a MODEL file type (like .fbx, .obj) is present, group it with texture files that share its base name or are located in a plausible associated directory (like Textures/).\n - Single-File Assets (Utility Maps): Files whose names strongly suggest a UtilityMap type (e.g., scratches.tif, FlowMap.png, 21_hairs_deposits.tif) should typically form their own asset, unless they clearly belong to a larger PBR set based on naming conventions. Remember UtilityMap assets usually contain only one file as per their definition.\n - Variations: Files indicating variations (e.g., _A, _B or _variant_blue) should be grouped logically.\n - If variations represent complete, distinct sets (like Boards001_A and Boards001_B in the examples), create separate assets for each variation.\n - If variations seem like alternative maps or supplementary files for a single core asset (like pattern_01_diffuse.tga and variant_blue_diffuse.tga in the examples), group them under one asset. Use the base name (e.g., Fabric_Pattern_01) for the asset.\n3. Group Files: Based on the identified patterns, group the file paths into logical predicted_assets.\n4. Determine Asset Type: For each asset group, determine the most appropriate predicted_asset_type by considering the types of files it contains (e.g., presence of a .fbx suggests Model; multiple PBR maps like MAP_COL, MAP_NRM, MAP_ROUGH suggest Surface; a single imperfection map suggests UtilityMap). Refer to the ASSET_TYPE_DEFINITIONS.\n5. Suggest Asset Name: For each asset, generate a suggested_asset_name. This should be concise and derived from the common base filename or the immediate parent directory name. Clean up the name (e.g., use CamelCase or underscores consistently, remove redundant info like dimensions if not essential).\n6. Assign File Types: For each file_path within an asset, determine the most appropriate predicted_file_type based on its name, extension, and context within the asset. Use the keys from FILE_TYPE_DEFINITIONS.\n - Use FILE_IGNORE for files that should be ignored (e.g., Thumbs.db, .DS_Store).\n - Use EXTRA for files that belong to the asset but don't fit a standard map type (e.g., previews, text files, non-standard maps like Emissive unless you add a specific type for it).\n\nInput File List:\n\ntext\n{FILE_LIST}\n\n\nOutput Format:\n\nYour response MUST be ONLY a single, perfectly valid JSON object adhering strictly to the structure below. Do NOT include any text, explanations, or introductory phrases before or after the JSON object. Ensure all strings are correctly quoted and escaped, and there are NO trailing commas or comments (//, /* */).\n\nCRITICAL: The output must be strictly valid JSON parsable by standard libraries.\n\njson\n{\n \"predicted_assets\": [\n {\n \"suggested_asset_name\": \"string\", // Concise asset name derived from common file parts or directory\n \"predicted_asset_type\": \"string\", // Key from Asset Types definitions\n \"files\": [\n {\n \"file_path\": \"string\", // Exact relative path from the input list\n \"predicted_file_type\": \"string\" // Key from File Types definitions\n },\n // ... more files\n ]\n },\n // ... more assets\n ]\n}\n\n\nExamples:\n\nHere are examples of input file lists and the desired JSON output, illustrating the grouping logic:\n\njson\n[\n {EXAMPLE_INPUT_OUTPUT_PAIRS}\n]\n\n\nNow, process the provided FILE_LIST and generate ONLY the JSON output according to these instructions." + "llm_predictor_prompt": "You are an expert asset classification system. Your task is to analyze a list of file paths, understand their relationships based on naming and directory structure, and output a structured JSON object that classifies each file individually and then classifies the logical asset groups they belong to.\n\nDefinitions:\n\nAsset Types: These define the overall category of a logical asset group. Use one of the following keys when classifying asset groups:\njson\n{ASSET_TYPE_DEFINITIONS}\n\n\nFile Types: These define the specific purpose of each individual file. Use one of the following keys when classifying individual files:\njson\n{FILE_TYPE_DEFINITIONS}\n\n\nCore Task & Logic:\n\n1. **Individual File Analysis:**\n * Examine each `relative_file_path` in the input `FILE_LIST`.\n * For EACH file, determine its most likely `classified_file_type` using the `FILE_TYPE_DEFINITIONS`. Pay attention to filename suffixes, keywords, and extensions. Use `FILE_IGNORE` for files like `Thumbs.db` or `.DS_Store`. Use `EXTRA` for previews, metadata, or unidentifiable maps.\n * For EACH file, propose a logical `proposed_asset_group_name` (string). This name should represent the asset the file likely belongs to, based on common base names (e.g., `WoodFloor01` from `WoodFloor01_col.png`, `WoodFloor01_nrm.png`) or directory structure (e.g., `SciFi_Drone` for files within that folder).\n * Files that seem to be standalone utility maps (like `scratches.png`, `FlowMap.tif`) should get a unique group name derived from their filename (e.g., `Scratches`, `FlowMap`).\n * If a file doesn't seem to belong to any logical group (e.g., a stray readme file in the root), you can propose `null` or a generic name like `Miscellaneous`.\n * Be consistent with the proposed names for files belonging to the same logical asset.\n * Populate the `individual_file_analysis` array with one object for *every* file in the input list, containing `relative_file_path`, `classified_file_type`, and `proposed_asset_group_name`.\n\n2. **Asset Group Classification:**\n * Collect all unique, non-null `proposed_asset_group_name` values generated in the previous step.\n * For EACH unique group name, determine the overall `asset_type` (using `ASSET_TYPE_DEFINITIONS`) based on the types of files assigned to that group name in the `individual_file_analysis`.\n * Example: If files proposed as `AssetGroup1` include `MAP_COL`, `MAP_NRM`, `MAP_ROUGH`, classify `AssetGroup1` as `Surface`.\n * Example: If files proposed as `AssetGroup2` include `MODEL` and texture maps, classify `AssetGroup2` as `Model`.\n * Example: If `AssetGroup3` only has one file classified as `MAP_IMPERFECTION`, classify `AssetGroup3` as `UtilityMap`.\n * Populate the `asset_group_classifications` dictionary, mapping each unique `proposed_asset_group_name` to its determined `asset_type`.\n\nInput File List:\n\ntext\n{FILE_LIST}\n\n\nOutput Format:\n\nYour response MUST be ONLY a single JSON object. You MAY include comments (using // or /* */) within the JSON structure for clarification if needed, but the core structure must be valid JSON. Do NOT include any text, explanations, or introductory phrases before or after the JSON object itself. Ensure all strings are correctly quoted and escaped.\n\nCRITICAL: The output JSON structure must strictly adhere to the following format:\n\n```json\n{\n \"individual_file_analysis\": [\n {\n // Optional comment about this file\n \"relative_file_path\": \"string\", // Exact relative path from the input list\n \"classified_file_type\": \"string\", // Key from FILE_TYPE_DEFINITIONS\n \"proposed_asset_group_name\": \"string_or_null\" // Your suggested group name for this file\n }\n // ... one object for EVERY file in the input list\n ],\n \"asset_group_classifications\": {\n // Dictionary mapping unique proposed group names to asset types\n \"ProposedGroupName1\": \"string\", // Key: proposed_asset_group_name, Value: Key from ASSET_TYPE_DEFINITIONS\n \"ProposedGroupName2\": \"string\"\n // ... one entry for each unique, non-null proposed_asset_group_name\n }\n}\n```\n\nExamples:\n\nHere are examples of input file lists and the desired JSON output, illustrating the two-part structure:\n\njson\n[\n {EXAMPLE_INPUT_OUTPUT_PAIRS}\n]\n\n\nNow, process the provided FILE_LIST and generate ONLY the JSON output according to these instructions. Remember to include an entry in `individual_file_analysis` for every single input file path." } \ No newline at end of file diff --git a/configuration.py b/configuration.py index daa83f0..bb39f11 100644 --- a/configuration.py +++ b/configuration.py @@ -13,7 +13,7 @@ log = logging.getLogger(__name__) # Use logger defined in main.py # Assumes config/ and presets/ are relative to this file's location BASE_DIR = Path(__file__).parent APP_SETTINGS_PATH = BASE_DIR / "config" / "app_settings.json" -PRESETS_DIR = BASE_DIR / "presets" +PRESETS_DIR = BASE_DIR / "Presets" # --- Custom Exception --- class ConfigurationError(Exception): diff --git a/gui/llm_prediction_handler.py b/gui/llm_prediction_handler.py index 0582237..8c7b459 100644 --- a/gui/llm_prediction_handler.py +++ b/gui/llm_prediction_handler.py @@ -236,124 +236,191 @@ class LLMPredictionHandler(BasePredictionHandler): def _parse_llm_response(self, llm_response_json_str: str) -> List[SourceRule]: """ - Parses the LLM's JSON response string into a list of SourceRule objects. + Parses the LLM's JSON response string (new two-part format) into a + list containing a single SourceRule object. + Includes sanitization for comments and markdown fences. """ # Note: Exceptions (JSONDecodeError, ValueError) raised here # will be caught by the _perform_prediction method's handler. - # Strip potential markdown code fences before parsing + # --- Sanitize Input String --- clean_json_str = llm_response_json_str.strip() + + # 1. Remove multi-line /* */ comments + clean_json_str = re.sub(r'/\*.*?\*/', '', clean_json_str, flags=re.DOTALL) + + # 2. Remove single-line // comments (handle potential URLs carefully) + # Only remove // if it's likely a comment (e.g., whitespace before it, + # or at the start of a line after stripping leading whitespace). + lines = clean_json_str.splitlines() + cleaned_lines = [] + for line in lines: + stripped_line = line.strip() + # Find the first // that isn't preceded by a : (to avoid breaking URLs like http://) + comment_index = -1 + search_start = 0 + while True: + idx = stripped_line.find('//', search_start) + if idx == -1: + break # No more // found + if idx == 0 or stripped_line[idx-1] != ':': + # Found a potential comment marker + # Check if it's inside quotes + in_quotes = False + quote_char = '' + for i in range(idx): + char = stripped_line[i] + if char in ('"', "'") and (i == 0 or stripped_line[i-1] != '\\'): # Handle escaped quotes + if not in_quotes: + in_quotes = True + quote_char = char + elif char == quote_char: + in_quotes = False + quote_char = '' + if not in_quotes: + comment_index = idx + break # Found valid comment marker + else: + # // is inside quotes, continue searching after it + search_start = idx + 2 + else: + # Found ://, likely a URL, continue searching after it + search_start = idx + 2 + + if comment_index != -1: + # Find the original position in the non-stripped line + original_comment_start = line.find(stripped_line[comment_index:]) + cleaned_lines.append(line[:original_comment_start].rstrip()) + else: + cleaned_lines.append(line) + clean_json_str = "\n".join(cleaned_lines) + + + # 3. Remove markdown code fences + clean_json_str = clean_json_str.strip() if clean_json_str.startswith("```json"): clean_json_str = clean_json_str[7:] # Remove ```json\n if clean_json_str.endswith("```"): clean_json_str = clean_json_str[:-3] # Remove ``` clean_json_str = clean_json_str.strip() # Remove any extra whitespace - # --- ADDED: Remove tags --- + # 4. Remove tags (just in case) clean_json_str = re.sub(r'.*?', '', clean_json_str, flags=re.DOTALL | re.IGNORECASE) - clean_json_str = clean_json_str.strip() # Strip again after potential removal - # --------------------------------- + clean_json_str = clean_json_str.strip() + # --- Parse Sanitized JSON --- try: response_data = json.loads(clean_json_str) except json.JSONDecodeError as e: - # Log the full cleaned string that caused the error for better debugging - error_detail = f"Failed to decode LLM JSON response: {e}\nFull Cleaned Response:\n{clean_json_str}" - log.error(f"ERROR: {error_detail}") # Log full error detail to console - raise ValueError(error_detail) # Raise the error with full detail + error_detail = f"Failed to decode LLM JSON response after sanitization: {e}\nSanitized Response Attempted:\n{clean_json_str}" + log.error(f"ERROR: {error_detail}") + raise ValueError(error_detail) - if "predicted_assets" not in response_data or not isinstance(response_data["predicted_assets"], list): - raise ValueError("Invalid LLM response format: 'predicted_assets' key missing or not a list.") + # --- Validate Top-Level Structure --- + if not isinstance(response_data, dict): + raise ValueError("Invalid LLM response: Root element is not a JSON object.") - source_rules = [] - # We assume one SourceRule per input source processed by this handler instance - # Use self.input_source_identifier from the base class + if "individual_file_analysis" not in response_data or not isinstance(response_data["individual_file_analysis"], list): + raise ValueError("Invalid LLM response format: 'individual_file_analysis' key missing or not a list.") + + if "asset_group_classifications" not in response_data or not isinstance(response_data["asset_group_classifications"], dict): + raise ValueError("Invalid LLM response format: 'asset_group_classifications' key missing or not a dictionary.") + + # --- Prepare for Rule Creation --- source_rule = SourceRule(input_path=self.input_source_identifier) - - # Access valid types from the settings dictionary valid_asset_types = list(self.llm_settings.get('asset_types', {}).keys()) valid_file_types = list(self.llm_settings.get('file_types', {}).keys()) + asset_rules_map: Dict[str, AssetRule] = {} # Maps group_name to AssetRule - for asset_data in response_data["predicted_assets"]: + # --- Process Individual Files and Build Rules --- + for file_data in response_data["individual_file_analysis"]: # Check for cancellation within the loop if self._is_cancelled: - log.info("LLM prediction cancelled during response parsing (assets).") + log.info("LLM prediction cancelled during response parsing (files).") return [] - if not isinstance(asset_data, dict): - log.warning(f"Skipping invalid asset data (not a dict): {asset_data}") + if not isinstance(file_data, dict): + log.warning(f"Skipping invalid file data entry (not a dict): {file_data}") continue - asset_name = asset_data.get("suggested_asset_name", "Unnamed_Asset") - asset_type = asset_data.get("predicted_asset_type") + file_path_rel = file_data.get("relative_file_path") + file_type = file_data.get("classified_file_type") + group_name = file_data.get("proposed_asset_group_name") # Can be string or null + + # --- Validate File Data --- + if not file_path_rel or not isinstance(file_path_rel, str): + log.warning(f"Missing or invalid 'relative_file_path' in file data: {file_data}. Skipping file.") + continue + + if not file_type or not isinstance(file_type, str): + log.warning(f"Missing or invalid 'classified_file_type' for file '{file_path_rel}'. Skipping file.") + continue + + # Handle FILE_IGNORE explicitly + if file_type == "FILE_IGNORE": + log.debug(f"Ignoring file as per LLM prediction: {file_path_rel}") + continue # Skip creating a rule for this file + + # Validate file_type against definitions + if file_type not in valid_file_types: + log.warning(f"Invalid predicted_file_type '{file_type}' for file '{file_path_rel}'. Defaulting to EXTRA.") + file_type = "EXTRA" + + # --- Handle Grouping and Asset Type --- + if not group_name or not isinstance(group_name, str): + log.warning(f"File '{file_path_rel}' has missing, null, or invalid 'proposed_asset_group_name' ({group_name}). Cannot assign to an asset. Skipping file.") + continue # Skip files that cannot be grouped + + asset_type = response_data["asset_group_classifications"].get(group_name) + + if not asset_type: + log.warning(f"No classification found in 'asset_group_classifications' for group '{group_name}' (proposed for file '{file_path_rel}'). Skipping file.") + continue # Skip files belonging to unclassified groups if asset_type not in valid_asset_types: - log.warning(f"Invalid predicted_asset_type '{asset_type}' for asset '{asset_name}'. Skipping asset.") - continue # Skip this asset + log.warning(f"Invalid asset_type '{asset_type}' found in 'asset_group_classifications' for group '{group_name}'. Skipping file '{file_path_rel}'.") + continue # Skip files belonging to groups with invalid types - asset_rule = AssetRule(asset_name=asset_name, asset_type=asset_type) - source_rule.assets.append(asset_rule) - - if "files" not in asset_data or not isinstance(asset_data["files"], list): - log.warning(f"'files' key missing or not a list in asset '{asset_name}'. Skipping files for this asset.") + # --- Construct Absolute Path --- + try: + base_path = Path(self.input_source_identifier) + if base_path.is_file(): + base_path = base_path.parent + clean_rel_path = Path(file_path_rel.strip().replace('\\', '/')) + file_path_abs = str(base_path / clean_rel_path) + except Exception as path_e: + log.warning(f"Error constructing absolute path for '{file_path_rel}' relative to '{self.input_source_identifier}': {path_e}. Skipping file.") continue - for file_data in asset_data["files"]: - # Check for cancellation within the inner loop - if self._is_cancelled: - log.info("LLM prediction cancelled during response parsing (files).") - return [] + # --- Get or Create Asset Rule --- + asset_rule = asset_rules_map.get(group_name) + if not asset_rule: + # Create new AssetRule if this is the first file for this group + log.debug(f"Creating new AssetRule for group '{group_name}' with type '{asset_type}'.") + asset_rule = AssetRule(asset_name=group_name, asset_type=asset_type) + source_rule.assets.append(asset_rule) + asset_rules_map[group_name] = asset_rule + # else: use existing asset_rule - if not isinstance(file_data, dict): - log.warning(f"Skipping invalid file data (not a dict) in asset '{asset_name}': {file_data}") - continue - - file_path_rel = file_data.get("file_path") # LLM provides relative path - file_type = file_data.get("predicted_file_type") - - if not file_path_rel: - log.warning(f"Missing 'file_path' in file data for asset '{asset_name}'. Skipping file.") - continue - - # Convert relative path from LLM (using '/') back to absolute OS-specific path - # We need the original input path (directory or archive) to make it absolute - # Use self.input_source_identifier which holds the original path - # IMPORTANT: Ensure the LLM is actually providing paths relative to the *root* of the input source. - try: - # Use Pathlib for safer joining, assuming input_source_identifier is the parent dir/archive path - # If input_source_identifier is an archive file, this logic might need adjustment - # depending on where files were extracted. For now, assume it's the base path. - base_path = Path(self.input_source_identifier) - # If the input was a file (like a zip), use its parent directory as the base for joining relative paths - if base_path.is_file(): - base_path = base_path.parent - # Clean the relative path potentially coming from LLM - clean_rel_path = Path(file_path_rel.strip().replace('\\', '/')) - file_path_abs = str(base_path / clean_rel_path) - except Exception as path_e: - log.warning(f"Error constructing absolute path for '{file_path_rel}' relative to '{self.input_source_identifier}': {path_e}. Skipping file.") - continue + # --- Create and Add File Rule --- + file_rule = FileRule( + file_path=file_path_abs, + item_type=file_type, + item_type_override=file_type, # Initial override based on LLM + target_asset_name_override=group_name, # Use the group name + output_format_override=None, + is_gloss_source=False, + resolution_override=None, + channel_merge_instructions={} + ) + asset_rule.files.append(file_rule) + log.debug(f"Added file '{file_path_rel}' (type: {file_type}) to asset '{group_name}'.") - if file_type not in valid_file_types: - log.warning(f"Invalid predicted_file_type '{file_type}' for file '{file_path_rel}'. Defaulting to EXTRA.") - file_type = "EXTRA" # Default to EXTRA if invalid type from LLM + # Log if no assets were created + if not source_rule.assets: + log.warning(f"LLM prediction for '{self.input_source_identifier}' resulted in zero valid assets after parsing.") - # Create the FileRule instance - # Add default values for fields not provided by LLM - file_rule = FileRule( - file_path=file_path_abs, - item_type=file_type, - item_type_override=file_type, # Initial override - target_asset_name_override=asset_name, # Default to asset name - output_format_override=None, - is_gloss_source=False, # LLM doesn't predict this - resolution_override=None, - channel_merge_instructions={} - ) - asset_rule.files.append(file_rule) - - source_rules.append(source_rule) - return source_rules + return [source_rule] # Return list containing the single SourceRule # Removed conceptual example usage comments \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f711dfe..90541f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ numpy openexr PySide6 py7zr -rarfile \ No newline at end of file +rarfile +requests \ No newline at end of file