import os import json from pathlib import Path def extract_names_recursive(obj, names_list, visited=None): """ Recursively extract all 'meta.name' values from a nested JSON object. Args: obj: The JSON object or list to process names_list: List to append found names to visited: Set of object ids already visited (to prevent infinite recursion) """ if visited is None: visited = set() # Skip already visited objects or non-container types if not isinstance(obj, (dict, list)) or id(obj) in visited: return # Mark this object as visited visited.add(id(obj)) if isinstance(obj, dict): # Check if this object has a meta.name field if 'meta' in obj and isinstance(obj['meta'], dict) and 'name' in obj['meta']: name = obj['meta']['name'] if name and isinstance(name, str): names_list.append(name) # Check for children array and process only this key specifically if 'children' in obj and isinstance(obj['children'], list): for child in obj['children']: extract_names_recursive(child, names_list, visited) # Only process a few key dictionary values that might contain component definitions keys_to_process = ['root', 'props', 'custom'] for key in keys_to_process: if key in obj: extract_names_recursive(obj[key], names_list, visited) elif isinstance(obj, list): # Process only the first 1000 items to prevent excessive recursion for item in obj[:1000]: extract_names_recursive(item, names_list, visited) def load_scada_names(repo_path): """ Recursively find all JSON files in a repository and extract all component names. Names can be found in 'meta.name' fields at both the root level and in nested children. Args: repo_path (str): Path to the repository Returns: list: List of SCADA names extracted from JSON files """ names = [] repo_dir = Path(repo_path) # Find all JSON files recursively json_files = list(repo_dir.glob('**/*.json')) print(f"Found {len(json_files)} JSON files") for json_file in json_files: try: with open(json_file, 'r') as f: data = json.load(f) # Store the count before extraction count_before = len(names) # Extract names recursively from the JSON structure extract_names_recursive(data, names) # Print how many names were found in this file count_after = len(names) if count_after > count_before: print(f"Found {count_after - count_before} names in {json_file}") except json.JSONDecodeError: print(f"Invalid JSON in {json_file}") continue except Exception as e: print(f"Error processing {json_file}: {str(e)}") continue return names # Test with the specified path if __name__ == "__main__": repo_path = "project/clones/9820af836d9854563e0e495ca1541de48aefd95e" names = load_scada_names(repo_path) print(f"\nTotal names found: {len(names)}") # Print some example names if any were found if names: print("\nExample names:") for name in sorted(names)[:20]: # First 20 names alphabetically print(f"- {name}")