Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """ | |
| NeMo dependency structure definition. | |
| This module analyzes the codebase to determine internal dependencies between NeMo collections and core components. | |
| """ | |
| import ast | |
| import json | |
| import os | |
| from typing import Dict, List, Set, Union | |
| def find_python_files(directory: str) -> List[str]: | |
| """Find all Python files in the given directory and its subdirectories.""" | |
| python_files = [] | |
| # Look in nemo directory and other relevant directories | |
| relevant_dirs = ['nemo', 'scripts', 'examples', 'tests'] | |
| for dir_name in relevant_dirs: | |
| dir_path = os.path.join(directory, dir_name) | |
| if os.path.exists(dir_path): | |
| for root, _, files in os.walk(dir_path): | |
| for file in files: | |
| if file.endswith('.py'): | |
| python_files.append(os.path.join(root, file)) | |
| return python_files | |
| def analyze_imports(file_path: str) -> Set[str]: | |
| """Analyze a Python file and return its NeMo package dependencies using AST parsing.""" | |
| imports = set() | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| tree = ast.parse(f.read(), filename=file_path) | |
| # Walk through the AST to find import statements | |
| for node in ast.walk(tree): | |
| if isinstance(node, ast.ImportFrom) and node.module and node.module.startswith('nemo.'): | |
| # Split the module path | |
| parts = node.module.split('.') | |
| if len(parts) >= 2: | |
| module_type = parts[1] # collections, core, utils, or automodel | |
| if module_type == 'collections' and len(parts) >= 3: | |
| imported_package = f"nemo.collections.{parts[2]}" | |
| imports.add(imported_package) | |
| elif module_type in ('core', 'utils', 'export', 'deploy', 'lightning', 'automodel'): | |
| imported_package = f"nemo.{module_type}" | |
| imports.add(imported_package) | |
| except Exception as e: | |
| print(f"Error analyzing {file_path}: {e}") | |
| return imports | |
| def find_top_level_packages(nemo_root: str) -> List[str]: | |
| """Find all top-level packages under nemo directory.""" | |
| packages: List[str] = [] | |
| nemo_dir = os.path.join(nemo_root, 'nemo') | |
| if not os.path.exists(nemo_dir): | |
| print(f"Warning: nemo directory not found at {nemo_dir}") | |
| return packages | |
| for item in os.listdir(nemo_dir): | |
| item_path = os.path.join(nemo_dir, item) | |
| if os.path.isdir(item_path) and not item.startswith('__'): | |
| packages.append(item) | |
| return sorted(packages) | |
| def find_collection_modules(nemo_root: str) -> Dict[str, List[str]]: | |
| """Find all modules within collections.""" | |
| collection_modules: Dict[str, List[str]] = {} | |
| collections_dir = os.path.join(nemo_root, 'nemo', 'collections') | |
| if not os.path.exists(collections_dir): | |
| print(f"Warning: collections directory not found at {collections_dir}") | |
| return collection_modules | |
| for collection in os.listdir(collections_dir): | |
| collection_path = os.path.join(collections_dir, collection) | |
| if os.path.isdir(collection_path) and not collection.startswith('__'): | |
| modules = [] | |
| for root, _, files in os.walk(collection_path): | |
| for file in files: | |
| if file.endswith('.py') and not file.startswith('__'): | |
| rel_path = os.path.relpath(os.path.join(root, file), collections_dir) | |
| module = rel_path.replace(os.sep, '.').replace('.py', '') | |
| if module: | |
| modules.append(f"nemo.collections.{collection}.{module}") | |
| collection_modules[f"nemo.collections.{collection}"] = sorted(modules) | |
| return collection_modules | |
| def build_dependency_graph(nemo_root: str) -> Dict[str, Union[List[str], Dict[str, List[str]]]]: | |
| """Build a dependency graph by analyzing all Python files.""" | |
| # Find all top-level packages | |
| top_level_packages = find_top_level_packages(nemo_root) | |
| print(f"Found top-level packages: {top_level_packages}") | |
| # Initialize reverse dependency sets for each package | |
| reverse_deps: Dict[str, Set[str]] = {} | |
| # Find all Python files | |
| python_files = find_python_files(nemo_root) | |
| # First pass: collect all packages | |
| for file_path in python_files: | |
| relative_path = os.path.relpath(file_path, nemo_root) | |
| parts = relative_path.split(os.sep) | |
| if len(parts) < 2: | |
| continue | |
| # Determine which package this file belongs to | |
| if parts[0] == 'nemo': | |
| if parts[1] == 'collections' and len(parts) >= 3: | |
| current_package = f"nemo.collections.{parts[2]}" | |
| elif parts[1] in top_level_packages: | |
| current_package = f"nemo.{parts[1]}" | |
| else: | |
| continue | |
| # Initialize reverse dependency set for this package if not exists | |
| if current_package not in reverse_deps: | |
| reverse_deps[current_package] = set() | |
| # Second pass: analyze imports and build reverse dependencies | |
| for file_path in python_files: | |
| relative_path = os.path.relpath(file_path, nemo_root) | |
| parts = relative_path.split(os.sep) | |
| if len(parts) < 2: | |
| continue | |
| # Determine which package this file belongs to | |
| if parts[0] == 'nemo': | |
| if parts[1] == 'collections' and len(parts) >= 3: | |
| current_package = f"nemo.collections.{parts[2]}" | |
| elif parts[1] in top_level_packages: | |
| current_package = f"nemo.{parts[1]}" | |
| else: | |
| continue | |
| # Analyze imports in this file | |
| imports = analyze_imports(file_path) | |
| # Add current package as a reverse dependency to each imported package | |
| for imported_pkg in imports: | |
| if imported_pkg in reverse_deps and imported_pkg != current_package: | |
| reverse_deps[imported_pkg].add(current_package) | |
| # Convert sets to sorted lists and group collections | |
| dependencies: Dict[str, Union[List[str], Dict[str, List[str]]]] = {} | |
| # Add collections group | |
| collections = {} | |
| for pkg, deps in reverse_deps.items(): | |
| if pkg.startswith('nemo.collections.') and not pkg.endswith('__init__.py'): | |
| collections[pkg] = sorted(list(deps)) | |
| if collections: | |
| dependencies['nemo.collections'] = collections | |
| # Add other packages | |
| for pkg, deps in reverse_deps.items(): | |
| if not pkg.startswith('nemo.collections.'): | |
| dependencies[pkg] = sorted(list(deps)) | |
| return dependencies | |
| def main(): | |
| """Main function to analyze dependencies and output JSON.""" | |
| # Get the root directory of the NeMo project | |
| nemo_root = os.path.dirname(os.path.abspath(__file__)) | |
| # Build dependency graph | |
| dependencies = build_dependency_graph(nemo_root) | |
| # Output as JSON | |
| print(json.dumps(dependencies, indent=4)) | |
| if __name__ == "__main__": | |
| main() | |