""" Evaluation Module This module provides comprehensive model evaluation and benchmarking tools. Includes benchmark building, certification tests, gap analysis, and training recommendations. """ from .metrics import Metrics, calculate_perplexity, calculate_bleu from .evaluator import ModelEvaluator, EvaluationConfig from .benchmark import Benchmark, BenchmarkSuite from .benchmark_builder import Benchmark as BenchmarkBuilder, BenchmarkSuite as BenchmarkBuilderSuite from .certification_tests import ( CertificationTestBuilder, FinancialCertificationTests, MedicalCertificationTests, LegalCertificationTests, EducationCertificationTests, get_certification_tests_for_domain ) from .gap_analyzer import GapAnalyzer from .training_recommender import TrainingRecommender __all__ = [ # Original exports 'Metrics', 'calculate_perplexity', 'calculate_bleu', 'ModelEvaluator', 'EvaluationConfig', 'Benchmark', 'BenchmarkSuite', # New exports 'BenchmarkBuilder', 'BenchmarkBuilderSuite', 'CertificationTestBuilder', 'FinancialCertificationTests', 'MedicalCertificationTests', 'LegalCertificationTests', 'EducationCertificationTests', 'get_certification_tests_for_domain', 'GapAnalyzer', 'TrainingRecommender' ]