Spaces:

victor
/

inference-metrics

Running

App Files Files Community

inference-metrics / get-metrics.ts

victor HF Staff

Upload folder using huggingface_hub

15a5288 verified 5 months ago

raw

history blame contribute delete

19.9 kB

	// get-metrics-new.ts - Updated version using direct provider APIs
	import * as fs from "node:fs";
	import { parseArgs } from "util";
	import { ProviderAggregator } from "./providers";
	import type { ProviderEntry } from "./providers";
	import { extractHFRouterData } from "./providers/huggingface-router";

	/* -------------------------------------------------------------------------- */
	/* CONSTANTS */
	/* -------------------------------------------------------------------------- */

	const HUGGINGFACE_API = "https://router.huggingface.co/v1/models";
	const HUGGINGFACE_ROUTER_API =
	"https://router.huggingface.co/v1/chat/completions";

	/* -------------------------------------------------------------------------- */
	/* TYPE DEFINITIONS */
	/* -------------------------------------------------------------------------- */

	interface HFModel {
	id: string;
	[key: string]: any;
	providers?: ProviderEntry[];
	}

	interface Statistics {
	total_models: number;
	models_enriched: number;
	providers_enriched: number;
	new_capabilities_added: number;
	providers_fetched: Record<string, number>;
	}

	interface PerformanceTestResult {
	total_tested: number;
	successful: number;
	errors: number;
	status_distribution: Record<string, number>;
	}

	/* -------------------------------------------------------------------------- */
	/* FETCH HELPERS */
	/* -------------------------------------------------------------------------- */

	async function fetchHuggingfaceModels(): Promise<HFModel[]> {
	const resp = await fetch(HUGGINGFACE_API).then(
	(r) => r.json() as Promise<{ data: HFModel[] }>
	);
	return resp.data;
	}

	/* -------------------------------------------------------------------------- */
	/* PROVIDER ENRICHMENT */
	/* -------------------------------------------------------------------------- */

	function normalizeModelId(modelId: string): string {
	// Convert HF model ID to a normalized form for matching
	// Remove organization prefix for common patterns
	const patterns = [
	/^meta-llama\/Meta-Llama-(.+)$/,
	/^meta-llama\/Llama-(.+)$/,
	/^mistralai\/(.+)$/,
	/^google\/(.+)$/,
	/^anthropic\/(.+)$/,
	];

	for (const pattern of patterns) {
	const match = modelId.match(pattern);
	if (match) {
	return match[1].toLowerCase();
	}
	}

	// For other models, just use the part after the last slash
	const parts = modelId.split("/");
	return parts[parts.length - 1].toLowerCase();
	}

	function matchProviderModel(
	hfModelId: string,
	providerEntries: Map<string, ProviderEntry[]>
	): Map<string, ProviderEntry[]> {
	const normalizedHfId = normalizeModelId(hfModelId);
	const matches = new Map<string, ProviderEntry[]>();

	for (const [provider, entries] of providerEntries) {
	const matchingEntries = entries.filter((entry) => {
	// This would need to be enhanced with provider-specific matching logic
	// For now, we'll use simple substring matching
	const entryId = (entry as any).id \|\| (entry as any).model_id \|\| "";
	const normalizedEntryId = normalizeModelId(entryId);

	return (
	normalizedEntryId.includes(normalizedHfId) \|\|
	normalizedHfId.includes(normalizedEntryId)
	);
	});

	if (matchingEntries.length > 0) {
	matches.set(provider, matchingEntries);
	}
	}

	return matches;
	}

	async function enrichHuggingfaceModels(
	hfModels: HFModel[],
	aggregator: ProviderAggregator
	): Promise<{
	enriched: HFModel[];
	stats: Statistics;
	matchedProviderData: any[];
	}> {
	console.log("\nFetching data from all providers...");
	const providerData = await aggregator.fetchAllProviders();

	const stats: Statistics = {
	total_models: hfModels.length,
	models_enriched: 0,
	providers_enriched: 0,
	new_capabilities_added: 0,
	providers_fetched: {},
	};

	// Count models per provider
	for (const [provider, entries] of providerData) {
	stats.providers_fetched[provider] = entries.length;
	}

	const enrichedModels: HFModel[] = [];
	const matchedProviderData: any[] = [];
	const matchedProviderKeys = new Set<string>(); // Track unique model-provider combinations

	console.log(
	`\nProcessing ${hfModels.length} models from HuggingFace Router API...`
	);

	for (const hfModel of hfModels) {
	const enrichedModel = structuredClone(hfModel);

	// Extract HF router data first (this is already in the model)
	const hfRouterData = extractHFRouterData(enrichedModel);

	// Find matches from provider APIs
	const matches = matchProviderModel(hfModel.id, providerData);

	// Ensure providers array exists
	if (!enrichedModel.providers) {
	enrichedModel.providers = [];
	}

	let modelEnriched = false;

	// Process HF router data first (prioritize it)
	for (const [providerName, hfProviderData] of hfRouterData) {
	const normalizedProvider = normalizeProviderName(providerName);

	// Check if provider already exists in the model
	let existingProvider = enrichedModel.providers.find(
	(p) => normalizeProviderName(p.provider) === normalizedProvider
	);

	if (existingProvider) {
	// HF router data is already there, just count it
	if (hfProviderData.pricing) {
	stats.providers_enriched++;
	modelEnriched = true;
	}
	// Track this provider data as matched (avoid duplicates)
	const matchKey = `${hfModel.id}:${providerName}`;
	if (!matchedProviderKeys.has(matchKey)) {
	matchedProviderKeys.add(matchKey);
	matchedProviderData.push({
	...hfProviderData,
	provider: providerName,
	id: hfModel.id,
	});
	}
	}
	}

	// Then enrich with provider API data where missing
	if (matches.size > 0) {
	for (const [provider, providerEntries] of matches) {
	for (const providerEntry of providerEntries) {
	// Find existing provider entry
	let existingProvider = enrichedModel.providers.find(
	(p) => normalizeProviderName(p.provider) === provider.toLowerCase()
	);

	if (!existingProvider) {
	// No HF router data for this provider
	// Skip - we only want providers that are listed in HF Router
	continue;
	} else {
	// Merge data, but prioritize HF router data
	const hadPricing = !!existingProvider.pricing;
	const hadTools = existingProvider.supports_tools !== undefined;
	const hadStructured =
	existingProvider.supports_structured_output !== undefined;
	const hadContext = !!existingProvider.context_length;

	// Only add provider API data for missing fields
	const mergedData: any = {};

	// Add provider API data only if HF router doesn't have it
	if (!hadPricing && providerEntry.pricing) {
	mergedData.pricing = providerEntry.pricing;
	stats.providers_enriched++;
	modelEnriched = true;
	}

	if (!hadContext && providerEntry.context_length) {
	mergedData.context_length = providerEntry.context_length;
	}

	if (!hadTools && providerEntry.supports_tools !== undefined) {
	mergedData.supports_tools = providerEntry.supports_tools;
	}

	if (
	!hadStructured &&
	providerEntry.supports_structured_output !== undefined
	) {
	mergedData.supports_structured_output =
	providerEntry.supports_structured_output;
	}

	// Add other capabilities from provider API
	for (const key of Object.keys(providerEntry)) {
	if (
	key.startsWith("supports_") &&
	!["supports_tools", "supports_structured_output"].includes(
	key
	) &&
	!(key in existingProvider)
	) {
	mergedData[key] = (providerEntry as any)[key];
	stats.new_capabilities_added++;
	}
	}

	// Apply merged data
	Object.assign(existingProvider, mergedData);

	// Track the enriched data (avoid duplicates)
	const matchKey = `${hfModel.id}:${provider}`;
	if (!matchedProviderKeys.has(matchKey)) {
	matchedProviderKeys.add(matchKey);
	matchedProviderData.push({
	...existingProvider,
	provider,
	id: hfModel.id,
	});
	}
	}
	}
	}
	}

	if (modelEnriched) {
	stats.models_enriched++;
	}

	enrichedModels.push(enrichedModel);
	}

	// Log models from provider APIs that weren't matched
	let unmatchedCount = 0;
	for (const [provider, entries] of providerData) {
	for (const entry of entries) {
	const modelId = (entry as any).model_id \|\| (entry as any).id \|\| "";
	if (modelId) {
	const matchKey = `${modelId}:${provider}`;
	if (!matchedProviderKeys.has(matchKey)) {
	unmatchedCount++;
	}
	}
	}
	}

	if (unmatchedCount > 0) {
	console.log(
	`\nNote: ${unmatchedCount} models from provider APIs were not included (not in HF Router).`
	);
	}

	return { enriched: enrichedModels, stats, matchedProviderData };
	}

	// Helper function to normalize provider names for comparison
	function normalizeProviderName(providerName: string): string {
	const providerMap: Record<string, string> = {
	"featherless-ai": "featherless",
	"fireworks-ai": "fireworks",
	"hf-inference": "huggingface",
	};

	return (providerMap[providerName] \|\| providerName).toLowerCase();
	}

	/* -------------------------------------------------------------------------- */
	/* PERFORMANCE TESTING */
	/* -------------------------------------------------------------------------- */

	async function testModelProvider(
	modelId: string,
	providerName: string,
	hfToken: string
	): Promise<Partial<ProviderEntry>> {
	const nonce = crypto.randomUUID().slice(0, 8);
	const prompt = `What is the capital of France?\n<!-- nonce:${nonce} -->`;

	const payload = {
	model: `${modelId}:${providerName}`,
	messages: [{ role: "user", content: prompt }],
	stream: false,
	temperature: 0.7,
	};

	const headers = {
	Authorization: `Bearer ${hfToken}`,
	"Content-Type": "application/json",
	};

	const start = performance.now();
	try {
	const controller = new AbortController();
	const timeoutId = setTimeout(() => controller.abort(), 30_000);

	const resp = await fetch(HUGGINGFACE_ROUTER_API, {
	method: "POST",
	headers,
	body: JSON.stringify(payload),
	signal: controller.signal,
	});

	clearTimeout(timeoutId);
	const latency = (performance.now() - start) / 1000;

	if (resp.ok) {
	const data = await resp.json();
	const usage = data.usage ?? {};
	const totalTokens =
	usage.total_tokens ??
	(usage.prompt_tokens ?? 0) + (usage.completion_tokens ?? 0);
	const tps = totalTokens ? totalTokens / latency : 0;

	return {
	latency_s: Number(latency.toFixed(2)),
	throughput_tps: Number(tps.toFixed(2)),
	status: "live",
	};
	}

	const data = await resp.json().catch(() => ({}));
	const msg =
	data?.error?.message ?? `HTTP ${resp.status} ${resp.statusText}`;
	return { performance_error: msg, status: "offline" };
	} catch (err: any) {
	const msg = err.name === "AbortError" ? "Request timeout" : err.message;
	return { performance_error: msg, status: "offline" };
	}
	}

	async function testProvidersBatch(
	triplets: [string, string, ProviderEntry][],
	hfToken: string
	): Promise<void> {
	await Promise.all(
	triplets.map(async ([modelId, providerName, prov]) => {
	const res = await testModelProvider(modelId, providerName, hfToken);
	Object.assign(prov, res, {
	performance_tested_at: new Date().toISOString(),
	});
	})
	);
	}

	async function testAllProviders(
	models: HFModel[],
	hfToken: string,
	limit: number \| undefined,
	batchSize: number,
	filter: string[] \| undefined
	): Promise<PerformanceTestResult> {
	const subset = typeof limit === "number" ? models.slice(0, limit) : models;

	const allPairs: [string, string, ProviderEntry][] = [];
	for (const m of subset) {
	for (const p of m.providers ?? []) {
	if (filter && !filter.includes(p.provider)) continue;
	allPairs.push([m.id, p.provider, p]);
	}
	}

	console.log(
	`\nTesting performance for ${allPairs.length} model-provider combinations...`
	);

	let tested = 0;
	let errors = 0;
	const statusDist: Record<string, number> = {
	live: 0,
	offline: 0,
	not_tested: 0,
	};

	for (let i = 0; i < allPairs.length; i += batchSize) {
	const batch = allPairs.slice(i, i + batchSize);
	console.log(
	`Testing batch ${i / batchSize + 1}/${Math.ceil(
	allPairs.length / batchSize
	)}...`
	);
	await testProvidersBatch(batch, hfToken);

	batch.forEach(([_, __, prov]) => {
	tested += 1;
	if (prov.performance_error) errors += 1;
	switch (prov.status) {
	case "live":
	statusDist.live += 1;
	break;
	case "offline":
	statusDist.offline += 1;
	break;
	default:
	statusDist.not_tested += 1;
	}
	});

	if (i + batchSize < allPairs.length) {
	await new Promise((resolve) => setTimeout(resolve, 1000));
	}
	}

	return {
	total_tested: tested,
	successful: tested - errors,
	errors,
	status_distribution: statusDist,
	};
	}

	/* -------------------------------------------------------------------------- */
	/* PRINT HELPERS */
	/* -------------------------------------------------------------------------- */

	function printStatistics(s: Statistics): void {
	console.log("\n" + "=".repeat(60));
	console.log("ENRICHMENT STATISTICS");
	console.log("=".repeat(60));
	console.log(`Total models processed: ${s.total_models}`);
	console.log(`Models enriched with pricing: ${s.models_enriched}`);
	console.log(`Provider entries enriched: ${s.providers_enriched}`);
	console.log(`New capability fields added: ${s.new_capabilities_added}`);

	console.log("\nProvider data fetched:");
	Object.entries(s.providers_fetched)
	.sort(([a], [b]) => a.localeCompare(b))
	.forEach(([provider, count]) => {
	console.log(` ${provider}: ${count} models`);
	});
	}

	/* -------------------------------------------------------------------------- */
	/* CLI PARSER */
	/* -------------------------------------------------------------------------- */

	const { values: opts } = parseArgs({
	args: Bun.argv.slice(2),
	options: {
	"test-performance": { type: "boolean" },
	"test-limit": { type: "string" },
	"test-providers": { type: "string", multiple: true },
	"batch-size": { type: "string" },
	providers: { type: "string", multiple: true },
	"skip-providers": { type: "string", multiple: true },
	},
	strict: false,
	});

	const testLimit =
	opts["test-limit"] && typeof opts["test-limit"] === "string"
	? parseInt(opts["test-limit"], 10)
	: undefined;
	const batchSize =
	opts["batch-size"] && typeof opts["batch-size"] === "string"
	? parseInt(opts["batch-size"], 10)
	: 20;

	/* -------------------------------------------------------------------------- */
	/* MAIN */
	/* -------------------------------------------------------------------------- */

	(async () => {
	console.log("Fetching HuggingFace models...");
	const hfModels = await fetchHuggingfaceModels();
	console.log(`Found ${hfModels.length} HuggingFace models.`);

	// Configure provider aggregator
	const apiKeys: Record<string, string> = {};

	// Only add API keys that are defined
	if (process.env.NOVITA_API_KEY) apiKeys.novita = process.env.NOVITA_API_KEY;
	if (process.env.SAMBANOVA_API_KEY) apiKeys.sambanova = process.env.SAMBANOVA_API_KEY;
	if (process.env.GROQ_API_KEY) apiKeys.groq = process.env.GROQ_API_KEY;
	if (process.env.FEATHERLESS_API_KEY) apiKeys.featherless = process.env.FEATHERLESS_API_KEY;
	if (process.env.TOGETHER_API_KEY) apiKeys.together = process.env.TOGETHER_API_KEY;
	if (process.env.COHERE_API_KEY) apiKeys.cohere = process.env.COHERE_API_KEY;
	if (process.env.FIREWORKS_API_KEY) apiKeys.fireworks = process.env.FIREWORKS_API_KEY;
	if (process.env.NEBIUS_API_KEY) apiKeys.nebius = process.env.NEBIUS_API_KEY;
	if (process.env.HYPERBOLIC_API_KEY) apiKeys.hyperbolic = process.env.HYPERBOLIC_API_KEY;
	if (process.env.CEREBRAS_API_KEY) apiKeys.cerebras = process.env.CEREBRAS_API_KEY;
	if (process.env.NSCALE_API_KEY) apiKeys.nscale = process.env.NSCALE_API_KEY;

	const config = {
	providers: opts["providers"] as string[] \| undefined,
	apiKeys,
	};

	// Remove skip-providers if specified
	if (opts["skip-providers"]) {
	const skipProviders = opts["skip-providers"] as string[];
	if (!config.providers) {
	config.providers = [
	"novita",
	"sambanova",
	"groq",
	"featherless",
	"together",
	"cohere",
	"fireworks",
	"nebius",
	"hyperbolic",
	"cerebras",
	"nscale",
	].filter((p) => !skipProviders.includes(p));
	}
	}

	const aggregator = new ProviderAggregator(config);

	console.log("\nEnriching HuggingFace models with provider data...");
	const { enriched, stats, matchedProviderData } =
	await enrichHuggingfaceModels(hfModels, aggregator);

	// Optional performance tests
	if (opts["test-performance"]) {
	const hfToken = process.env.HF_TOKEN;
	if (!hfToken) {
	console.error(
	"ERROR: HF_TOKEN environment variable not set. Skipping performance tests."
	);
	} else {
	console.log("\n" + "=".repeat(60));
	console.log("PERFORMANCE TESTING");
	console.log("=".repeat(60));
	const perfStats = await testAllProviders(
	enriched,
	hfToken,
	testLimit,
	batchSize,
	opts["test-providers"] as string[] \| undefined
	);
	console.log("\nPerformance testing complete:");
	console.log(` Total tested: ${perfStats.total_tested}`);
	console.log(` Successful: ${perfStats.successful}`);
	console.log(` Errors: ${perfStats.errors}`);
	console.log("\nProvider status distribution:");
	Object.entries(perfStats.status_distribution)
	.sort()
	.forEach(([k, v]) => console.log(` ${k}: ${v}`));
	}
	}

	// Save enriched data
	const outFile = "enriched_models_enhanced.json";
	fs.writeFileSync(
	outFile,
	JSON.stringify(
	{
	data: enriched,
	generated_at: new Date().toISOString(),
	metadata: {
	total_models: enriched.length,
	models_enriched: stats.models_enriched,
	providers_enriched: stats.providers_enriched,
	performance_tested: !!opts["test-performance"],
	providers_fetched: stats.providers_fetched,
	},
	},
	null,
	2
	)
	);
	console.log(`\nEnriched data saved → ${outFile}`);

	// Save only matched provider data (models that exist in HF Router)
	fs.writeFileSync(
	"provider_models_raw.json",
	JSON.stringify({ data: matchedProviderData }, null, 2)
	);
	console.log(
	`Matched provider models saved → provider_models_raw.json (${matchedProviderData.length} entries)`
	);

	printStatistics(stats);
	})();