* Smartest Model for Your Budget with Artificial Analysis
* Artificial Analysis maintains an intelligence index that composites
* scores across standardized benchmarks (MMLU, GPQA, HumanEval, etc.)
* into a single quality number per model. Unlike arena-style human
* preference scores, this measures objective task performance.
* This example uses the intelligence index to find the highest-quality
* models under a cost ceiling -- useful when you're building a
* high-volume pipeline where cost matters but you don't want to
* sacrifice intelligence.
* Requires an API key from artificialanalysis.ai (set ARTIFICIAL_ANALYSIS_API_KEY).
* See lmarena-benchmarks.ts for a free alternative using human preference data.
fromModelsDev, recommend,
minMaxCriterion, matchesModel,
const aaKey = process.env.ARTIFICIAL_ANALYSIS_API_KEY;
console.error("Set ARTIFICIAL_ANALYSIS_API_KEY to run this example.");
const models = await fromModelsDev();
// Fetch benchmark data from Artificial Analysis (requires API key)
const response = await fetch(
"https://artificialanalysis.ai/api/v2/data/llms/models",
{ headers: { "x-api-key": aaKey } },
if (!response.ok) throw new Error(`AA fetch failed: ${response.status}`);
const aaData = await response.json();
// We're pulling just the intelligence index here, but the AA API also
// exposes individual benchmark scores (GPQA, IFBench, MMLU, etc.) you
// could use as separate criteria.
const benchmarks = aaData.data
.filter((m: Record<string, unknown>) => m.evaluations)
.map((m: Record<string, unknown>) => {
const evals = m.evaluations as Record<string, number | null>;
quality: evals.artificial_analysis_intelligence_index ?? undefined,
// Enrich models with quality scores so they flow through to results.
// ScoredModel is generic -- recommend() preserves any extra fields you add.
type BenchmarkedModel = Model & { quality?: number };
const benchmarkedModels: BenchmarkedModel[] = models.map((m) => {
const match = benchmarks.find((b: { slug: string }) =>
matchesModel(b.slug, m.id),
return { ...m, quality: match?.quality };
// AA Intelligence Index: composite quality score across multiple benchmarks
const qualityScore = minMaxCriterion((model: BenchmarkedModel) => model.quality);
// Rank by intelligence index with cost as tiebreaker
{ criterion: qualityScore, weight: 5 },
{ criterion: costEfficiency, weight: 2 },
// Top 10 under $3/M from direct providers, diverse across providers and families
filter: { providers: [...DIRECT_PROVIDERS], maxCostInput: 3 },
constraints: [perProvider(2), perFamily(1)],
const results = recommend(benchmarkedModels, qualityProfile, selection);
console.table(results.map((m) => ({
Score: +m.score.toFixed(3),
Quality: m.quality ?? "n/a",
Cost: m.cost?.input != null ? `$${m.cost.input}/M` : "n/a",