# Compare two models
aevyra-verdict run data.jsonl -m openai/gpt-5.4-nano -m qwen/qwen3.5-9b
# Use a config file
aevyra-verdict run data.jsonl --config models.yaml
# ROUGE + LLM judge, save results
aevyra-verdict run data.jsonl -m openai/gpt-5.4-nano \
--metric rouge \
--judge openai/gpt-5.4 \
-o results.json
# Custom judge prompt and scoring function
aevyra-verdict run data.jsonl -m openai/gpt-5.4-nano \
--judge openai/gpt-5.4 \
--judge-prompt prompt.md \
--custom-metric my_metrics.py:brevity_score
# Reduce concurrency if hitting rate limits
aevyra-verdict run data.jsonl --config models.yaml --max-workers 3