aviary.labbench
Installation
pip install 'fhaviary[labbench]'Usage
import os
from ldp.agent import SimpleAgent
from ldp.alg import Evaluator, EvaluatorConfig, MeanMetricsCallback
from paperqa import Settings
from aviary.env import TaskDataset
async def evaluate(folder_of_litqa_v2_papers: str | os.PathLike) -> None:
settings = Settings(paper_directory=folder_of_litqa_v2_papers)
dataset = TaskDataset.from_name("litqa2", settings=settings)
metrics_callback = MeanMetricsCallback(eval_dataset=dataset)
evaluator = Evaluator(
config=EvaluatorConfig(batch_size=3),
agent=SimpleAgent(),
dataset=dataset,
callbacks=[metrics_callback],
)
await evaluator.evaluate()
print(metrics_callback.eval_means)Image Question-Answer
References
Last updated

