Is your feature request related to a problem? Please describe.
I have been using AutoRAG and performing the parse, chunk, and evaluate steps separately, and then reviewing the data stored in the benchmark one by one. However, it is time-consuming to click through each step and difficult to compare the results. Is there a feature that would allow me to view the optimal settings and detailed statistics considering all the configuration values collectively?
for parsed_raw in parsed_raw_files:
for chunk in chunked_file_list:
parsed_raw_index = parsed_raw.split("/")[-1].split(".")[0]
chunk_index = str(chunk).split("/")[-1].split(".")[0]
if not parsed_raw_index == chunk.parent.name:
continue
initial_raw = Raw(pd.read_parquet(parsed_raw, engine="pyarrow"))
initial_corpus = Corpus(pd.read_parquet(chunk, engine="pyarrow"), initial_raw)
qa = initial_corpus.sample(random_single_hop, n=len(initial_corpus.data), random_state=random.randint(1,100)).map(
lambda df: df.reset_index(drop=True),
).make_retrieval_gt_contents().batch_apply(
multiple_queries_gen, # query generation
llm=llm,
lang="ko",
n=10,
).batch_apply(
make_basic_gen_gt, # answer generation (basic)
llm=llm,
lang="ko",
).batch_apply(
make_concise_gen_gt, # answer generation (concise)
llm=llm,
lang="ko",
).filter(
dontknow_filter_rule_based, # filter unanswerable questions
lang="ko",
)
qa_dir_name = "qa"
if not os.path.exists(os.path.join(current_dir, qa_dir_name)):
os.makedirs(os.path.join(current_dir, qa_dir_name))
output_path = os.path.join(current_dir, qa_dir_name, f"parsed_{parsed_raw_index}_chunk_{chunk_index}_qa.parquet")
corpus_output_path = os.path.join(current_dir, qa_dir_name, f"parsed_{parsed_raw_index}_chunk_{chunk_index}_corpus.parquet")
qa.to_parquet(output_path, corpus_output_path)
for i in range(10):
for j in range(4):
opt["config"] = os.path.join(current_dir, "config", "evaluate_config.yaml")
opt["qa_data_path"] = os.path.join(current_dir, "qa","parsed_{}_chunk_{}_qa.parquet".format(i,j))
opt["corpus_data_path"] = os.path.join(current_dir, "qa","parsed_{}_chunk_{}_corpus.parquet".format(i,j))
opt["project_dir"] = os.path.join(current_dir, "benchmark")
evaluate(**opt)
Pay now to fund the work behind this issue.
Get updates on progress being made.
Maintainer is rewarded once the issue is completed.
You're funding impactful open source efforts
You want to contribute to this effort
You want to get funding like this too