import braintrust
import fastcore.all as fc
from dotenv import load_dotenv
from tqdm import tqdm
from wattbot import retriever, eda, evaluate, utils, generator
from langchain_text_splitters import MarkdownTextSplitterSubmissions
Including neighbour chunks
load_dotenv()True
embedding_model = 'accounts/fireworks/models/qwen3-embedding-8b'
llm_model = 'accounts/fireworks/models/kimi-k2p5'Hybrid Search
chunk_size, chunk_overlap = 375, 350
md_splitter = MarkdownTextSplitter.from_tiktoken_encoder(encoding_name="cl100k_base", chunk_size=chunk_size, chunk_overlap=chunk_overlap)
all_chunks = retriever.chunk_all(md_splitter.chunk_markdown)
ls = retriever.LexicalSearch(all_chunks)
ss = retriever.SemanticSearch(all_chunks, model=embedding_model)
hs = retriever.HybridSearch(ls, ss, neighbour_chunks=True)
rag = generator.RAG(hs, utils.fw(), model=llm_model)experiment_metadata = {
'pdf_extraction': 'markdown',
'chunking': 'token_level',
'chunk_size': chunk_size,
'chunk_overlap': chunk_overlap,
'retrieval': 'hybrid_search',
'neighbour_chunks': True
}evaluate.evaluate_train(rag, experiment_metadata, n_rc=15)Processing Rows: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [03:27<00:00, 5.05s/it]
32.75
experiment_metadata['output_path'] = 'submission_v15.csv'
evaluate.create_submission(rag, experiment_metadata, n_rc=15)Answering question: 0%| | 0/282 [00:00<?, ?it/s]/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:103: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The average increase in U.S. data center electricity consumption between 2010 and 2014 was about 4%' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
df.loc[i, 'answer'] = str(answer['answer'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:104: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '4' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
df.loc[i, 'answer_value'] = str(answer['answer_value'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:106: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['wu2021b']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
df.loc[i, 'ref_id'] = str(answer['ref_id'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:107: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['https://arxiv.org/pdf/2108.06738']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
df.loc[i, 'ref_url'] = str(answer['ref_url'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:108: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '"The total energy consumption of the US data centers increased by about 4% from 2010-2014, compared with the estimated 24% increase from 2005-10 and nearly 90% increase from 2000-05 [Masanet et al., 2020]"' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
df.loc[i, 'supporting_materials'] = str(answer['supporting_materials'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:109: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The text explicitly states that U.S. data center energy consumption increased by about 4% from 2010-2014, citing Masanet et al., 2020. This represents the average increase over that 4-year period.' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
df.loc[i, 'explanation'] = str(answer['explanation'])
Answering question: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [24:55<00:00, 5.30s/it]