Submissions

import braintrust
import fastcore.all as fc
from dotenv import load_dotenv
from tqdm import tqdm
from wattbot import retriever, eda, evaluate, utils, generator
from wattbot.retriever import chunk_markdown
from langchain_text_splitters import MarkdownTextSplitter

load_dotenv()

True

embedding_model = 'accounts/fireworks/models/qwen3-embedding-8b'
llm_model = 'accounts/fireworks/models/kimi-k2p5'

all_chunks = retriever.chunk_all(retriever.chunk_doc)

With Lexical Search

ls = retriever.LexicalSearch(all_chunks)
rag = generator.RAG(ls, utils.fw(), model=llm_model)

experiment_metadata = {
    'pdf_extraction': 'pypdf',
    'chunking': 'character_level',
    'chunk_size': 1500,
    'chunk_step': 1400,
    'retrieval': 'lexical_search'
}

evaluate.evaluate_train(rag, experiment_metadata, n_rc=20)

Processing Rows:   2%|███▉                                                                                                                                                            | 1/41 [00:03<02:12,  3.32s/it]Skipping git metadata. This is likely because the repository has not been published to a remote yet. Remote named 'origin' didn't exist
Processing Rows: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [01:46<00:00,  2.60s/it]

33.199999999999996

Skipping git metadata. This is likely because the repository has not been published to a remote yet. Remote named 'origin' didn't exist
Skipping git metadata. This is likely because the repository has not been published to a remote yet. Remote named 'origin' didn't exist
Skipping git metadata. This is likely because the repository has not been published to a remote yet. Remote named 'origin' didn't exist
Skipping git metadata. This is likely because the repository has not been published to a remote yet. Remote named 'origin' didn't exist
Skipping git metadata. This is likely because the repository has not been published to a remote yet. Remote named 'origin' didn't exist

experiment_metadata['output_path'] = 'submission_v5.csv'
evaluate.create_submission(rag, experiment_metadata)

Answering question:   0%|                                                                                                                                                                    | 0/282 [00:00<?, ?it/s]/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:109: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The total energy consumption of US data centers increased by about 4% from 2010-2014' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'answer'] = str(answer['answer'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:110: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '4' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'answer_value'] = str(answer['answer_value'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:112: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['wu2021b']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'ref_id'] = str(answer['ref_id'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:113: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['https://arxiv.org/pdf/2108.06738']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'ref_url'] = str(answer['ref_url'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:114: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The total energy consumption of the US data centers increased by about 4% from 2010-2014' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'supporting_materials'] = str(answer['supporting_materials'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:115: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'Chunk 9 from the wu2021b document explicitly states the 4% increase in US data center energy consumption between 2010-2014, which directly answers the question about electricity consumption increase.' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'explanation'] = str(answer['explanation'])
Answering question: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [10:00<00:00,  2.13s/it]

With Semantic Search

ss = retriever.SemanticSearch(all_chunks, model=embedding_model)
rag = generator.RAG(ss, utils.fw(), model=llm_model)

rag.r.chunks_embeddings[-1].shape

(1927, 4096)

experiment_metadata = {
    'pdf_extraction': 'pypdf',
    'chunking': 'character_level',
    'chunk_size': 1500,
    'chunk_step': 1400,
    'retrieval': 'semantic_search'   
}

evaluate.evaluate_train(rag, experiment_metadata, n_rc=20)

Processing Rows: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [02:30<00:00,  3.67s/it]

32.574999999999996

experiment_metadata['output_path'] = 'submission_v6.csv'
evaluate.create_submission(rag, experiment_metadata, n_rc=20)

Answering question:   0%|                                                                                                                                                                    | 0/282 [00:00<?, ?it/s]/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:109: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The total energy consumption of U.S. data centers increased by about 4% between 2010 and 2014' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'answer'] = str(answer['answer'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:110: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '4' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'answer_value'] = str(answer['answer_value'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:112: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['wu2021b']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'ref_id'] = str(answer['ref_id'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:113: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['https://arxiv.org/pdf/2108.06738']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'ref_url'] = str(answer['ref_url'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:114: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The total energy consumption of the US data centers increased by about 4% from 2010-2014' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'supporting_materials'] = str(answer['supporting_materials'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:115: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'Chunk 18 from wu2021b explicitly states that U.S. data center energy consumption increased by about 4% between 2010 and 2014, providing the exact percentage increase requested.' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'explanation'] = str(answer['explanation'])
Answering question: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [17:44<00:00,  3.77s/it]

Hybrid Search

ls = retriever.LexicalSearch(all_chunks)
ss = retriever.SemanticSearch(all_chunks, model=embedding_model)
hs = retriever.HybridSearch(ls, ss)
rag = generator.RAG(hs, utils.fw(), model=llm_model)

experiment_metadata = {
    'pdf_extraction': 'pypdf',
    'chunking': 'character_level',
    'chunk_size': 1500,
    'chunk_step': 1400,
    'retrieval': 'hybrid_search'   
}

evaluate.evaluate_train(rag, experiment_metadata, n_rc=15)

Processing Rows:   2%|███▉                                                                                                                                                            | 1/41 [00:03<02:37,  3.93s/it]Skipping git metadata. This is likely because the repository has not been published to a remote yet. Remote named 'origin' didn't exist
Processing Rows: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [03:53<00:00,  5.69s/it]

32.349999999999994

Skipping git metadata. This is likely because the repository has not been published to a remote yet. Remote named 'origin' didn't exist
Retrying request after error: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Sleeping for 0.5 seconds

experiment_metadata['output_path'] = 'submission_v9.csv'
evaluate.create_submission(rag, experiment_metadata, n_rc=15)

Answering question:   0%|                                                                                                                                                                    | 0/282 [00:00<?, ?it/s]/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:109: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The average increase in U.S. data center electricity consumption between 2010 and 2014 was 4%.' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'answer'] = str(answer['answer'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:110: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '4' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'answer_value'] = str(answer['answer_value'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:112: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['wu2021b']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'ref_id'] = str(answer['ref_id'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:113: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '['https://arxiv.org/pdf/2108.06738']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'ref_url'] = str(answer['ref_url'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:114: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The total energy consumption of the US data centers increased by about 4% from 2010-2014, compared with the estimated 24% increase from 2005-10 and nearly 90% increase from 2000-05 [Masanet et al., 2020].' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'supporting_materials'] = str(answer['supporting_materials'])
/Users/anubhavmaity/projects/wattbot/wattbot/evaluate.py:115: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'The provided context explicitly states that U.S. data center electricity consumption increased by about 4% from 2010-2014, citing Masanet et al., 2020. This is the only relevant information found in the retrieved documents addressing the specific time period and geographic scope asked in the question.' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  df.loc[i, 'explanation'] = str(answer['explanation'])
Answering question: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 282/282 [25:47<00:00,  5.49s/it]