Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*.pyc
__pycache__
vectore_stores
sajith_vectorstore
32 changes: 32 additions & 0 deletions RAG/agents/answer_grader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel , Field
from langchain_mistralai import ChatMistralAI
import os
os.environ["MISTRAL_API_KEY"] = "naAG2SIBHoKW5KtKS7B2MN5z49roSnzV"

# Data model
class GradeAnswer(BaseModel):
"""Binary score to assess answer addresses question."""

binary_score: str = Field(
description="Answer addresses the question, 'yes' or 'no'"
)


# LLM with function call
llm = ChatMistralAI(model="mistral-large-latest", api_key=os.getenv("MISTRAL_API_KEY"))
structured_llm_grader = llm.with_structured_output(GradeAnswer)

# Prompt
system = """You are a grader assessing whether an answer addresses / resolves a question \n
Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
answer_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
]
)

answer_grader = answer_prompt | structured_llm_grader
43 changes: 43 additions & 0 deletions RAG/agents/contextualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from pydantic import BaseModel , Field
from langchain_mistralai import ChatMistralAI
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv
import os

class ContextualizeQuestion(BaseModel):
"""Contextualize the question."""

contextualized_question: str = Field(
...,
description="The contextualized question.",
)

contextualize_q_system_prompt = (
"Given a chat history and the latest user question, "
"which might reference context in the chat history, "
"formulate a standalone question that can be understood "
"without the chat history. Specifically:"
"\n1. Replace pronouns (e.g., 'he', 'she', 'it', 'they') with their specific referents."
"\n2. Expand references like 'that', 'this', 'those' to what they specifically refer to."
"\n3. Include any relevant context from previous messages that's necessary to understand the question."
"\n4. Ensure the reformulated question is clear, specific, and self-contained."
"\nDo NOT answer the question, just reformulate it to be self-explanatory."
)

load_dotenv()
mistral_api_key = os.getenv("MISTRAL_API_KEY")
if not mistral_api_key:
raise ValueError("MISTRAL_API_KEY environment variable not set")
llm = ChatMistralAI(model="mistral-large-latest", api_key=os.getenv("MISTRAL_API_KEY"))

contextualize_q_prompt = ChatPromptTemplate.from_messages(
[
("system", contextualize_q_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
structured_llm_router = llm.with_structured_output(ContextualizeQuestion)

contextualizer = contextualize_q_prompt | structured_llm_router
66 changes: 66 additions & 0 deletions RAG/agents/extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
### Router

from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel , Field
from langchain_mistralai import ChatMistralAI
from dotenv import load_dotenv
import os
# Data model
class ExtractQuery(BaseModel):
"""Route a user query to the relevant datasources with subquestions."""

namal_vector_search_query: str = Field(
...,
description="The query to search the vector store of namal.",
)
ranil_vector_search_query: str = Field(
...,
description="The query to search the vector store of ranil.",
)
sajith_vector_search_query: str = Field(
...,
description="The query to search the vector store of sajith.",
)
web_search_query: str = Field(
...,
description="The query to search the web.",
)

load_dotenv()
mistral_api_key = os.getenv("MISTRAL_API_KEY")

if not mistral_api_key:
raise ValueError("MISTRAL_API_KEY environment variable not set")

# Initialize the ChatMistralAI client with the API key
llm = ChatMistralAI(model="mistral-large-latest", api_key=mistral_api_key)
structured_llm_router = llm.with_structured_output(ExtractQuery)

# Prompt
system = """You are an expert at routing a user question to a vectorstore or web search.
There are three vectorstores. One contains documents related to Manifests of political candidate Sajith Premadasa.
Another contains documents related to Manifests of political candidate Namal Rajapaksa.
The third contains documents related to Manifests of political candidate Ranil Wickramasinghe.

for an example, what this candidate do for education sector, health sector etc is on the vectorstore.
And also their plans for the future of the country is on the vectorstore.

If the question involves something about a candidate's policies in a past year, then you will have to do a websearch.
And also if you feel like a web search will be usefull. Do a web search.

After deciding,
Output the 'namal_vector_search_query': The query that needs to be searched from the vector store of namal.
And the 'ranil_vector_search_query': The query that needs to be searched from the vector store of ranil.
And the 'sajith_vector_search_query': The query that needs to be searched from the vector store of sajith.
And the 'web_search_query': The query that needs to be searched from the web.
"""
route_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
("human", "{question}"),
]
)

question_extractor = route_prompt | structured_llm_router
43 changes: 43 additions & 0 deletions RAG/agents/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel , Field
from langchain_mistralai import ChatMistralAI
import os
template = """You are a very vigilant and helpful journalist. Use the following pieces of
context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Be concise and helpful.

________________________________________________________________________________
Here are the web results regarding the question:
{web_context}

Here are the results from the manifesto of the candidates:
________________________________________________________________________________
namel rajapakse:
{namal_context}

________________________________________________________________________________
ranil wickramasinghe:
{ranil_context}

________________________________________________________________________________
sajith premadasa:
{sajith_context}

________________________________________________________________________________
Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

# LLM
llm = ChatMistralAI(model="mistral-large-latest", api_key=os.getenv("MISTRAL_API_KEY"))

# Post-processing
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)


rag_chain = custom_rag_prompt | llm | StrOutputParser()
29 changes: 29 additions & 0 deletions RAG/agents/grader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel , Field
from langchain_mistralai import ChatMistralAI
import os

class GradeDocuments(BaseModel):
"""Binary score for relevance check on retrieved documents."""

binary_score: str = Field(
description="Documents are relevant to the question, 'yes' or 'no'"
)


llm = ChatMistralAI(model="mistral-large-latest", api_key=os.getenv("MISTRAL_API_KEY"))
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
]
)

retrieval_grader = grade_prompt | structured_llm_grader
28 changes: 28 additions & 0 deletions RAG/agents/hallucinate_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel , Field
from langchain_mistralai import ChatMistralAI
class GradeHallucinations(BaseModel):
"""Binary score for hallucination present in generation answer."""

binary_score: str = Field(
description="Answer is grounded in the facts, 'yes' or 'no'"
)

import os
# LLM with function call
llm = ChatMistralAI(model="mistral-large-latest", api_key=os.getenv("MISTRAL_API_KEY"))
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
]
)

hallucination_grader = hallucination_prompt | structured_llm_grader
22 changes: 22 additions & 0 deletions RAG/agents/question_rewriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel , Field
from langchain_mistralai import ChatMistralAI
import os
llm = ChatMistralAI(model="mistral-large-latest", api_key=os.getenv("MISTRAL_API_KEY"))

# Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n
for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
re_write_prompt = ChatPromptTemplate.from_messages(
[
("system", system),
(
"human",
"Here is the initial question: \n\n {question} \n Formulate an improved question.",
),
]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()
26 changes: 26 additions & 0 deletions RAG/edges/decide_to_generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
def decide_to_generate(state):
"""
Determines whether to generate an answer, or re-generate a question.

Args:
state (dict): The current graph state

Returns:
str: Binary decision for next node to call
"""

print("---ASSESS GRADED DOCUMENTS---")
state["question"]
filtered_documents = state["documents"]

if not filtered_documents:
# All documents have been filtered check_relevance
# We will re-generate a new query
print(
"---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
)
return "transform_query"
else:
# We have relevant documents, so generate answer
print("---DECISION: GENERATE---")
return "generate"
44 changes: 44 additions & 0 deletions RAG/edges/generation_grader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from RAG.agents.answer_grader import answer_grader
from RAG.agents.hallucinate_checker import hallucination_grader

def grade_generation_v_documents_and_question(state):
"""
Determines whether the generation is grounded in the document and answers question.

Args:
state (dict): The current graph state

Returns:
str: Decision for next node to call
"""

print("---CHECK HALLUCINATIONS---")
question = state["question"]
documents = state["namal_vector_search_documents"] + state["ranil_vector_search_documents"] + state["sajith_vector_search_documents"] + state["web_search_documents"]
generation = state["generation"]

score = hallucination_grader.invoke(
{"documents": documents, "generation": generation}
)
grade = score.binary_score

if state.get("generated_count", 0) > 1:
print("---DECISION: AlREADY TRIED AND FAILED. CONTINUING ANYWAYS---")
return "useful"

# Check hallucination
if grade == "yes":
print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
# Check question-answering
print("---GRADE GENERATION vs QUESTION---")
score = answer_grader.invoke({"question": question, "generation": generation})
grade = score.binary_score
if grade == "yes":
print("---DECISION: GENERATION ADDRESSES QUESTION---")
return "useful"
else:
print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
return "not useful"
else:
print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
return "not supported"
Loading