agentic-rag
SKILL.md
Agentic RAG
Build RAG systems that reason, plan, and adaptively retrieve information.
When to Use
- Questions require multiple retrieval steps
- Need to combine information from different sources
- Query needs decomposition into sub-queries
- Results need validation or refinement
- Complex reasoning over retrieved documents
Simple RAG vs Agentic RAG
Simple RAG:
Query → Retrieve → Generate → Answer
Agentic RAG:
Query → Plan → [Retrieve → Analyze → Decide]*n → Synthesize → Answer
Core Architecture
┌─────────────────────────────────────────────────────────┐
│ User Question │
└─────────────────────────┬───────────────────────────────┘
│
▼
┌───────────────────┐
│ Query Analyzer │
│ (Decompose?) │
└─────────┬─────────┘
│
┌────────────────┼────────────────┐
│ │ │
▼ ▼ ▼
┌──────────┐ ┌──────────┐ ┌──────────┐
│ Sub-Q 1 │ │ Sub-Q 2 │ │ Sub-Q 3 │
└────┬─────┘ └────┬─────┘ └────┬─────┘
│ │ │
▼ ▼ ▼
┌──────────┐ ┌──────────┐ ┌──────────┐
│ Retrieve │ │ Retrieve │ │ Retrieve │
└────┬─────┘ └────┬─────┘ └────┬─────┘
│ │ │
└───────────────┼───────────────┘
│
▼
┌───────────────────┐
│ Synthesizer │
│ (Combine & Cite) │
└─────────┬─────────┘
│
▼
┌───────────────────┐
│ Final Answer │
└───────────────────┘
Implementation with LangGraph
from langgraph.graph import StateGraph, END
from langchain_openai import ChatOpenAI
from typing import TypedDict, List, Annotated
import operator
class AgentState(TypedDict):
question: str
sub_questions: List[str]
retrieved_docs: Annotated[List, operator.add]
current_step: int
final_answer: str
# Nodes
def analyze_query(state: AgentState) -> AgentState:
"""Decompose complex query into sub-questions."""
llm = ChatOpenAI(model="gpt-4")
prompt = f"""Analyze this question and break it into sub-questions if needed.
Question: {state['question']}
Return a JSON list of sub-questions, or just the original if simple."""
response = llm.invoke(prompt)
sub_questions = parse_questions(response.content)
return {"sub_questions": sub_questions, "current_step": 0}
def retrieve_for_subquery(state: AgentState) -> AgentState:
"""Retrieve documents for current sub-question."""
current_q = state["sub_questions"][state["current_step"]]
docs = retriever.invoke(current_q)
return {
"retrieved_docs": docs,
"current_step": state["current_step"] + 1
}
def should_continue(state: AgentState) -> str:
"""Check if more sub-questions to process."""
if state["current_step"] < len(state["sub_questions"]):
return "retrieve"
return "synthesize"
def synthesize_answer(state: AgentState) -> AgentState:
"""Combine all retrieved info into final answer."""
llm = ChatOpenAI(model="gpt-4")
context = "\n\n".join([doc.page_content for doc in state["retrieved_docs"]])
prompt = f"""Based on the following context, answer the question.
Cite sources using [1], [2], etc.
Question: {state['question']}
Context:
{context}
"""
response = llm.invoke(prompt)
return {"final_answer": response.content}
# Build graph
workflow = StateGraph(AgentState)
workflow.add_node("analyze", analyze_query)
workflow.add_node("retrieve", retrieve_for_subquery)
workflow.add_node("synthesize", synthesize_answer)
workflow.set_entry_point("analyze")
workflow.add_edge("analyze", "retrieve")
workflow.add_conditional_edges("retrieve", should_continue, {
"retrieve": "retrieve",
"synthesize": "synthesize"
})
workflow.add_edge("synthesize", END)
agent = workflow.compile()
# Run
result = agent.invoke({"question": "Compare AWS and GCP pricing for ML workloads"})
Self-RAG: Retrieve When Needed
def self_rag_node(state: AgentState) -> AgentState:
"""Decide whether retrieval is needed."""
llm = ChatOpenAI(model="gpt-4")
prompt = f"""Given this question, do you need to retrieve external information?
Question: {state['question']}
Consider:
- Is this factual or requires current data? → RETRIEVE
- Is this reasoning/math/coding? → NO RETRIEVE
- Do you have high confidence? → NO RETRIEVE
Answer: RETRIEVE or NO_RETRIEVE"""
response = llm.invoke(prompt)
if "RETRIEVE" in response.content and "NO" not in response.content:
return {"needs_retrieval": True}
return {"needs_retrieval": False}
Tool-Using RAG Agent
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain.tools import Tool
# Define retrieval tools
tools = [
Tool(
name="search_docs",
func=lambda q: retriever.invoke(q),
description="Search internal documentation"
),
Tool(
name="search_code",
func=lambda q: code_retriever.invoke(q),
description="Search codebase for examples"
),
Tool(
name="search_tickets",
func=lambda q: jira_retriever.invoke(q),
description="Search JIRA tickets and issues"
),
Tool(
name="calculator",
func=lambda x: eval(x),
description="Perform calculations"
)
]
# Create agent
llm = ChatOpenAI(model="gpt-4")
agent = create_openai_tools_agent(llm, tools, prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
# Agent decides which tools to use
result = executor.invoke({
"input": "What's the average response time mentioned in our API docs, and how does it compare to ticket #1234?"
})
Adaptive Retrieval
def adaptive_retrieve(query: str, min_score: float = 0.7) -> list:
"""Retrieve with quality check, expand if needed."""
# Initial retrieval
results = retriever.invoke(query)
scores = [doc.metadata.get("score", 0) for doc in results]
# Check quality
if max(scores) < min_score:
# Try query expansion
expanded = expand_query(query)
for eq in expanded:
more_results = retriever.invoke(eq)
results.extend(more_results)
# Deduplicate
results = deduplicate(results)
# Rerank
results = rerank(query, results)
return results[:5]
def expand_query(query: str) -> list:
"""Generate alternative phrasings."""
llm = ChatOpenAI(model="gpt-4")
prompt = f"Generate 3 alternative phrasings for: {query}"
response = llm.invoke(prompt)
return parse_alternatives(response.content)
Patterns Summary
| Pattern | When to Use | Complexity |
|---|---|---|
| Query Decomposition | Multi-part questions | Medium |
| Self-RAG | Uncertain if retrieval needed | Low |
| Tool-Using Agent | Multiple data sources | High |
| Adaptive Retrieval | Variable quality needs | Medium |
| Iterative Refinement | Research tasks | High |
Best Practices
- Start simple - add agency only when needed
- Limit iterations - set max steps to prevent loops
- Log decisions - track when/why agent retrieves
- Validate outputs - agent can hallucinate tool usage
- Cost awareness - more steps = more LLM calls
Weekly Installs
18
Repository
latestaiagents/…t-skillsGitHub Stars
2
First Seen
Feb 5, 2026
Security Audits
Installed on
gemini-cli18
opencode17
github-copilot17
codex17
amp16
cline16