cohere-langchain
Cohere LangChain Integration Reference
Official Resources
- Docs & Cookbooks: https://github.com/cohere-ai/cohere-developer-experience
- API Reference: https://docs.cohere.com/reference/about
Model Compatibility: Command A Reasoning and Command A Vision are not supported in LangChain. Use the native Cohere SDK for these models.
Installation
pip install langchain-cohere langchain langchain-core
Import Map (v0.5+)
from langchain_cohere import (
ChatCohere,
CohereEmbeddings,
CohereRerank,
CohereRagRetriever,
create_cohere_react_agent
)
# NOT from langchain_community (deprecated)
ChatCohere
Basic Usage
from langchain_cohere import ChatCohere
from langchain_core.messages import HumanMessage, SystemMessage
llm = ChatCohere(model="command-a-03-2025")
response = llm.invoke([
HumanMessage(content="What is machine learning?")
])
print(response.content)
Streaming
for chunk in llm.stream([HumanMessage(content="Write a poem")]):
print(chunk.content, end="", flush=True)
For Agents (Recommended Settings)
llm = ChatCohere(
model="command-a-03-2025",
temperature=0.3, # Critical for reliable tool calling
max_tokens=4096
)
With Prompt Templates
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
prompt = ChatPromptTemplate.from_messages([
("system", "You are a {role}."),
("human", "{input}")
])
chain = prompt | llm | StrOutputParser()
result = chain.invoke({"role": "helpful assistant", "input": "What is Python?"})
CohereEmbeddings
from langchain_cohere import CohereEmbeddings
embeddings = CohereEmbeddings(model="embed-english-v3.0")
query_vector = embeddings.embed_query("What is AI?")
doc_vectors = embeddings.embed_documents(["First document", "Second document"])
With Vector Store
from langchain_community.vectorstores import FAISS
vectorstore = FAISS.from_texts(texts, embeddings)
results = vectorstore.similarity_search("query", k=5)
CohereRerank
from langchain_cohere import CohereRerank
from langchain_core.documents import Document
reranker = CohereRerank(model="rerank-v3.5", top_n=3)
docs = [
Document(page_content="ML is a subset of AI..."),
Document(page_content="Weather is sunny..."),
]
reranked = reranker.compress_documents(docs, query="What is ML?")
With Contextual Compression Retriever
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
base_retriever = vectorstore.as_retriever(search_kwargs={"k": 20})
reranker = CohereRerank(model="rerank-v3.5", top_n=5)
retriever = ContextualCompressionRetriever(
base_compressor=reranker,
base_retriever=base_retriever
)
results = retriever.invoke("Your query")
Tool Calling
from langchain_core.tools import tool
@tool
def get_weather(location: str) -> str:
"""Get weather for a location."""
return f"Weather in {location}: 20°C, sunny"
llm = ChatCohere(model="command-a-03-2025")
llm_with_tools = llm.bind_tools([get_weather])
response = llm_with_tools.invoke("What's the weather in Toronto?")
if response.tool_calls:
for tc in response.tool_calls:
print(f"Tool: {tc['name']}, Args: {tc['args']}")
Structured Output
from pydantic import BaseModel, Field
class Person(BaseModel):
name: str = Field(description="Person's name")
age: int = Field(description="Person's age")
llm = ChatCohere(model="command-a-03-2025")
structured_llm = llm.with_structured_output(Person)
result = structured_llm.invoke("John is 30 years old")
print(result) # Person(name='John', age=30)
Full RAG Chain Example
from langchain_cohere import ChatCohere, CohereEmbeddings, CohereRerank
from langchain_community.vectorstores import FAISS
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# Setup
embeddings = CohereEmbeddings(model="embed-english-v3.0")
vectorstore = FAISS.from_texts(your_texts, embeddings)
base_retriever = vectorstore.as_retriever(search_kwargs={"k": 20})
reranker = CohereRerank(model="rerank-v3.5", top_n=5)
retriever = ContextualCompressionRetriever(
base_compressor=reranker,
base_retriever=base_retriever
)
llm = ChatCohere(model="command-a-03-2025")
prompt = ChatPromptTemplate.from_template("""
Answer based on context:
Context: {context}
Question: {question}
""")
def format_docs(docs):
return "\n\n".join(d.page_content for d in docs)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
answer = chain.invoke("Your question here")
More from rshvr/unofficial-cohere-best-practices
unofficial-cohere-best-practices
Unofficial best practices guide for Cohere's AI APIs. Use when working with Cohere models for chat/text generation (Command A, Command R+, Command R), embeddings (Embed v4, v3), reranking (Rerank v4, v3.5), streaming, structured outputs, RAG, tool use/function calling, or agents. Supports Python, TypeScript, Java, and Go SDKs, plus LangChain/LangGraph integrations. Triggers on mentions of Cohere API, Command models, CohereEmbeddings, ChatCohere, CohereRerank, cohere-ai, or any Cohere-related development task.
2cohere-typescript-sdk
Cohere TypeScript/JavaScript SDK reference for chat, streaming, embeddings, reranking, and tool use. Use when building Node.js or browser applications with Cohere APIs.
1cohere-best-practices
Production best practices for Cohere AI APIs. Covers model selection, API configuration, error handling, cost optimization, and architectural patterns for chat, RAG, and agentic applications.
1cohere-python-sdk
Cohere Python SDK reference for chat, streaming, tool use, structured outputs, and RAG. Use when building Python applications with Cohere's Command models, embeddings, or reranking APIs.
1