Cohere Native Python SDK Reference

Official Resources

Docs & Cookbooks: https://github.com/cohere-ai/cohere-developer-experience
API Reference: https://docs.cohere.com/reference/about

Client Setup
Chat API
Streaming
Tool Use / Function Calling
Multi-step Tool Use (Agents)
Structured Outputs
RAG with Documents
Safety Modes

Client Setup

Basic Setup

import cohere

# Option 1: Auto-read from CO_API_KEY env var
co = cohere.ClientV2()

# Option 2: Explicit API key
co = cohere.ClientV2(api_key="your-api-key")

# Option 3: Custom endpoint (private deployment)
co = cohere.ClientV2(
    api_key="your-api-key",
    base_url="https://your-deployment.com"
)

Async Client

import cohere

async_co = cohere.AsyncClientV2()

async def main():
    response = await async_co.chat(
        model="command-a-03-2025",
        messages=[{"role": "user", "content": "Hello!"}]
    )
    print(response.message.content[0].text)

Chat API

Basic Chat

response = co.chat(
    model="command-a-03-2025",
    messages=[
        {"role": "user", "content": "What is machine learning?"}
    ]
)
print(response.message.content[0].text)

With System Message

response = co.chat(
    model="command-a-03-2025",
    messages=[
        {"role": "system", "content": "You are a helpful coding assistant."},
        {"role": "user", "content": "Write a Python hello world"}
    ]
)

Multi-turn Conversation

messages = [
    {"role": "user", "content": "My name is Veer"},
    {"role": "assistant", "content": "Nice to meet you, Veer!"},
    {"role": "user", "content": "What's my name?"}
]
response = co.chat(model="command-a-03-2025", messages=messages)

Parameters

response = co.chat(
    model="command-a-03-2025",
    messages=[{"role": "user", "content": "Write a story"}],
    temperature=0.7,           # 0.0-1.0, higher = more creative
    max_tokens=500,            # Max response length
    p=0.9,                     # Top-p sampling
    k=50,                      # Top-k sampling
    seed=42,                   # For reproducibility
    stop_sequences=["END"],    # Stop generation at these
)

Reasoning Model (Command A Reasoning)

The command-a-reasoning-2025 model includes extended thinking capabilities with controllable token budgets:

Basic Usage

response = co.chat(
    model="command-a-reasoning-2025",
    messages=[{"role": "user", "content": "Solve this step by step: What is 15% of 340?"}],
    thinking={
        "type": "enabled",
        "budget_tokens": 5000  # Max tokens for internal reasoning
    }
)
print(response.message.content[0].text)

Disable Reasoning (Lower Latency)

response = co.chat(
    model="command-a-reasoning-2025",
    messages=[{"role": "user", "content": "Quick question: capital of France?"}],
    thinking={"type": "disabled"}  # Skip reasoning for simple queries
)

Streaming

Basic Streaming

response = co.chat_stream(
    model="command-a-03-2025",
    messages=[{"role": "user", "content": "Write a poem about AI"}]
)

for event in response:
    if event.type == "content-delta":
        print(event.delta.message.content.text, end="", flush=True)

Streaming Event Types

for event in co.chat_stream(model="command-a-03-2025", messages=messages):
    match event.type:
        case "message-start":
            print("Generation started")
        case "content-delta":
            print(event.delta.message.content.text, end="")
        case "message-end":
            print("Generation complete")
        case "tool-plan-delta":
            print(f"Tool plan: {event.delta.message.tool_plan}")
        case "tool-call-start":
            print(f"Tool call started: {event.delta.message.tool_calls}")

Tool Use / Function Calling

Step 1: Define Tools

def get_weather(location: str) -> dict:
    return {"temperature": "20°C", "condition": "sunny"}

functions_map = {"get_weather": get_weather}

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get current weather for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City name, e.g., 'Toronto'"
                    }
                },
                "required": ["location"]
            }
        }
    }
]

Step 2: Generate and Execute Tool Calls

import json

messages = [{"role": "user", "content": "What's the weather in Toronto?"}]

response = co.chat(model="command-a-03-2025", messages=messages, tools=tools)

if response.message.tool_calls:
    messages.append({
        "role": "assistant",
        "tool_plan": response.message.tool_plan,
        "tool_calls": response.message.tool_calls
    })

    for tc in response.message.tool_calls:
        args = json.loads(tc.function.arguments)
        result = functions_map[tc.function.name](**args)
        messages.append({
            "role": "tool",
            "tool_call_id": tc.id,
            "content": [{"type": "document", "document": {"data": json.dumps(result)}}]
        })

final_response = co.chat(model="command-a-03-2025", messages=messages, tools=tools)
print(final_response.message.content[0].text)

Controlling Tool Behavior

response = co.chat(
    model="command-a-03-2025",
    messages=messages,
    tools=tools,
    tool_choice="REQUIRED"  # Must call tool. Options: AUTO, REQUIRED, NONE
)

Structured Outputs

JSON Mode

response = co.chat(
    model="command-a-03-2025",
    messages=[{"role": "user", "content": "List 3 fruits as JSON"}],
    response_format={"type": "json_object"}
)

JSON Schema

response = co.chat(
    model="command-a-03-2025",
    messages=[{"role": "user", "content": "Extract person info from: John is 30"}],
    response_format={
        "type": "json_object",
        "json_schema": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "age": {"type": "integer"}
            },
            "required": ["name", "age"]
        }
    }
)

Strict Tool Parameters

response = co.chat(
    model="command-a-03-2025",
    messages=[{"role": "user", "content": "..."}],
    tools=tools,
    strict_tools=True  # Eliminates tool name/param hallucinations
)

RAG with Documents

documents = [
    {"id": "doc1", "data": {"title": "Report", "text": "Q3 revenue was $10M"}},
    {"id": "doc2", "data": {"title": "Summary", "text": "Growth rate: 15%"}}
]

response = co.chat(
    model="command-a-03-2025",
    messages=[{"role": "user", "content": "What was Q3 revenue?"}],
    documents=documents
)

for citation in response.message.citations or []:
    print(f"'{citation.text}' cited from {citation.sources}")

Safety Modes

response = co.chat(
    model="command-a-03-2025",
    messages=[{"role": "user", "content": "..."}],
    safety_mode="CONTEXTUAL"  # Default, or "STRICT" or "OFF"
)

Error Handling

from cohere.core import ApiError

try:
    response = co.chat(model="command-a-03-2025", messages=messages)
except ApiError as e:
    print(f"API Error: {e.status_code} - {e.body}")
except Exception as e:
    print(f"Error: {e}")

cohere-python-sdk

Cohere Native Python SDK Reference

Official Resources

Table of Contents

Client Setup

Basic Setup

Async Client

Chat API

Basic Chat

With System Message

Multi-turn Conversation

Parameters

Reasoning Model (Command A Reasoning)

Basic Usage

Disable Reasoning (Lower Latency)

Streaming

Basic Streaming

Streaming Event Types

Tool Use / Function Calling

Step 1: Define Tools

Step 2: Generate and Execute Tool Calls

Controlling Tool Behavior

Structured Outputs

JSON Mode

JSON Schema

Strict Tool Parameters

RAG with Documents

Safety Modes

Error Handling

More from rshvr/unofficial-cohere-best-practices

unofficial-cohere-best-practices

cohere-langchain

cohere-typescript-sdk

cohere-best-practices