DSPy

Use Tensoras.ai as a language model provider in DSPy, the framework for programming (not prompting) language models.

Installation

pip install dspy tensoras

Authentication

export TENSORAS_API_KEY="tns_your_key_here"

Configuration

Register Tensoras as a DSPy language model:

import dspy
 
lm = dspy.Tensoras(
    model="llama-3.3-70b",
    api_key="tns_your_key_here",  # or set TENSORAS_API_KEY
    temperature=0.7,
    max_tokens=512,
)
 
dspy.configure(lm=lm)

Basic Usage

Simple Prediction

import dspy
 
lm = dspy.Tensoras(model="llama-3.3-70b")
dspy.configure(lm=lm)
 
# Define a simple signature
predict = dspy.Predict("question -> answer")
 
result = predict(question="What is retrieval-augmented generation?")
print(result.answer)

Typed Signatures

Define structured input/output signatures as classes:

import dspy
 
class QA(dspy.Signature):
    """Answer the question based on the given context."""
    context: str = dspy.InputField(desc="Relevant context for the question")
    question: str = dspy.InputField(desc="The question to answer")
    answer: str = dspy.OutputField(desc="A concise answer")
 
lm = dspy.Tensoras(model="llama-3.3-70b")
dspy.configure(lm=lm)
 
qa = dspy.Predict(QA)
 
result = qa(
    context="Tensoras provides serverless AI inference with built-in RAG using hybrid search.",
    question="What search method does Tensoras use for RAG?",
)
 
print(result.answer)

Chain of Thought

Use dspy.ChainOfThought for step-by-step reasoning:

import dspy
 
class MathProblem(dspy.Signature):
    """Solve the math problem step by step."""
    problem: str = dspy.InputField()
    answer: float = dspy.OutputField()
 
lm = dspy.Tensoras(model="llama-3.3-70b")
dspy.configure(lm=lm)
 
solver = dspy.ChainOfThought(MathProblem)
 
result = solver(problem="If a train travels 120 miles in 2 hours, what is its speed in km/h? (1 mile = 1.609 km)")
print(f"Answer: {result.answer}")

RAG Module

Build a RAG module that retrieves from a Tensoras Knowledge Base:

import dspy
from tensoras import Tensoras
 
class TensorasRM(dspy.Retrieve):
    """Retrieve from a Tensoras Knowledge Base."""
 
    def __init__(self, knowledge_base_id: str, top_k: int = 5):
        super().__init__(k=top_k)
        self.client = Tensoras()
        self.knowledge_base_id = knowledge_base_id
 
    def forward(self, query: str, k: int = None) -> list[str]:
        k = k or self.k
        response = self.client.chat.completions.create(
            model="llama-3.3-70b",
            messages=[{"role": "user", "content": query}],
            knowledge_bases=[self.knowledge_base_id],
        )
        passages = [c.text for c in response.citations[:k]]
        return passages
 
class RAG(dspy.Module):
    def __init__(self, knowledge_base_id: str):
        self.retriever = TensorasRM(knowledge_base_id)
        self.qa = dspy.ChainOfThought("context, question -> answer")
 
    def forward(self, question: str):
        passages = self.retriever(question)
        context = "\n\n".join(passages)
        return self.qa(context=context, question=question)
 
lm = dspy.Tensoras(model="llama-3.3-70b")
dspy.configure(lm=lm)
 
rag = RAG(knowledge_base_id="kb_a1b2c3d4")
result = rag(question="How do I configure SSO?")
print(result.answer)

Optimization

DSPy can optimize prompts and few-shot examples. Use Tensoras as the LM during optimization:

import dspy
 
class Summarize(dspy.Signature):
    """Summarize the document in 2-3 sentences."""
    document: str = dspy.InputField()
    summary: str = dspy.OutputField()
 
lm = dspy.Tensoras(model="llama-3.3-70b")
dspy.configure(lm=lm)
 
summarizer = dspy.ChainOfThought(Summarize)
 
# Define a training set
trainset = [
    dspy.Example(
        document="Tensoras provides serverless AI inference...",
        summary="Tensoras offers serverless AI with built-in RAG.",
    ).with_inputs("document"),
    # ... more examples
]
 
# Optimize with BootstrapFewShot
optimizer = dspy.BootstrapFewShot(metric=your_metric, max_bootstrapped_demos=3)
optimized_summarizer = optimizer.compile(summarizer, trainset=trainset)
 
result = optimized_summarizer(document="Your document here...")
print(result.summary)

Using Multiple Models

You can use different Tensoras models for different modules:

import dspy
 
# Fast model for retrieval/classification
fast_lm = dspy.Tensoras(model="llama-3.3-70b")
 
# Reasoning model for complex tasks
reasoning_lm = dspy.Tensoras(model="deepseek-r1")
 
dspy.configure(lm=fast_lm)
 
# Override for specific modules
with dspy.context(lm=reasoning_lm):
    result = solver(problem="Complex math problem...")

Next Steps