Examples
End-to-end snippets that show how Fastpaca fits into common workflows.
Streaming chat route (Next.js + ai-sdk)
app/api/chat/route.ts
import { createClient } from 'fastpaca';
import { streamText } from 'ai';
import { openai } from '@ai-sdk/openai';
export async function POST(req: Request) {
const { contextId, message } = await req.json();
const fastpaca = createClient({ baseUrl: process.env.FASTPACA_URL || 'http://localhost:4000/v1' });
const ctx = await fastpaca.context(contextId, { budget: 1_000_000 });
await ctx.append({
role: 'user',
parts: [{ type: 'text', text: message }]
});
return ctx.stream((messages) =>
streamText({ model: openai('gpt-4o-mini'), messages })
);
}
Non-streaming response (Anthropic)
import { createClient } from 'fastpaca';
import { generateText } from 'ai';
import { anthropic } from '@ai-sdk/anthropic';
const fastpaca = createClient({ baseUrl: process.env.FASTPACA_URL || 'http://localhost:4000/v1' });
const ctx = await fastpaca.context('chat_non_stream', { budget: 1_000_000 });
await ctx.append({
role: 'user',
parts: [{ type: 'text', text: 'Summarise the release notes.' }]
});
const context = await ctx.context();
const { text } = await generateText({
model: anthropic('claude-3-opus'),
messages: context.messages
});
await ctx.append({
role: 'assistant',
parts: [{ type: 'text', text }]
});
Manual compaction with LLM-generated summary
const { needsCompaction, messages } = await ctx.context();
if (needsCompaction) {
const { summary, remainingMessages } = await summarise(messages);
await ctx.compact([
{ role: 'system', parts: [{ type: 'text', text: summary }] },
...remainingMessages
]);
}
Switching providers mid-context
const fastpaca = createClient({ baseUrl: process.env.FASTPACA_URL || 'http://localhost:4000/v1' });
const ctx = await fastpaca.context('mixed-sources', { budget: 1_000_000 });
await ctx.append({
role: 'user',
parts: [{ type: 'text', text: 'Explain vector clocks.' }]
});
await ctx.append({
role: 'assistant',
parts: [{
type: 'text',
text: await streamText({
model: openai('gpt-4o'),
messages: await ctx.context().then(c => c.messages)
}).text()
}]
});
await ctx.append({
role: 'user',
parts: [{ type: 'text', text: 'Now explain like I’m five.' }]
});
await ctx.append({
role: 'assistant',
parts: [{
type: 'text',
text: await streamText({
model: anthropic('claude-3-haiku'),
messages: await ctx.context().then(c => c.messages)
}).text()
}]
});
Raw REST calls with curl
# Append tool call output
curl -X POST http://localhost:4000/v1/contexts/support/messages \
-H "Content-Type: application/json" \
-d '{
"message": {
"role": "assistant",
"parts": [
{ "type": "text", "text": "Fetching the latest logs..." },
{ "type": "tool_call", "name": "fetch-logs", "payload": {"tail": 200} }
]
}
}'
# Get the last 50 messages
curl "http://localhost:4000/v1/contexts/support/tail?limit=50"
These examples mirror the SDK helpers, the REST API, and the websocket stream described elsewhere in the docs. Mix and match based on how your application is structured.