AI Embeddings
intermediateVector embeddings for semantic search and RAG applications using Upstash Vector.
aiembeddingsvectorragsemantic-searchupstash
Tested on⬢20▲16⚛19TS5.9
$ bunx sinew add ai/ai-embeddingsInteractive demo coming soon
1The Problem
Building search and RAG (Retrieval-Augmented Generation) requires:
- Converting text to vector embeddings
- Storing and querying vectors efficiently
- Handling long documents with chunking
- Semantic similarity search
2The Solution
Use OpenAI's text-embedding-3-small model with Upstash Vector for serverless vector storage. Includes chunking utilities for long documents.
3Files
lib/ai/embeddings.ts
lib/ai/embeddings.tsTypeScript
import { embed, embedMany } from "ai";
import { openai } from "@ai-sdk/openai";
const embeddingModel = openai.embedding("text-embedding-3-small");
export async function generateEmbedding(text: string): Promise<number[]> {
const { embedding } = await embed({
model: embeddingModel,
value: text,
});
return embedding;
}
export async function generateEmbeddings(texts: string[]): Promise<number[][]> {
const { embeddings } = await embedMany({
model: embeddingModel,
values: texts,
});
return embeddings;
}
// maxChunkSize and overlap are measured in characters.
export function chunkText(
text: string,
options: { maxChunkSize?: number; overlap?: number } = {}
): string[] {
const { maxChunkSize = 1000, overlap = 200 } = options;
const chunks: string[] = [];
let currentChunk = "";
const overlapTail = (chunk: string): string => {
if (overlap <= 0 || chunk.length <= overlap) return chunk;
const tail = chunk.slice(-overlap);
const firstSpace = tail.indexOf(" ");
return firstSpace === -1 ? tail : tail.slice(firstSpace + 1);
};
const flush = () => {
if (!currentChunk) return;
chunks.push(currentChunk);
currentChunk = overlapTail(currentChunk);
};
const append = (unit: string, separator: string) => {
const candidate = currentChunk ? currentChunk + separator + unit : unit;
if (candidate.length <= maxChunkSize) {
currentChunk = candidate;
} else {
flush();
currentChunk = currentChunk ? currentChunk + separator + unit : unit;
}
};
for (const paragraph of text.split(/\n\n+/)) {
if (paragraph.length <= maxChunkSize) {
append(paragraph, "\n\n");
} else {
const sentences = paragraph.match(/[^.!?]+[.!?]+/g) ?? [paragraph];
for (const sentence of sentences) {
append(sentence.trim(), " ");
}
}
}
if (currentChunk) chunks.push(currentChunk);
return chunks;
}lib/ai/vector-store.ts
lib/ai/vector-store.tsTypeScript
import { Index } from "@upstash/vector";
import { generateEmbedding, generateEmbeddings } from "./embeddings";
const index = new Index({
url: process.env.UPSTASH_VECTOR_REST_URL!,
token: process.env.UPSTASH_VECTOR_REST_TOKEN!,
});
export interface VectorDocument {
id: string;
content: string;
metadata?: Record<string, unknown>;
}
export async function upsertDocument(doc: VectorDocument): Promise<void> {
const embedding = await generateEmbedding(doc.content);
await index.upsert({
id: doc.id,
vector: embedding,
metadata: { content: doc.content, ...doc.metadata },
});
}
export async function search(
query: string,
options: { topK?: number } = {}
): Promise<{ id: string; score: number; content: string }[]> {
const { topK = 5 } = options;
const queryEmbedding = await generateEmbedding(query);
const results = await index.query({
vector: queryEmbedding,
topK,
includeMetadata: true,
});
return results.map((r) => ({
id: r.id as string,
score: r.score,
content: (r.metadata?.content as string) || "",
}));
}lib/ai/rag.ts
lib/ai/rag.tsTypeScript
import { generateText } from "ai";
import { openai } from "@ai-sdk/openai";
import { search } from "./vector-store";
// Uses OpenAI by default. Swap this for your own provider/model selection.
const model = openai("gpt-4o");
export async function ragGenerate(query: string, options: { topK?: number } = {}) {
const results = await search(query, options);
const context = results.map((r, i) => `[${i + 1}] ${r.content}`).join("\n\n");
const systemPrompt = `Use the following context to answer the question.
If the context doesn't contain relevant information, say so.
Context:
${context}`;
const { text } = await generateText({
model,
system: systemPrompt,
prompt: query,
});
return { answer: text, sources: results };
}4Dependencies
$ bun add ai @ai-sdk/openai @upstash/vector5Configuration
Environment Variables
| Variable | Description | Required |
| --------------------------- | ----------------------------- | -------- |
| OPENAI_API_KEY | OpenAI API key for embeddings | Yes |
| UPSTASH_VECTOR_REST_URL | Upstash Vector URL | Yes |
| UPSTASH_VECTOR_REST_TOKEN | Upstash Vector token | Yes |
6Usage
Index Documents
import { upsertDocument } from "@/lib/ai/vector-store";
import { chunkText } from "@/lib/ai/embeddings";
const document = await fetchDocument();
const chunks = chunkText(document.content);
for (let i = 0; i < chunks.length; i++) {
await upsertDocument({
id: `${document.id}-chunk-${i}`,
content: chunks[i],
metadata: { documentId: document.id },
});
}TypeScript
Semantic Search
import { search } from "@/lib/ai/vector-store";
const results = await search("How do I configure the database?", { topK: 5 });TypeScript
RAG Query
import { ragGenerate } from "@/lib/ai/rag";
const { answer, sources } = await ragGenerate("What are the best practices?");TypeScript
7Alternatives
- Pinecone - Industry-leading vector database
- Weaviate - Open-source with GraphQL API
- Chroma - Open-source, local-first
- pgvector - Vector search for PostgreSQL