AI Embeddings

intermediate

Vector embeddings for semantic search and RAG applications using Upstash Vector.

aiembeddingsvectorragsemantic-searchupstash

Tested on⬢20▲16⚛19TS5.9

$ bunx sinew add ai/ai-embeddings

Interactive demo coming soon

1The Problem

Building search and RAG (Retrieval-Augmented Generation) requires:

Converting text to vector embeddings
Storing and querying vectors efficiently
Handling long documents with chunking
Semantic similarity search

2The Solution

Use OpenAI's text-embedding-3-small model with Upstash Vector for serverless vector storage. Includes chunking utilities for long documents.

3Files

lib/ai/embeddings.ts

lib/ai/embeddings.tsTypeScript

import { embed, embedMany } from "ai";
import { openai } from "@ai-sdk/openai";

const embeddingModel = openai.embedding("text-embedding-3-small");

export async function generateEmbedding(text: string): Promise<number[]> {
  const { embedding } = await embed({
    model: embeddingModel,
    value: text,
  });
  return embedding;
}

export async function generateEmbeddings(texts: string[]): Promise<number[][]> {
  const { embeddings } = await embedMany({
    model: embeddingModel,
    values: texts,
  });
  return embeddings;
}

// maxChunkSize and overlap are measured in characters.
export function chunkText(
  text: string,
  options: { maxChunkSize?: number; overlap?: number } = {}
): string[] {
  const { maxChunkSize = 1000, overlap = 200 } = options;
  const chunks: string[] = [];
  let currentChunk = "";

  const overlapTail = (chunk: string): string => {
    if (overlap <= 0 || chunk.length <= overlap) return chunk;
    const tail = chunk.slice(-overlap);
    const firstSpace = tail.indexOf(" ");
    return firstSpace === -1 ? tail : tail.slice(firstSpace + 1);
  };

  const flush = () => {
    if (!currentChunk) return;
    chunks.push(currentChunk);
    currentChunk = overlapTail(currentChunk);
  };

  const append = (unit: string, separator: string) => {
    const candidate = currentChunk ? currentChunk + separator + unit : unit;
    if (candidate.length <= maxChunkSize) {
      currentChunk = candidate;
    } else {
      flush();
      currentChunk = currentChunk ? currentChunk + separator + unit : unit;
    }
  };

  for (const paragraph of text.split(/\n\n+/)) {
    if (paragraph.length <= maxChunkSize) {
      append(paragraph, "\n\n");
    } else {
      const sentences = paragraph.match(/[^.!?]+[.!?]+/g) ?? [paragraph];
      for (const sentence of sentences) {
        append(sentence.trim(), " ");
      }
    }
  }

  if (currentChunk) chunks.push(currentChunk);
  return chunks;
}

lib/ai/vector-store.ts

lib/ai/vector-store.tsTypeScript

import { Index } from "@upstash/vector";
import { generateEmbedding, generateEmbeddings } from "./embeddings";

const index = new Index({
  url: process.env.UPSTASH_VECTOR_REST_URL!,
  token: process.env.UPSTASH_VECTOR_REST_TOKEN!,
});

export interface VectorDocument {
  id: string;
  content: string;
  metadata?: Record<string, unknown>;
}

export async function upsertDocument(doc: VectorDocument): Promise<void> {
  const embedding = await generateEmbedding(doc.content);
  await index.upsert({
    id: doc.id,
    vector: embedding,
    metadata: { content: doc.content, ...doc.metadata },
  });
}

export async function search(
  query: string,
  options: { topK?: number } = {}
): Promise<{ id: string; score: number; content: string }[]> {
  const { topK = 5 } = options;
  const queryEmbedding = await generateEmbedding(query);

  const results = await index.query({
    vector: queryEmbedding,
    topK,
    includeMetadata: true,
  });

  return results.map((r) => ({
    id: r.id as string,
    score: r.score,
    content: (r.metadata?.content as string) || "",
  }));
}

lib/ai/rag.ts

lib/ai/rag.tsTypeScript

import { generateText } from "ai";
import { openai } from "@ai-sdk/openai";
import { search } from "./vector-store";

// Uses OpenAI by default. Swap this for your own provider/model selection.
const model = openai("gpt-4o");

export async function ragGenerate(query: string, options: { topK?: number } = {}) {
  const results = await search(query, options);

  const context = results.map((r, i) => `[${i + 1}] ${r.content}`).join("\n\n");

  const systemPrompt = `Use the following context to answer the question.
If the context doesn't contain relevant information, say so.

Context:
${context}`;

  const { text } = await generateText({
    model,
    system: systemPrompt,
    prompt: query,
  });

  return { answer: text, sources: results };
}

4Dependencies

$ bun add ai @ai-sdk/openai @upstash/vector

5Configuration

Environment Variables

| Variable | Description | Required | | --------------------------- | ----------------------------- | -------- | | OPENAI_API_KEY | OpenAI API key for embeddings | Yes | | UPSTASH_VECTOR_REST_URL | Upstash Vector URL | Yes | | UPSTASH_VECTOR_REST_TOKEN | Upstash Vector token | Yes |

6Usage

Index Documents

import { upsertDocument } from "@/lib/ai/vector-store";
import { chunkText } from "@/lib/ai/embeddings";

const document = await fetchDocument();
const chunks = chunkText(document.content);

for (let i = 0; i < chunks.length; i++) {
  await upsertDocument({
    id: `${document.id}-chunk-${i}`,
    content: chunks[i],
    metadata: { documentId: document.id },
  });
}

TypeScript

Semantic Search

import { search } from "@/lib/ai/vector-store";

const results = await search("How do I configure the database?", { topK: 5 });

TypeScript

RAG Query

import { ragGenerate } from "@/lib/ai/rag";

const { answer, sources } = await ragGenerate("What are the best practices?");

TypeScript

7Alternatives

Pinecone - Industry-leading vector database
Weaviate - Open-source with GraphQL API
Chroma - Open-source, local-first
pgvector - Vector search for PostgreSQL

Related patterns

AI Chat AI Tool Calling AI Rate Limits