AI Rate Limits
intermediateToken-aware rate limiting for AI endpoints with per-user quotas and cost tracking.
airate-limitingtokenscostquotasupstash
Tested on⬢20▲16⚛19TS5.9
$ bunx sinew add ai/ai-rate-limitsInteractive demo coming soon
1The Problem
AI endpoints need specialized rate limiting:
- Token-based quotas (not just request counts)
- Per-user daily limits
- Cost tracking and analytics
- Tier-based access control
2The Solution
Use Upstash Redis for token bucket rate limiting with usage tracking. Includes middleware for automatic rate limit checking and cost calculation.
3Files
lib/ai/rate-limits.ts
lib/ai/rate-limits.tsTypeScript
import { Ratelimit } from "@upstash/ratelimit";
import { Redis } from "@upstash/redis";
const redis = Redis.fromEnv();
export const tierLimits = {
free: {
tokensPerDay: 10_000,
requestsPerMinute: 10,
maxTokensPerRequest: 4_000,
},
pro: {
tokensPerDay: 100_000,
requestsPerMinute: 60,
maxTokensPerRequest: 8_000,
},
enterprise: {
tokensPerDay: 1_000_000,
requestsPerMinute: 200,
maxTokensPerRequest: 32_000,
},
} as const;
export type Tier = keyof typeof tierLimits;
const DAY_SECONDS = 86400;
// One limiter per tier, created once — a per-call `new Ratelimit` forfeits the
// SDK's in-memory cache and analytics.
const requestLimiters = new Map<Tier, Ratelimit>();
function getRequestLimiter(tier: Tier): Ratelimit {
let limiter = requestLimiters.get(tier);
if (!limiter) {
limiter = new Ratelimit({
redis,
limiter: Ratelimit.slidingWindow(tierLimits[tier].requestsPerMinute, "1 m"),
prefix: `ai:requests:${tier}`,
analytics: true,
});
requestLimiters.set(tier, limiter);
}
return limiter;
}
export async function checkRequestLimit(userId: string, tier: Tier = "free") {
const result = await getRequestLimiter(tier).limit(userId);
return {
success: result.success,
remaining: result.remaining,
reset: result.reset,
};
}
// Reserves the estimate atomically (incrby first, then check) so concurrent
// requests can't both pass the check. Reconcile actuals with reconcileTokenUsage.
export async function checkTokenLimit(
userId: string,
requestedTokens: number,
tier: Tier = "free"
) {
const key = `ai:tokens:${userId}`;
const dailyLimit = tierLimits[tier].tokensPerDay;
const newUsage = await redis.incrby(key, requestedTokens);
// Set a TTL on first write only; never extend the window on later writes.
await redis.expire(key, DAY_SECONDS, "NX");
const ttl = await redis.ttl(key);
const resetAt = Date.now() + (ttl > 0 ? ttl * 1000 : DAY_SECONDS * 1000);
if (newUsage > dailyLimit) {
await redis.incrby(key, -requestedTokens); // refund
return {
success: false,
remainingTokens: Math.max(0, dailyLimit - (newUsage - requestedTokens)),
resetAt,
};
}
return { success: true, remainingTokens: dailyLimit - newUsage, resetAt };
}
// Correct a reservation once the real token count is known. Only the delta is
// applied, so a request is never double-counted.
export async function reconcileTokenUsage(
userId: string,
estimatedTokens: number,
actualTokens: number
) {
const delta = actualTokens - estimatedTokens;
if (delta === 0) return;
const key = `ai:tokens:${userId}`;
await redis.incrby(key, delta);
await redis.expire(key, DAY_SECONDS, "NX");
}lib/ai/usage-tracking.ts
lib/ai/usage-tracking.tsTypeScript
import { Redis } from "@upstash/redis";
const redis = Redis.fromEnv();
export const modelPricing = {
"gpt-4o": { input: 0.0025, output: 0.01 },
"gpt-4o-mini": { input: 0.00015, output: 0.0006 },
"claude-sonnet-4-20250514": { input: 0.003, output: 0.015 },
"claude-3-5-haiku-20241022": { input: 0.0008, output: 0.004 },
} as const;
export function calculateCost(
model: keyof typeof modelPricing,
inputTokens: number,
outputTokens: number
): number {
const pricing = modelPricing[model];
return (inputTokens / 1000) * pricing.input + (outputTokens / 1000) * pricing.output;
}
export async function recordUsage(
userId: string,
record: {
model: string;
inputTokens: number;
outputTokens: number;
cost: number;
}
) {
const day = new Date().toISOString().split("T")[0];
const key = `usage:daily:${userId}:${day}`;
await redis.hincrby(key, "totalTokens", record.inputTokens + record.outputTokens);
await redis.hincrbyfloat(key, "totalCost", record.cost);
await redis.hincrby(key, "requests", 1);
await redis.expire(key, 60 * 60 * 24 * 30);
}lib/middleware/ai-rate-limit.ts
lib/middleware/ai-rate-limit.tsTypeScript
import { NextRequest, NextResponse } from "next/server";
import { checkRequestLimit, checkTokenLimit, type Tier } from "@/lib/ai/rate-limits";
export function withAIRateLimit(
handler: (req: NextRequest) => Promise<Response>,
options: {
getUserId: (req: NextRequest) => string | null;
getUserTier?: (req: NextRequest) => Tier;
}
) {
return async (req: NextRequest): Promise<Response> => {
const userId = options.getUserId(req);
if (!userId) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
const tier = options.getUserTier?.(req) ?? "free";
const requestCheck = await checkRequestLimit(userId, tier);
if (!requestCheck.success) {
return NextResponse.json(
{ error: "Rate limit exceeded" },
{
status: 429,
headers: {
"Retry-After": Math.ceil((requestCheck.reset - Date.now()) / 1000).toString(),
},
}
);
}
const tokenCheck = await checkTokenLimit(userId, 1000, tier);
if (!tokenCheck.success) {
return NextResponse.json({ error: "Daily token limit exceeded" }, { status: 429 });
}
return handler(req);
};
}4Dependencies
$ bun add @upstash/ratelimit @upstash/redis5Configuration
Environment Variables
| Variable | Description | Required |
| -------------------------- | ------------------- | -------- |
| UPSTASH_REDIS_REST_URL | Upstash Redis URL | Yes |
| UPSTASH_REDIS_REST_TOKEN | Upstash Redis token | Yes |
6Usage
Apply Rate Limiting
import { withAIRateLimit } from "@/lib/middleware/ai-rate-limit";
async function chatHandler(req: NextRequest) {
// Your chat logic
}
export const POST = withAIRateLimit(chatHandler, {
getUserId: (req) => req.headers.get("x-user-id"),
getUserTier: (req) => (req.headers.get("x-user-tier") as Tier) ?? "free",
});TypeScript
Track Usage
import { recordUsage, calculateCost } from "@/lib/ai/usage-tracking";
const cost = calculateCost("gpt-4o", inputTokens, outputTokens);
await recordUsage(userId, { model: "gpt-4o", inputTokens, outputTokens, cost });TypeScript
Get Usage Stats
import { getDailySummary } from "@/lib/ai/usage-tracking";
const today = new Date().toISOString().split("T")[0];
const stats = await getDailySummary(userId, today);
// { totalTokens: 5000, totalCost: 0.15, requests: 25 }TypeScript