diff --git a/.env.sample b/.env.sample index 79b1dc55..e7c58b60 100644 --- a/.env.sample +++ b/.env.sample @@ -30,6 +30,20 @@ INTERN_KEY= # 在 https://open.bigmodel.cn/ 注册后获取 ZHIPU_API_KEY= +# Upstash Redis(Serverless Redis over HTTP)—— 给 AI 接口做 per-IP rate limit +# 免费模型 GLM-4.6V-Flash 并发极低(≈5),不限流单用户就能打爆。 +# +# 获取方式(任选其一): +# 1. 在 https://console.upstash.com/ 手动建 Redis 库,直接复制 REST URL / Token +# 2. Vercel Project → Integrations → Upstash → 一键绑定(env 会自动注入项目) +# +# 代码会自动识别以下三种命名(按优先级): +# a. UPSTASH_REDIS_REST_URL / _TOKEN (手动建推荐) +# b. UPSTASH_REDIS_REST_KV_REST_API_URL / _TOKEN (Vercel 集成 + 自定义 prefix) +# c. KV_REST_API_URL / _TOKEN (Vercel 集成 + 默认无 prefix) +# 未配置时限流会自动降级为放行 + 一次 warn,不会阻塞接口。 +UPSTASH_REDIS_REST_URL= +UPSTASH_REDIS_REST_TOKEN= # Sentry 错误监控(Developer plan 免费 5K errors / 10K perf units / 月) # NEXT_PUBLIC_SENTRY_DSN 是浏览器端需要的公开 DSN,暴露在前端 bundle 里属于设计, # SENTRY_AUTH_TOKEN 仅用于 next build 时上传 source map,私密。 diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index af6e1bc3..b2fc5927 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -3,6 +3,7 @@ import { streamText, UIMessage, convertToModelMessages } from "ai"; import { getModel, requiresApiKey, type AIProvider } from "@/lib/ai/models"; import { buildSystemMessage } from "@/lib/ai/prompt"; import { source } from "@/lib/source"; +import { limitChat, rateLimitResponse } from "@/lib/rate-limit"; import fs from "fs/promises"; import path from "path"; @@ -29,6 +30,22 @@ interface ChatRequest { import { resolveUserId } from "@/lib/server-auth"; export async function POST(req: Request) { + // 0. Rate limit:免费模型 GLM-4.6V-Flash 并发极低(≈ 5), + // 单用户开几个 tab 就能打爆。per-IP 滑动窗口限流先挡一层。 + // (L2 防护;如果 Upstash env 漏配会自动降级为放行+warn) + // + // 预读 body 判断是否带图(hasImage=true 会触发更严的 5 req/60s 窗口)。 + // 为此多克一次请求,后续 proxyReq/req.json() 仍可独立读(Copilot CR #4)。 + let hasImage = false; + try { + const body = (await req.clone().json()) as Partial; + hasImage = messagesHaveImage(body.messages); + } catch { + // body 不是合法 JSON:按无图处理,继续让下游的 req.json() 去报真正的错 + } + const rl = await limitChat(req, hasImage); + if (!rl.success) return rateLimitResponse(rl); + // 1. 克隆请求,因为如果代理失败,后面的代码还需要读取 req.json() const proxyReq = req.clone(); @@ -234,6 +251,19 @@ export async function POST(req: Request) { return Response.json({ error: error.message }, { status: 400 }); } + // 识别上游(智谱 GLM)限流/欠费/鉴权错误,给出结构化 code 让前端友好提示。 + // 智谱业务码参考: + // 1302 - 接口请求并发超额(与 HTTP 429 对应) + // 1113 - 账户余额不足 / 免费额度耗尽 + // 1001/1002/1003 - 鉴权失败 + const mapped = mapUpstreamError(error); + if (mapped) { + return Response.json( + { error: mapped.message, code: mapped.code }, + { status: mapped.status }, + ); + } + return Response.json( { error: "Failed to process chat request" }, { status: 500 }, @@ -241,6 +271,94 @@ export async function POST(req: Request) { } } +/** + * 判断一组 UIMessage 里是否含图片 part。支持 AI SDK v5 的多种图片表达: + * `type === "image"` / `type === "image_url"` / `type === "file"` 且 mediaType 起头 image。 + * 任何异常结构都当作无图,宁可放过也不误杀。 + */ +function messagesHaveImage(messages: unknown): boolean { + if (!Array.isArray(messages)) return false; + return messages.some((msg) => { + if (!msg || typeof msg !== "object") return false; + const parts = (msg as { parts?: unknown }).parts; + if (!Array.isArray(parts)) return false; + return parts.some((part) => { + if (!part || typeof part !== "object") return false; + const type = (part as { type?: unknown }).type; + if (type === "image" || type === "image_url") return true; + if (type === "file") { + const mediaType = (part as { mediaType?: unknown }).mediaType; + return typeof mediaType === "string" && mediaType.startsWith("image/"); + } + return false; + }); + }); +} + +interface MappedUpstreamError { + status: number; + code: "rate_limited" | "quota_exhausted" | "upstream_auth" | "upstream_down"; + message: string; +} + +function mapUpstreamError(err: unknown): MappedUpstreamError | null { + if (!err) return null; + + // 仅使用 message / response payload,**不要拼 stack** —— stack 里带行号 + // 形如 `:429:` / `:1302:` 会误匹配业务码正则(Copilot CR #5)。 + // JSON.stringify 对循环引用会抛错,用 try/catch 兜底(Copilot CR #6)。 + let raw: string; + if (err instanceof Error) { + raw = err.message; + } else if (typeof err === "string") { + raw = err; + } else { + try { + raw = JSON.stringify(err); + } catch { + raw = String(err); + } + } + + // 业务码正则:全部用 `[^\s]{0,N}?` 代替 `.*`,限死回溯深度避免 ReDoS + // (CodeQL polynomial regex 告警)。关键词语义够短,10~20 字符窗口足够。 + const hasStatus429 = /\b429\b|rate[-_ ]?limit|too many requests/i.test(raw); + const has1302 = /\b1302\b|并发超额|速率限制|控制请求频率/.test(raw); + const has1113 = + /\b1113\b|余额不足|额度[^\s]{0,10}?耗尽|quota[^\s]{0,10}?exhaust/i.test( + raw, + ); + const hasAuth = + /\b1001\b|\b1002\b|\b1003\b|\b401\b|unauthorized|invalid[^\s]{0,10}?api[^\s]{0,10}?key/i.test( + raw, + ); + + if (has1302 || hasStatus429) { + return { + status: 429, + code: "rate_limited", + message: "AI 服务被挤爆了,排队中,请 30 秒后再试。(上游并发限流)", + }; + } + if (has1113) { + return { + status: 503, + code: "quota_exhausted", + message: + "免费模型今日额度已用完,请明天再来,或在设置里切到你自己的 OpenAI/Gemini。", + }; + } + if (hasAuth) { + return { + status: 502, + code: "upstream_auth", + message: + "AI 服务密钥配置异常,站点管理员已收到通知。请稍后重试或切换到自有 API Key。", + }; + } + return null; +} + // 提取纯文本内容,过滤掉 MDX 语法 function extractTextFromMDX(content: string): string { let text = content diff --git a/app/api/suggestions/route.ts b/app/api/suggestions/route.ts index 7a6c11ea..a07f1c2f 100644 --- a/app/api/suggestions/route.ts +++ b/app/api/suggestions/route.ts @@ -2,6 +2,7 @@ import { generateText } from "ai"; import { unstable_cache } from "next/cache"; import { getModel, requiresApiKey, type AIProvider } from "@/lib/ai/models"; import { createGlmFlashModel } from "@/lib/ai/providers/glm"; +import { limitChat, rateLimitResponse } from "@/lib/rate-limit"; // 允许流式响应最长30秒 export const maxDuration = 30; @@ -20,6 +21,10 @@ interface SuggestionsRequest { } export async function POST(req: Request) { + // Rate limit:suggestions 也打 LLM,共用同一 IP 额度池 + const rl = await limitChat(req, false); + if (!rl.success) return rateLimitResponse(rl); + try { const { messages, diff --git a/app/components/DocsAssistant.tsx b/app/components/DocsAssistant.tsx index 49d07a92..d6f4e4ab 100644 --- a/app/components/DocsAssistant.tsx +++ b/app/components/DocsAssistant.tsx @@ -465,11 +465,15 @@ function deriveAssistantError( ? message : `The ${providerLabel} API key looks incorrect. Update it in settings and try again.`; } else if (statusCode === 429) { + // 优先用服务端返回的中文友好提示(rate_limited / quota_exhausted), + // 只在服务端没给消息时才兜底到默认英文文案 friendlyMessage = - "The provider is rate limiting requests. Please wait and try again."; + message && message.length > 0 ? message : "请求太频繁,请稍等片刻再试。"; } else if (statusCode && statusCode >= 500) { friendlyMessage = - "The AI provider is currently unavailable. Please try again soon."; + message && message.length > 0 + ? message + : "AI 服务暂时不可用,请稍后再试。"; } return { diff --git a/app/components/assistant-ui/thread.tsx b/app/components/assistant-ui/thread.tsx index 9dac98d9..c84d0d7e 100644 --- a/app/components/assistant-ui/thread.tsx +++ b/app/components/assistant-ui/thread.tsx @@ -395,6 +395,8 @@ const Composer: FC = ({ autoFocus aria-label="Message input" disabled={!hasActiveKey} + // 单条消息硬上限 4000 字符:防 token bomb,保护免费模型额度 + maxLength={4000} /> ip.trim()) + .filter(Boolean); + if (parts.length > 0) return parts[parts.length - 1]; + } + + return "anonymous"; +} + +export interface RateLimitResult { + success: boolean; + limit: number; + remaining: number; + /** Unix ms timestamp when the window resets */ + reset: number; + /** 当 Upstash 未配置时,此字段为 true,调用方应跳过限流 */ + skipped?: boolean; +} + +/** + * 对聊天请求做两层限流:per-minute + per-day,任一维度不过就算失败。 + * @param req Next.js Request + * @param hasImage 消息是否携带图片(影响每分钟窗口严格度) + */ +export async function limitChat( + req: Request, + hasImage = false, +): Promise { + const minuteLimiter = hasImage ? getChatImageLimiter() : getChatLimiter(); + const dayLimiter = getDailyLimiter(); + + // Upstash 未配置:本地开发或生产漏配。不阻塞请求,只打一次 warn 提示运维。 + // 不再按 NODE_ENV 区分(dev 也提示,免得开发期"没限流却不知道"), + // 用 module 级 flag 避免每请求刷爆日志(Copilot CR #3)。 + if (!minuteLimiter || !dayLimiter) { + if (!hasWarnedMissingUpstash) { + hasWarnedMissingUpstash = true; + console.warn( + "[rate-limit] UPSTASH_REDIS_REST_URL / UPSTASH_REDIS_REST_TOKEN 未配置," + + "聊天接口暂无限流保护(本实例生命周期内不会再次提示)。" + + "生产环境请在 Vercel Env 中补齐。", + ); + } + return { + success: true, + limit: Infinity, + remaining: Infinity, + reset: 0, + skipped: true, + }; + } + + const ip = getClientIp(req); + const [minuteRes, dayRes] = await Promise.all([ + minuteLimiter.limit(ip), + dayLimiter.limit(ip), + ]); + + if (!minuteRes.success) { + return { + success: false, + limit: minuteRes.limit, + remaining: minuteRes.remaining, + reset: minuteRes.reset, + }; + } + if (!dayRes.success) { + return { + success: false, + limit: dayRes.limit, + remaining: dayRes.remaining, + reset: dayRes.reset, + }; + } + // 取剩余额度较紧的一档回给调用方 + const tighter = minuteRes.remaining <= dayRes.remaining ? minuteRes : dayRes; + return { + success: true, + limit: tighter.limit, + remaining: tighter.remaining, + reset: tighter.reset, + }; +} + +/** 生成 429 响应,带标准 Retry-After 和 X-RateLimit-* 头 */ +export function rateLimitResponse(result: RateLimitResult): Response { + const retryAfterSec = Math.max( + 1, + Math.ceil((result.reset - Date.now()) / 1000), + ); + return new Response( + JSON.stringify({ + error: "请求太频繁了,喘口气再来。", + code: "rate_limited", + retryAfter: retryAfterSec, + }), + { + status: 429, + headers: { + "Content-Type": "application/json", + "Retry-After": String(retryAfterSec), + "X-RateLimit-Limit": String(result.limit), + "X-RateLimit-Remaining": String(result.remaining), + "X-RateLimit-Reset": String(result.reset), + }, + }, + ); +} diff --git a/package.json b/package.json index 1db3c9be..a33981cf 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,8 @@ "@sentry/nextjs": "^10.49.0", "@types/mdx": "^2.0.13", "@types/pg": "^8.16.0", + "@upstash/ratelimit": "^2.0.8", + "@upstash/redis": "^1.37.0", "@vercel/speed-insights": "^1.2.0", "ai": "^6.0.148", "antd": "^5.27.4", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4201ce7d..c9ea7816 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -93,6 +93,12 @@ importers: '@types/pg': specifier: ^8.16.0 version: 8.20.0 + '@upstash/ratelimit': + specifier: ^2.0.8 + version: 2.0.8(@upstash/redis@1.37.0) + '@upstash/redis': + specifier: ^1.37.0 + version: 1.37.0 '@vercel/speed-insights': specifier: ^1.2.0 version: 1.3.1(next@16.2.3(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(react@19.2.3)(vue@3.5.32(typescript@5.9.3)) @@ -4024,6 +4030,18 @@ packages: cpu: [x64] os: [win32] + '@upstash/core-analytics@0.0.10': + resolution: {integrity: sha512-7qJHGxpQgQr9/vmeS1PktEwvNAF7TI4iJDi8Pu2CFZ9YUGHZH4fOP5TfYlZ4aVxfopnELiE4BS4FBjyK7V1/xQ==} + engines: {node: '>=16.0.0'} + + '@upstash/ratelimit@2.0.8': + resolution: {integrity: sha512-YSTMBJ1YIxsoPkUMX/P4DDks/xV5YYCswWMamU8ZIfK9ly6ppjRnVOyBhMDXBmzjODm4UQKcxsJPvaeFAijp5w==} + peerDependencies: + '@upstash/redis': ^1.34.3 + + '@upstash/redis@1.37.0': + resolution: {integrity: sha512-LqOJ3+XWPLSZ2rGSed5DYG3ixybxb8EhZu3yQqF7MdZX1wLBG/FRcI6xcUZXHy/SS7mmXWyadrud0HJHkOc+uw==} + '@vercel/backends@0.0.14': resolution: {integrity: sha512-4a4LQueJCvwqJhz+B9DBlEOZOdyl+BrIMkC1LZC3++YGbEA9KLhcBwS10WF7hndQR1jizpf7klMQbcU2FwaN/g==} @@ -7855,6 +7873,9 @@ packages: resolution: {integrity: sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==} engines: {node: '>= 0.4'} + uncrypto@0.1.3: + resolution: {integrity: sha512-Ql87qFHB3s/De2ClA9e0gsnS6zXG27SkTiSJwjCc9MebbfapQfuPzumMIUMi38ezPZVNFcHI9sUIepeQfw8J8Q==} + undici-types@7.19.2: resolution: {integrity: sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==} @@ -12684,6 +12705,19 @@ snapshots: '@unrs/resolver-binding-win32-x64-msvc@1.11.1': optional: true + '@upstash/core-analytics@0.0.10': + dependencies: + '@upstash/redis': 1.37.0 + + '@upstash/ratelimit@2.0.8(@upstash/redis@1.37.0)': + dependencies: + '@upstash/core-analytics': 0.0.10 + '@upstash/redis': 1.37.0 + + '@upstash/redis@1.37.0': + dependencies: + uncrypto: 0.1.3 + '@vercel/backends@0.0.14(@emnapi/core@1.9.2)(@emnapi/runtime@1.9.2)(rollup@4.60.1)(typescript@5.9.3)': dependencies: '@vercel/cervel': 0.0.6(@emnapi/core@1.9.2)(@emnapi/runtime@1.9.2)(typescript@5.9.3) @@ -17643,6 +17677,8 @@ snapshots: has-symbols: 1.1.0 which-boxed-primitive: 1.1.1 + uncrypto@0.1.3: {} + undici-types@7.19.2: {} undici@5.28.4: