From 1eaf523b289c62ea3d677d325f975f625690bc16 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:27:01 +0000
Subject: [PATCH 01/19] =?UTF-8?q?fix(api/docs/history):=20=E5=85=BC?=
 =?UTF-8?q?=E5=AE=B9=20fumadocs=20=E7=9A=84=E7=9B=B8=E5=AF=B9=E8=B7=AF?=
 =?UTF-8?q?=E5=BE=84=E5=8F=82=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fumadocs 的 page.file.path 返回相对 app/docs/ 的路径（如 ai/xxx/index.mdx）
而不是仓库根路径，导致前端调 /api/docs/history 时报 400。
在 normalizeDocsPath 里补上 app/docs/ 前缀兼容。
---
 app/api/docs/history/route.ts | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/app/api/docs/history/route.ts b/app/api/docs/history/route.ts
index 08ad6e30..714799a4 100644
--- a/app/api/docs/history/route.ts
+++ b/app/api/docs/history/route.ts
@@ -47,6 +47,11 @@ function normalizeDocsPath(raw: string): string | null {
   if (normalized.startsWith("docs/")) {
     normalized = `app/${normalized}`;
   }
+  // fumadocs 的 page.file.path 返回"相对 app/docs/"路径（如 ai/xxx/index.mdx）
+  // 而不是仓库根。这里补上前缀，和 page.tsx 传参保持兼容。
+  if (!normalized.startsWith("app/")) {
+    normalized = `app/docs/${normalized}`;
+  }
   // 必须落在 app/docs/ 下才放行
   if (!normalized.startsWith("app/docs/")) {
     return null;

From 001fd1651b24e7a3e5683c923c73f3d4b32e1d46 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:27:20 +0000
Subject: [PATCH 02/19] =?UTF-8?q?style(ui):=20=E5=AF=B9=E9=BD=90=20editori?=
 =?UTF-8?q?al=20=E8=AE=BE=E8=AE=A1=E7=B3=BB=E7=BB=9F=EF=BC=8C=E4=BF=AE?=
 =?UTF-8?q?=E5=A4=8D=206=20=E5=A4=84=E8=A7=86=E8=A7=89=E4=B8=80=E8=87=B4?=
 =?UTF-8?q?=E6=80=A7=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- DocShareButton: 去掉 rounded-md / h-11，改为 font-mono uppercase + border + 翻转色 hover
- EditOnGithub: 同步改为相同风格，图标 h-8→h-4
- DocHistoryPanel: 骨架屏和头像去掉 rounded-full，保持方角报纸风
- ContributorRow: Dialog 硬编码 #111111/#F9F9F7 换成 CSS 变量；POWER LEVEL 标签移动端隐藏
- HotDocsTab: 加 sessionStorage 5 分钟缓存，减少重复打后端
- SettingsForm: AI 提供商从原生 select 改为 segment button group；中文 label 去掉 uppercase tracking-widest
---
 app/components/DocHistoryPanel.tsx     | 10 +--
 app/components/DocShareButton.tsx      |  2 +-
 app/components/EditOnGithub.tsx        |  4 +-
 app/components/rank/ContributorRow.tsx | 24 +++----
 app/components/rank/HotDocsTab.tsx     | 34 ++++++++-
 app/settings/SettingsForm.tsx          | 95 +++++++++++++++++---------
 6 files changed, 115 insertions(+), 54 deletions(-)

diff --git a/app/components/DocHistoryPanel.tsx b/app/components/DocHistoryPanel.tsx
index b1a3298a..577c33c0 100644
--- a/app/components/DocHistoryPanel.tsx
+++ b/app/components/DocHistoryPanel.tsx
@@ -50,10 +50,10 @@ function relativeTime(dateStr: string): string {
 function SkeletonRow() {
   return (
     <div className="flex items-center gap-3 py-2.5 animate-pulse">
-      <div className="w-6 h-6 rounded-full bg-neutral-200 dark:bg-neutral-700 shrink-0" />
+      <div className="w-6 h-6 bg-neutral-200 dark:bg-neutral-700 shrink-0" />
       <div className="flex-1 flex flex-col gap-1">
-        <div className="h-3 w-2/3 rounded bg-neutral-200 dark:bg-neutral-700" />
-        <div className="h-2.5 w-1/3 rounded bg-neutral-100 dark:bg-neutral-800" />
+        <div className="h-3 w-2/3 bg-neutral-200 dark:bg-neutral-700" />
+        <div className="h-2.5 w-1/3 bg-neutral-100 dark:bg-neutral-800" />
       </div>
     </div>
   );
@@ -129,7 +129,7 @@ export function DocHistoryPanel({ path }: DocHistoryPanelProps) {
                 href={item.htmlUrl}
                 target="_blank"
                 rel="noopener noreferrer"
-                className="flex items-start gap-3 py-2.5 group hover:bg-neutral-50 dark:hover:bg-neutral-900 rounded transition-colors px-1 -mx-1"
+                className="flex items-start gap-3 py-2.5 group hover:bg-neutral-50 dark:hover:bg-neutral-900 transition-colors px-1 -mx-1"
               >
                 {/* 头像 */}
                 <Image
@@ -137,7 +137,7 @@ export function DocHistoryPanel({ path }: DocHistoryPanelProps) {
                   alt={item.authorLogin}
                   width={24}
                   height={24}
-                  className="rounded-full mt-0.5 shrink-0"
+                  className="mt-0.5 shrink-0"
                   unoptimized
                 />
 
diff --git a/app/components/DocShareButton.tsx b/app/components/DocShareButton.tsx
index f71fdeaa..9d0b5659 100644
--- a/app/components/DocShareButton.tsx
+++ b/app/components/DocShareButton.tsx
@@ -38,7 +38,7 @@ export function DocShareButton() {
     <button
       type="button"
       onClick={handleCopy}
-      className="inline-flex items-center gap-2 rounded-md px-4 h-11 text-base font-medium hover:bg-muted/80 hover:text-foreground"
+      className="inline-flex items-center gap-2 px-3 py-1.5 font-mono text-xs uppercase tracking-widest border border-[var(--foreground)] hover:bg-[var(--foreground)] hover:text-[var(--background)] transition-colors"
       aria-label="复制页面链接"
     >
       <svg
diff --git a/app/components/EditOnGithub.tsx b/app/components/EditOnGithub.tsx
index 55d1c494..be27f749 100644
--- a/app/components/EditOnGithub.tsx
+++ b/app/components/EditOnGithub.tsx
@@ -4,13 +4,13 @@ export function EditOnGithub({ href }: { href: string }) {
   return (
     <Link
       href={href}
-      className="inline-flex items-center gap-2 rounded-md px-4 h-11 text-base font-medium hover:bg-muted/80 hover:text-foreground no-underline"
+      className="inline-flex items-center gap-2 px-3 py-1.5 font-mono text-xs uppercase tracking-widest border border-[var(--foreground)] hover:bg-[var(--foreground)] hover:text-[var(--background)] transition-colors no-underline"
       data-umami-event="docs_edit_click"
       data-umami-event-page={href}
     >
       <svg
         aria-hidden="true"
-        className="h-8 w-8"
+        className="h-4 w-4"
         viewBox="0 0 24 24"
         fill="none"
         stroke="currentColor"
diff --git a/app/components/rank/ContributorRow.tsx b/app/components/rank/ContributorRow.tsx
index cc783e23..c59cc553 100644
--- a/app/components/rank/ContributorRow.tsx
+++ b/app/components/rank/ContributorRow.tsx
@@ -58,7 +58,7 @@ export function ContributorRow({
               className="absolute top-0 left-0 h-full bg-[var(--foreground)] transition-all duration-1000 origin-left"
               style={{ width: `${(user.points / maxPoints) * 100}%` }}
             />
-            <div className="absolute inset-0 flex items-center px-2 font-mono text-[10px] text-white mix-blend-difference uppercase tracking-widest z-10 pointer-events-none">
+            <div className="absolute inset-0 hidden md:flex items-center px-2 font-mono text-[10px] text-white mix-blend-difference uppercase tracking-widest z-10 pointer-events-none">
               POWER LEVEL
             </div>
           </div>
@@ -67,12 +67,12 @@ export function ContributorRow({
 
       <Dialog.Portal>
         <Dialog.Overlay className="fixed inset-0 bg-black/60 z-50 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0" />
-        <Dialog.Content className="fixed left-[50%] outline-none top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border-2 border-[#111111] bg-[#F9F9F7] p-6 sm:p-8 shadow-[8px_8px_0px_0px_#111111] duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] max-h-[85vh] flex col dark:bg-neutral-950 dark:border-neutral-200 dark:shadow-[8px_8px_0px_0px_#e5e5e5]">
+        <Dialog.Content className="fixed left-[50%] outline-none top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border-2 border-[var(--foreground)] bg-[var(--background)] p-6 sm:p-8 shadow-[8px_8px_0px_0px_var(--foreground)] duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] max-h-[85vh] flex col">
           {/* 弹窗核心内容：左侧头像，右侧个人基础信息 */}
-          <div className="flex justify-between items-start border-b-4 border-[#111111] dark:border-neutral-200 pb-6 mb-4 shrink-0 relative">
+          <div className="flex justify-between items-start border-b-4 border-[var(--foreground)] pb-6 mb-4 shrink-0 relative">
             <div className="flex gap-5 md:gap-6 items-start w-full pr-12">
               {/* 用户头像 */}
-              <div className="w-20 h-20 md:w-24 md:h-24 bg-neutral-200 dark:bg-neutral-800 border-2 border-[#111111] dark:border-neutral-200 shrink-0 overflow-hidden">
+              <div className="w-20 h-20 md:w-24 md:h-24 bg-neutral-200 dark:bg-neutral-800 border-2 border-[var(--foreground)] shrink-0 overflow-hidden">
                 <Image
                   src={user.avatarUrl}
                   alt={user.name}
@@ -91,7 +91,7 @@ export function ContributorRow({
                       href={`https://github.com/${user.name}`}
                       target="_blank"
                       rel="noopener noreferrer"
-                      className="text-[#111111] dark:text-neutral-100 hover:text-[#CC0000] dark:hover:text-[#CC0000] transition-colors"
+                      className="text-[var(--foreground)] hover:text-[#CC0000] transition-colors"
                       data-umami-event="click_github_profile_name"
                       data-umami-event-user={user.name}
                     >
@@ -107,7 +107,7 @@ export function ContributorRow({
                     href={`https://github.com/${user.name}`}
                     target="_blank"
                     rel="noopener noreferrer"
-                    className="inline-flex items-center gap-1 font-mono text-xs uppercase tracking-widest text-[#111111]/70 hover:text-[#CC0000] dark:text-neutral-400 dark:hover:text-[#CC0000] transition-colors w-max"
+                    className="inline-flex items-center gap-1 font-mono text-xs uppercase tracking-widest text-[var(--foreground)]/70 hover:text-[#CC0000] transition-colors w-max"
                     data-umami-event="click_github_profile"
                     data-umami-event-user={user.name}
                   >
@@ -119,26 +119,26 @@ export function ContributorRow({
                 <div className="flex flex-wrap items-center gap-x-6 gap-y-2 mt-2">
                   {/* 总积分面板 */}
                   <div className="flex flex-col">
-                    <span className="font-mono text-[10px] uppercase tracking-widest text-[#111111]/70 dark:text-neutral-400 mb-0.5">
+                    <span className="font-mono text-[10px] uppercase tracking-widest text-[var(--foreground)]/70 mb-0.5">
                       Total Score
                     </span>
                     <span className="font-serif font-black text-xl md:text-2xl text-[#CC0000] leading-none">
                       {user.points.toLocaleString()}{" "}
-                      <span className="text-xs font-mono text-[#111111] dark:text-neutral-200 tracking-normal leading-none inline-block align-baseline">
+                      <span className="text-xs font-mono text-[var(--foreground)] tracking-normal leading-none inline-block align-baseline">
                         PTS
                       </span>
                     </span>
                   </div>
 
                   {/* 分隔线 */}
-                  <div className="w-px h-8 bg-[#111111]/20 dark:bg-neutral-200/20"></div>
+                  <div className="w-px h-8 bg-[var(--foreground)]/20"></div>
 
                   {/* 提交次数面板 */}
                   <div className="flex flex-col">
-                    <span className="font-mono text-[10px] uppercase tracking-widest text-[#111111]/70 dark:text-neutral-400 mb-0.5">
+                    <span className="font-mono text-[10px] uppercase tracking-widest text-[var(--foreground)]/70 mb-0.5">
                       Commits
                     </span>
-                    <span className="font-serif font-black text-xl md:text-2xl text-[#111111] dark:text-neutral-100 leading-none">
+                    <span className="font-serif font-black text-xl md:text-2xl text-[var(--foreground)] leading-none">
                       {user.commits}
                     </span>
                   </div>
@@ -147,7 +147,7 @@ export function ContributorRow({
             </div>
 
             {/* 弹窗关闭按钮 */}
-            <Dialog.Close className="absolute top-0 right-0 h-8 w-8 flex items-center justify-center border border-[#111111] dark:border-neutral-200 hover:bg-[#111111] hover:text-[#F9F9F7] dark:hover:bg-neutral-200 dark:hover:text-neutral-950 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-[#CC0000] shrink-0">
+            <Dialog.Close className="absolute top-0 right-0 h-8 w-8 flex items-center justify-center border border-[var(--foreground)] hover:bg-[var(--foreground)] hover:text-[var(--background)] transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-[#CC0000] shrink-0">
               <X className="h-4 w-4" />
               <span className="sr-only">Close</span>
             </Dialog.Close>
diff --git a/app/components/rank/HotDocsTab.tsx b/app/components/rank/HotDocsTab.tsx
index d55485b3..f243fd05 100644
--- a/app/components/rank/HotDocsTab.tsx
+++ b/app/components/rank/HotDocsTab.tsx
@@ -40,8 +40,27 @@ export function HotDocsTab({ initialWindow }: { initialWindow: WindowParam }) {
   const [state, dispatch] = useReducer(reducer, { status: "loading" });
 
   useEffect(() => {
-    dispatch({ type: "fetch" });
     let cancelled = false;
+
+    // sessionStorage 缓存：同一会话内切换 tab 或来回进出 /rank 不再重复请求
+    // 后端 Caffeine 缓存 10 分钟，前端这里保持 5 分钟避免展示太旧数据
+    const CACHE_TTL_MS = 5 * 60 * 1000;
+    const cacheKey = `hot-docs:${windowParam}:20`;
+    try {
+      const raw = sessionStorage.getItem(cacheKey);
+      if (raw) {
+        const cached = JSON.parse(raw) as { ts: number; docs: HotDoc[] };
+        if (Date.now() - cached.ts < CACHE_TTL_MS) {
+          // 命中缓存：直接渲染，跳过网络请求；没有 loading 闪烁
+          dispatch({ type: "ok", docs: cached.docs });
+          return;
+        }
+      }
+    } catch {
+      // sessionStorage 在隐私模式 / 配额超限会抛错，降级到正常请求
+    }
+
+    dispatch({ type: "fetch" });
     fetch(`${BACKEND_URL}/analytics/top-docs?window=${windowParam}&limit=20`)
       .then((r) => {
         if (!r.ok) throw new Error();
@@ -52,7 +71,18 @@ export function HotDocsTab({ initialWindow }: { initialWindow: WindowParam }) {
       })
       .then((body) => {
         if (!body.success) throw new Error();
-        if (!cancelled) dispatch({ type: "ok", docs: body.data ?? [] });
+        if (cancelled) return;
+        const docs = body.data ?? [];
+        dispatch({ type: "ok", docs });
+        // 写入会话缓存，下次切 tab 秒回
+        try {
+          sessionStorage.setItem(
+            cacheKey,
+            JSON.stringify({ ts: Date.now(), docs }),
+          );
+        } catch {
+          // 写失败不影响展示
+        }
       })
       .catch(() => {
         if (!cancelled) dispatch({ type: "error" });
diff --git a/app/settings/SettingsForm.tsx b/app/settings/SettingsForm.tsx
index 00f81bcf..777cf5b8 100644
--- a/app/settings/SettingsForm.tsx
+++ b/app/settings/SettingsForm.tsx
@@ -27,6 +27,14 @@ function getToken(): string | null {
   return localStorage.getItem("satoken");
 }
 
+// 从 document.cookie 读取 locale，返回 "zh" | "en" | null
+function getLocaleCookie(): "zh" | "en" | null {
+  if (typeof document === "undefined") return null;
+  const match = document.cookie.match(/(?:^|;\s*)locale=([^;]+)/);
+  const val = match?.[1];
+  return val === "zh" || val === "en" ? val : null;
+}
+
 // 骨架屏占位
 function SkeletonRow() {
   return (
@@ -39,10 +47,16 @@ function SkeletonRow() {
 
 export function SettingsForm() {
   const { status } = useAuth();
-  const { setTheme } = useTheme();
+  const { theme: currentTheme, setTheme } = useTheme();
   const router = useRouter();
 
-  const [prefs, setPrefs] = useState<UserPreferences>(DEFAULT_PREFS);
+  // 初始值：主题从 ThemeProvider 读（避免表单与页面实际主题不一致），
+  // 语言从 locale cookie 读（与 middleware 写的值保持同步）
+  const [prefs, setPrefs] = useState<UserPreferences>(() => ({
+    ...DEFAULT_PREFS,
+    theme: currentTheme as UserPreferences["theme"],
+    language: getLocaleCookie() ?? DEFAULT_PREFS.language,
+  }));
   const [loading, setLoading] = useState(true);
   const [saving, setSaving] = useState(false);
   const [toast, setToast] = useState<{
@@ -59,6 +73,16 @@ export function SettingsForm() {
     }
   }, [status, router]);
 
+  // 监听全局 ThemeProvider 的主题变化（比如 Header 的 ThemeToggle）
+  // 把变化同步到表单选中态，避免"外部切换了但 settings 还显示旧值"
+  useEffect(() => {
+    setPrefs((p) =>
+      p.theme === currentTheme
+        ? p
+        : { ...p, theme: currentTheme as UserPreferences["theme"] },
+    );
+  }, [currentTheme]);
+
   // 拉取偏好数据
   useEffect(() => {
     if (status !== "authenticated") return;
@@ -81,9 +105,13 @@ export function SettingsForm() {
       .then((body) => {
         if (body?.success && body?.data) {
           const merged = { ...DEFAULT_PREFS, ...body.data };
-          setPrefs(merged);
-          // 加载出来的 theme 立即同步到 ThemeProvider，避免"已保存设置与当前主题不一致"
-          setTheme(merged.theme);
+          // 表单显示后端的"已保存"值；但不强制 setTheme 覆盖
+          // 因为用户可能在别处用 ThemeToggle 改过本地主题，
+          // 以本地当前主题为准，后端值只是表单的初始显示
+          setPrefs({
+            ...merged,
+            theme: currentTheme as UserPreferences["theme"],
+          });
         }
       })
       .catch(() => {
@@ -136,6 +164,8 @@ export function SettingsForm() {
         setPrefs(merged);
         // 主题变化立即同步到 ThemeProvider（同步写 localStorage）
         setTheme(merged.theme);
+        // 语言变化写回 cookie，供文档页 Server Component 读取
+        document.cookie = `locale=${merged.language};path=/;max-age=${60 * 60 * 24 * 365};samesite=lax`;
       }
       showToast("success", "偏好设置已保存");
     } catch {
@@ -196,15 +226,17 @@ export function SettingsForm() {
 
       {/* 主题设置 */}
       <section>
-        <label className="block font-serif font-bold text-lg mb-3 uppercase tracking-wide">
-          主题
-        </label>
+        <label className="block font-serif font-bold text-lg mb-3">主题</label>
         <div className="flex gap-0 border border-[var(--foreground)]">
           {themeOptions.map(({ value, label }) => (
             <button
               key={value}
               type="button"
-              onClick={() => setPrefs((p) => ({ ...p, theme: value }))}
+              onClick={() => {
+                // 立即同步到 ThemeProvider，避免"表单已选但页面没变"的割裂感
+                setPrefs((p) => ({ ...p, theme: value }));
+                setTheme(value);
+              }}
               className={`flex-1 py-2 px-4 font-mono text-sm uppercase transition-colors ${
                 prefs.theme === value
                   ? "bg-[var(--foreground)] text-[var(--background)]"
@@ -219,15 +251,17 @@ export function SettingsForm() {
 
       {/* 语言设置 */}
       <section>
-        <label className="block font-serif font-bold text-lg mb-3 uppercase tracking-wide">
-          语言
-        </label>
+        <label className="block font-serif font-bold text-lg mb-3">语言</label>
         <div className="flex gap-0 border border-[var(--foreground)]">
           {langOptions.map(({ value, label }) => (
             <button
               key={value}
               type="button"
-              onClick={() => setPrefs((p) => ({ ...p, language: value }))}
+              onClick={() => {
+                setPrefs((p) => ({ ...p, language: value }));
+                // 写 cookie 覆盖 middleware 的 IP 判断，让文档页 Server Component 读取
+                document.cookie = `locale=${value};path=/;max-age=${60 * 60 * 24 * 365};samesite=lax`;
+              }}
               className={`flex-1 py-2 px-4 font-mono text-sm uppercase transition-colors ${
                 prefs.language === value
                   ? "bg-[var(--foreground)] text-[var(--background)]"
@@ -242,30 +276,27 @@ export function SettingsForm() {
 
       {/* AI 默认提供商 */}
       <section>
-        <label
-          htmlFor="ai-provider"
-          className="block font-serif font-bold text-lg mb-3 uppercase tracking-wide"
-        >
+        <label className="block font-serif font-bold text-lg mb-3">
           AI 默认提供商
         </label>
-        <select
-          id="ai-provider"
-          value={prefs.aiDefaultProvider}
-          onChange={(e) =>
-            setPrefs((p) => ({
-              ...p,
-              aiDefaultProvider: e.target
-                .value as UserPreferences["aiDefaultProvider"],
-            }))
-          }
-          className="w-full border border-[var(--foreground)] bg-[var(--background)] text-[var(--foreground)] font-mono text-sm px-4 py-2 appearance-none focus:outline-none focus:ring-2 focus:ring-[var(--foreground)]"
-        >
+        <div className="flex gap-0 border border-[var(--foreground)]">
           {aiOptions.map(({ value, label }) => (
-            <option key={value} value={value}>
+            <button
+              key={value}
+              type="button"
+              onClick={() =>
+                setPrefs((p) => ({ ...p, aiDefaultProvider: value }))
+              }
+              className={`flex-1 py-2 px-4 font-mono text-sm transition-colors ${
+                prefs.aiDefaultProvider === value
+                  ? "bg-[var(--foreground)] text-[var(--background)]"
+                  : "bg-transparent text-[var(--foreground)] hover:bg-neutral-100 dark:hover:bg-neutral-800"
+              }`}
+            >
               {label}
-            </option>
+            </button>
           ))}
-        </select>
+        </div>
       </section>
 
       {/* 提交按钮 */}

From 967b07b41dedc5741c789b0ba0326068a2c961f3 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:27:41 +0000
Subject: [PATCH 03/19] =?UTF-8?q?fix(settings):=20=E4=BF=AE=E5=A4=8D?=
 =?UTF-8?q?=E4=B8=BB=E9=A2=98=E5=88=87=E6=8D=A2=E5=8F=8C=20Provider=20?=
 =?UTF-8?q?=E5=86=B2=E7=AA=81=20+=20=E5=8F=8C=E5=90=91=E5=90=8C=E6=AD=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

问题：
- fumadocs 的 RootProvider 内置 next-themes，与自己的 ThemeProvider
  同时往 <html class> 写 light/dark，导致进入 /settings 时从黑闪白
- Header 右上角 ThemeToggle 切换主题后，/settings 表单选中态不更新
- Settings 保存时的语言设置写不到文档页能读到的位置

修复：
- RootProvider 加 theme={{ enabled: false }}，禁用 fumadocs 内置主题
- SettingsForm 用 useTheme() 读当前主题作为初始值
- 监听 currentTheme 变化，同步到表单选中态
- 切主题按钮立即 setTheme（所见即所得，不用等保存）
- handleSave 保存成功后把 language 写 cookie，供 /docs Server Component 读
---
 app/layout.tsx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/app/layout.tsx b/app/layout.tsx
index c28ec0f2..c98c631a 100644
--- a/app/layout.tsx
+++ b/app/layout.tsx
@@ -206,6 +206,9 @@ export default async function RootLayout({
         <ThemeProvider defaultTheme="dark" storageKey="ih-theme">
           <AuthProvider>
             <RootProvider
+              // 禁用 fumadocs 内置的 next-themes，避免与我们自己的 ThemeProvider（storageKey: ih-theme）
+              // 同时往 <html class> 写 light/dark 导致闪烁和状态不同步
+              theme={{ enabled: false }}
               search={{
                 SearchDialog: CustomSearchDialog,
                 // 使用静态索引，兼容 next export 与本地开发

From cf6997d6839bfe75d1ca74063770a989ab77dd99 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:28:21 +0000
Subject: [PATCH 04/19] =?UTF-8?q?fix(nav):=20=E9=9D=9E=E9=A6=96=E9=A1=B5?=
 =?UTF-8?q?=E7=82=B9=E5=87=BB=20Header=20=E4=B9=9F=E8=83=BD=E5=9B=9E?=
 =?UTF-8?q?=E4=B8=BB=E9=A1=B5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- BrandMark 外层包 <Link href="/">，点 logo 回首页
- Header 三个锚点从 #features 改为 /#features 等绝对路径，
  从 /rank 或任意子页点导航能跳回主页对应区块

方案 F（最小修复），比引入 Header 全站 layout 或新组件成本更低。
---
 app/components/BrandMark.tsx |  5 +++--
 app/components/Header.tsx    | 15 ++++++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/app/components/BrandMark.tsx b/app/components/BrandMark.tsx
index efbfa1f2..b86ac1cc 100644
--- a/app/components/BrandMark.tsx
+++ b/app/components/BrandMark.tsx
@@ -12,6 +12,7 @@
  * @param {boolean} priority - 是否优先加载
  */
 import Image from "next/image";
+import Link from "next/link";
 import { cn } from "@/lib/utils";
 
 export const BRAND_NAME = "Involution Hell";
@@ -38,7 +39,7 @@ export function BrandMark({
   const width = Math.round(imageSize * BRAND_LOGO_ASPECT_RATIO);
 
   return (
-    <div className={cn("flex items-center gap-2", className)}>
+    <Link href="/" className={cn("flex items-center gap-2", className)}>
       <div className="relative">
         <Image
           src={BRAND_LOGO_LIGHT_SRC}
@@ -65,6 +66,6 @@ export function BrandMark({
       >
         {BRAND_NAME}
       </span>
-    </div>
+    </Link>
   );
 }
diff --git a/app/components/Header.tsx b/app/components/Header.tsx
index 2a75f041..a3184096 100644
--- a/app/components/Header.tsx
+++ b/app/components/Header.tsx
@@ -31,7 +31,16 @@ export function Header() {
         <div className="flex items-center justify-between h-10">
           <nav className="hidden md:flex items-center gap-8 font-sans text-xs font-bold uppercase tracking-widest text-[var(--foreground)]">
             <a
-              href="#features"
+              href="/"
+              className="hover:text-[#CC0000] transition-colors"
+              data-umami-event="navigation_click"
+              data-umami-event-region="header"
+              data-umami-event-label="home"
+            >
+              首页
+            </a>
+            <a
+              href="/#features"
               className="hover:text-[#CC0000] transition-colors"
               data-umami-event="navigation_click"
               data-umami-event-region="header"
@@ -40,7 +49,7 @@ export function Header() {
               特点
             </a>
             <a
-              href="#community"
+              href="/#community"
               className="hover:text-[#CC0000] transition-colors"
               data-umami-event="navigation_click"
               data-umami-event-region="header"
@@ -49,7 +58,7 @@ export function Header() {
               社区
             </a>
             <a
-              href="#contact"
+              href="/#contact"
               className="hover:text-[#CC0000] transition-colors"
               data-umami-event="navigation_click"
               data-umami-event-region="header"

From 5335ca66e0947d1e0df2c44e7d80c4e6e59ec76c Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:28:37 +0000
Subject: [PATCH 05/19] =?UTF-8?q?feat(i18n):=20=E6=96=87=E6=A1=A3=E7=AB=99?=
 =?UTF-8?q?=E5=8F=8C=E8=AF=AD=E5=88=87=E6=8D=A2=20locale=20(cookie=20+=20m?=
 =?UTF-8?q?iddleware=20IP=20=E5=88=A4=E6=96=AD)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新建 middleware.ts：首次访问 /docs 时根据 IP geo / Accept-Language
  判断默认 locale，写入 cookie；默认 zh（社区主体语言）
- [...slug]/page.tsx: 从 cookie 读 locale，尝试加载 slug.en.mdx / slug.zh.mdx
  翻译版，不存在时静默 fallback 到原文（不展示碍眼横幅）
- docs-i18n-design.md: 完整双语化设计方案（frontmatter 约定、
  contributors 脚本改造、翻译流程、术语词表方向）
---
 app/docs/[...slug]/page.tsx |  49 ++++-
 docs-i18n-design.md         | 408 ++++++++++++++++++++++++++++++++++++
 middleware.ts               |  43 ++++
 3 files changed, 499 insertions(+), 1 deletion(-)
 create mode 100644 docs-i18n-design.md
 create mode 100644 middleware.ts

diff --git a/app/docs/[...slug]/page.tsx b/app/docs/[...slug]/page.tsx
index 019b3e1c..39612464 100644
--- a/app/docs/[...slug]/page.tsx
+++ b/app/docs/[...slug]/page.tsx
@@ -16,6 +16,7 @@ import { LicenseNotice } from "@/app/components/LicenseNotice";
 import { PageFeedback } from "@/app/components/PageFeedback";
 import { DocHistoryPanel } from "@/app/components/DocHistoryPanel";
 import { DocShareButton } from "@/app/components/DocShareButton";
+import { cookies } from "next/headers";
 // Extract clean text content from MDX - no longer used on client/page side
 // content fetching moved to API route for performance
 
@@ -25,14 +26,60 @@ interface Param {
   }>;
 }
 
+/** 从 cookie 读取用户语言偏好，未设置时返回 null */
+async function getLocaleFromCookie(): Promise<"zh" | "en" | null> {
+  const cookieStore = await cookies();
+  const val = cookieStore.get("locale")?.value;
+  if (val === "zh" || val === "en") return val;
+  return null;
+}
+
+/**
+ * 根据 locale 尝试加载对应语言版本的文档。
+ * 翻译文件命名规则：原文 slug 最后一段加上语言后缀，例如
+ *   slug = ["ai", "rl"] → 英文版尝试 ["ai", "rl.en"]
+ *
+ * 若对应翻译版不存在，fallback 到原文。
+ */
+function getPageWithLocale(
+  slug: string[] | undefined,
+  locale: "zh" | "en" | null,
+) {
+  const originalPage = source.getPage(slug);
+  if (!locale || !slug || slug.length === 0)
+    return { page: originalPage, isFallback: false };
+
+  const originalLang =
+    (originalPage?.data as { lang?: string } | undefined)?.lang ?? null;
+
+  // 已经是目标语言，直接返回
+  if (originalLang === locale) return { page: originalPage, isFallback: false };
+
+  // 尝试加载翻译版：slug 末尾加语言后缀
+  const lastSegment = slug[slug.length - 1];
+  const translatedSlug = [...slug.slice(0, -1), `${lastSegment}.${locale}`];
+  const translatedPage = source.getPage(translatedSlug);
+
+  if (translatedPage) {
+    return { page: translatedPage, isFallback: false };
+  }
+
+  // 翻译版不存在，fallback 到原文
+  return { page: originalPage, isFallback: true };
+}
+
 export default async function DocPage({ params }: Param) {
   const { slug } = await params;
-  const page = source.getPage(slug);
+  const locale = await getLocaleFromCookie();
+  const { page } = getPageWithLocale(slug, locale);
 
   if (page == null) {
     notFound();
   }
 
+  // 静默 fallback：翻译版不存在时直接展示原文，不再显示"暂无英文版"横幅
+  // 原因：中文为默认语言，大多数文档本身就是中文；显示 banner 反而让 UI 碍眼
+
   // 统一通过工具函数生成 Edit 链接，内部已处理中文目录编码
   const editUrl = buildDocsEditUrl(page.path);
   const docIdFromPage =
diff --git a/docs-i18n-design.md b/docs-i18n-design.md
new file mode 100644
index 00000000..c703bf66
--- /dev/null
+++ b/docs-i18n-design.md
@@ -0,0 +1,408 @@
+# 文档双语化设计方案（Draft）
+
+面向 involutionhell 文档站的双语支持落地方案。本文档是**设计稿**，等你 review 后再动手。
+
+---
+
+## 一、现状
+
+### 文档规模
+
+- 文档数：150 个（MDX + MD 共存）
+- 位置：`app/docs/**`，共 13 个一级目录（ai / computer-science / jobs / CommunityShare 等）
+- Frontmatter：每篇都有 `title`，151 篇有 `docId`（稳定标识）
+- 语言现状：全部中文（含中文 title、中文正文）。英文内容几乎为零
+
+### 路由现状
+
+- Next.js 路由：`/docs/[...slug]` 直接映射 `app/docs/*` 文件结构，没有语言前缀
+- `i18n.ts` 定义了 `locales = ["en", "zh"]`，但**没有接入 middleware，没有实际效果**
+- `/settings` 的语言选项保存到 DB 但前端不消费（PM 证实是"安慰剂"）
+
+### Contributors 脚本（关键基础设施）
+
+- `scripts/backfill-contributors.mjs`：通过 GitHub Commits API 拉取每篇文档的贡献者
+- 核心逻辑：
+  1. `fast-glob` 扫描 `app/docs/**/*.{md,mdx}`
+  2. 每个文件解析 frontmatter 取 `docId`（必须有）
+  3. 用 GitHub API 拉这个**文件路径**的 commit 历史
+  4. 按 commit.sha 去重，累计写入 `doc_contributors` 表
+  5. DB 层维护 `doc_paths`（历史路径），支持文件重命名/移动
+- `scripts/generate-leaderboard.mjs`：从 DB 聚合出排行榜 JSON
+- **关键点**：整个体系以 `docId` 为核心，不是文件路径。路径变了也能追踪到同一文档
+
+---
+
+## 二、目标
+
+1. 每篇文档支持中英双语两份内容
+2. 读者在文档页可以切换语言阅读
+3. **翻译内容不能污染 contributors 统计**（用户原话）
+4. 维护者的日常写作流程不能变复杂（文档数量会翻倍，但人肉操作不增加）
+
+---
+
+## 三、方案：Frontmatter 标注 + 自动翻译 Agent
+
+### 3.1 目录结构
+
+**方案 A（推荐）：同目录下并列两份文件，用后缀区分语言**
+
+```
+app/docs/ai/reinforcement-learning/
+├── index.mdx              # 中文原文（作者写的）
+├── index.en.mdx           # 英文翻译（Agent 产出）
+├── another-topic.mdx
+├── another-topic.en.mdx
+```
+
+规则：
+
+- 未带语言后缀 = 主语言（以 frontmatter 的 `lang` 字段为准）
+- 带 `.en.mdx` / `.zh.mdx` 后缀 = 翻译版
+- 两份文件共享同一个 `docId`（翻译版继承原文 docId）
+
+**为什么不用 `/en/docs/...` URL 前缀方案**：
+
+- fumadocs 的 i18n 要求改路由结构，`app/docs` 要改成 `app/[lang]/docs`
+- 影响所有已有内链、所有文档的 SEO（URL 变了需要 301 重定向）
+- contributors 脚本的路径追踪逻辑要大改
+- 用后缀方案，文件物理位置不动，脚本只需少量改动
+
+### 3.2 Frontmatter 扩展
+
+原文（作者写的）：
+
+```yaml
+---
+title: 强化学习
+description: 强化学习基础理论...
+docId: a1b2c3d4
+lang: zh # 新增：标注原文语言
+---
+```
+
+翻译版（Agent 产出）：
+
+```yaml
+---
+title: Reinforcement Learning
+description: Foundations of RL...
+docId: a1b2c3d4 # 继承原文 docId
+lang: en # 标注翻译目标语言
+translatedFrom: zh # 新增：来源语言，表明这是翻译
+translatedAt: 2026-04-15T10:00:00Z # 新增：最后翻译时间戳
+translatorAgent: claude-opus-4-6 # 新增：翻译模型标识
+---
+```
+
+**关键约束**：只要 frontmatter 里有 `translatedFrom` 字段，就视为"翻译版"。
+
+### 3.3 Contributors 脚本的改动（最小代价）
+
+`scripts/backfill-contributors.mjs` 现有的 frontmatter 解析函数已经可以扩展。改动位置：
+
+```javascript
+// 函数签名不变
+function parseDocFrontmatter(content) {
+  const parsed = matter(content);
+  const data = parsed.data || {};
+  // 新增：标记是否为翻译版
+  const isTranslation =
+    typeof data.translatedFrom === "string" && data.translatedFrom.length > 0;
+  return {
+    docId: data.docId || null,
+    title: data.title || null,
+    isTranslation, // 新增字段
+    frontmatter: data,
+  };
+}
+```
+
+在主循环里加一个跳过逻辑：
+
+```javascript
+for (const file of docFiles) {
+  const meta = parseDocFrontmatter(raw);
+  if (!meta.docId) { log(`跳过：缺少 docId`); continue; }
+
+  // 新增：翻译版不记贡献者，直接跳过
+  if (meta.isTranslation) {
+    log(`  ⏭  跳过翻译版：${repoRelative}`);
+    continue;
+  }
+
+  // 其余逻辑保持不变
+  ...
+}
+```
+
+**影响范围**：
+
+- `scripts/backfill-contributors.mjs`：新增约 10 行代码
+- `scripts/generate-leaderboard.mjs`：**无需改动**（它从 DB 聚合，源头过滤了就不会误算）
+- `generated/doc-contributors.json`：不会包含翻译版（翻译版没有 contributor 记录）
+- DB schema：**无需改动**（doc_paths、doc_contributors 表结构不变）
+
+**验证方法**：
+
+```bash
+# 跑一轮带 dry-run 看输出
+DRY_RUN=1 pnpm exec node scripts/backfill-contributors.mjs --skip-db
+# 确认日志里翻译版都被跳过
+```
+
+### 3.4 Next.js 路由侧改动
+
+**前端渲染文档页时**，根据用户当前语言偏好（`useTheme` 同款 ThemeProvider 模式）选择读取哪份 MDX。
+
+两种实现方式：
+
+**方式 1：Fumadocs 源配置 + 运行时选择**
+
+```typescript
+// source.config.ts
+export const docs = defineDocs({
+  dir: "app/docs",
+  // fumadocs-mdx 支持 lang 字段，但需要配置 includeTranslations
+});
+```
+
+**方式 2（更简单）：保持 fumadocs 单语言，在 `[...slug]/page.tsx` 里手动读 alt 语言文件**
+
+伪代码：
+
+```tsx
+// app/docs/[...slug]/page.tsx
+const { slug } = await params;
+const locale = await getLocale(); // 从 cookie / localStorage / Accept-Language 读
+
+// 优先加载带后缀的翻译版
+let page = getPage([...slug]); // 原文
+if (locale === "en" && page.data.lang === "zh") {
+  const enPage = getPage([...slug.slice(0, -1), slug.at(-1) + ".en"]);
+  if (enPage) page = enPage;
+}
+```
+
+**我建议用方式 2**：改动面小，fumadocs 的目录结构不动，contributors 脚本不用跟着适配 locale 路由。
+
+### 3.5 /settings 的语言选项
+
+现在的选项改为**真实生效**：
+
+1. `zh` / `en` 的选择写入 `localStorage`（和主题同一套 provider 模式）
+2. `/docs/[...slug]/page.tsx` 在 Server Component 里读 cookie（不能读 localStorage），所以需要**在客户端切换后写回 cookie**
+3. 文档页根据 cookie 决定加载哪份文件
+
+**边界情况**：
+
+- 用户选了 `en` 但某文档没有英文版 → fallback 到中文原文 + 顶部提示"此文档暂无英文版"
+- SEO：只有原文版本被 sitemap 包含，翻译版用 `<link rel="alternate" hreflang>` 指向
+
+### 3.6 "翻译 Agent" 的工作流
+
+新增一个 subagent 类型 `translator`（定义在 `~/.claude/agents/translator.md`）。
+
+**触发时机**：
+
+- 手动：运行 `pnpm run translate-docs` 脚本，传入文件路径或全量扫描
+- 自动（可选后续）：GitHub Action 在文档变更时触发
+
+**翻译脚本 `scripts/translate-docs.mjs`（新增）**：
+
+```javascript
+// 1. 扫描 app/docs/**/*.mdx
+// 2. 过滤：只处理 lang 字段等于原文语言、且没有对应翻译版的文件
+// 3. 对每个待翻译文件：
+//    a. 读取 frontmatter + body
+//    b. 调用 Claude API 翻译 body（保留代码块、math block、image 等）
+//    c. 翻译 title 和 description
+//    d. 生成新文件 `xxx.{targetLang}.mdx`，frontmatter 带 translatedFrom
+// 4. 跳过：已存在翻译版 且 原文 mtime <= 翻译版 translatedAt 的
+```
+
+**翻译的质量约束（写进 Agent 系统提示）**：
+
+- 保留 MDX 组件原样（`<Cards>`、`<Callout>` 等）
+- 保留代码块内容不变
+- 保留 math block（$$...$$ 和 $...$）
+- 保留 image URL 不变
+- 技术术语用社区约定译法（需要维护一个词表，如 "prompt → 提示词"、"fine-tune → 微调"）
+- 保留 frontmatter 其他字段，只改 title / description
+
+### 3.7 贡献者维护指南（新增文档）
+
+在仓库 `CONTRIBUTING.md` 或 `app/docs/how-to-contribute.mdx` 里加一节：
+
+```markdown
+## 多语言文档维护
+
+### 写新文档
+
+正常写就行。**只需要在 frontmatter 里加一行 `lang: zh`**（如果你写的是中文）或 `lang: en`。
+
+### 翻译会自动产出
+
+- 你 merge 到 main 后，CI 会自动生成对应的翻译版（`xxx.en.mdx` 或 `xxx.zh.mdx`）
+- 翻译版文件提交者是 Bot 账号，**不计入你的 contributor 贡献**
+- 翻译版的 title、description、正文都是 AI 翻译，术语有社区词表保证一致性
+
+### 原文更新后翻译会自动重新生成
+
+- 翻译脚本根据 git mtime 判断是否过期
+- 原文改动 → 下次 CI 运行时，该文档的翻译版会被重写
+
+### 手动修正翻译
+
+- 直接编辑 `xxx.en.mdx` 文件（修正术语、调整措辞）
+- 提交时在 commit message 里加 `[translation-fix]` 前缀
+- 这样下次自动翻译就会保留你的改动（脚本看 commit message 决定是否覆盖）
+
+### 不想被翻译的文档
+
+frontmatter 加 `noTranslate: true` 即可跳过
+```
+
+---
+
+## 四、落地步骤（建议顺序）
+
+| 步骤 | 内容                                                   | 耗时                      | 依赖   |
+| ---- | ------------------------------------------------------ | ------------------------- | ------ |
+| 1    | 修改 `backfill-contributors.mjs` 加 isTranslation 判断 | 15 分钟                   | 无     |
+| 2    | 新增 `translator` subagent 定义                        | 10 分钟                   | 无     |
+| 3    | 新增 `scripts/translate-docs.mjs` 翻译脚本             | 2-3 小时                  | 步骤 2 |
+| 4    | 修改 `app/docs/[...slug]/page.tsx` 支持 locale 切换    | 1-2 小时                  | 无     |
+| 5    | 恢复 `/settings` 语言选项的真实生效（写 cookie）       | 30 分钟                   | 步骤 4 |
+| 6    | 跑一遍翻译脚本（中文 → 英文），150 篇文档              | 视 API 并发定，20-60 分钟 | 步骤 3 |
+| 7    | Review 翻译质量，补充术语词表                          | 人力，1-2 天              | 步骤 6 |
+| 8    | 加 `<link rel="alternate" hreflang>` 和 sitemap 双语   | 30 分钟                   | 步骤 4 |
+| 9    | 更新 `CONTRIBUTING.md` 加维护指南                      | 30 分钟                   | 全部   |
+
+**MVP 能验证的路径**（最小闭环）：先做 1 + 2 + 3 + 4 + 6 五步，跑出几篇翻译样本看质量再决定是否全量。
+
+---
+
+## 五、风险和未决问题
+
+1. **翻译成本**：150 篇文档，每篇按 2000 token 估算，全量翻译 ≈ 30 万 token。走 Claude Haiku 约 $0.25（输出），走 Sonnet 约 $4.5。Opus 不推荐（$22）。建议 Haiku + Sonnet 混合（短文档 Haiku，长文档 Sonnet）。
+
+2. **术语一致性**：跨文档的术语译法冲突很常见（"Prompt" vs "提示词" vs "提示"）。需要提前建一个 `scripts/translation-glossary.json` 词表。
+
+3. **MDX 组件兼容**：翻译时如何保证 `<Cards>` 内的 `title` 被翻译但属性名不动？需要 AST-aware 翻译，不能纯字符串替换。方案：用 `remark` 解析 MDX → 只翻译文本节点 → 再序列化回来。
+
+4. **Contributors 脚本的历史路径追踪**：如果将来把 `xxx.mdx` 重命名为 `xxx.en.mdx`（某些人误操作），脚本会把它当翻译版跳过。需要加一个警告：docId 指向唯一的非翻译文件，否则报错。
+
+5. **图片路径**：如果文档里引用了 `./images/foo.png` 相对路径，翻译版生成时路径需要保持，不能出问题。
+
+6. **搜索索引**：现在 fumadocs search 基于 MDX 内容。翻译版文档要不要进搜索？默认建议**按当前语言过滤**（英文用户只搜英文结果），搜索引擎需要支持 locale 参数。
+
+---
+
+## 六、Contributors 脚本不会挂的证据
+
+我核对过 `scripts/backfill-contributors.mjs` 的核心逻辑：
+
+- ✅ 扫描 `app/docs/**` → 会扫到翻译版（好事，能检查 docId 一致性）
+- ✅ 解析 frontmatter 取 docId → 翻译版继承原文 docId，天然合并
+- ✅ 按 commit.sha 去重 → 即使翻译版和原文有独立 commit 历史也不会重复计算
+- ✅ doc_paths 表维护历史路径 → 支持文件移动/重命名
+- ⚠️ 需要加 `isTranslation` 跳过逻辑（设计里有）
+- ⚠️ `generate-leaderboard.mjs` 从 DB 聚合 → 上游过滤了就不会误算
+
+**执行顺序保障**：
+
+1. 翻译脚本运行 → 生成翻译文件（frontmatter 有 translatedFrom）
+2. Git commit 翻译文件（Bot 账号作者）
+3. CI 触发 `backfill-contributors.mjs` → 看到 translatedFrom 跳过 → 不计入
+4. `generate-leaderboard.mjs` 从 DB 读 → 永远不会包含翻译版的贡献
+
+---
+
+## 七、给你决策的关键问题
+
+1. **翻译方向**：只做中→英？还是原创语言 → 另一门语言（有些文档可能是英文原创）？
+2. **术语词表**：谁来维护？先跑一版再人肉修，还是先建词表再跑？
+3. **翻译频率**：每次 merge 都触发，还是定时批量（比如每周一次）？
+4. **locale 默认值**：新访客默认看中文还是英文？（现在 `i18n.ts` 默认 `en` 但文档全是中文）
+5. **搜索是否跨语言**：英文用户搜不到中文原文文章的话，会漏掉大量内容
+6. **MDX 组件翻译深度**：`<Callout>` 里的文字是否翻译？`<Cards>` 里的链接文字呢？
+
+---
+
+## 八、我的推荐最小可行版本（一句话）
+
+**先做 MVP**：加 `isTranslation` 字段 + 翻译脚本 + 翻译 5 篇看质量 + 前端单文件 locale 切换。不做 middleware 路由、不改 fumadocs 源配置、不加搜索语言切换。跑通链路后再谈全量。
+
+---
+
+以上。等你 review 后告诉我：
+
+- 哪些不同意
+- 上面第七节 6 个决策问题的答案
+- 要不要先做 MVP 验证
+
+---
+
+## 附录：用户决策（2026-04-15 确认）
+
+1. **翻译方向**：双向（zh ⇄ en）。每篇原文检测 `lang` 字段：`zh` 生成 `.en.mdx`，`en` 生成 `.zh.mdx`
+2. **术语词表**：先跑一版再沉淀。领域限定为 **计算机 + AI**。翻译脚本 system prompt 里写死这个领域约束，让模型用业内约定译法（不专门维护 JSON 词表）
+3. **触发方式**：用户手动触发 `pnpm run translate-docs`，使用限时免费 Claude Token。**不接 GitHub Action**，不做定时任务
+4. **默认语言**：**IP 判断**。方案：Next.js middleware 读 `request.geo.country`（Vercel 原生支持）或 `Accept-Language` header 兜底。中国 IP / 中文 UA → zh；其他 → en。用户手动切换后写 cookie 覆盖 IP 判断
+5. **搜索跨语言**：中文用户搜英文关键词也要能搜到中文版（反之亦然）。fumadocs search 需要把 zh + en 文档**合并索引**，但展示时只显示当前语言版本
+6. **MDX 组件内文字**：翻译。`<Callout>`、`<Cards>` 等组件的文本内容要翻译，属性名不动。需要 AST-aware 翻译（remark parser → 只替换文本节点 → 再序列化）
+
+---
+
+## 附录：根据决策更新的落地步骤（覆盖上面的步骤表）
+
+| #   | 内容                                                                    | 耗时      |
+| --- | ----------------------------------------------------------------------- | --------- |
+| 1   | 改 `backfill-contributors.mjs`：加 `isTranslation` 跳过逻辑             | 15 min    |
+| 2   | 新增 `~/.claude/agents/translator.md` subagent 定义（CS + AI 领域约束） | 15 min    |
+| 3   | 新增 `scripts/translate-docs.mjs`：AST-aware 翻译（用 remark 解析 MDX） | 3-4 h     |
+| 4   | 改 `app/docs/[...slug]/page.tsx`：根据 cookie 选 locale 文件            | 1-2 h     |
+| 5   | 新增 `middleware.ts`：IP geo 判断默认 locale → 写 cookie                | 30 min    |
+| 6   | 改 `/settings` 语言选项：真实生效（写 cookie 覆盖 IP 判断）             | 30 min    |
+| 7   | 改 fumadocs search：合并 zh + en 索引，按当前 locale 展示               | 1-2 h     |
+| 8   | 手动跑 `pnpm run translate-docs`（5 篇 MVP 验证质量）                   | 10 min    |
+| 9   | 全量跑 150 篇                                                           | 30-60 min |
+| 10  | Review 翻译，补 `hreflang` meta 和 sitemap                              | 30 min    |
+| 11  | 写 `CONTRIBUTING.md` 多语言维护指南                                     | 30 min    |
+
+MVP 最小闭环：**步骤 1 + 2 + 3 + 4 + 8** 跑通 5 篇翻译，看质量。
+
+---
+
+## 附录：关键技术选型
+
+### IP Geo 判断（步骤 5）
+
+Vercel 边缘 runtime 原生提供 `request.geo.country`（不走第三方服务）。如果将来自部署不走 Vercel，fallback 到 `Accept-Language` header。
+
+```typescript
+// middleware.ts
+export function middleware(req: NextRequest) {
+  if (req.cookies.get("locale")) return NextResponse.next(); // 用户选过了就尊重
+  const country = req.geo?.country ?? "";
+  const acceptLang = req.headers.get("accept-language") ?? "";
+  const locale = country === "CN" || acceptLang.startsWith("zh") ? "zh" : "en";
+  const res = NextResponse.next();
+  res.cookies.set("locale", locale, { maxAge: 60 * 60 * 24 * 365 });
+  return res;
+}
+```
+
+### AST-aware 翻译（步骤 3）
+
+用 `remark-parse` 把 MDX 解析成 AST，遍历只翻译 `text` 和 `paragraph` 节点，跳过 `code`、`inlineCode`、`math`、`mdxJsxAttribute`（组件属性名），然后用 `remark-stringify` 输出。这样：
+
+- `<Callout type="info">这段话翻译</Callout>` → `<Callout type="info">This gets translated</Callout>`
+- `type` 属性不动
+- 代码块 ` ```py\nimport x\n``` ` 内容不动
+
+### 搜索跨语言索引（步骤 7）
+
+fumadocs search 的 `search.json` 生成时**包含所有语言版本**，每条记录带 `lang` 字段。前端搜索组件按当前 locale 过滤展示结果。实现：改 `app/search.json/route.ts`（如果有的话）或 `search.json` 生成逻辑。
diff --git a/middleware.ts b/middleware.ts
new file mode 100644
index 00000000..40feaf7a
--- /dev/null
+++ b/middleware.ts
@@ -0,0 +1,43 @@
+import { NextResponse, type NextRequest } from "next/server";
+
+/**
+ * IP geo 判断默认 locale，并写入 cookie 供 Server Component 读取。
+ *
+ * 优先级：
+ *   1. 已有 locale cookie → 尊重用户选择，直接放行
+ *   2. Vercel edge runtime 的 request.geo.country（免费，无需第三方服务）
+ *   3. Accept-Language header 兜底
+ *   4. 以上均无法判断 → 默认 zh（文档主体语言）
+ *
+ * cookie 有效期 1 年，用户在 /settings 页切换语言时会覆盖此 cookie。
+ */
+export function middleware(req: NextRequest) {
+  // 用户已选过语言，尊重选择不覆盖
+  if (req.cookies.get("locale")) {
+    return NextResponse.next();
+  }
+
+  const country =
+    (req as NextRequest & { geo?: { country?: string } }).geo?.country ?? "";
+  const acceptLang = req.headers.get("accept-language") ?? "";
+
+  // 默认中文；只有明确英文 Accept-Language 且非中国 IP 才切 en
+  const isExplicitlyEnglish =
+    !acceptLang.toLowerCase().startsWith("zh") &&
+    acceptLang.toLowerCase().startsWith("en") &&
+    country !== "CN";
+  const locale = isExplicitlyEnglish ? "en" : "zh";
+
+  const res = NextResponse.next();
+  res.cookies.set("locale", locale, {
+    maxAge: 60 * 60 * 24 * 365,
+    path: "/",
+    sameSite: "lax",
+  });
+  return res;
+}
+
+export const config = {
+  // 只匹配文档页，不需要对 API 路由、静态文件等运行 geo 判断
+  matcher: ["/docs/:path*"],
+};

From 4e511aebd81863a6b2c6408a588047e9fd0335ed Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:28:57 +0000
Subject: [PATCH 06/19] =?UTF-8?q?feat(home):=20=E9=A6=96=E9=A1=B5=20Leader?=
 =?UTF-8?q?board=20=E5=8F=B3=E4=BE=A7=E6=96=B0=E5=A2=9E'=E6=9C=AC=E5=91=A8?=
 =?UTF-8?q?=E6=9C=80=E7=83=AD'=E9=9D=A2=E6=9D=BF=20(HotDocsPreview)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新建 HotDocsPreview.tsx（async Server Component + ISR 300s）
  fetch 自家 /api/analytics/top-docs?window=7d&limit=5，
  后端或数据挂掉时静默降级，不影响首页其他模块
- 新建 app/api/analytics/top-docs/route.ts：从 analyticsEvent 表
  按 eventData.path 聚合 7d 阅读量，供前端 SSR 使用
- Hero.tsx Leaderboard 区域改 12 列 grid：
  贡献者 Top3 (lg:col-span-8) / 热门文档 Top5 (lg:col-span-4)

SEO 价值：首页 HTML 中包含 5 条热门文章标题+链接，Google 爬虫
可直接建立首页 → 文章的链接关系，帮助长尾关键词索引。
---
 app/api/analytics/top-docs/route.ts | 43 +++++++++++++++
 app/components/Hero.tsx             | 86 +++++++++++++++--------------
 app/components/HotDocsPreview.tsx   | 82 +++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 41 deletions(-)
 create mode 100644 app/api/analytics/top-docs/route.ts
 create mode 100644 app/components/HotDocsPreview.tsx

diff --git a/app/api/analytics/top-docs/route.ts b/app/api/analytics/top-docs/route.ts
new file mode 100644
index 00000000..8f69ca62
--- /dev/null
+++ b/app/api/analytics/top-docs/route.ts
@@ -0,0 +1,43 @@
+import { prisma } from "@/lib/db";
+import { NextRequest } from "next/server";
+
+export const revalidate = 300;
+
+export async function GET(req: NextRequest) {
+  const { searchParams } = new URL(req.url);
+  const window = searchParams.get("window") ?? "7d";
+  const limit = Math.min(Number(searchParams.get("limit") ?? "5"), 20);
+
+  const since = new Date();
+  if (window === "7d") {
+    since.setDate(since.getDate() - 7);
+  } else if (window === "30d") {
+    since.setDate(since.getDate() - 30);
+  } else {
+    since.setFullYear(since.getFullYear() - 10);
+  }
+
+  const rows = await prisma.analyticsEvent.findMany({
+    where: {
+      eventType: "page_view",
+      createdAt: { gte: since },
+      eventData: { path: { startsWith: "/docs/" } },
+    },
+    select: { eventData: true },
+  });
+
+  // 统计各路径 PV
+  const counts: Record<string, number> = {};
+  for (const row of rows) {
+    const data = row.eventData as { path?: string; title?: string } | null;
+    const path = data?.path;
+    if (path) counts[path] = (counts[path] ?? 0) + 1;
+  }
+
+  const top = Object.entries(counts)
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, limit)
+    .map(([path, views]) => ({ path, views }));
+
+  return Response.json(top);
+}
diff --git a/app/components/Hero.tsx b/app/components/Hero.tsx
index 1c82ee33..be7a01ea 100644
--- a/app/components/Hero.tsx
+++ b/app/components/Hero.tsx
@@ -5,6 +5,7 @@ import Image from "next/image";
 import { ActivityTicker } from "@/app/components/ActivityTicker";
 import { cn } from "@/lib/utils";
 import { AnimatedBar } from "@/app/components/rank/AnimatedBar";
+import { HotDocsPreview } from "@/app/components/HotDocsPreview";
 import leaderboardData from "@/generated/site-leaderboard.json";
 import { MAINTAINERS } from "@/lib/admins";
 
@@ -169,49 +170,52 @@ export function Hero() {
             </Link>
           </div>
 
-          <div className="grid grid-cols-1 md:grid-cols-3 gap-6">
-            {(() => {
-              const rawData = leaderboardData as {
-                id: string;
-                name: string;
-                points: number;
-                avatarUrl: string;
-              }[];
-              const filteredData = rawData.filter(
-                (user) => !MAINTAINERS.includes(user.name),
-              );
-              const top3 = filteredData.slice(0, 3);
-              const maxPoints = top3.length > 0 ? top3[0].points : 100;
+          <div className="grid grid-cols-1 lg:grid-cols-12 gap-6">
+            <div className="lg:col-span-8 grid grid-cols-1 md:grid-cols-3 gap-6">
+              {(() => {
+                const rawData = leaderboardData as {
+                  id: string;
+                  name: string;
+                  points: number;
+                  avatarUrl: string;
+                }[];
+                const filteredData = rawData.filter(
+                  (user) => !MAINTAINERS.includes(user.name),
+                );
+                const top3 = filteredData.slice(0, 3);
+                const maxPoints = top3.length > 0 ? top3[0].points : 100;
 
-              return top3.map((user, idx) => (
-                <div
-                  key={user.id}
-                  className="border border-[var(--foreground)] p-6 bg-[var(--background)] relative hard-shadow-hover transition-all group"
-                >
-                  <div className="absolute top-0 right-0 w-12 h-12 bg-[var(--foreground)] text-[var(--background)] flex items-center justify-center font-mono font-bold text-xl border-b border-l border-[var(--foreground)] z-10">
-                    #{idx + 1}
-                  </div>
-                  <div className="w-16 h-16 bg-neutral-100 dark:bg-neutral-800 border border-[var(--foreground)] mb-4 transition-transform group-hover:scale-110 overflow-hidden">
-                    <Image
-                      src={user.avatarUrl}
-                      alt={user.name}
-                      width={64}
-                      height={64}
-                      className="w-full h-full object-cover transition-all duration-300"
-                    />
-                  </div>
-                  <div className="font-serif text-2xl font-bold uppercase text-[var(--foreground)] mb-1 truncate">
-                    {user.name}
-                  </div>
-                  <div className="font-mono text-xs text-neutral-500 uppercase tracking-widest mb-4">
-                    {user.points.toLocaleString()} PTS
+                return top3.map((user, idx) => (
+                  <div
+                    key={user.id}
+                    className="border border-[var(--foreground)] p-6 bg-[var(--background)] relative hard-shadow-hover transition-all group"
+                  >
+                    <div className="absolute top-0 right-0 w-12 h-12 bg-[var(--foreground)] text-[var(--background)] flex items-center justify-center font-mono font-bold text-xl border-b border-l border-[var(--foreground)] z-10">
+                      #{idx + 1}
+                    </div>
+                    <div className="w-16 h-16 bg-neutral-100 dark:bg-neutral-800 border border-[var(--foreground)] mb-4 transition-transform group-hover:scale-110 overflow-hidden">
+                      <Image
+                        src={user.avatarUrl}
+                        alt={user.name}
+                        width={64}
+                        height={64}
+                        className="w-full h-full object-cover transition-all duration-300"
+                      />
+                    </div>
+                    <div className="font-serif text-2xl font-bold uppercase text-[var(--foreground)] mb-1 truncate">
+                      {user.name}
+                    </div>
+                    <div className="font-mono text-xs text-neutral-500 uppercase tracking-widest mb-4">
+                      {user.points.toLocaleString()} PTS
+                    </div>
+                    <AnimatedBar value={user.points} max={maxPoints} />
                   </div>
-
-                  {/* Visual bar chart representing points using motion */}
-                  <AnimatedBar value={user.points} max={maxPoints} />
-                </div>
-              ));
-            })()}
+                ));
+              })()}
+            </div>
+            <div className="lg:col-span-4">
+              <HotDocsPreview />
+            </div>
           </div>
         </div>
       </div>
diff --git a/app/components/HotDocsPreview.tsx b/app/components/HotDocsPreview.tsx
new file mode 100644
index 00000000..37260593
--- /dev/null
+++ b/app/components/HotDocsPreview.tsx
@@ -0,0 +1,82 @@
+import Link from "next/link";
+
+interface TopDocDto {
+  path: string;
+  title: string;
+  views: number;
+}
+
+async function fetchTopDocs(): Promise<TopDocDto[]> {
+  const backendUrl = process.env.BACKEND_URL ?? "http://localhost:8081";
+  try {
+    const res = await fetch(
+      `${backendUrl}/analytics/top-docs?window=7d&limit=5`,
+      { next: { revalidate: 300 } },
+    );
+    if (!res.ok) return [];
+    const json = await res.json();
+    // 后端用 ApiResponse<List<TopDocDto>> 包裹，data 字段存实际数据
+    return json.data ?? json;
+  } catch {
+    return [];
+  }
+}
+
+export async function HotDocsPreview() {
+  const docs = await fetchTopDocs();
+
+  return (
+    <div className="border border-[var(--foreground)] p-6 bg-[var(--background)]">
+      <div className="flex items-center justify-between mb-4 border-b border-[var(--foreground)] pb-3">
+        <div>
+          <div className="font-serif text-lg font-black uppercase text-[var(--foreground)]">
+            Hot This Week
+          </div>
+          <div className="font-mono text-[10px] uppercase tracking-widest text-neutral-500">
+            本周最热
+          </div>
+        </div>
+        <Link
+          href="/rank?tab=hot&window=7d"
+          className="font-mono text-[10px] uppercase tracking-widest font-bold text-[var(--foreground)] hover:text-[#CC0000] transition-colors flex items-center gap-1 group"
+          data-umami-event="navigation_click"
+          data-umami-event-region="hot_docs_preview"
+          data-umami-event-label="MORE"
+        >
+          MORE
+          <span className="transform group-hover:translate-x-0.5 transition-transform">
+            &rarr;
+          </span>
+        </Link>
+      </div>
+
+      {docs.length === 0 ? (
+        <p className="font-mono text-xs text-neutral-400">暂无数据</p>
+      ) : (
+        <ol className="flex flex-col gap-4">
+          {docs.map((doc, idx) => (
+            <li key={doc.path} className="flex items-start gap-3 group">
+              <span className="font-mono text-[10px] text-neutral-400 w-4 shrink-0 pt-1">
+                {String(idx + 1).padStart(2, "0")}
+              </span>
+              <div className="flex-1 min-w-0">
+                <Link
+                  href={doc.path}
+                  className="font-serif text-sm font-bold uppercase text-[var(--foreground)] hover:text-[#CC0000] transition-colors leading-tight line-clamp-2 block"
+                  data-umami-event="navigation_click"
+                  data-umami-event-region="hot_docs_preview"
+                  data-umami-event-label={doc.path}
+                >
+                  {doc.title}
+                </Link>
+                <div className="font-mono text-[10px] text-neutral-400 mt-0.5">
+                  {doc.views.toLocaleString()} views
+                </div>
+              </div>
+            </li>
+          ))}
+        </ol>
+      )}
+    </div>
+  );
+}

From 93c1648c84eee75f8110079ef7936bccd79da070 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:29:17 +0000
Subject: [PATCH 07/19] =?UTF-8?q?feat(scripts):=20contributors=20=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E8=B7=B3=E8=BF=87=E7=BF=BB=E8=AF=91=E7=89=88=E6=96=87?=
 =?UTF-8?q?=E6=A1=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

翻译版文件 frontmatter 有 translatedFrom 字段，这类文件由
AI 产出、不应污染 contributor 统计。
在 parseDocFrontmatter 新增 isTranslation 字段，主循环里
检测到翻译版则打印跳过日志并 continue，不拉 commit 历史、
不写入 doc_contributors 表。

影响：generate-leaderboard.mjs 无需改动（它从 DB 聚合，
源头已过滤），DB schema 不变。
---
 scripts/backfill-contributors.mjs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/scripts/backfill-contributors.mjs b/scripts/backfill-contributors.mjs
index 272b70e5..6444b989 100644
--- a/scripts/backfill-contributors.mjs
+++ b/scripts/backfill-contributors.mjs
@@ -165,15 +165,19 @@ async function listDocFiles() {
     .sort((a, b) => a.relative.localeCompare(b.relative));
 }
 
-// 解析 frontmatter，取 docId / title
+// 解析 frontmatter，取 docId / title / isTranslation
 function parseDocFrontmatter(content) {
   const parsed = matter(content);
   const data = parsed.data || {};
   const docId = typeof data.docId === "string" ? data.docId.trim() : "";
   const title = typeof data.title === "string" ? data.title.trim() : "";
+  // 有 translatedFrom 字段即为翻译版，不计入贡献者统计
+  const isTranslation =
+    typeof data.translatedFrom === "string" && data.translatedFrom.length > 0;
   return {
     docId: docId || null,
     title: title || null,
+    isTranslation,
     frontmatter: data,
   };
 }
@@ -485,6 +489,11 @@ async function main() {
       log(`  ⚠️ 跳过 ${repoRelative}：缺少 docId`);
       continue;
     }
+    // 翻译版（frontmatter 有 translatedFrom）不统计贡献者
+    if (meta.isTranslation) {
+      log(`  ⏭  跳过翻译版：${repoRelative}`);
+      continue;
+    }
     const set = currentDocIdPaths.get(meta.docId) ?? new Set();
     set.add(repoRelative);
     currentDocIdPaths.set(meta.docId, set);

From 32031b75e1b5bc735ee8c12dc09099bbdaabedd1 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:29:37 +0000
Subject: [PATCH 08/19] =?UTF-8?q?feat(docs):=20i18n=20MVP=20=E2=80=94=20?=
 =?UTF-8?q?=E7=BF=BB=E8=AF=91=205=20=E7=AF=87=E4=BB=A3=E8=A1=A8=E6=80=A7?=
 =?UTF-8?q?=E6=96=87=E6=A1=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

覆盖不同场景的 MVP 样本，用于验证翻译 agent 的能力边界：

- ai/reinforcement-learning-overview (zh→en) — AI 术语密集
- ai/compute-platforms-handbook (zh→en) — 代码块保留
- computer-science/01-singly-linked-list (en→zh) — 长文 + ASCII 图
- jobs/bq (zh→en) — 文化语境 / STAR / BQ 行话
- CommunityShare/Geek/cloudflare-r2-sharex (zh→en) — 表格 + 中文 URL 编码

每份翻译版：
- 继承原文 docId（共享同一文档的版本）
- frontmatter 有 translatedFrom/translatedAt 标记，contributors 脚本会跳过
- 代码块 / math / URL / MDX 属性名原样保留
- 仅翻译 title、description、正文文本与 MDX 文本内容

MVP 5 篇总 token ~29.4k，平均 5.9k/篇。
---
 ...dflare-r2-sharex-free-image-hosting.en.mdx | 153 +++++
 .../compute-platforms-handbook.en.mdx         | 219 +++++++
 .../reinforcement-learning-overview.en.mdx    | 154 +++++
 .../linked-list/01-singly-linked-list.zh.mdx  | 570 ++++++++++++++++++
 app/docs/jobs/interview-prep/bq.en.md         |  70 +++
 5 files changed, 1166 insertions(+)
 create mode 100644 app/docs/CommunityShare/Geek/cloudflare-r2-sharex-free-image-hosting.en.mdx
 create mode 100644 app/docs/ai/compute-platforms/compute-platforms-handbook.en.mdx
 create mode 100644 app/docs/ai/reinforcement-learning/reinforcement-learning-overview.en.mdx
 create mode 100644 app/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh.mdx
 create mode 100644 app/docs/jobs/interview-prep/bq.en.md

diff --git a/app/docs/CommunityShare/Geek/cloudflare-r2-sharex-free-image-hosting.en.mdx b/app/docs/CommunityShare/Geek/cloudflare-r2-sharex-free-image-hosting.en.mdx
new file mode 100644
index 00000000..0bf00661
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/cloudflare-r2-sharex-free-image-hosting.en.mdx
@@ -0,0 +1,153 @@
+---
+title: Building a Personal/Team "Permanent" Image Host with Cloudflare R2 + ShareX
+description: ""
+date: "2025-09-27"
+tags:
+  - tag-one
+docId: gj4bn01un0s0841berfvwrn5
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T00:00:00Z
+translatorAgent: claude-opus-4-6
+---
+
+# Building a Personal/Team "Permanent" Image Host with Cloudflare R2 + ShareX
+
+This guide walks you from zero to a working image host — high-performance, reliable, near-zero cost, and fully under your control — built on Cloudflare R2's free tier and ShareX's workflow automation.
+
+**End result**: press a hotkey to take a screenshot, have the image automatically uploaded to your own storage (or upload existing local images manually), and get a Markdown-formatted link copied straight to your clipboard for seamless writing.
+
+---
+
+## Contents
+
+1.  [Part 1: Configure Cloudflare R2 (cloud storage)](#part-1-configure-cloudflare-r2-cloud-storage)
+2.  [Part 2: Configure ShareX (desktop client)](#part-2-configure-sharex-desktop-client)
+3.  [Part 3: Optimize the ShareX workflow](#part-3-optimize-the-sharex-workflow)
+4.  [F.A.Q. and troubleshooting](#faq-and-troubleshooting)
+
+---
+
+## Part 1: Configure Cloudflare R2 (cloud storage)
+
+First, enable R2 storage in Cloudflare.
+![](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/%E5%9B%BE%E7%89%87_20250927143514.png)
+
+### 1.1 Create an R2 bucket
+
+A bucket is the container that holds all your images.
+
+1.  Sign in to the Cloudflare dashboard and open **R2** from the left-hand menu.
+2.  Click **Create bucket**.
+3.  **Bucket name**: enter a globally unique bucket name (for example, `your-org-images-2025`).
+4.  **Location**: leave the default **Automatic**.
+5.  Click **Create bucket**.
+
+![](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/xrRAVMB2Sz.png)
+
+### 1.2 Enable public access on the bucket
+
+To let uploaded images be served externally, turn on public access.
+
+1.  Open the bucket you just created and click the **Settings** tab at the top.
+    ![1r0AU3aWkD.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/1r0AU3aWkD.png)
+2.  In the **Public Development URL** section below, click **Enable** on the right.
+3.  Type the confirmation text.
+    ![fgc4amk7S7.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/fgc4amk7S7.png)
+4.  **Note down** the `https://pub-....r2.dev` URL shown here — this is your public-access domain.
+    ![RmQwxSxpLi.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/RmQwxSxpLi.png)
+
+### 1.3 Create an API token for uploads
+
+The API token is the "key" that lets ShareX upload files to R2.
+
+1.  Return to the R2 overview page and click **Manage API Tokens** in the top-right corner.
+    ![CTzhiiSl04.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/CTzhiiSl04.png)
+1.  Click **Create Account API token**.
+    ![FBEzXXohz7.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/FBEzXXohz7.png)
+1.  **Permissions**: **you must select `Object Read & Write`**. This is the critical step — read-only permissions will cause uploads to fail.
+    ![xB4pYQOeEI.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/xB4pYQOeEI.png)
+1.  Click **Create API token**.
+1.  **⚠️ Copy and save immediately!** The page displays your `Access Key ID` and `Secret Access Key`. **These two secrets are shown only once** — copy them immediately and paste them somewhere safe. The `Default endpoints` link at the bottom also needs to be saved.
+    ![kg9E8tEozI.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/kg9E8tEozI.png)
+
+---
+
+## Part 2: Configure ShareX (desktop client)
+
+### 2.1 Download and install ShareX
+
+Grab the latest version from the official site: [https://getsharex.com/](https://getsharex.com/)
+
+### 2.2 Configure the S3 upload destination
+
+This is the heart of the setup.
+
+1.  Open ShareX and, from the main window, click `Destination settings...`.
+2.  In the popup, select `Amazon S3` on the left and fill in your configuration on the right.
+3.  Fill in your R2 details exactly as shown in the table below:
+
+| ShareX field          | What to enter                                             | Notes                                                                                                                               |
+| --------------------- | --------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| **Access key ID**     | Paste the `Access Key ID` you saved                       |                                                                                                                                     |
+| **Secret access key** | Paste the `Secret Access Key` you saved                   |                                                                                                                                     |
+| **Region**            | Leave blank                                               |                                                                                                                                     |
+| **Endpoints**         | Leave blank                                               |
+| **Endpoint**          | `https://<your-AccountID>.r2.cloudflarestorage.com`       | If you saved this earlier, paste it as-is. Make sure there's no trailing `/`.                                                       |
+| **Bucket name**       | Your R2 bucket name (for example, `your-org-images-2025`) |                                                                                                                                     |
+| **Upload path**       | `img/%y/%mo/%d/`                                          | Organizes images as `img/year/month/day/`, which helps with management. ShareX uses `%` for variables. (Customize as you like.)     |
+| **Use custom domain** | **Check this box**                                        |                                                                                                                                     |
+| (Custom domain field) | `https://<the r2.dev public URL you noted down>`          | **Note**: only paste the Public Development URL you saved earlier — don't append `$key$`. Modern ShareX handles that automatically. |
+
+#### 2.2.1 Critical Advanced settings
+
+At the bottom of the S3 configuration window, find the **Advanced** section and apply these settings:
+
+- `Set public-read ACL on file`: **must be unchecked**. R2 doesn't support this; leaving it checked will produce `403 Forbidden` errors.
+- `Use path style request`: **must be checked**. R2 requires this request URL style.
+
+### 2.3 Set S3 as the default image uploader
+
+1.  Return to the ShareX main window.
+2.  Click `Destinations` -> `Image uploader` -> `File uploader` -> and pick `Amazon S3`.
+    ![Code_nqStY1UhqR.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/Code_nqStY1UhqR.png)
+
+---
+
+## Part 3: Optimize the ShareX workflow
+
+### 3.1 Auto-copy a Markdown link
+
+1.  In the ShareX main window, click `Task settings...`.
+2.  In the popup, select **`Advanced`** from the lower part of the left sidebar.
+    ![ShareX_ZWFVlZZu0W.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/ShareX_ZWFVlZZu0W.png)
+3.  Click `ClipboardContentFormat` under `After upload`.
+4.  Replace the contents with the Markdown format `![$filename]($result)`.
+
+### 3.2 Enable auto-copy to clipboard
+
+1.  From the main window, under `After upload tasks`, tick `Copy URL to clipboard` on the right.
+    ![ShareX_zf6qftjnu6.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/ShareX_zf6qftjnu6.png)
+
+### 3.3 Change the hotkey
+
+You can change the hotkey under `Hotkey settings` on the main window, giving you a one-shot flow: screenshot → upload → Markdown on your clipboard.
+
+---
+
+## F.A.Q. and Troubleshooting
+
+**Q1: I get a `(403) Forbidden` error when uploading — what now?**  
+**A1:** This is the most common issue. Check the following two S3 Advanced settings:
+
+1.  Make sure **`Set public-read ACL on file`** is **unchecked**.
+2.  Make sure **`Use path style request`** is **checked**.
+3.  If it still fails, regenerate an **API token with `Object Read & Write` permissions** and update it in ShareX.
+
+**Q2: I don't want to upload screenshots — I want to upload existing local images.**  
+**A2:**
+**Use the Upload tab on the left**: pick the type of content you want to upload from inside that tab.
+![ShareX_34TAgHco1T.png](https://pub-85d4dcece16844bf8290aa4b33608ccd.r2.dev/ShareX/2025/09/ShareX_34TAgHco1T.png)
+
+**Q3: Why can't I use PicGo?**  
+**A3:** In my testing, PicGo's S3 plugin has compatibility issues with Cloudflare R2 — filenames and paths aren't handled correctly. ShareX's S3 implementation is more standard and currently the more reliable choice.
diff --git a/app/docs/ai/compute-platforms/compute-platforms-handbook.en.mdx b/app/docs/ai/compute-platforms/compute-platforms-handbook.en.mdx
new file mode 100644
index 00000000..281d5777
--- /dev/null
+++ b/app/docs/ai/compute-platforms/compute-platforms-handbook.en.mdx
@@ -0,0 +1,219 @@
+---
+title: Compute Platforms
+description: A guide to AI development on cloud compute platforms such as AutoDL and InternStudio
+date: "2025-01-27"
+tags:
+  - compute-platforms
+  - autodl
+  - internstudio
+  - gpu-cloud
+  - development-environment
+docId: d73h3kyjnzytk1y2nizulyr6
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T00:00:00Z
+translatorAgent: claude-opus-4-6
+---
+
+Training and inference on AI models requires substantial compute. This section walks through the mainstream compute platforms and cloud services to help developers pick the right resources.
+
+## AutoDL
+
+### Overview
+
+- **Website**: [https://www.autodl.com/home](https://www.autodl.com/home)
+- **Positioning**: A dedicated GPU cloud service
+- **Strengths**: Affordable pricing and a simple workflow — well suited for individual developers and small teams
+
+### Documentation
+
+- **Full docs**: [AutoDL official docs](https://www.autodl.com/docs/)
+- **Coverage**:
+  - Instance creation and management
+  - Environment setup
+  - Data upload and download
+  - Billing and cost
+
+### Step-by-Step: Connecting PyCharm Professional to AutoDL
+
+**Configuration steps**:
+
+1. **Create an AutoDL instance**: Pick a suitable GPU configuration
+2. **Collect connection info**: Note the IP address, port, and username
+3. **Configure PyCharm**: Set up the remote interpreter
+4. **File sync**: Configure automatic upload and download
+5. **Debug and run**: Remote debugging and code execution
+
+**Network configuration**:
+
+- SSH connection settings
+- Port forwarding
+- File transfer tuning
+- Ensuring a stable connection
+
+**Development workflow**:
+
+- Write code locally
+- Sync code to the remote machine
+- Schedule GPU resources
+- Download result files
+
+## InternStudio
+
+### Platform Introduction
+
+- **Website**: [https://studio.intern-ai.org.cn/](https://studio.intern-ai.org.cn/user/account)
+- **Highlights**: A free compute platform provided by the Shanghai AI Laboratory
+- **Use cases**: Learning, research, and small-scale project development
+
+### Connection and Usage
+
+**SSH connection setup**:
+
+- [SSH connection and port forwarding tutorial](https://juejin.cn/post/7446234916049829939)
+- Supports remote development workflows
+- Provides a JupyterLab interface
+
+**Camp 4 training resources**:
+
+- **GitHub**: [Tutorial (Camp 4)](https://github.com/InternLM/Tutorial/tree/camp4)
+- **Linux basics**: [InternStudio basic commands](https://aicarrier.feishu.cn/wiki/XZChwwDsciyFyHk5mGTc1EKinkc)
+
+### Open-Source Community Project Applications
+
+**Compute grants**:
+
+- 🔥 [Intern LLM open-source community project application](https://openxlab.org.cn/apps) 🔥
+- Available for open-source projects and academic research
+- Offers long-term, stable compute support
+
+## Platform Comparison
+
+### When to Choose AutoDL
+
+**Strengths**:
+
+- Hourly billing keeps costs predictable
+- Rich set of preinstalled environments
+- Good Chinese-language support
+- Stable network connectivity
+
+**Ideal users**:
+
+- Individual developers
+- Beginners and students
+- Short-term project needs
+- Budget-constrained teams
+
+### When to Choose InternStudio
+
+**Strengths**:
+
+- Free usage quota
+- Academic-friendly
+- Integrated with the InternLM ecosystem
+- Rich educational resources
+
+**Ideal users**:
+
+- Students and researchers
+- InternLM model users
+- Teaching and training
+- Open-source project development
+
+### Other Cloud Options
+
+#### International Platforms
+
+- **Google Colab**: Free GPU — good for learning and lightweight work
+- **AWS EC2**: Enterprise-grade service with broad features but higher cost
+- **Microsoft Azure**: Integrates well with the Windows ecosystem
+- **Lambda Labs**: Specialized GPU cloud provider
+
+#### China-Based Platforms
+
+- **Alibaba Cloud**: Enterprise-grade with a mature ecosystem
+- **Tencent Cloud**: Tuned for gaming and social workloads
+- **Baidu Cloud**: AI platform built around the PaddlePaddle ecosystem
+- **Huawei Cloud**: Support for Ascend AI processors
+
+## Tips and Best Practices
+
+### Cost Optimization
+
+1. **On-demand usage**: Shut down idle instances promptly
+2. **Preinstalled images**: Pick an image that matches your stack
+3. **Data management**: Plan storage usage up front
+4. **Alerts**: Set budget and resource-usage alerts
+
+### Developer Productivity
+
+1. **Environment management**: Use Docker or conda
+2. **Code sync**: Set up Git or a file-sync tool
+3. **Debugging**: Master remote debugging workflows
+4. **Resource monitoring**: Watch GPU and memory usage in real time
+
+### Data Security
+
+1. **Regular backups**: Back up critical data across multiple locations
+2. **Version control**: Manage code with Git
+3. **Access control**: Use strong SSH keys
+4. **Compliance**: Follow relevant data-handling regulations
+
+## Environment Setup Guide
+
+### Deep Learning Environment
+
+**Core components**:
+
+- CUDA/cuDNN
+- Python 3.8+
+- PyTorch/TensorFlow
+- Jupyter Notebook
+
+**Common libraries**:
+
+```bash
+# PyTorch ecosystem
+pip install torch torchvision transformers datasets
+
+# Scientific computing
+pip install numpy pandas matplotlib seaborn
+
+# Machine learning
+pip install scikit-learn xgboost lightgbm
+
+# Deep learning utilities
+pip install wandb tensorboard
+```
+
+### Development Tool Setup
+
+- **IDEs**: PyCharm Professional, VS Code
+- **Debugging**: pdb, ipdb
+- **Profiling**: nvidia-smi, htop
+- **Version control**: Git, DVC
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Connection timeouts**: Check network and firewall settings
+2. **GPU unavailable**: Verify CUDA installation and driver version
+3. **Out of memory**: Reduce batch size or shrink model parameters
+4. **Out of disk space**: Clean up temporary files and logs
+
+### Performance Tuning
+
+1. **GPU utilization**: Monitor and optimize GPU usage
+2. **I/O optimization**: Speed up data loading and preprocessing
+3. **Memory management**: Tune caching and batch size appropriately
+4. **Parallelism**: Leverage multi-GPU and distributed training
+
+## Learning Suggestions
+
+1. **Know one platform deeply**: Develop expertise on at least one major platform
+2. **Stay cost-aware**: Learn to plan and control compute spend
+3. **Environment management**: Master configuration and dependency management
+4. **Monitor and tune**: Track resource usage and optimize performance
+5. **Security practices**: Take data security and access control seriously
diff --git a/app/docs/ai/reinforcement-learning/reinforcement-learning-overview.en.mdx b/app/docs/ai/reinforcement-learning/reinforcement-learning-overview.en.mdx
new file mode 100644
index 00000000..a188f695
--- /dev/null
+++ b/app/docs/ai/reinforcement-learning/reinforcement-learning-overview.en.mdx
@@ -0,0 +1,154 @@
+---
+title: Reinforcement Learning
+description: Fundamentals of reinforcement learning, Chain-of-Thought (CoT), GRPO, and their applications in large language models
+date: "2025-01-27"
+tags:
+  - reinforcement-learning
+  - rlhf
+  - cot
+  - grpo
+  - ppo
+docId: s4fuhmdf6hj49jx1l7k87d4p
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T00:00:00Z
+translatorAgent: claude-opus-4-6
+---
+
+Reinforcement learning plays a vital role in the era of large language models, particularly in reasoning and alignment. From RLHF to Chain-of-Thought reasoning, RL provides key technical foundations for enhancing LLM capabilities.
+
+## Recommended Learning Resources
+
+### Prof. Shiyu Zhao's RL Course (Westlake University)
+
+**Highlights**: The mathematical foundations of reinforcement learning, from scratch to deep understanding
+
+**Resources**:
+
+- **Book & Slides**: [GitHub repository](https://github.com/MathFoundationRL/Book-Mathmatical-Foundation-of-Reinforcement-Learning)
+- **Video lectures**: [Full course on Bilibili](https://www.bilibili.com/video/BV1sd4y167NS/)
+- **Why this course**: Rigorous math derivations with a solid theoretical foundation
+
+### RethinkFun RL Series
+
+**Recommended creator**: [@RethinkFun](https://space.bilibili.com/18235884/)
+
+**Core videos**:
+
+- [RL fundamentals explained](https://www.bilibili.com/video/BV1rooaYVEk8/) — plain-language explanations with illustrated principles and formula derivations
+- [PPO and GRPO algorithm walkthroughs](https://www.bilibili.com/video/BV15cZYYvEhz/) — illustrated algorithm internals
+
+### Beginner Tutorials
+
+- **Minimal Introduction to Reinforcement Learning** — an intuitive walkthrough of MDP, DP/MC/TD, Q-learning, policy gradients, and PPO
+
+## RL Papers and Projects (Pending Review)
+
+> 📊 **Paper shortlist**: Click to view the full list of RL papers pending review
+>
+> Status categories: Not Started, Evaluating, Completed, Not Recommended, Not Open-Sourced
+
+## GRPO Reproduction References
+
+### TRL Framework
+
+- **Project**: [TRL (Transformer Reinforcement Learning)](https://github.com/huggingface/trl)
+- **Highlights**: HuggingFace's official RL framework
+- **Supported algorithms**: GRPO, PPO, DPO, and more
+
+## Chain-of-Thought (CoT)
+
+### Core Concept
+
+Chain-of-Thought reasoning is a key technique for exposing an LLM's reasoning process, improving both interpretability and reasoning capability.
+
+### Notable Papers and Projects
+
+#### CoT-Valve: Length-Compressible Chain-of-Thought Tuning
+
+- **Highlights**: A technique for tuning Chain-of-Thought reasoning with compressible length
+- **Source**: [HuggingFace Daily Papers](https://huggingface.co/papers/date/2025-07-25)
+
+#### MCoT (Multi-Chain-of-Thought)
+
+- **Project**: [Awesome-MCoT](https://github.com/yaotingwangofficial/Awesome-MCoT)
+- **Highlights**: Multi-chain reasoning that improves performance on complex reasoning tasks
+
+#### Latent CoT
+
+- **Project**: [Awesome-Latent-CoT](https://github.com/awesome-latent-cot)
+- **Core idea**: Move reasoning from linguistic symbols into the latent space to capture richer and more complex thought processes
+
+### Multimodal CoT
+
+Chain-of-Thought reasoning that combines visual and textual information — showing strong capability on multimodal tasks.
+
+### Survey on Latent-Space Reasoning
+
+- **Key survey**: [HIT's first survey on latent-space reasoning](https://zhuanlan.zhihu.com/p/1930639357087298531)
+- **Core argument**: Reshapes the boundaries of LLM reasoning by exploring reasoning mechanisms in the latent space
+
+## DeepSeek-R1 Deep Dive
+
+DeepSeek-R1, as a model with standout reasoning capabilities, has technical details worth in-depth study:
+
+- Reasoning mechanism design
+- Training strategy analysis
+- Performance evaluation methodology
+
+## Suggested Learning Path
+
+### Foundations
+
+1. **Math prerequisites**: probability, dynamic programming, optimization theory
+2. **Core concepts**: MDP, value functions, policies, returns
+3. **Classical algorithms**: Q-learning, policy gradients, Actor-Critic
+
+### Intermediate
+
+1. **Modern algorithms**: PPO, TRPO, SAC, TD3
+2. **LLM applications**: RLHF, Constitutional AI
+3. **Chain-of-Thought techniques**: CoT, MCoT, Latent CoT
+
+### Practice
+
+1. **Frameworks**: OpenAI Gym, Stable Baselines3, TRL
+2. **Hands-on projects**: game AI, dialogue system optimization
+3. **Paper reproduction**: reproducing and improving upon key algorithms
+
+## Application Areas
+
+### LLM Alignment
+
+- **RLHF**: Learning from human feedback to improve output quality
+- **Constitutional AI**: Principle-based approach to AI alignment
+- **DPO**: Direct Preference Optimization — a simplified alternative to RLHF
+
+### Reasoning Capability
+
+- **Chain-of-Thought reasoning**: Improves performance on complex reasoning tasks
+- **Tool use**: Training models to invoke external tools
+- **Code generation**: Improves programming capability
+
+### Multi-Agent Systems
+
+- **Cooperative learning**: Multiple agents solve problems together
+- **Competitive learning**: Individual capabilities improve through competition
+- **Social learning**: Agents learn from the behavior of other agents
+
+## Frontier Trends
+
+1. **Offline RL**: Learning policies from static datasets
+2. **Meta-learning**: Algorithms that adapt quickly to new tasks
+3. **Safe RL**: Ensuring the safety of both the learning process and the learned policy
+4. **Explainable RL**: Improving the interpretability of decision-making
+
+## Additional Notes
+
+- Chain-of-Thought / Multi-step CoT (MCoT) / Latent CoT
+
+- GRPO learning resources:
+  - Bilibili playlist: https://space.bilibili.com/18235884/search?keyword=GRPO
+  - PPO/GRPO algorithm explainer: https://www.bilibili.com/video/BV15cZYYvEhz/
+  - Paper and resource collection: https://github.com/yaotingwangofficial/Awesome-MCoT
+- RL math foundations textbook: GitHub https://github.com/MathFoundationRL/Book-Mathmatical-Foundation-of-Reinforcement-Learning
diff --git a/app/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh.mdx b/app/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh.mdx
new file mode 100644
index 00000000..b50e08b7
--- /dev/null
+++ b/app/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh.mdx
@@ -0,0 +1,570 @@
+---
+title: 单链表
+description: "单链表的实现、操作与应用"
+date: "2024-01-07"
+tags:
+  - singly-linked-list
+  - pointer-operations
+  - basic-data-structure
+docId: gkjk6stzpb44n9lv8u2ij7xx
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T00:00:00Z
+translatorAgent: claude-opus-4-6
+---
+
+# 单链表
+
+单链表是链表最基本的形式，每个节点包含数据和指向下一个节点的指针。它支持动态内存分配，并能高效地完成插入和删除操作。
+
+## 节点结构
+
+```javascript
+class ListNode {
+  constructor(data) {
+    this.data = data; // 数据域
+    this.next = null; // 指针域，指向下一个节点
+  }
+}
+```
+
+## 完整实现
+
+```javascript
+class SinglyLinkedList {
+  constructor() {
+    this.head = null; // 头指针
+    this.size = 0; // 链表长度
+  }
+
+  // 头部插入节点
+  prepend(data) {
+    const newNode = new ListNode(data);
+    newNode.next = this.head;
+    this.head = newNode;
+    this.size++;
+  }
+
+  // 尾部插入节点
+  append(data) {
+    const newNode = new ListNode(data);
+
+    // 若链表为空，新节点成为头节点
+    if (!this.head) {
+      this.head = newNode;
+      this.size++;
+      return;
+    }
+
+    // 找到最后一个节点
+    let current = this.head;
+    while (current.next) {
+      current = current.next;
+    }
+
+    // 链接新节点
+    current.next = newNode;
+    this.size++;
+  }
+
+  // 在指定位置插入节点
+  insert(index, data) {
+    if (index < 0 || index > this.size) {
+      throw new Error("Index out of bounds");
+    }
+
+    // 头部插入
+    if (index === 0) {
+      this.prepend(data);
+      return;
+    }
+
+    const newNode = new ListNode(data);
+    let current = this.head;
+
+    // 找到插入位置的前一个节点
+    for (let i = 0; i < index - 1; i++) {
+      current = current.next;
+    }
+
+    // 插入新节点
+    newNode.next = current.next;
+    current.next = newNode;
+    this.size++;
+  }
+
+  // 删除头节点
+  removeFirst() {
+    if (!this.head) {
+      return null;
+    }
+
+    const removedData = this.head.data;
+    this.head = this.head.next;
+    this.size--;
+    return removedData;
+  }
+
+  // 删除尾节点
+  removeLast() {
+    if (!this.head) {
+      return null;
+    }
+
+    // 只有一个节点
+    if (!this.head.next) {
+      const removedData = this.head.data;
+      this.head = null;
+      this.size--;
+      return removedData;
+    }
+
+    // 找到倒数第二个节点
+    let current = this.head;
+    while (current.next.next) {
+      current = current.next;
+    }
+
+    const removedData = current.next.data;
+    current.next = null;
+    this.size--;
+    return removedData;
+  }
+
+  // 删除指定位置的节点
+  remove(index) {
+    if (index < 0 || index >= this.size) {
+      throw new Error("Index out of bounds");
+    }
+
+    // 删除头节点
+    if (index === 0) {
+      return this.removeFirst();
+    }
+
+    let current = this.head;
+
+    // 找到被删节点的前一个节点
+    for (let i = 0; i < index - 1; i++) {
+      current = current.next;
+    }
+
+    const removedData = current.next.data;
+    current.next = current.next.next;
+    this.size--;
+    return removedData;
+  }
+
+  // 删除指定值的第一个节点
+  removeByValue(data) {
+    if (!this.head) {
+      return false;
+    }
+
+    // 如果头节点就是要删除的节点
+    if (this.head.data === data) {
+      this.head = this.head.next;
+      this.size--;
+      return true;
+    }
+
+    let current = this.head;
+    while (current.next && current.next.data !== data) {
+      current = current.next;
+    }
+
+    // 找到了要删除的节点
+    if (current.next) {
+      current.next = current.next.next;
+      this.size--;
+      return true;
+    }
+
+    return false; // 未找到
+  }
+
+  // 查找元素
+  indexOf(data) {
+    let current = this.head;
+    let index = 0;
+
+    while (current) {
+      if (current.data === data) {
+        return index;
+      }
+      current = current.next;
+      index++;
+    }
+
+    return -1; // 未找到
+  }
+
+  // 获取指定位置的元素
+  get(index) {
+    if (index < 0 || index >= this.size) {
+      throw new Error("Index out of bounds");
+    }
+
+    let current = this.head;
+    for (let i = 0; i < index; i++) {
+      current = current.next;
+    }
+
+    return current.data;
+  }
+
+  // 检查链表是否包含指定元素
+  contains(data) {
+    return this.indexOf(data) !== -1;
+  }
+
+  // 获取链表长度
+  length() {
+    return this.size;
+  }
+
+  // 检查链表是否为空
+  isEmpty() {
+    return this.size === 0;
+  }
+
+  // 清空链表
+  clear() {
+    this.head = null;
+    this.size = 0;
+  }
+
+  // 转换为数组
+  toArray() {
+    const result = [];
+    let current = this.head;
+
+    while (current) {
+      result.push(current.data);
+      current = current.next;
+    }
+
+    return result;
+  }
+
+  // 遍历链表
+  forEach(callback) {
+    let current = this.head;
+    let index = 0;
+
+    while (current) {
+      callback(current.data, index);
+      current = current.next;
+      index++;
+    }
+  }
+
+  // 反转链表
+  reverse() {
+    let prev = null;
+    let current = this.head;
+    let next = null;
+
+    while (current) {
+      next = current.next; // 保存下一个节点
+      current.next = prev; // 反转指针
+      prev = current; // 移动 prev
+      current = next; // 移动 current
+    }
+
+    this.head = prev;
+  }
+
+  // 打印链表
+  toString() {
+    if (!this.head) {
+      return "Empty list";
+    }
+
+    const elements = [];
+    let current = this.head;
+
+    while (current) {
+      elements.push(current.data);
+      current = current.next;
+    }
+
+    return elements.join(" -> ");
+  }
+}
+```
+
+## 使用示例
+
+```javascript
+// 创建链表
+const list = new SinglyLinkedList();
+
+// 添加元素
+list.append(1);
+list.append(2);
+list.append(3);
+list.prepend(0);
+console.log(list.toString()); // "0 -> 1 -> 2 -> 3"
+
+// 插入元素
+list.insert(2, 1.5);
+console.log(list.toString()); // "0 -> 1 -> 1.5 -> 2 -> 3"
+
+// 查找元素
+console.log(list.indexOf(2)); // 3
+console.log(list.get(1)); // 1
+console.log(list.contains(3)); // true
+
+// 删除元素
+list.remove(0); // 删除索引 0 的元素
+list.removeByValue(1.5); // 删除值为 1.5 的元素
+console.log(list.toString()); // "1 -> 2 -> 3"
+
+// 反转链表
+list.reverse();
+console.log(list.toString()); // "3 -> 2 -> 1"
+
+// 遍历链表
+list.forEach((data, index) => {
+  console.log(`Index ${index}: ${data}`);
+});
+```
+
+## 关键算法细节
+
+### 1. 插入操作中的指针变化
+
+```
+插入前：A -> B -> C
+在 A 与 B 之间插入 X：
+
+步骤 1：newNode.next = A.next
+        A -> B -> C
+        X ----↗
+
+步骤 2：A.next = newNode
+        A -> X -> B -> C
+```
+
+### 2. 删除操作中的指针变化
+
+```
+删除前：A -> B -> C -> D
+删除 B：
+
+步骤 1：找到 B 的前驱节点 A
+步骤 2：A.next = B.next
+        A -----> C -> D
+        （B 被跳过，等待垃圾回收）
+```
+
+### 3. 反转算法细节
+
+```javascript
+// 反转过程示意
+// 初始：1 -> 2 -> 3 -> null
+// 目标：null <- 1 <- 2 <- 3
+
+function reverse(head) {
+  let prev = null;
+  let current = head;
+
+  while (current !== null) {
+    let next = current.next; // 保存下一个节点
+    current.next = prev; // 反转当前节点的指针
+    prev = current; // prev 前移
+    current = next; // current 前移
+  }
+
+  return prev; // 此时 prev 指向新的头节点
+}
+```
+
+## 常见面试题
+
+### 1. 检测链表中的环
+
+```javascript
+function hasCycle(head) {
+  if (!head || !head.next) return false;
+
+  let slow = head;
+  let fast = head;
+
+  while (fast && fast.next) {
+    slow = slow.next;
+    fast = fast.next.next;
+
+    if (slow === fast) {
+      return true; // 发现环
+    }
+  }
+
+  return false;
+}
+```
+
+### 2. 找到链表的中点
+
+```javascript
+function findMiddle(head) {
+  if (!head) return null;
+
+  let slow = head;
+  let fast = head;
+
+  while (fast.next && fast.next.next) {
+    slow = slow.next;
+    fast = fast.next.next;
+  }
+
+  return slow;
+}
+```
+
+### 3. 合并两个有序链表
+
+```javascript
+function mergeTwoLists(l1, l2) {
+  const dummy = new ListNode(0);
+  let current = dummy;
+
+  while (l1 && l2) {
+    if (l1.data <= l2.data) {
+      current.next = l1;
+      l1 = l1.next;
+    } else {
+      current.next = l2;
+      l2 = l2.next;
+    }
+    current = current.next;
+  }
+
+  // 连接剩余的节点
+  current.next = l1 || l2;
+
+  return dummy.next;
+}
+```
+
+## 性能优化技巧
+
+### 1. 尾指针优化
+
+```javascript
+class OptimizedSinglyLinkedList {
+  constructor() {
+    this.head = null;
+    this.tail = null; // 维护尾指针
+    this.size = 0;
+  }
+
+  append(data) {
+    const newNode = new ListNode(data);
+
+    if (!this.head) {
+      this.head = this.tail = newNode;
+    } else {
+      this.tail.next = newNode;
+      this.tail = newNode;
+    }
+
+    this.size++;
+  }
+
+  // 现在 append 操作是 O(1) 而不是 O(n)
+}
+```
+
+### 2. 哨兵节点优化
+
+```javascript
+class SentinelLinkedList {
+  constructor() {
+    this.sentinel = new ListNode(null); // 哨兵节点
+    this.sentinel.next = null;
+    this.size = 0;
+  }
+
+  // 有了哨兵节点后，许多操作的边界处理会更简单
+  prepend(data) {
+    const newNode = new ListNode(data);
+    newNode.next = this.sentinel.next;
+    this.sentinel.next = newNode;
+    this.size++;
+  }
+}
+```
+
+## 实际应用场景
+
+### 1. 实现栈
+
+```javascript
+class Stack {
+  constructor() {
+    this.list = new SinglyLinkedList();
+  }
+
+  push(item) {
+    this.list.prepend(item);
+  }
+
+  pop() {
+    return this.list.removeFirst();
+  }
+
+  peek() {
+    return this.list.isEmpty() ? null : this.list.get(0);
+  }
+
+  isEmpty() {
+    return this.list.isEmpty();
+  }
+}
+```
+
+### 2. 实现队列
+
+```javascript
+class Queue {
+  constructor() {
+    this.head = null;
+    this.tail = null;
+  }
+
+  enqueue(item) {
+    const newNode = new ListNode(item);
+    if (!this.tail) {
+      this.head = this.tail = newNode;
+    } else {
+      this.tail.next = newNode;
+      this.tail = newNode;
+    }
+  }
+
+  dequeue() {
+    if (!this.head) return null;
+
+    const item = this.head.data;
+    this.head = this.head.next;
+    if (!this.head) this.tail = null;
+
+    return item;
+  }
+}
+```
+
+## 注意事项
+
+1. **空指针检查**：总是先判断节点是否为空
+2. **边界情况**：特别留意空链表和单节点的情况
+3. **内存管理**：在需要手动管理内存的语言中，记得释放已删除的节点
+4. **指针操作顺序**：插入和删除时注意指针修改的先后顺序
+
+## 小结
+
+单链表是理解链表这一数据结构的基础。虽然它在随机访问上不如数组高效，但在动态的插入与删除操作中表现出色。掌握单链表的实现与操作，是学习更复杂数据结构的必经之路。
+
+下一节我们会学习双向链表，看看它如何通过增加反向指针带来更大的灵活性。
diff --git a/app/docs/jobs/interview-prep/bq.en.md b/app/docs/jobs/interview-prep/bq.en.md
new file mode 100644
index 00000000..625ab3d1
--- /dev/null
+++ b/app/docs/jobs/interview-prep/bq.en.md
@@ -0,0 +1,70 @@
+---
+title: Behavioral Interview
+description: First page
+date: "2025-09-11"
+tags:
+  - intro
+docId: u68pjetu592c9zvs3f5xa82j
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T00:00:00Z
+translatorAgent: claude-opus-4-6
+---
+
+# Ten Universal Angles for Behavioral Interviews
+
+After sitting through a stretch of interviews, I realized "behavioral" questions — the BQ ones — may look endlessly varied, but they really boil down to ten directions. Prepare one solid story for each and you'll be able to handle almost any variant.
+
+## 1. Collaboration & Communication
+
+This category shows up constantly. Common questions:
+
+**Teamwork**  
+Don't just say "I'm a team player." Show how you work with colleagues and managers from different backgrounds to hit a shared goal.
+
+**Conflict**  
+Conflict questions are unavoidable. Don't shy away from describing a disagreement — the point is how you kept calm, communicated, and found a way forward.
+
+**Client service**  
+Often overlooked. Plenty of JDs are really probing how you understand client needs, solve their problems, and sometimes deliver above expectations.
+
+## 2. Leadership & Organization
+
+Not just for management roles. Common questions:
+
+**Leadership**  
+The focus isn't "how I directed people." It's how you took ownership and drove momentum inside a team or project.
+
+**Planning**  
+The interviewer wants to hear how you break work down, stage it, and handle surprises along the way.
+
+**Multitasking**  
+How do you juggle parallel work streams? Be clear about how you prioritize and still deliver on time.
+
+## 3. Learning & Growth
+
+Roles evolve fast, so learning agility is always on the rubric. Common questions:
+
+**Learn new skills**  
+Use a specific example to show how you picked up a new skill or approach on your own and applied it at work right away.
+
+**Motivation**  
+The classic "three whys": why the industry, why the company, why the role. Answer by tying the company's positioning to your personal drivers — that's what lands.
+
+## 4. Challenges & Response
+
+Shows up in almost every interview. Common questions:
+
+**Challenge**  
+A major challenge you faced at work — don't drift into life anecdotes. Focus on how you decomposed the problem and pushed through.
+
+**Failure**  
+A failure story is a plus, not a minus. Be honest about what went wrong, and — more importantly — how you adjusted and grew afterward.
+
+## Summary
+
+Teamwork, conflict, client; leadership, planning, multitasking; learning, motivation; challenge, failure.  
+Prep these ten and any reworded question becomes just another variation.
+
+Remember: use the **STAR framework** (Situation, Task, Action, Result)  
+to package your answer as a short story you can pull out on demand.

From db1a25cedf82c242bcf0463020c4641ab4e34070 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:43:53 +0000
Subject: [PATCH 09/19] =?UTF-8?q?feat(docs):=20i18n=20CommunityShare=20?=
 =?UTF-8?q?=E5=85=A8=E9=83=A8=E7=BF=BB=E8=AF=91=E5=AE=8C=E6=88=90=20(13=20?=
 =?UTF-8?q?=E7=AF=87)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

translator-community 产出。双向翻译：
- Geek: git101 / picturecdn / swanlab / raspberry-guide / CommonUsedMarkdown
  / Katex×2 (全部 zh→en)，leworldmodel (en→zh)
- RAG: context_engineering_intro / embedding / rag (全部 zh→en)
- Amazing-AI-Tools: perplexity-comet (zh→en)，
  prompt-repetition-improves-non-reasoning-llms (en→zh)

跳过：3 个 <Cards> 索引页、1 个 MVP 已翻译文件。
所有翻译版 frontmatter 继承原文 docId，带 translatedFrom 标记
供 contributors 脚本跳过统计。
---
 .../Amazing-AI-Tools/perplexity-comet.en.md   | 104 ++++++
 ...petition-improves-non-reasoning-llms.zh.md |  20 ++
 .../Geek/CommonUsedMarkdown.en.md             |  81 +++++
 .../CommunityShare/Geek/Katex/Seb1.en.mdx     |  84 +++++
 .../CommunityShare/Geek/Katex/Seb2.en.mdx     | 113 +++++++
 app/docs/CommunityShare/Geek/git101.en.mdx    |  49 +++
 .../CommunityShare/Geek/leworldmodel.zh.md    |  18 +
 .../CommunityShare/Geek/picturecdn.en.mdx     | 164 +++++++++
 .../CommunityShare/Geek/raspberry-guide.en.md | 313 ++++++++++++++++++
 app/docs/CommunityShare/Geek/swanlab.en.mdx   |  99 ++++++
 .../RAG/context_engineering_intro.en.md       |  72 ++++
 app/docs/CommunityShare/RAG/embedding.en.mdx  |  67 ++++
 app/docs/CommunityShare/RAG/rag.en.mdx        | 107 ++++++
 13 files changed, 1291 insertions(+)
 create mode 100644 app/docs/CommunityShare/Amazing-AI-Tools/perplexity-comet.en.md
 create mode 100644 app/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh.md
 create mode 100644 app/docs/CommunityShare/Geek/CommonUsedMarkdown.en.md
 create mode 100644 app/docs/CommunityShare/Geek/Katex/Seb1.en.mdx
 create mode 100644 app/docs/CommunityShare/Geek/Katex/Seb2.en.mdx
 create mode 100644 app/docs/CommunityShare/Geek/git101.en.mdx
 create mode 100644 app/docs/CommunityShare/Geek/leworldmodel.zh.md
 create mode 100644 app/docs/CommunityShare/Geek/picturecdn.en.mdx
 create mode 100644 app/docs/CommunityShare/Geek/raspberry-guide.en.md
 create mode 100644 app/docs/CommunityShare/Geek/swanlab.en.mdx
 create mode 100644 app/docs/CommunityShare/RAG/context_engineering_intro.en.md
 create mode 100644 app/docs/CommunityShare/RAG/embedding.en.mdx
 create mode 100644 app/docs/CommunityShare/RAG/rag.en.mdx

diff --git a/app/docs/CommunityShare/Amazing-AI-Tools/perplexity-comet.en.md b/app/docs/CommunityShare/Amazing-AI-Tools/perplexity-comet.en.md
new file mode 100644
index 00000000..4f104631
--- /dev/null
+++ b/app/docs/CommunityShare/Amazing-AI-Tools/perplexity-comet.en.md
@@ -0,0 +1,104 @@
+---
+title: "Perplexity Comet: The AI Browser That Acts Like a Personal Assistant"
+description: ""
+date: "2025-10-03"
+tags:
+  - perlexity
+  - comet
+  - productivity
+docId: eej2awin6irhbdgcy8vvs3xb
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Key Info
+
+**Free for students — claim here: [Get Comet with free Perplexity Pro. The AI browser built for students.](https://www.perplexity.ai/students)**
+Follow the link to get a free year of Perplexity Pro membership.
+Then you can download the Comet browser here:
+[Comet Browser: a Personal AI Assistant](https://www.perplexity.ai/comet/)
+
+## What Is Comet?
+
+This is the first post in the series, and I'm starting with Perplexity Comet — an AI browser.
+I picked it first because it's the tool I use most, even more than Cursor. It's a browser, after all — something you have open every single day.
+
+There are quite a few AI browsers on the market right now: Dia, Microsoft's Edge has leaned into AI, and Chrome is ramping up too.
+But from what I've heard, Comet pulls ahead in a few specific ways — probably thanks to Perplexity's early head start.
+
+![ab3becf9-fe7f-40f3-92f3-8e861e68f1fc.png](https://img.coly.cc/obs-img/2025/10/1df9eb3648e7893e32c0139de7ad5a6d.png)
+
+What makes it stand out comes down to two things:
+
+1. It can operate the browser on my behalf — handling tasks I don't want to do myself. The speed and accuracy aren't always perfect, but the point is you can hand it something and go do something else.
+2. It's replaced my to-do app. It acts like a personal scheduler: consolidating my emails, surfacing upcoming tasks, and sending me reminders.
+
+### Browser Automation
+
+Comet's browser automation works in two modes:
+
+1. Background mode — visits pages and executes tasks without you watching
+2. Foreground mode — automates actions directly on your current page
+
+#### Background Mode: Use Cases
+
+Checking your email, browsing Moodle to look up grades, and similar tasks.
+
+If you're already logged into Moodle, you can type a request directly into Comet's input box:
+![image.png](https://img.coly.cc/obs-img/2025/10/2979472a93976749b4f45b5960906c57.png)
+![image.png](https://img.coly.cc/obs-img/2025/10/bff8830b849d3d64ad5ee40f4eb589d8.png)
+You'll then see it navigating pages and executing steps in the background:
+![image.png](https://img.coly.cc/obs-img/2025/10/5e31b513d633a1383ef637293e320c3e.png)
+Until it comes back with a result.
+
+Similarly, you can ask it to check your inbox and delete spam. I won't go through every example — once you get the idea, you'll know what kinds of tasks this background AI can handle.
+
+One thing to note: Comet defaults to using Perplexity's own model, which is pretty mediocre. Remember to manually switch it to whichever model is currently the best:
+![image.png](https://img.coly.cc/obs-img/2025/10/aeba741491cad07e3c105db3f49cdf1a.png)
+
+#### Foreground Mode: Use Cases
+
+Automating Coles — adding every item on your grocery list to the cart.
+![8c574a67ac53577520eb8758b7e892a5.png](https://img.coly.cc/obs-img/2025/10/0759eb80fe077ad310c0f235bf8843c3.png)
+The way to use it: open the Coles page while logged in, click the Assistant button in the top right, and give it an instruction like "add [item] to my cart."
+You'll see a blue overlay appear on the page as it gets to work.
+Even in foreground mode, you can switch to another tab and carry on — it keeps going in the background.
+I'm honestly not entirely sure what the difference is between the two modes, but it seems like some pages are harder to operate in background mode and more reliable in foreground mode.
+
+### Your Personal Planner: Email + Calendar Integration
+
+The first thing I do every morning is type my custom command into the input box:
+![image.png](https://img.coly.cc/obs-img/2025/10/98605f02a120be2cb67a090c85a40b2b.png)
+
+Remind me when to bring laundry in (Sydney's rainy season has soaked me enough times),
+tell me when the exchange rate is favorable,
+surface anything coming up that I need to deal with,
+and summarize my emails.
+
+First, go to Settings > Connectors and add your accounts. I mainly use Google services — if you use others, those can be connected too.
+![image.png](https://img.coly.cc/obs-img/2025/10/ed069ee778622782832ed06b947c24d6.png)
+
+Then add a shortcut in settings so you don't have to type a long prompt every day — just a short command and it runs:
+![image.png](https://img.coly.cc/obs-img/2025/10/feb661e4aa4c71b5cf2c68fd5284f294.png)
+
+Here's what it looks like in action:
+![image.png](https://img.coly.cc/obs-img/2025/10/26eb7f9cb633787058b0efe5ecfa86e6.png)
+![image.png](https://img.coly.cc/obs-img/2025/10/751ab79cce1abfd9c194d09666cb6061.png)
+Pretty handy.
+
+My favorite feature is the calendar integration. Since I started using it, I haven't opened TickTick or Google Calendar once. Course schedules get added to calendar events automatically, and Comet reminds me. If I have something I want to remember, I just tell it — it logs the event in my calendar, and when my daily command runs, it surfaces any upcoming events and sends reminders via browser notification and email. Hard to forget things.
+
+## About Perplexity
+
+Beyond Comet, Perplexity is primarily an AI search engine company. Their core product is decent, but doesn't stand out dramatically compared to GPT or similar tools. What it does well is let you use models from multiple providers freely — so it's a solid AI chat option overall.
+![image.png](https://img.coly.cc/obs-img/2025/10/544381e5bde62b83b952ce3ad96b3820.png)
+
+## Coming Up Next
+
+That's Perplexity Comet covered — the tool I use most. Next up is Cursor. The topic will probably be what a lot of people actually want to know: how a complete beginner can use it to ship projects quickly and build up their portfolio.
+
+Cursor is genuinely impressive — over the past year I've used it to knock out a bunch of solid projects in a fraction of the time it would've taken otherwise.
+The downside is that heavy reliance on it weakens your raw coding skills over time.
+So I'm actually on the fence about whether to write that post.
diff --git a/app/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh.md b/app/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh.md
new file mode 100644
index 00000000..9030b5c3
--- /dev/null
+++ b/app/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh.md
@@ -0,0 +1,20 @@
+---
+title: Prompt Repetition Improves Non-Reasoning LLMs
+description: 复读机或可提高大模型能力
+date: "2026-03-05"
+tags:
+  - AI
+  - LLMs
+  - arXiv
+docId: l6eepr5ctjgrhdgupy3twr1t
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+&lt;https://arxiv.org/pdf/2512.14982&gt;
+
+在不使用推理模式的情况下，重复输入提示词能够提升主流模型（Gemini、GPT、Claude、DeepSeek）的表现，且不会增加生成 token 数量或推理延迟。
+
+1. 提示词重复：LLM 通常以因果语言模型的方式训练，即过去的 token 无法关注到未来的 token。因此，用户查询中 token 的排列顺序会影响预测性能。例如，"选项在前、问题在后"的查询形式与"问题在前、选项在后"的形式往往表现不同（见图 1）。我们提出重复提示词的方法：将输入从单次提示词转换为重复两次的形式。这使得每个提示词 token 都能关注到其他所有提示词 token，从而解决上述问题。在不使用推理模式时，提示词重复能够提升 LLM 的性能（图 1），且不会增加生成输出的长度或推理延迟。
diff --git a/app/docs/CommunityShare/Geek/CommonUsedMarkdown.en.md b/app/docs/CommunityShare/Geek/CommonUsedMarkdown.en.md
new file mode 100644
index 00000000..8f9f10cf
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/CommonUsedMarkdown.en.md
@@ -0,0 +1,81 @@
+---
+title: Common Markdown Syntax
+date: 2025-09-20T14:25:39.000Z
+docId: xqz5iiv3p52h6d9g3c0w2baf
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Headings & Font Styles
+
+`# Heading 1`
+
+`## Heading 2`
+
+And so on.
+
+**Bold** `**Bold**`
+
+_Italic_ `*Italic*`
+
+**_Bold Italic_** `***Bold Italic***`
+
+~~Strikethrough~~ `~~Strikethrough~~`
+
+<mark>Highlight</mark> `<mark>Highlight</mark>`
+
+Half&ensp;width&ensp;space
+
+`Half&ensp;width&ensp;space`
+
+Full&emsp;width&emsp;space
+
+`Full&emsp;width&emsp;space`
+
+## Unordered & Ordered Lists
+
+- Unordered item 1&emsp;`* Unordered item 1`
+- Unordered item 2&emsp;`* Unordered item 2`
+- Unordered item 3&emsp;`* Unordered item 3`
+- Unordered item 4&emsp;`* Unordered item 4`
+
+1. Ordered item 1&emsp;`1. Ordered item 1`
+2. Ordered item 2&emsp;`2. Ordered item 2`
+3. Ordered item 3&emsp;`3. Ordered item 3`
+4. Ordered item 4&emsp;`4. Ordered item 4`
+
+## Tables
+
+| Header | Left-aligned | Centered | Right-aligned |
+| ------ | :----------- | :------: | ------------: |
+| Cell   | Cell         |   Cell   |          Cell |
+| Cell   | Cell         |   Cell   |          Cell |
+
+```
+| Header | Left-aligned | Centered | Right-aligned |
+| - | :- | :-: | -: |
+| Cell | Cell | Cell | Cell |
+| Cell | Cell | Cell | Cell |
+```
+
+## Links
+
+`[Link text](target URL)`
+
+**Example**
+
+[Download Anaconda here](https://www.anaconda.com/download)
+
+`[Download Anaconda here](https://www.anaconda.com/download)`
+
+## Images
+
+`![Alt text](./your-image.jpg "Custom hover title")`
+
+**Example**
+
+![Test image](./CommonUsedMarkdown.assets/testpic1.jpg "autumn")
+
+`![Test image](./CommonUsedMarkdown.assets/testpic1.jpg "autumn")`
diff --git a/app/docs/CommunityShare/Geek/Katex/Seb1.en.mdx b/app/docs/CommunityShare/Geek/Katex/Seb1.en.mdx
new file mode 100644
index 00000000..70d4352e
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/Katex/Seb1.en.mdx
@@ -0,0 +1,84 @@
+---
+title: Commonly Used Symbols
+date: 2025-09-20T14:25:39.000Z
+docId: r0inttjcby48tly602p410vo
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Greek Letters
+
+|   Result   |  Command   |  Result   |  Command  |
+| :--------: | :--------: | :-------: | :-------: |
+|  $\alpha$  |  `\alpha`  | $\sigma$  | `\sigma`  |
+|  $\beta$   |  `\beta`   |  $\tau$   |  `\tau`   |
+|  $\gamma$  |  `\gamma`  | $\Gamma$  | `\Gamma`  |
+|  $\delta$  |  `\delta`  | $\Delta$  | `\Delta`  |
+| $\epsilon$ | `\epsilon` |  $\psi$   |  `\psi`   |
+|  $\zeta$   |  `\zeta`   | $\omega$  | `\omega`  |
+|   $\eta$   |   `\eta`   | $\Omega$  | `\Omega`  |
+|  $\theta$  |  `\theta`  | $\Theta$  | `\Theta`  |
+| $\lambda$  | `\lambda`  |  $\phi$   |  `\phi`   |
+|   $\mu$    |   `\mu`    | $\varphi$ | `\varphi` |
+|   $\pi$    |   `\pi`    |   $\xi$   |   `\xi`   |
+|   $\rho$   |   `\rho`   |           |           |
+
+## Arrows
+
+|      Result       |      Command      |        Result        |       Command        |
+| :---------------: | :---------------: | :------------------: | :------------------: |
+|   $\leftarrow$    |   `\leftarrow`    |     $\Leftarrow$     |     `\Leftarrow`     |
+|   $\rightarrow$   |   `\rightarrow`   |    $\Rightarrow$     |    `\Rightarrow`     |
+| $\leftrightarrow$ | `\leftrightarrow` |  $\Leftrightarrow$   |  `\Leftrightarrow`   |
+|    $\uparrow$     |    `\uparrow`     |      $\mapsto$       |      `\mapsto`       |
+|   $\downarrow$    |   `\downarrow`    | $\rightleftharpoons$ | `\rightleftharpoons` |
+
+## Operators
+
+|  Result  | Command  |  Result   |  Command  |
+| :------: | :------: | :-------: | :-------: |
+| $\times$ | `\times` | $\wedge$  | `\wedge`  |
+|  $\div$  |  `\div`  |  $\vee$   |  `\vee`   |
+|  $\cap$  |  `\cap`  | $\oplus$  | `\oplus`  |
+|  $\cup$  |  `\cup`  | $\otimes$ | `\otimes` |
+| $\cdot$  | `\cdot`  | $\cdots$  | `\cdots`  |
+
+## Relational Symbols
+
+| Result  | Command |   Result    |   Command   |
+| :-----: | :-----: | :---------: | :---------: |
+| $\neq$  | `\neq`  |  $\subset$  |  `\subset`  |
+| $\leq$  | `\leq`  | $\subseteq$ | `\subseteq` |
+| $\geq$  | `\geq`  |  $\approx$  |  `\approx`  |
+|  $\ll$  |  `\ll`  |  $\equiv$   |  `\equiv`   |
+|  $\gg$  |  `\gg`  |   $\cong$   |   `\cong`   |
+| $\prec$ | `\prec` | $\parallel$ | `\parallel` |
+| $\succ$ | `\succ` |   $\perp$   |   `\perp`   |
+|  $\in$  |  `\in`  |  $\notin$   |  `\notin`   |
+
+## Infinity, Partial Derivatives & Proof Symbols
+
+|    Result     |    Command    |    Result    |   Command    |
+| :-----------: | :-----------: | :----------: | :----------: |
+|   $\infty$    |   `\infty`    |  $\exists$   |  `\exists`   |
+|  $\partial$   |  `\partial`   |    $\neg$    |    `\neg`    |
+| $\varnothing$ | `\varnothing` |  $\because$  |  `\because`  |
+|               |               | $\therefore$ | `\therefore` |
+
+## Superscripts & Decorators
+
+|          Result           |          Command          |           Result           |          Command           |
+| :-----------------------: | :-----------------------: | :------------------------: | :------------------------: |
+|         $\bar{a}$         |         `\bar{a}`         |                            |                            |
+|         $\hat{a}$         |         `\hat{a}`         |                            |                            |
+|      $\widehat{abc}$      |      `\widehat{abc}`      |                            |                            |
+|        $\tilde{a}$        |        `\tilde{a}`        |        $\utilde{a}$        |        `\utilde{a}`        |
+|     $\widetilde{abc}$     |     `\widetilde{abc}`     |                            |                            |
+|         $\vec{A}$         |         `\vec{A}`         |                            |                            |
+|      $\overline{AB}$      |      `\overline{AB}`      |      $\underline{AB}$      |      `\underline{AB}`      |
+|   $\overleftarrow{AB}$    |   `\overleftarrow{AB}`    |   $\underleftarrow{AB}$    |   `\underleftarrow{AB}`    |
+| $\overleftrightarrow{AB}$ | `\overleftrightarrow{AB}` | $\underleftrightarrow{AB}$ | `\underleftrightarrow{AB}` |
+|  $\overleftharpoon{AB}$   |  `\overleftharpoon{AB}`   |                            |                            |
+|     $\overgroup{AB}$      |     `\overgroup{AB}`      |     $\undergroup{AB}$      |     `\undergroup{AB}`      |
diff --git a/app/docs/CommunityShare/Geek/Katex/Seb2.en.mdx b/app/docs/CommunityShare/Geek/Katex/Seb2.en.mdx
new file mode 100644
index 00000000..20518a41
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/Katex/Seb2.en.mdx
@@ -0,0 +1,113 @@
+---
+title: Math Formula Syntax
+date: 2025-09-20T14:25:39.000Z
+docId: khcrztruqdku9fntd3dwzvwe
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Summation, Product & Binomial
+
+|      Example      |     Command     |
+| :---------------: | :-------------: |
+| $$\sum_{i=1}^n$$  | `\sum_{i=1}^n`  |
+| $$\prod_{i=1}^n$$ | `\prod_{i=1}^n` |
+| $$\tbinom{a}{b}$$ | `\tbinom{a}{b}` |
+
+## Matrices & Determinants
+
+$$
+\left(\begin{matrix}
+a & b \\
+c & d
+\end{matrix}\right)
+$$
+
+```latex
+$$
+\left(\begin{matrix}
+a & b \\
+c & d
+\end{matrix}\right)
+$$
+```
+
+$$
+\left|\begin{matrix}
+a & b \\
+c & d
+\end{matrix}\right|
+$$
+
+```latex
+$$
+\left|\begin{matrix}
+a & b \\
+c & d
+\end{matrix}\right|
+$$
+```
+
+$$
+\left(\begin{matrix}
+1 & 2 & \cdots & n \\
+2 & 3 & \cdots & n+1 \\
+\vdots & \vdots & \ddots & n^2 - 1\\
+n & n+1 & \cdots & n^2
+\end{matrix}\right)
+$$
+
+```latex
+$$
+\left(\begin{matrix}
+1 & 2 & \cdots & n \\
+2 & 3 & \cdots & n+1 \\
+\vdots & \vdots & \ddots & n^2 - 1\\
+n & n+1 & \cdots & n^2
+\end{matrix}\right)
+$$
+```
+
+## Aligned Equations
+
+$$
+\begin{aligned}
+    f(X|\theta) &= f(x_1,x_2,\cdots,x_n|\theta)\\
+    &= f(x_1|\theta) \cdot f(x_2|\theta) \cdots f(x_n|\theta)\\
+    &= \prod_{i=1}^n \frac{1}{x_i !} e^{-\theta}\theta^{x_i}\\
+    &= \left(\prod_{i=1}^n \frac{1}{x_i !}\right) e^{-n \theta}\theta^{\sum_{i=1}^n x_i}
+\end{aligned}
+$$
+
+```latex
+$$
+\begin{aligned}
+    f(X|\theta) &= f(x_1,x_2,\cdots,x_n|\theta)\\
+    &= f(x_1|\theta) \cdot f(x_2|\theta) \cdots f(x_n|\theta)\\
+    &= \prod_{i=1}^n \frac{1}{x_i !} e^{-\theta}\theta^{x_i}\\
+    &= \left(\prod_{i=1}^n \frac{1}{x_i !}\right) e^{-n \theta}\theta^{\sum_{i=1}^n x_i}
+\end{aligned}
+$$
+```
+
+## Piecewise Functions (differs from standard LaTeX)
+
+Since `\equation` and the standard `\cases` syntax are not supported here, a workaround is needed:
+
+$$
+f(x,\theta) = \left\{ \begin{array}{ll}
+\dfrac{1}{x\sqrt{2\pi \theta}} e^{-\frac{1}{2\theta} [\log(x)]^2} &,\text{ $x>0$} \\
+0 &,\text{ otherwise}
+\end{array} \right.
+$$
+
+```latex
+$$
+f(x,\theta) = \left\{ \begin{array}{ll}
+\dfrac{1}{x\sqrt{2\pi \theta}} e^{-\frac{1}{2\theta} [\log(x)]^2} &,\text{ $x>0$} \\
+0 &,\text{ otherwise}
+\end{array} \right.
+$$
+```
diff --git a/app/docs/CommunityShare/Geek/git101.en.mdx b/app/docs/CommunityShare/Geek/git101.en.mdx
new file mode 100644
index 00000000..1fd63e67
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/git101.en.mdx
@@ -0,0 +1,49 @@
+---
+title: Git Getting Started Guide — Git Tips Every Developer Should Know
+description: ""
+date: "2025-09-19"
+tags:
+  - tag-one
+docId: tksz80mfqqyzwzzer5p3uxtg
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Most Common Git Commands
+
+| Basic                                      | Branch & Remote                                     |
+| ------------------------------------------ | --------------------------------------------------- |
+| `git init <dir>` → Initialize a repo       | `git branch` → List / create branches               |
+| `git clone <repo>` → Clone a remote repo   | `git checkout -b <branch>` → New branch + switch    |
+| `git add <file>` → Stage a file            | `git merge <branch>` → Merge a branch               |
+| `git commit -m "msg"` → Commit changes     | `git pull <remote>` → Fetch + merge remote          |
+| `git push <remote> <branch>` → Push branch | `git remote add <name> <url>` → Add remote          |
+| `git status` → Check file status           | `git fetch <remote> <branch>` → Fetch remote branch |
+| `git log --oneline` → One-line history     | `git pull --rebase <remote>` → Fetch + rebase       |
+| `git diff` → View unstaged changes         | `git push <remote> --tags` → Push all tags          |
+
+## Other Git Commands
+
+| Undo / Reset / Log / Diff                                               | Config / Advanced Push / Rebase                             |
+| ----------------------------------------------------------------------- | ----------------------------------------------------------- |
+| `git commit --amend` → Modify the last commit                           | `git config user.name <name>` → Set repo author name        |
+| `git revert <commit>` → Revert a commit                                 | `git config --global user.name <name>` → Global author name |
+| `git reset <file>` → Unstage a file                                     | `git config --global user.email <email>` → Global email     |
+| `git reset --soft <commit>` → Roll back commit, keep staged & working   | `git config --global alias.<alias> <cmd>` → Create alias    |
+| `git reset --mixed <commit>` → Roll back commit, keep working (default) | `git config --system core.editor <editor>` → Set editor     |
+| `git reset --hard <commit>` → Roll back commit, discard changes         | `git config --global --edit` → Edit global config           |
+| `git clean -n` → Preview deletion of untracked files                    | `git push <remote> --force` → Force push (dangerous)        |
+| `git reflog` → View HEAD history                                        | `git push <remote> --all` → Push all branches               |
+| `git log -<n>` → Limit number of commits                                | `git rebase <base>` → Rebase a branch                       |
+| `git log --stat` → File change stats                                    | `git rebase -i <base>` → Interactive rebase                 |
+| `git log -p` → Detailed commit diff                                     | `git rebase --continue` → Continue rebase after conflict    |
+| `git log <since>..<until>` → History in a range                         | `git rebase --abort` → Abort rebase                         |
+| `git log -- <file>` → History for a specific file                       | `git log --author="<pattern>"` → Search by author           |
+| `git diff HEAD` → Working tree vs. latest commit                        | `git log --grep="<pattern>"` → Search by message            |
+| `git diff --cached` → Staged vs. latest commit                          | `git log --graph --decorate` → Graphical history            |
+
+## Practice Exercises
+
+Here's a recommended interactive sandbox for learning Git — great for beginners to get hands-on quickly: https://learngitbranching.js.org/
diff --git a/app/docs/CommunityShare/Geek/leworldmodel.zh.md b/app/docs/CommunityShare/Geek/leworldmodel.zh.md
new file mode 100644
index 00000000..eb05a472
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/leworldmodel.zh.md
@@ -0,0 +1,18 @@
+---
+title: LeWorldModel
+description: 从像素端到端稳定训练的联合嵌入预测架构
+date: "2026-04-08"
+tags:
+  - 世界模型
+docId: boo70qqm8nos8b0q9h7zjrki
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+联合嵌入预测架构（JEPA）为在紧凑隐空间中学习世界模型提供了一个颇具吸引力的框架，但现有方法仍然较为脆弱，往往依赖复杂的多项损失、指数移动平均、预训练编码器或辅助监督来避免表征坍塌。在本研究中，我们提出 LeWorldModel（LeWM）——首个仅用两项损失函数（下一嵌入预测损失 + 强制隐嵌入服从高斯分布的正则项）就能从原始像素端到端稳定训练的 JEPA。与当前唯一存在的端到端替代方案相比，可调损失超参数从六个降至一个。LeWM 拥有 1500 万参数，可在单张 GPU 上数小时内完成训练，规划速度最高比基于基础模型的世界模型快 48 倍，同时在多种 2D 和 3D 控制任务上保持竞争力。除控制任务外，我们还通过对物理量的探针分析表明，LeWM 的隐空间编码了有意义的物理结构。惊讶度评估进一步确认，该模型能可靠地检测出物理上不合理的事件。
+
+<br />
+
+&lt;https://arxiv.org/pdf/2603.19312&gt;
diff --git a/app/docs/CommunityShare/Geek/picturecdn.en.mdx b/app/docs/CommunityShare/Geek/picturecdn.en.mdx
new file mode 100644
index 00000000..dee8ac23
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/picturecdn.en.mdx
@@ -0,0 +1,164 @@
+---
+title: How to Deploy Your Own GitHub Image Hosting with PictureCDN
+description: ""
+date: "2025-09-27"
+tags:
+  - tag-one
+docId: e6udpzrorhvgeeda6xpy1e0s
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 1. What Is an Image Host?
+
+- An image host is essentially a "bed" for storing images — an online service that lets you upload, store, and share images. Once you upload an image, you get a link you can share anywhere without worrying about storage capacity or bandwidth.
+
+## What Problems Does an Image Host Solve?
+
+- 1. Individual users and small websites often lack the server space and bandwidth to store and serve large numbers of images. An image host provides cheap or even free storage so you can keep as many images as you need without worry.
+- 2. Image hosts usually include management features, making it easy to organize, search, and reuse your uploaded images.
+- 3. Performance: Image hosts typically run on powerful servers with CDN (Content Delivery Network) infrastructure, enabling fast image delivery to users worldwide. This is critical for a good user experience.
+
+# 2. Creating an Image Hosting Repository on GitHub
+
+## 2.1 Create a Repository
+
+![Create repository](https://ravencaffeine.github.io/PictureCDN/images/20250927160645295.png)
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645296.png)
+
+## 2.2 Generate a Personal Access Token
+
+- Click your profile picture or avatar in the top-right corner.
+- Select "Settings" from the dropdown menu.
+  ![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645297.png)
+
+### 2.2.1 In "Developer settings", click "Personal access tokens".
+
+- In the left sidebar, click "Developer settings".
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645298.png)
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645299.png)
+
+### 2.2.2 Generate new token (classic)
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645300.png)
+
+- In the "Note" field, enter a descriptive name so you remember what this token is for.
+- Choose an "Expiration" date. You can set it to never expire or pick a specific date.
+- Select the appropriate "Scopes" (permissions). If you only need repository access, choosing "repo" is sufficient.
+- Scroll down and click "Generate token".
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645301.png)
+
+- Once generated, you'll see the token in plain text. **Copy and save it immediately** — this is the only time you'll see it.
+- After leaving the page, you can only see whether the token exists, not its value.
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645302.png)
+
+Note:
+
+- Personal access tokens are highly sensitive — treat them like passwords. Never share them or hard-code them in your source code.
+- If you suspect a token has been compromised, revoke it immediately in GitHub settings.
+- Tokens can perform a wide range of actions on your GitHub account, so choose permissions carefully.
+
+# 3. PicGo
+
+PicGo is an open-source image upload tool that supports multiple image hosting services, including Qiniu Cloud, Upyun, SM.MS, and GitHub. It helps you upload local images to your chosen host and generates shareable links automatically.
+
+## 3.1 What Problems Does PicGo Solve?
+
+1. **Image upload**: PicGo uploads local images to your image host quickly, saving manual upload time.
+2. **Image management**: PicGo supports batch uploads and makes it easy to find and reuse previously uploaded images.
+3. **Link generation**: After uploading, PicGo automatically generates a shareable link you can use anywhere.
+4. **Multi-host support**: PicGo works with multiple image hosting services and lets you switch between them at any time.
+
+## 3.2 Download
+
+- Shandong University mirror:
+- https://mirrors.sdu.edu.cn/github-release/Molunerfinn_PicGo/v2.3.1/
+- Original GitHub repository (releases tab):
+- https://github.com/Molunerfinn/PicGo
+- After installing, open it from the system tray icon in the bottom-left corner.
+  ![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645303.png)
+
+## 3.3 Configuring the GitHub Image Host
+
+- Open Image Host Settings
+- Select GitHub
+  ![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645304.png)
+
+- **Repository name**: Enter `<your-username>/<repo-name>` — the repo you created in step 2.1.
+- **Branch**: The branch where your images will be stored. Usually `main` or `master`.
+- **Token**: Paste the token you copied in step 2.2.
+- **Storage path**: A folder within your repo. Setting it to `img/` is a common convention — it will be created automatically.
+- **Custom domain**: Set this if you want to use GitHub Pages (see section 4).
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645305.png)
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645306.png)
+
+# 4. Setting Up a Custom Domain with GitHub Pages
+
+## 4.1 GitHub Pages Basics
+
+GitHub Pages is a static website hosting service. There are three types:
+
+#### 1. User / Organization Site:
+
+- Each GitHub account can have exactly one user or organization site.
+- The repository must be named `<username>.github.io` (or `<org>.github.io` for organizations).
+- The URL will be `https://<username>.github.io` or `https://<org>.github.io`.
+- Limit: one user/org site per account.
+
+#### 2. Project Site:
+
+- A project site is tied to a specific repository, with URL `https://<username>.github.io/<repo>`.
+- Any repository can have its own project site.
+- Limit: no explicit cap — one project site per repository.
+
+#### 3. General GitHub Pages Limits:
+
+- Recommended source repository size: under 1 GB.
+- Published site size: no more than 1 GB.
+- Monthly bandwidth: 100 GB (soft limit).
+- Hourly build limit: 10 builds (soft limit, applies to GitHub Actions builds).
+
+## 4.2 Enabling GitHub Pages
+
+To get a cleaner image URL, enable GitHub Pages for your repository:
+
+- Go to your repository on GitHub and click "Settings".
+- Scroll down to the "Pages" section.
+- Under "Source", select your `main` branch (or `master`, depending on your default) and click "Save".
+- After a moment, GitHub will generate a Pages URL (e.g., `https://your-username.github.io/image-hosting`).
+- Back in PicGo's GitHub image host settings, set "Custom Domain" to this Pages URL.
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645308.png)
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645309.png)
+
+# 5. Uploading Images
+
+- **Drag and drop**: Open PicGo and drag an image directly into the window — it uploads automatically.
+- **Manual file selection**: Click the "Upload" button in PicGo and select a local image file.
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645310.png)
+
+![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645311.png)
+
+- The image will be automatically committed to your repository.
+  ![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645312.png)
+
+- You can now grab the image link.
+  ![](https://ravencaffeine.github.io/PictureCDN/images/20250927160645313.png)
+
+# References
+
+- 1. Complete tutorial: "Set Up a Personal Image Host on GitHub in 2025 — Step-by-Step with Screenshots"
+     https://www.cnblogs.com/ljbguanli/p/18928090
+- 2. CSDN post by "三金C_C": PicGo Configuration — Free Image Hosting
+- Original link: https://blog.csdn.net/QAZJOU/article/details/146449613
diff --git a/app/docs/CommunityShare/Geek/raspberry-guide.en.md b/app/docs/CommunityShare/Geek/raspberry-guide.en.md
new file mode 100644
index 00000000..d8227b09
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/raspberry-guide.en.md
@@ -0,0 +1,313 @@
+---
+title: Building a Minecraft Server on an Idle Raspberry Pi
+date: 2025-08-05T18:53:40.000Z
+tags: null
+docId: i0xmpskau105p83vq35wnxls
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+A walkthrough of setting up a Raspberry Pi Minecraft server from scratch and exposing it to the internet using FRP port forwarding.
+
+# Hardware Prerequisites
+
+One idle Raspberry Pi, and one VPS server with a public IP address.
+
+# Setting Up the Raspberry Pi
+
+Flash the Raspberry Pi OS image, set up your username and password, then power it on.
+
+## Assign a Static IP to the Raspberry Pi
+
+Open your router's settings panel and find the DHCP static IP allocation section. Assign a fixed IP to your Raspberry Pi. If you've forgotten the current IP, you can check it with:
+
+```shell
+$ hostname -I # Example output: 192.168.2.102
+```
+
+## Enable VNC Remote Desktop
+
+Log in via SSH and open the configuration panel:
+
+```shell
+$ sudo raspi-config
+```
+
+1. Select **Interface Options**
+2. Select **VNC**
+3. When asked **Would you like the VNC Server to be enabled?**, select **YES**
+
+Open a VNC client on your computer and log in with your username and password to access the virtual desktop.
+
+## Install Java
+
+```sh
+$ sudo -i # Temporarily gain admin privileges
+$ cd /usr/local
+```
+
+Open the Raspberry Pi browser and download the [JDK](https://www.oracle.com/java/technologies/downloads/#java21). The file will be in `/home/<your-username>/Downloads/`.
+
+```sh
+$ mkdir java
+$ mv /home/<your-username>/Downloads/* /usr/local/java/
+$ cd java
+$ tar -zxvf jdk-21_linux-aarch64_bin.tar.gz
+# Some log output will appear
+```
+
+Configure environment variables:
+
+```sh
+$ nano /etc/profile
+```
+
+Add the following at the end of the file:
+
+```sh
+# Adjust the JDK version number as needed
+export JAVA_HOME=/usr/local/java/jdk-21.0.8
+export CLASSPATH=.:$JAVA_HOME/lib/
+export PATH=.:$JAVA_HOME/bin:$PATH
+# To exit: Ctrl+O, Enter, Ctrl+X
+```
+
+Reload the file:
+
+```sh
+$ source /etc/profile
+```
+
+Verify the installation:
+
+```sh
+$ java -version
+
+# Success looks like this:
+java version "21.0.8" 2025-07-15 LTS
+Java(TM) SE Runtime Environment (build 21.0.8+12-LTS-250)
+Java HotSpot(TM) 64-Bit Server VM (build 21.0.8+12-LTS-250, mixed mode, sharing)
+```
+
+## Download the Minecraft Server Jar
+
+Open the browser and download the server jar. I'm using the [Fabric](https://fabricmc.net/use/server/) server loader.
+
+```sh
+$ cd .. # Should return to /usr/local/
+$ mkdir minecraft
+$ mv /home/<your-username>/Downloads/* /usr/local/minecraft
+```
+
+The first run will fail because you haven't agreed to the EULA yet:
+
+```sh
+# My Pi has 8 GB RAM, so I'm allocating 4 GB here
+# Note: the jar name will differ depending on the version you downloaded
+$ java -Xmx4G -jar fabric-server-mc.1.21.1-loader.0.17.0-launcher.1.1.0.jar
+```
+
+Accept the EULA:
+
+```sh
+$ nano eula.txt # Change eula=false to eula=true
+$ # To exit: Ctrl+O, Enter, Ctrl+X
+```
+
+## Configure FRP
+
+The Raspberry Pi uses the ARM architecture. Download [frp](https://github.com/fatedier/frp/releases) — the latest version at the time of writing is 0.63.0.
+
+```sh
+$ cd .. # Should be in /usr/local/java
+$ wget https://github.com/fatedier/frp/releases/download/v0.63.0/frp_0.63.0_linux_arm64.tar.gz
+# Some log output will appear
+$ tar -zxvf frp_0.63.0_linux_arm64.tar.gz
+# Some log output will appear
+$ mv frp_0.63.0_linux_arm64 frp # Rename for convenience
+```
+
+Edit the configuration:
+
+```sh
+$ cd frp
+$ nano frpc.toml # 'c' in frpc stands for client
+```
+
+Edit `frpc.toml`:
+
+```toml
+serverAddr = "Your VPS public IP"
+serverPort = 7000 # Default value
+auth.method = "token"
+auth.token = "set a password here"
+
+[[proxies]]
+name = "choose a service name"
+type = "tcp"
+localIP = "192.168.2.102" # Raspberry Pi IP
+localPort = 25565 # MC server default port
+remotePort = 25565 # MC server default port
+
+## To exit: Ctrl+O, Enter, Ctrl+X
+```
+
+## Install tmux
+
+This guide uses tmux for session management. You could also use `screen`, but I prefer tmux.
+
+```shell
+$ apt install tmux
+```
+
+# Setting Up the VPS
+
+Log in via SSH using the root credentials from your provider's welcome email.
+
+## Configure FRP
+
+Install [frp](https://github.com/fatedier/frp/releases) on the VPS:
+
+```shell
+$ cd /usr/local
+$ wget https://github.com/fatedier/frp/releases/download/v0.63.0/frp_0.63.0_linux_amd64.tar.gz
+$ # Some log output will appear
+$ tar -zxvf frp_0.63.0_linux_amd64.tar.gz
+$ # Some log output will appear
+$ mv frp_0.63.0_linux_amd64 frp # Rename for convenience
+```
+
+Edit the configuration:
+
+```sh
+$ cd frp
+$ nano frps.toml # 's' in frps stands for server
+```
+
+Edit `frps.toml`:
+
+```toml
+bindPort = 7000
+auth.method = "token"
+auth.token = "same password as on the Raspberry Pi"
+
+[webServer] # Optional dashboard — remove if not needed
+addr = "127.0.0.1" # Not exposed publicly; access via SSH tunnel. Use "0.0.0.0" to expose publicly
+port = 7500
+user = "?"
+password = "********"
+```
+
+## Run FRP in the Background
+
+Same process as on the Raspberry Pi:
+
+```sh
+$ apt install tmux
+```
+
+Open the required ports:
+
+```shell
+$ ufw allow 7000/tcp
+$ ufw allow 25565/tcp
+```
+
+Start frp in a tmux session:
+
+```sh
+$ tmux new -s <service-name>
+$ cd /usr/local/frp
+$ ./frps -c frps.toml
+# Press Ctrl+B then D to detach
+```
+
+Re-attach to the session later:
+
+```shell
+$ tmux attach -t <service-name>
+```
+
+Verify everything is running:
+
+```sh
+$ tmux ls
+# frp: 1 windows (created <timestamp>)
+$ ss -tlnp | grep 7000
+# Output here means frp is working
+```
+
+# Starting the Server on the Raspberry Pi
+
+## Write a Start Script
+
+```sh
+$ cd /usr/local/minecraft
+$ nano start.sh
+```
+
+Set the contents of `start.sh` to:
+
+```sh
+#!/bin/bash
+java -Xmx4G -jar fabric-server-mc.1.21.1-loader.0.17.0-launcher.1.1.0.jar nogui
+```
+
+Make it executable:
+
+```shell
+$ chmod +x start.sh
+```
+
+## Run FRP and the Server
+
+### Run FRP
+
+```sh
+$ tmux new -s frp
+$ cd /usr/local/frp
+$ ./frpc -c frpc.toml
+# Press Ctrl+B then D to detach
+```
+
+### Run the Server
+
+```sh
+$ tmux new -s mcserver
+$ cd /usr/local/minecraft
+$ ./start.sh
+# Press Ctrl+B then D to detach
+```
+
+Verify both are running:
+
+```sh
+$ tmux ls
+# frp: 1 windows (created <timestamp>)
+# mcserver: 1 windows (created <timestamp>)
+```
+
+# (Optional) Configure a Domain Name
+
+At this point you can already connect to the game using your VPS's public IP:
+
+```
+?.?.?.?:25565 # VPS public IP
+```
+
+If you have your own domain, log in to the [Cloudflare dashboard](https://dash.cloudflare.com/) and add a DNS record:
+
+|                         |                       |
+| :---------------------- | :-------------------- |
+| Type                    | A                     |
+| Name (required)         | rasp                  |
+| IPv4 address (required) | &lt;VPS public IP&gt; |
+| Proxy status            | DNS only              |
+
+Then you can connect using your domain:
+
+```
+rasp.<your-domain>:25565
+```
diff --git a/app/docs/CommunityShare/Geek/swanlab.en.mdx b/app/docs/CommunityShare/Geek/swanlab.en.mdx
new file mode 100644
index 00000000..00764a2d
--- /dev/null
+++ b/app/docs/CommunityShare/Geek/swanlab.en.mdx
@@ -0,0 +1,99 @@
+---
+title: SwanLab Quick Start Guide
+description: ""
+date: "2025-09-23"
+tags:
+  - tag-one
+docId: mhyoknm6vj8jmp186oli5f5c
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+I've been running a lot of experiments lately and things were getting messy fast — results scattered everywhere, and I was seriously considering just tracking everything in Excel.
+
+Then I remembered a teammate from group 9444 mentioning SwanLab for experiment tracking and visualization. So I put together this quick start guide — it'll be part of my own experiment pipeline going forward.
+
+If you have a better system for managing research experiments, feel free to share it.
+
+Anyway, let's get into SwanLab. Here's the quick start workflow for experiment tracking with SwanLab.
+
+## 1. Create an Account & Get Your API Key
+
+First, open the SwanLab website: [https://swanlab.cn/](https://swanlab.cn/)
+If you don't have an account yet, register on the site first.
+
+Once you're logged in, create a project. You'll see a Quick Start guide — just follow it step by step and your API key will be right there. I've copied the key steps here for convenience.
+
+## 2. Install the SwanLab Library
+
+In any environment with Python 3, install the SwanLab client library via pip:
+
+```bash
+pip install swanlab
+```
+
+## 3. Log In to SwanLab
+
+Run the following command in your terminal:
+
+```bash
+swanlab login
+```
+
+You'll see a prompt like this:
+
+```
+swanlab: Logging into swanlab cloud.
+swanlab: You can find your API key at: https://swanlab.cn/settings
+swanlab: Paste an API key from your profile and hit enter, or press 'CTRL-C' to quit:
+```
+
+Paste the API key you copied from your profile settings page and you're in. ([docs.swanlab.cn](https://docs.swanlab.cn/en/guide_cloud/general/quick-start.html))
+
+## 4. Submit an Experiment
+
+```python
+import swanlab
+import random
+
+# Initialize a new SwanLab run to track this script
+swanlab.init(
+  # Set the project this run belongs to
+  project="mteb-ailastatue",
+  workspace="mira",
+  # Track hyperparameters and run metadata
+  config={
+    "learning_rate": 0.02,
+    "architecture": "CNN",
+    "dataset": "CIFAR-100",
+    "epochs": 10
+  }
+)
+
+# Simulate training
+epochs = 10
+offset = random.random() / 5
+for epoch in range(2, epochs):
+  acc = 1 - 2 ** -epoch - random.random() / epoch - offset
+  loss = 2 ** -epoch + random.random() / epoch + offset
+
+  # Log training metrics to SwanLab
+  swanlab.log({"acc": acc, "loss": loss})
+
+# [Optional] Mark the run as finished — required in notebook environments
+swanlab.finish()
+
+
+```
+
+---
+
+## 5. View Your Results!
+
+After running the code, navigate to your newly created project to compare different experiments and their metrics.
+
+## References
+
+Official docs: [Quick Start Guide](https://docs.swanlab.cn/en/guide_cloud/general/quick-start.html)
diff --git a/app/docs/CommunityShare/RAG/context_engineering_intro.en.md b/app/docs/CommunityShare/RAG/context_engineering_intro.en.md
new file mode 100644
index 00000000..2291bcfb
--- /dev/null
+++ b/app/docs/CommunityShare/RAG/context_engineering_intro.en.md
@@ -0,0 +1,72 @@
+---
+title: A Quick Introduction to Context Engineering
+description: ""
+date: "2025-10-03"
+tags:
+  - tag-one
+docId: wdqqrepoy43jiieyyjmaekk1
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# A Quick Introduction to Context Engineering
+
+## Key Concepts
+
+Problems it addresses:
+
+- Most models have a very limited context window
+- Noisy, poorly organized inputs hurt model understanding
+- More input = higher cost (tokens are expensive)
+
+**Context**: Everything fed to the model as input — the user's question, background information, reference material, available tools, tool execution results, conversation history, and so on. The model generates answers based on all of this.
+
+**Context window**: The maximum amount of input a model can process. Measured in tokens — for example, Gemini 2.5 Pro has a 1 million token context window, meaning it can handle up to 1 million tokens of input at once.
+
+**Context engineering**: The deliberate design of what goes into the model's input. The goal is to help the model understand more accurately, respond better, and spend fewer tokens — all within a limited context window.
+
+The common experience of LLMs "forgetting" earlier parts of a conversation happens precisely because of context window size limits.
+
+Context engineering is especially important when building agents.
+
+## Approaches
+
+### Saving Context
+
+A classic example is ChatGPT's long-term memory feature.
+
+Store the context you want the model to remember in a database or on disk, and retrieve it when needed.
+
+### Selecting Context
+
+Choose the most relevant information from a large pool of data to include in the model's input.
+
+**Static selection**: Content that always goes into the context — for example, a system prompt that guides the model's behavior and ensures safe, reliable outputs.
+
+**Dynamic selection**: Content chosen based on what's most relevant to the current query — for example, pulling entries from a long-term memory store in ChatGPT, or an agent selecting which tools to include based on the current task.
+
+RAG is one implementation of dynamic selection.
+
+### Compressing Context
+
+The two biggest space consumers in a context are model-generated text and tool execution results.
+
+One practice from Claude Code: when the context reaches a certain size, it runs auto-compact — discarding raw content and keeping only a summary of what was there.
+
+### Isolating Context
+
+This typically comes up in multi-agent scenarios.
+
+Anthropic's approach:
+
+![Anthropic's approach](https://img.coly.cc/obs-img/2025/10/7110909d5366ba7747f037ae9300f7bc.png)
+
+Different agents each have their own dedicated tools, independent execution histories, and separate memory systems. Their contexts remain isolated from one another.
+
+### Further Reading
+
+LangChain — Context Engineering: http://blog.langchain.com/context-engineering-for-agents/
+
+Cognition: https://cognition.ai/blog/dont-build-multi-agents
diff --git a/app/docs/CommunityShare/RAG/embedding.en.mdx b/app/docs/CommunityShare/RAG/embedding.en.mdx
new file mode 100644
index 00000000..5107780f
--- /dev/null
+++ b/app/docs/CommunityShare/RAG/embedding.en.mdx
@@ -0,0 +1,67 @@
+---
+title: A Beginner's Guide to Fine-Tuning Embedding Models
+description: ""
+date: "2025-09-22"
+tags:
+  - RAG
+docId: eyd32o3ebd5q69hfbb2enxqi
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Embedding models are something many people haven't heard of, and a fair question to ask is: modern LLMs are already so powerful — do we really need a dedicated embedding model on top of that?
+
+Here's the thing. LLMs are impressive, but they're also large. Deploying them requires serious hardware, and when the barrier to deployment is high, your application is constrained. And if you're calling them via API, the ongoing cost adds up fast. Beyond that, a general-purpose LLM with strong generalization might actually underperform a smaller, task-specific fine-tuned embedding model in specialized domains. If you're applying a RAG system to financial compliance, an internal knowledge base, or technical documentation Q&A, you'll notice that even the best general-purpose embedding models often struggle with domain-specific terminology and context.
+
+That's why more and more people are turning to fine-tuning — to help embedding models better understand their own data. And if you can master a domain-adaptive fine-tuning approach for embedding models, you'll have something applicable across virtually every industry. Every company could fine-tune an embedding model for their own domain. The potential is real.
+
+## Why Fine-Tune?
+
+The value of fine-tuning shows up first in task-specific performance. In finance or healthcare, for example, general-purpose embeddings often fail to capture domain-specific terminology or context precisely. A fine-tuned model can significantly improve retrieval quality, and that improvement flows directly into the final Q&A output.
+
+That said, fine-tuning isn't a cure-all. Many performance issues come from elsewhere: sometimes the query fundamentally needs keyword matching, sometimes the chunking strategy is poor, sometimes the model's embedding dimension is simply too small. Fine-tuning is only worth the investment once you've ruled out these other issues and confirmed that the bottleneck is actually in semantic understanding.
+
+What's more interesting is that fine-tuning doesn't always mean "bigger and stronger." In practice, a small model with task-specific fine-tuning can often match or beat large commercial models, while delivering lower latency and more predictable costs. And don't overlook a fundamental truth: data is the real ceiling on embedding quality. Without data, or with low-quality data, even the best fine-tuning process won't produce a miracle.
+
+## Recent Innovations
+
+A few developments in recent blog posts and papers are worth paying attention to. The dual-LLM synthesis-and-evaluation chain is a particularly elegant approach: use one LLM to generate diverse queries from documents, then use a second LLM as a "judge" to filter out low-quality samples. This gets you high-quality training pairs with almost no manual labeling.
+
+Then there's the fine-tuning vs. re-ranking tradeoff. Fine-tuning improves relevance without adding latency, but it requires re-embedding your entire corpus. Re-ranking avoids that re-embedding step but adds API calls and latency. The best practice isn't to pick one — it's to combine them based on your situation: use fine-tuning for a stable core corpus, and rely on re-ranking for frequently updated portions.
+
+There's also an often-overlooked point about small models. With the right fine-tuning, a small model can compete directly with large ones. High-quality synthetic data plays a big role here — techniques like paraphrasing, diversity control, and hard negative mining can build training sets that are both larger in scale and broader in coverage, significantly boosting fine-tuning effectiveness. Researchers have noted that the performance ceiling for embedding models is typically constrained by the coverage and quality of training data, not model parameter count. This further underscores why high-quality synthetic datasets matter so much. Looking ahead, multi-task and instruction-based training are becoming the norm — integrating signals across different domains into a single small model to give it stronger cross-domain generalization.
+
+On the data processing side, it's worth keeping an eye on [OpenDCAI/DataFlow](https://github.com/OpenDCAI/DataFlow). DataFlow is a data-centric AI system built specifically for fine-tuning and RAG workflows. It automates parsing, generation, cleaning, and quality assessment from noisy raw data sources (PDFs, web pages, low-quality QA pairs) through modular pipelines, making it much easier to build high-quality training sets. It has been validated in healthcare, finance, and legal domains.
+
+## Practical Takeaways
+
+If you're actually planning to fine-tune in production, you need a clear decision path. First, diagnose — confirm the bottleneck really is in semantic understanding. Only if the answer is yes should you proceed to fine-tuning. Second, construct your data. The standard approach: generate diverse queries from domain documents, filter with an LLM quality check, and build positive-negative pairs — making sure to include hard negatives and deduplicating to avoid data leakage.
+
+During training, contrastive learning is the standard choice: multiple negatives, triplet loss, or cosine embedding loss. Pair that with small-scale hyperparameter search to get learning rate, batch size, epoch count, and pooling strategy into a reasonable range. During evaluation, don't just look at Recall@k and MRR — also check end-to-end Q&A accuracy and grounding hit rate, and factor latency and cost into your decisions. When you deploy, start by applying fine-tuning to your stable core corpus to gain consistent high relevance, then use re-ranking or hybrid retrieval for high-churn data — that combination tends to be robust in practice.
+
+## Summary
+
+The value of fine-tuning an embedding model isn't about chasing a universal "optimal model." It's about making the model genuinely understand your data and your task. It's a systems engineering effort: diagnose first, confirm the bottleneck; then fine-tune with high-quality data and systematic experimentation; finally, close the loop with re-ranking for a stable, production-ready system.
+
+For most organizations, the most practical approach is: fine-tune on stable core data for low-latency, high-relevance retrieval; use re-ranking on high-churn data for flexibility; and expand the model's boundaries incrementally through synthetic data and multi-task training. The end goal isn't point-optimal performance — it's achieving a dynamic balance across performance, latency, and cost for the entire RAG system.
+
+## References
+
+ACM Digital Library. Exploring Parameter-Efficient Fine-Tuning Techniques for Code Models.
+https://dl.acm.org/doi/10.1145/3714461
+
+Databricks. Improving Retrieval and RAG with Embedding Model Finetuning.
+https://www.databricks.com/blog/improving-retrieval-and-rag-embedding-model-finetuning
+
+NAACL 2025. Little Giants: Synthesizing High-Quality Embedding Data at Scale.
+https://aclanthology.org/2025.naacl-long.64/
+
+Q. Zhou et al. Embedding Technical Report.
+
+arXiv. Multi-task Retriever Fine-tuning for Domain-specific and General-purpose Tasks.
+https://arxiv.org/abs/2501.04652
+
+Weaviate. Why, When and How to Fine-Tune a Custom Embedding Model.
+https://weaviate.io/blog/fine-tune-embedding-model
diff --git a/app/docs/CommunityShare/RAG/rag.en.mdx b/app/docs/CommunityShare/RAG/rag.en.mdx
new file mode 100644
index 00000000..9ca4be76
--- /dev/null
+++ b/app/docs/CommunityShare/RAG/rag.en.mdx
@@ -0,0 +1,107 @@
+---
+title: RAG
+description: ""
+date: "2025-09-19"
+tags:
+  - tag-one
+docId: zywri1bs64awfi9utfjy14ll
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# RAG
+
+This RAG toy demo is designed to help people unfamiliar with RAG understand the full pipeline in under 10 minutes.
+
+You can also pair it with "Mark's Tech Notes" for a deeper read.
+
+## RAG — Retrieval-Augmented Generation
+
+**Problem it solves:** Intelligent Q&A, building external knowledge bases, and specializing LLMs for specific domains.
+
+**Tools you need:** An LLM, an external knowledge base, and domain-specific data.
+
+**The core idea of RAG** is:
+
+1. Search the domain-specific files for relevant information
+2. Use the retrieved information as supplementary context
+3. Bundle it together with the user's query and send everything to the LLM to get an answer
+
+## RAG Pipeline
+
+Chunking → Embedding → Store in vector DB → User query → Retrieval → Re-ranking → Pack processed data + query → Feed to LLM → Get answer
+
+## Part 1: Building Your Own Minimal RAG
+
+### 1. Chunking
+
+Split your document into specific segments.
+
+**Example:**
+
+- Our document: "Today the weather is great"
+- If we split by character: the entire document becomes "T", "o", "d", "a", "y", "t", "h", "e", "w", "e", "a", "t", "h", "e", "r", "i", "s", "g", "r", "e", "a", "t" — 22 separate pieces
+
+**Optimization direction:** In real-world engineering, chunking strategy is a key optimization lever for any RAG system. Individual characters carry very little semantic information in isolation. You could instead chunk by word, sentence, paragraph, page, or chapter — the right strategy can dramatically improve RAG performance.
+
+Chunks produced by this step are referred to as "slices" in the rest of this guide.
+
+### 2. Embedding the Slices
+
+Embedding converts the text in each slice into a vector representation that computers can process.
+
+There are many approaches. You'll need an embedding model — typically you can use a pre-trained model from Hugging Face via the `sentence-transformers` library. For very small datasets, one-hot encoding works too. Beginners don't need to understand the internals — just know that tools exist to convert text slices into vectors. This falls under NLP (Natural Language Processing).
+
+- For example, "Today" might become `[0.23, 0.34, 0.54, 0.23, 0.76]`.
+
+Because embedding can be computationally expensive and the underlying documents don't change, we don't need to re-embed every time. We embed everything once and store the results in a database.
+
+**Optimization direction:** Different embedding models produce different results. You can use open-source pre-trained models, or if you have high-quality domain data, train your own embedding model.
+
+### 3. Storing in a Vector Database
+
+A vector database is built for storing and querying vectors.
+
+To avoid re-embedding documents every time, we store both the vector and its corresponding slice text together in a vector database.
+
+Vector databases come in different forms: lightweight local embeddings (stored on your machine) and cloud-hosted deployments (queried over the network). For lightweight projects, local libraries like `faiss` or `MongoDB` work great.
+
+**Optimization direction:** The choice of vector database can also affect overall RAG system performance.
+
+### 4. The Base LLM
+
+The LLM can be deployed locally or accessed via API.
+
+For example, you could use Gemini, which has a free daily token quota. Search for how to get a Gemini API key.
+
+**Optimization direction:** The base LLM has a direct impact on RAG quality. Besides general-purpose models like GPT and Gemini, some practitioners fine-tune a general LLM or train a specialized model for specific domains — for example, a model focused on medical diagnosis or legal consultation. These specialized models may lag behind in general capability but excel within their target domain.
+
+## Part 2: Using Your RAG System
+
+Assuming you've already built a RAG system and have a ChatGPT-style interface to interact with, here's how a query flows through the system.
+
+### 5. User Query & Retrieval
+
+Say the user asks: "What's the weather like today?"
+
+We embed this query using the same embedding model used during setup, then compare the resulting vector against all vectors in our database. The matching process retrieves the slices most similar to the query — this is called retrieval.
+
+Retrieval prioritizes recall over precision: we'd rather include everything relevant and filter later than miss something important.
+
+### 6. Re-ranking
+
+Re-ranking is a further filtering step on the retrieved results. For example, if we retrieved 20 slices, we select the top 5 most relevant to the user's query to reduce noise.
+
+**Optimization direction:** Different indexing strategies and re-ranking algorithms can yield very different results.
+
+### 7. Calling the LLM and Getting an Answer
+
+Finally, we bundle the user query and the top 5 slices together, apply prompt engineering, and send everything to the LLM. The LLM uses both the query and the relevant context slices to generate an answer.
+
+**Optimization direction:** Investing in prompt engineering pays off here.
+
+**For example, the basic logic of CoT (Chain of Thought):**
+
+- "A is True then B is true" and "A is True" = "B is True"

From 7a15e9bd5c79a4dbdc6b847ce97004ca9645b380 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:44:52 +0000
Subject: [PATCH 10/19] =?UTF-8?q?feat(docs):=20i18n=20ai/=20=E5=89=8D?=
 =?UTF-8?q?=E5=8D=8A=E9=83=A8=E5=88=86=E5=85=A8=E9=83=A8=E7=BF=BB=E8=AF=91?=
 =?UTF-8?q?=E5=AE=8C=E6=88=90=20(24=20=E7=AF=87)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

translator-ai-1 产出。全部 zh→en。覆盖：
- Introduction-of-Multi-agents-system (1)
- MoE (2)
- Multi-agents-system-on-Code-Translation (1)
- agents-todo (2)
- ai-math-basics (9) — 含 calculus / linear-algebra / probability 等子目录
- compute-platforms (1，handbook 已在 MVP 提交)
- foundation-models (7) — lifecycle / datasets / training / finetune 等
- generative-todo (1)

所有翻译版 frontmatter 继承原文 docId，带 translatedFrom: zh
标记供 contributors 脚本跳过统计。
---
 .../introduction_of_multi-agents_system.en.md | 159 +++++++
 app/docs/ai/MoE/MOE-intro.en.md               | 147 ++++++
 app/docs/ai/MoE/moe-update.en.md              | 231 +++++++++
 .../code-translation-intro.en.mdx             |  24 +
 .../ai/agents-todo/agent-ecosystem.en.mdx     |  63 +++
 .../ai/agents-todo/cs294-194-196/index.en.mdx |  15 +
 .../calculus-optimization/index.en.mdx        |  49 ++
 .../information-theory/index.en.mdx           |  47 ++
 .../linear-algebra/index.en.mdx               |  54 +++
 .../linear-algebra/resources/index.en.mdx     |  20 +
 .../ai/ai-math-basics/math-foundations.en.mdx |  89 ++++
 app/docs/ai/ai-math-basics/math_books.en.md   | 123 +++++
 .../numerical-analysis/index.en.mdx           |  40 ++
 .../probability-statistics/index.en.mdx       |  58 +++
 .../resources/index.en.mdx                    |  16 +
 .../model-compuational-resource-demand.en.md  | 198 ++++++++
 .../foundation-models/datasets/index.en.mdx   | 327 +++++++++++++
 .../deploy-infer/index.en.mdx                 | 443 ++++++++++++++++++
 .../foundation-models/evaluation/index.en.mdx | 287 ++++++++++++
 .../foundation-models/finetune/index.en.mdx   | 348 ++++++++++++++
 .../foundation-models-lifecycle.en.mdx        | 119 +++++
 .../qkv-interview/index.en.mdx                | 110 +++++
 .../foundation-models/training/index.en.mdx   | 293 ++++++++++++
 .../generative-models-plan.en.mdx             |  12 +
 24 files changed, 3272 insertions(+)
 create mode 100644 app/docs/ai/Introduction-of-Multi-agents-system/introduction_of_multi-agents_system.en.md
 create mode 100644 app/docs/ai/MoE/MOE-intro.en.md
 create mode 100644 app/docs/ai/MoE/moe-update.en.md
 create mode 100644 app/docs/ai/Multi-agents-system-on-Code-Translation/code-translation-intro.en.mdx
 create mode 100644 app/docs/ai/agents-todo/agent-ecosystem.en.mdx
 create mode 100644 app/docs/ai/agents-todo/cs294-194-196/index.en.mdx
 create mode 100644 app/docs/ai/ai-math-basics/calculus-optimization/index.en.mdx
 create mode 100644 app/docs/ai/ai-math-basics/information-theory/index.en.mdx
 create mode 100644 app/docs/ai/ai-math-basics/linear-algebra/index.en.mdx
 create mode 100644 app/docs/ai/ai-math-basics/linear-algebra/resources/index.en.mdx
 create mode 100644 app/docs/ai/ai-math-basics/math-foundations.en.mdx
 create mode 100644 app/docs/ai/ai-math-basics/math_books.en.md
 create mode 100644 app/docs/ai/ai-math-basics/numerical-analysis/index.en.mdx
 create mode 100644 app/docs/ai/ai-math-basics/probability-statistics/index.en.mdx
 create mode 100644 app/docs/ai/ai-math-basics/probability-statistics/resources/index.en.mdx
 create mode 100644 app/docs/ai/compute-platforms/model-compuational-resource-demand.en.md
 create mode 100644 app/docs/ai/foundation-models/datasets/index.en.mdx
 create mode 100644 app/docs/ai/foundation-models/deploy-infer/index.en.mdx
 create mode 100644 app/docs/ai/foundation-models/evaluation/index.en.mdx
 create mode 100644 app/docs/ai/foundation-models/finetune/index.en.mdx
 create mode 100644 app/docs/ai/foundation-models/foundation-models-lifecycle.en.mdx
 create mode 100644 app/docs/ai/foundation-models/qkv-interview/index.en.mdx
 create mode 100644 app/docs/ai/foundation-models/training/index.en.mdx
 create mode 100644 app/docs/ai/generative-todo/generative-models-plan.en.mdx

diff --git a/app/docs/ai/Introduction-of-Multi-agents-system/introduction_of_multi-agents_system.en.md b/app/docs/ai/Introduction-of-Multi-agents-system/introduction_of_multi-agents_system.en.md
new file mode 100644
index 00000000..0c14307c
--- /dev/null
+++ b/app/docs/ai/Introduction-of-Multi-agents-system/introduction_of_multi-agents_system.en.md
@@ -0,0 +1,159 @@
+---
+title: Introduction of Multi-Agent Systems (For Any Task You Want)
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: h53uwefhlykt9ietsx9x0vtn
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Introduction of Multi-Agent Systems (For Any Task You Want)
+
+Overview of Multi-Agent Systems
+
+## 1. What Is a Multi-Agent System (MAS)?
+
+A Multi-Agent System (MAS) is a computational system composed of multiple relatively autonomous agents that interact, cooperate, or compete within a shared environment to achieve individual or collective goals.
+It focuses not on the optimal behavior of a single agent, but on the organization, coordination, and emergent behavior at the group level.
+Note: Emergent behavior refers to behaviors that arise from the interaction and collaboration of multiple agents that no single agent could accomplish alone. For example, flocks of birds follow simple rules to produce elegant formations that resist air currents — formations that were never explicitly designed.
+Intuitive understanding: Think of LLMs as multiple "roles" that simulate team/department collaboration to complete tasks together.
+
+## 2. Typical Applications and Problem Types
+
+Real-world distributed problems: power grid scheduling, intelligent transportation, supply chains, disaster response — all naturally exhibit distributed, dynamic, and uncertain characteristics that monolithic systems struggle to handle with global optimality and robustness.
+
+Example research directions: generation, translation, repair, judge, etc.
+
+## 3. Core Concepts in Multi-Agent Systems
+
+### 3.1 Agent
+
+A computational entity that operates in an environment through a Perception — Deliberation/Policy — Action cycle.
+
+Typical properties: autonomy, reactivity, proactiveness (initiative), sociability (ability to interact).
+
+### 3.2 Environment
+
+The object that agents perceive and act upon; can be fully/partially observable, deterministic/stochastic, static/dynamic, discrete/continuous.
+
+| Dimension                                      | Definition                                                                                                                   | Characteristics / Key Points                                                                                                                                                                               | Typical Examples                                                                                                                                                     | Impact on Agent Design                                                                                                                                                                                                                                          |
+| ---------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Fully Observable vs Partially Observable       | Whether the agent can perceive the full state of the environment at every moment                                             | If fully observable, the agent can make decisions based on the current state directly; if partially observable, hidden information exists and the agent may need internal memory and uncertainty reasoning | Chess is fully observable; poker (where opponents' hands are hidden) is partially observable                                                                         | In partially observable environments, agents typically need to maintain a **belief state** (probability distribution over true states) or an internal state model, making policies more complex                                                                 |
+| Deterministic vs Stochastic / Nondeterministic | Whether a given state + action leads to a unique next state/outcome, or multiple possible outcomes/probability distributions | Deterministic: action + current state uniquely determine the next state; stochastic/nondeterministic: multiple possible transitions with probability distributions                                         | Board games (e.g., chess) are approximately deterministic; real-world robot manipulation and traffic systems often involve stochasticity                             | In stochastic environments, agent policies must account for expectations/distributions/risk, e.g., probabilistic policies, reinforcement learning, robust design                                                                                                |
+| Static vs Dynamic                              | Whether the environment can change while the agent is deliberating/acting                                                    | Static: environment remains unchanged during the agent's decision-making; dynamic: the environment may evolve while the agent thinks/acts                                                                  | In a turn-based board game, the environment is static during the current agent's turn; traffic systems are dynamic as other vehicles/pedestrians continuously change | In dynamic environments, agents need fast response, real-time planning, and future prediction capabilities — they cannot afford costly computation delays                                                                                                       |
+| Discrete vs Continuous                         | Whether the state, action, and time of the environment form a discrete/enumerable set or a continuous/real-valued domain     | Discrete: states/actions/time are enumerable or discrete; continuous: these quantities vary over real-valued domains                                                                                       | Board games, grid worlds, turn-based games are discrete; robot positions/velocities/accelerations, drone control are continuous                                      | In continuous environments, agents typically use function approximation (neural networks, control models), continuous policies, differential equations, or continuous action optimization; in discrete environments, enumeration, search, and discrete RL apply |
+
+### 3.3 Interaction
+
+Forms include communication, negotiation, competition, cooperation, game theory, etc.
+
+### 3.4 Organization
+
+The totality of roles, hierarchies, norms, protocols, and team structures.
+
+|               Component                | Meaning / Function                                                                                                                                    | Common Design Approaches / Examples                                                                                                            | Considerations / Trade-offs                                                                                                                                         |
+| :------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+|                 Roles                  | Each agent's functional position and behavioral responsibilities in the organization. Roles abstract behavioral interfaces and capability constraints | "Planner" role handles task decomposition; "Executor" handles execution; "Critic" handles evaluation; "Communicator" handles information relay | Responsibilities must be clear and not overly overlapping; avoid strong role coupling; capabilities and resource allocation must match                              |
+|               Hierarchy                | Superior-subordinate relationships among roles/agents, directing control, supervision, and command flow                                               | Manager/Worker architecture: high-level agents make strategic decisions, low-level agents execute; multi-level nesting (macro → meso → micro)  | Hierarchy helps manage complexity and maintain clear command flow; but too many levels can cause communication bottlenecks, delays, and single points of failure    |
+|        Norms / Normative Rules         | Conventions or hard rules that constrain agent behavior, coordinate conflicts, and ensure safety                                                      | E.g., "cannot access the same resource simultaneously," "respond to urgent tasks first," "cannot act beyond assigned roles"                    | Too loose leads to chaos; too strict reduces flexibility; penalty mechanisms / compliance checks must be designed                                                   |
+|   Protocols / Interaction Protocols    | Mechanisms and conventions for how agents communicate, negotiate, trade, synchronize, and deliberate                                                  | Auction, Contract Net, Negotiation Protocol, Consensus                                                                                         | Must consider performance (communication cost, latency), robustness (error handling, failure recovery), expressiveness (whether semantic interaction is sufficient) |
+| Team Structure / Coalitions / Grouping | How agents are organized into sub-teams or collaborative groups, and how these groups cooperate                                                       | Static teams (fixed groupings), dynamic teams (task-triggered groupings), cross-team coalitions                                                | Must adapt to task requirements and capability distribution; dynamic structures increase flexibility but incur reorganization costs and coordination overhead       |
+
+### 3.5 Goals/Utility
+
+Individual goals and global social welfare may be aligned or conflicting, involving mechanism design. The ultimate aim should be toward task completion and utility maximization.
+
+## 4. System Composition and Typical Architectures
+
+### 4.1 Agent Internal Architecture
+
+**Reactive/Behavior-based**: e.g., subsumption architecture (layered behaviors) — fast response but weak planning.
+
+**BDI (Belief–Desire–Intention)**: models rational decision-making through beliefs/desires/intentions, suitable for interpretable planning scenarios.
+
+**Learning-based**: based on RL/supervised/self-supervised learning; in MARL, policies can be shared or trained independently.
+
+**LLM-Agent**: uses a large language model as the core, combined with tool calling, memory, retrieval, reflection, and actuators; excels at complex reasoning and open-environment tasks.
+
+### 4.2 Multi-Agent Architectures
+
+**Centralized Orchestration (Orchestrator)**: central scheduling (e.g., Planner/Router) assigns tasks; provides a strong global view but has a single point of failure.
+
+**Distributed Cooperation (Peer-to-Peer)**: agents interact as equals; high elasticity but complex protocols.
+
+**Hierarchical/Hybrid**: upper-level planning, lower-level execution; balances global and local efficiency.
+
+**Blackboard / Shared Memory**: agents exchange hypotheses and partial solutions through a shared workspace.
+
+### 4.3 Communication and Coordination Mechanisms
+
+Communication languages/protocols: early examples include KQML, FIPA-ACL; in engineering practice, MQ/HTTP/gRPC and structured messages (JSON/Proto) are commonly used.
+
+### 4.4 Coordination Methods
+
+**Contract Net and Auction/Bidding**: suitable for task assignment and resource competition.
+
+**Negotiation/Voting/Consensus**: e.g., Paxos/Raft or multi-party voting strategies.
+
+**Formation/Grouping and Role Switching**: formation control, dynamic role assignment.
+
+**Mechanism Design**: uses incentive-compatible rules to guide individual rational behavior toward desired collective outcomes.
+
+**Organizational Structures**: Hierarchy, Holarchy, Team/Coalition, and Roles & Norms-based social organization.
+
+### 4.5 Key Points in Multi-Agent Reinforcement Learning (MARL)
+
+**Non-stationarity**: changes in others' policies cause the environment to appear non-static to any individual agent, making training harder.
+
+**Training-execution paradigm**: Centralized Training, Decentralized Execution (CTDE) is the dominant approach.
+
+### 4.6 Method Families (Examples)
+
+**Value decomposition**: VDN, QMIX decompose global value into individual values.
+
+**Actor-Critic**: e.g., MADDPG (centralized Critic, decentralized Actor).
+
+**Opponent modeling / Game learning**: Nash equilibrium, transferable policies, meta-learning.
+
+Key challenges: credit assignment, scalability, partial observability, exploration-exploitation balance, communication bandwidth and latency.
+
+## 5. LLM-Driven Multi-Agent Paradigm (Main Focus)
+
+### 5.1 Role Division
+
+- Planner
+- Researcher (retrieval/analysis)
+- Coder/Executor (tool execution)
+- Critic/Verifier (review and validation)
+- Refiner (repair)
+
+### 5.2 Collaboration Patterns
+
+**Debate/Deliberation**: mutual evaluation to improve reasoning robustness.
+
+**Reflection/Memory**: experience summarization, long-term memory stores, external knowledge retrieval.
+
+**Graph-of-Agents**: explicitly represents task workflow as a DAG/state machine.
+
+### 5.3 Engineering Considerations
+
+- Prompt templating
+- Tool/database/code executor integration
+- Message routing and caching
+- Cost and latency control
+- Security (privilege escalation / data leakage / injection)
+
+## 6. Recommended Classic Papers / Works
+
+- AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation
+- CAMEL: Communicative Agents for "Mind" Exploration of LLM Society
+- Improving Factuality and Reasoning in Language Models through Multi-Agent Debate
+- Should We Be Going MAD? A Look at Multi-Agent Debate
+- Reflexion: Language Agents with Verbal Reinforcement Learning
+- Self-Refine: Iterative Refinement with Self-Feedback
+- Language Agents as Optimizable Graphs (GPTSwarm)
+- Graph of Thoughts: Solving Elaborate Problems with LLMs
diff --git a/app/docs/ai/MoE/MOE-intro.en.md b/app/docs/ai/MoE/MOE-intro.en.md
new file mode 100644
index 00000000..3493165e
--- /dev/null
+++ b/app/docs/ai/MoE/MOE-intro.en.md
@@ -0,0 +1,147 @@
+---
+title: A Brief Introduction to MoE
+description: "A brief introduction to the Mixture of Experts (MoE) architecture"
+tags:
+  - MoE
+  - AI
+docId: qftv72k0kzwiz8ddksbcl2aw
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Mixture of Experts (MoE) Architecture
+
+To effectively scale model parameters without significantly increasing computational demands, the MoE architecture has emerged as a viable solution. MoE leverages a set of specialized sub-models and a gating mechanism to dynamically select the appropriate "expert network" for a given input. This enables the model to allocate computational resources on demand — a concept known as **conditional computation**.
+
+MoE has been widely adopted in large language models (LLMs), enabling these models to achieve corresponding capability improvements as their parameter counts scale significantly.  
+For example, **Mixtral-8x7B** proposed by Mixtral AI activates only 13 billion parameters yet **outperforms or matches Llama-2-70B and GPT-3.5 on multiple benchmarks**.
+
+---
+
+## Traditional MoE Architecture
+
+Since MoE was first introduced into the Transformer architecture, it has primarily served as a replacement module for the **Feed-Forward Network (FFN)**. Typically, each expert in a MoE layer directly replicates the FFN structure it replaces, with a Router trained to decide which expert should handle a given input.
+
+![](./MOE-intro.assets/img-20250920112106486.png)
+
+MoE is applied mainly to the FFN layer rather than the self-attention layer, for the following reasons:
+
+- **Attention layers**: lower sparsity, better suited for global interactions.
+- **FFN layers**: higher sparsity, more domain-specific.  
+  DS-MoE found that when using Wikitext as the task, only **20%** of FFN experts were activated,  
+  while the attention layer activation rate was as high as **80%**. This high utilization rate indicates that the core communication mechanism of attention layers is incompatible with expert specialization. Conversely, FFN layers — with their sparse characteristics — have the full potential for multi-expert specialization.
+
+![](./MOE-intro.assets/img-20250920112106518.png)
+
+---
+
+## Routing Mechanisms: Dense MoE vs Sparse MoE
+
+![](./MOE-intro.assets/img-20250920112106554.png)
+
+- **Dense MoE**
+  - The gate uses a **softmax** routing mechanism for input tokens, passing a certain weight to each expert.
+  - Advantage: training is stable.
+  - Disadvantage: all experts must be computed every time, leading to high computational cost.
+
+- **Sparse MoE**
+  - Uses a **Top-K** routing mechanism, activating only the K experts with the highest weights.
+  - Advantage: drastically reduces computation — this is the strategy used by current mainstream models (e.g., GShard, Switch Transformer, Mixtral, DeepSeek-MoE).
+  - Disadvantage: Router training becomes more complex, prone to the issue where "popular experts are used too often while underutilized experts learn nothing" — known as **routing collapse**.
+  - Solution: additional **load balancing loss** must be introduced during training.
+
+---
+
+## Choosing the Number of Experts
+
+**GLaM (Google, 2021)** explored combinations of different expert counts and gating strategies:  
+It found that **64 experts (per layer) + Top-2 gating** achieves the best balance between performance and computational efficiency.  
+Top-2 gating significantly improves results and is more stable than a single expert. The 64-expert configuration also performs well in **zero-shot, one-shot, and few-shot** settings. Consequently, many subsequent MoE works (e.g., Mixtral, DBRX, DeepSeekMoE) have adopted a scale of ≤64 experts, making this design a practical reference.
+
+---
+
+## MoE and PEFT
+
+There remains substantial interest in PEFT (Parameter-Efficient Fine-Tuning).  
+The paper [_Pushing Mixture of Experts to the Limit: Extremely Parameter Efficient MoE for Instruction Tuning_](https://arxiv.org/abs/2309.05444) was the first to propose **combining LoRA-type PEFT methods with the MoE framework**.  
+The core idea is to apply LoRA not to the entire large model, but specifically within the MoE expert modules. Since each MoE expert is an FFN (MLP) — the key location for knowledge storage — only a small set of LoRA experts are updated at a time, greatly enhancing scalability.
+
+![](./MOE-intro.assets/img-20250920112106588.png)
+
+The core idea of this method is to use **low-rank approximate updates** to avoid high-cost fine-tuning.
+
+1. **Input (Input → Embedding)**
+   - Input tokens (characters or subwords) first pass through an Embedding layer.
+   - This part is the same as in a standard Transformer.
+
+2. **Multi-Head Attention**
+   - Input embeddings enter the multi-head attention module.
+   - Q, K, V remain fully intact and are not modified by LoRAMoE.
+   - The output goes through **Add & Norm**, and the result is passed to the FFN.
+
+3. **FFN → MoE (Expert Routing)**
+   - The standard Transformer FFN is replaced by a **LoRA + MoE expert network**.
+   - The Router selects a subset of experts based on the input; each expert is a **LoRA-adapted (low-rank) module**, not a fully trainable FFN.
+   - Frozen parts (❄️) are the pre-trained backbone of the large model.
+   - Fire (🔥) represents the LoRA Adapter (trainable parameters, low-rank matrices).
+   - The weighted combination output by the Router:
+
+   $$
+   y = \sum_i \alpha_i \cdot Expert_i(x)
+   $$
+
+   where $\alpha_i$ is the weight computed by the Router for the given input.
+
+4. **Output (Add & Norm → Residual)**
+   - The expert-mixed output from the Router, combined with the residual connection, enters Add & Norm and continues to the next layer.
+
+---
+
+#### LoRA Breakdown
+
+The core idea of LoRA (Low-Rank Adaptation):
+
+For a large linear layer weight $W \in \mathbb{R}^{d_{out} \times d_{in}}$, instead of training the entire matrix, a low-rank approximate update is added:
+
+$$
+W' = W + \Delta W, \quad \Delta W = BA
+$$
+
+- $A \in \mathbb{R}^{r \times d_{in}}, B \in \mathbb{R}^{d_{out} \times r}$
+- Rank $r \ll d_{in}, d_{out}$, typically a single digit to a few tens
+- $W$: frozen (❄️, pre-trained parameters)
+- $A, B$: trainable (🔥, significantly fewer parameters)
+
+Thus, when an input vector $x$ passes through the LoRA linear layer:
+
+$$
+Wx + BAx
+$$
+
+equals **the original backbone output + a small low-rank correction**.
+
+Returning to the diagram, each expert $Expert_i$ is not a brand new large FFN, but rather **a combination of LoRA adapters over an FFN**:
+
+$$
+Expert_i(x) = B_i A_i x
+$$
+
+- The Router computes a distribution $\alpha$ over the input hidden state, then applies a weighted combination:
+
+$$
+y = \sum_i \alpha_i \cdot Expert_i(x)
+$$
+
+- The final result is added to the backbone (frozen FFN weight output):
+
+$$
+y_{final} = W_{FFN}x + \sum_i \alpha_i \cdot B_i A_i x
+$$
+
+---
+
+Author: **Yang Lewis**  
+Non-commercial reproduction must credit the source.  
+For commercial use, contact the author: **840691168ly@gmail.com**
diff --git a/app/docs/ai/MoE/moe-update.en.md b/app/docs/ai/MoE/moe-update.en.md
new file mode 100644
index 00000000..5f927c4c
--- /dev/null
+++ b/app/docs/ai/MoE/moe-update.en.md
@@ -0,0 +1,231 @@
+---
+title: "Theory of MoE"
+description: ""
+date: "2025-10-05"
+tags:
+  - tag-one
+docId: db3qwg25h6l0bh8f2sdabdqc
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Theory of MoE
+
+## Basic Formula Definitions
+
+For a vector $w$, let $\|w\|_2$ and $\|w\|_\infty$ denote its $\ell_2$ norm and $\ell_\infty$ norm, respectively.
+
+Given positive constants $c_1, c_2$, we define:
+
+- $x = \Omega(y)$, if $x > c_2 |y|$;
+- $x = \Theta(y)$, if $c_1 |y| \lt x \lt c_2 |y|$;
+- $x = O(y)$, if $x \lt c_1 |y|$;
+- $x = o(y)$, if $\dfrac{x}{y} \to 0$.
+
+Where:
+
+- $O(y)$: upper bound, meaning "grows no faster than $y$".
+- $\Omega(y)$: lower bound, meaning "grows at least as fast as $y$".
+- $\Theta(y)$: both upper and lower bounds are on the order of $y$, meaning "same order as $y$".
+- $o(y)$: strictly much smaller than $y$, ultimately approaching $0$.
+
+## **Key Assumptions**:
+
+1. This paper aims only to derive closed-form forgetting formulas, so it simplifies directly to a linear model: $f(X)=X^{\top}w,\; w\in \mathbb{R}^d$
+
+2. This paper only discusses task-wise routing methods. During data generation, each sample contains only one signal, with all other entries as Gaussian noise. This is again for model simplification. In practical engineering, tokens are implicitly routed to various experts rather than using manually specified routing.
+
+> ### Dataset Generation Rules
+>
+> At each training round $t \in [T]$, when a new task $n_t$ arrives, the dataset $\mathcal{D}_t = (X_t, y_t)$ is generated as follows:
+>
+> 1. **Sample the ground truth vector for the task**
+>    - Uniformly sample a ground truth vector $w_{n_t}$ from the task pool $\mathcal{W} = \{w_1, \dots, w_N\}$, and set $w_{n_t}$ as the ground truth for the current task.
+> 2. **Generate scaling coefficient**
+>    - Independently sample a random variable $\beta_t \in (0, C)$, where $C = \mathcal{O}(1)$.
+> 3. **Construct input feature matrix $X_t$**
+>    - Generate from $s_t$ samples:
+>      - **One sample** is defined as $\beta_t v_{n_t}$, where $v_{n_t}$ is the feature signal of task $n_t$.
+>      - The remaining $s_t - 1$ samples come from a Gaussian distribution: $\mathcal{N}(0, \sigma_t^2 I_d)$, where $\sigma_t \ge 0$ is the noise level.
+> 4. **Generate output labels $y_t$**
+>    - Using linear regression:
+>      $$
+>      y_t = X_t^\top w_{n_t}
+>      $$
+>
+> **Result**:  
+> Dataset $\mathcal{D}_t = (X_t, y_t)$, corresponding to a linear regression task.
+
+3. This paper uses Top-1 expert selection only.
+
+## Formula Theory:
+
+Expert parameter update:
+When the router selects a particular expert, all other experts remain unchanged; only the selected expert is updated, according to the following formula:
+
+$$
+w_t^{(m_t)} = w_{t-1}^{(m_t)} + X_t (X_t^\top X_t)^{-1}(y_t - X_t^\top w_{t-1}^{(m_t)})
+$$
+
+> ### Derivation of the Expert Parameter Update Formula
+>
+> **Objective**: At round $t$, expert $m_t$ must fit the task dataset $(X_t, y_t)$  
+> $$ \min\_{w}\ \|X_t^\top w - y_t\|\_2^2 $$
+>
+> **Problem**: Under overparameterization ($s_t &lt; d$), the solution is non-unique; directly computing the least-squares solution discards historical information.  
+> &gt; Therefore, the paper reformulates it as a **constrained optimization**:
+>
+> $$
+> \min_w \ \|w - w_{t-1}^{(m_t)}\|_2^2 \quad
+> s.t.\ \ X_t^\top w = y_t
+> $$
+>
+> **Solution**: Using Lagrange multipliers or residual projection, the update is:
+>
+> $$
+> w_t^{(m_t)} = w_{t-1}^{(m_t)} + X_t (X_t^\top X_t)^{-1}\,(y_t - X_t^\top w_{t-1}^{(m_t)})
+> $$
+>
+> **Interpretation**:
+>
+> - $(y_t - X_t^\top w_{t-1})$ = **residual** = true output − old prediction
+> - $X_t (X_t^\top X_t)^{-1}$ = the correction term that projects the residual back into parameter space
+> - The entire expression = a least-squares correction near the old parameters
+>
+> **Properties**:
+>
+> - Guarantees $X_t^\top w_t = y_t$ → the new parameters perfectly fit the current task
+> - Stays as close as possible to $w_{t-1}$ → minimizes catastrophic forgetting
+
+Auxiliary loss (also commonly referred to as load balance):
+
+$$
+L_t^{\text{aux}}(\Theta_t, \mathcal{D}_t) = \alpha \cdot M \cdot \sum_{m \in [M]} f_t^{(m)} \cdot P_t^{(m)}
+$$
+
+> ### Auxiliary Loss
+>
+> **Parameter explanation**
+>
+> - $\alpha$: weighting coefficient, controls the proportion of auxiliary loss in the total loss
+> - $M$: number of experts
+> - $f_t^{(m)}$: frequency with which expert $m$ has been selected in the first $t$ rounds (historical usage)
+> - $P_t^{(m)}$: average routing probability assigned to expert $m$ by the router at round $t$
+>
+> **Purpose**
+>
+> - Penalizes experts that have been frequently used historically and are still assigned high probability in the current round
+> - Encourages the router to make greater use of underutilized experts
+> - Achieves **load balancing** to prevent experts from being over- or under-used
+> - The trailing term is intuitively clear: when an expert $m$ has been used many times historically and is still assigned large logits in the current round, this loss term becomes very large, suppressing the router's preference for a few experts and thus preventing routing collapse
+
+Locality loss:
+
+$$
+L_t^{\text{loc}}(\Theta_t, \mathcal{D}_t) = \sum_{m \in [M]} \pi_m(X_t,\Theta_t)\, \|w_t^{(m)} - w_{t-1}^{(m)}\|_2
+$$
+
+> ### Locality Loss
+>
+> **Parameter explanation**
+>
+> - $\pi_m(X_t,\Theta_t)$: probability assigned to expert $m$ by the router (softmax output)
+> - $w_t^{(m)}$: parameters of expert $m$ under the current task
+> - $w_{t-1}^{(m)}$: parameters of expert $m$ from the previous round
+>
+> **Purpose**
+>
+> - Constrains expert parameter updates from deviating too far from historical values
+> - Encourages similar tasks to be routed to the same expert, thereby reducing loss
+> - Reduces forgetting (updates for new tasks do not completely overwrite old knowledge)
+> - Improves expert **specialization**: each expert gradually stabilizes on a particular type of task
+
+Training loss:
+
+$$
+L_t^{\text{tr}}(w_t^{(m_t)}, \mathcal{D}_t) = \frac{1}{s_t}\,\|X_t^\top w_t^{(m_t)} - y_t\|_2^2
+$$
+
+> ### Training Loss
+>
+> **Parameter explanation**
+>
+> - $s_t$: number of data samples for the current task
+> - $X_t$: feature matrix
+> - $y_t$: output label vector
+> - $w_t^{(m_t)}$: parameters of the expert selected at round $t$
+>
+> **Purpose**
+>
+> - Essentially the mean squared error (MSE) of least-squares regression
+> - Makes the selected expert fit the current task data
+> - Ensures the expert can capture the true signal (ground truth) of the task
+
+Total loss:
+
+$$
+L_t^{\text{task}} = L_t^{\text{tr}} + L_t^{\text{loc}} + L_t^{\text{aux}}
+$$
+
+With the above total loss function, router parameter updates can be performed during training.
+
+Router update formula:
+
+$$
+\theta_{t+1}^{(m)} = \theta_t^{(m)} - \eta \cdot \nabla_{\theta^{(m)}} L_t^{\text{task}}(\Theta_t, w_t^{(m_t)}, \mathcal{D}_t), \quad \forall m \in [M]
+$$
+
+### Tricks:
+
+#### Early Termination
+
+In continual learning (CL) scenarios, if the gating network continues to update indefinitely, the allocation probabilities across different experts may gradually converge as more tasks arrive, eventually causing **expert differentiation to collapse** and **routing errors**. To address this, an **Early Termination** mechanism must be introduced.
+
+- **Core Idea**  
+  After sufficient rounds of task exploration ($T_1$ rounds), the expert assignments in MoE should gradually converge. Continuing to train the gating network at this point no longer yields benefits and instead leads to overfitting and blurring of task boundaries. Therefore, at an appropriate time, **updates to the router parameters $\Theta_t$ should be terminated** to maintain the stability of expert assignments.
+
+- **Convergence Criterion**  
+  Define a convergence indicator $I^{(m)}$ to measure whether expert $m$ has converged:
+
+  $I^{(m)} = \big| h_m(X_t, \theta_t) - h_{m_t}(X_t, \theta_t) \big|$
+
+  where $h_m(X_t,\theta_t)$ denotes the gating output of expert $m$ on the current input, and $h_{m_t}(X_t,\theta_t)$ denotes the output of the expert actually selected by the router.
+  - If this gap is **larger than threshold $\Gamma$**, expert $m$ has not yet converged and $\Theta_t$ should continue to be updated.
+  - If this gap is **smaller than threshold $\Gamma$**, the gating network is considered converged and updates to $\Theta_t$ are stopped.
+  - This prevents the router from continuing to update after convergence, which would otherwise destroy expert assignments. It also ensures that different experts stably serve their respective task clusters. Combined with the constraints of $L^{loc}$ and $L^{aux}$, the early termination mechanism enables the system to maintain balance and low forgetting in CL environments over the long term.
+
+#### Multiple Variants of Locality Loss
+
+- **Parameter Locality**
+
+$$
+ L^{loc}_{param} = \sum_{m \in [M]} \pi_m(X_t,\Theta_t)\,\|w_t^{(m)} - w_{t-1}^{(m)}\|_2
+$$
+
+    - The method used in the preceding sections.
+    - Ensures that the parameter differences for the same expert across adjacent tasks are not too large.
+
+- **Representation Locality** — Constraints can be applied directly to the representations (hidden states) output by each expert.
+
+      - For example:
+
+  $$
+  L^{loc}_{repr} = \sum_{m \in [M]} \pi_m(X_t,\Theta_t)\,\|f_m(X_t) - f_m(X_{t-1})\|_2
+  $$
+
+      - Keeps similar inputs stable on the same expert.
+
+- **Routing Locality** — Constrains the router's assignment probabilities from jumping too drastically between tasks.
+
+      - Of the form:
+
+  $$
+  L^{loc}_{route} = \sum_{m \in [M]} \|\pi_m(X_t,\Theta_t) - \pi_m(X_{t-1},\Theta_{t-1})\|_2
+  $$
+
+- **Task Embedding Locality**
+  - If task embeddings can be constructed (e.g., via meta-learning or contrastive learning), one can define:
+    - Similar tasks → routed to the same expert
+    - Dissimilar tasks → differentiated as much as possible
diff --git a/app/docs/ai/Multi-agents-system-on-Code-Translation/code-translation-intro.en.mdx b/app/docs/ai/Multi-agents-system-on-Code-Translation/code-translation-intro.en.mdx
new file mode 100644
index 00000000..59e3f5e8
--- /dev/null
+++ b/app/docs/ai/Multi-agents-system-on-Code-Translation/code-translation-intro.en.mdx
@@ -0,0 +1,24 @@
+---
+title: Essential Reading for Getting Started with Code Translation
+description: ""
+date: "2025-09-19"
+tags:
+  - tag-one
+docId: qaezsrj15sudk796r5otne36
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Recommended introductory papers:
+
+1. ExeCoder: Empowering Large Language Models with Executability Representation for Code Translation — Work from Microsoft, proposing some fairly creative ideas.
+2. Repository-Level Compositional Code Translation and Validation — File-level approach (the longer the code, the harder the task).
+3. CoTran: An LLM-based Code Translator using Reinforcement Learning with Feedback from Compiler and Symbolic Execution — A common method combining compiler feedback and RL, with a well-designed architecture.
+4. Lost in Translation: A Study of Bugs Introduced by Large Language Models while Translating Code — A survey-like paper that helps you get up to speed quickly.
+5. A Systematic Literature Review on Neural Code Translation — A comprehensive literature review; after reading it, you'll feel like you have a handle on everything.
+6. IMPROVING COMPLEX REASONING WITH DYNAMIC PROMPT CORRUPTION: A SOFT PROMPT OPTIMIZATION APPROACH — A paper on prompt engineering; reading it may help you write better prompts.
+7. Enhancing LLM-based Code Translation in Repository Context via Triple Knowledge-Augmented — Combines knowledge augmentation with code translation from an interesting angle, and also handles file-level processing.
+
+Feel free to share any great papers you find with everyone!
diff --git a/app/docs/ai/agents-todo/agent-ecosystem.en.mdx b/app/docs/ai/agents-todo/agent-ecosystem.en.mdx
new file mode 100644
index 00000000..0d7e635b
--- /dev/null
+++ b/app/docs/ai/agents-todo/agent-ecosystem.en.mdx
@@ -0,0 +1,63 @@
+---
+title: Agent
+description: "LLM Agents: CS294/194-196 course, ReAct, FireAct, and more"
+status: todo
+docId: ue27z7z95yzw3lhhfj7nit1c
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+This section aggregates courses and technical frameworks related to LLM agents.
+
+---
+
+## OpenHands (formerly OpenDevin)
+
+- **Links**:
+  - [GitHub Repository](https://github.com/All-Hands-AI/OpenHands)
+  - [Paper arXiv:2407.16741](https://arxiv.org/pdf/2407.16741)
+- **Overview**:  
+  OpenHands is an open-source Agent platform capable of executing code / shell / web / API tasks, suited for developers' daily automation and toolchain integration.
+
+---
+
+## Kimi-Researcher
+
+- **Links**:
+  - [Official Introduction](https://moonshotai.github.io/Kimi-Researcher/)
+  - [Zhihu Article](https://zhuanlan.zhihu.com/p/1921119537757140195)
+- **Overview**:  
+  A reinforcement learning (RL)-driven research Agent that can self-plan, perform multi-round retrieval and reasoning, for complex tasks such as technical research and report writing.
+
+---
+
+## OpenAI Deep Research
+
+- **Links**:
+  - [Introducing Deep Research — OpenAI](https://openai.com/index/introducing-deep-research/)
+  - [ChatGPT Deep Research — Wikipedia](https://en.wikipedia.org/wiki/ChatGPT_Deep_Research)
+- **Overview**:  
+  Deep Research is an Agent mode launched by OpenAI in 2025, supporting multi-step internet research tasks with the ability to browse web pages, PDFs, and image materials to synthesize reports. Suited for professional research in fields such as science, policy, and engineering.
+
+---
+
+## KAG: Knowledge Augmented Generation
+
+- **Links**:
+  - [arXiv:2409.13731 (KAG)](https://arxiv.org/abs/2409.13731)
+- **Overview**:  
+  KAG is an LLM enhancement framework for professional domains, designed to combine **knowledge graphs + vector retrieval** to address the limitations of RAG in specialized scenarios. It places particular emphasis on the integration of knowledge logic (such as numerical values, temporal information, and expert rules) to deliver more reliable performance on question-answering and reasoning tasks.
+
+---
+
+## DeepResearchAgent / DeepResearcher & DR Agent Survey
+
+- **Links**:
+  - [Deep Research Agents: A Systematic Examination and Roadmap (arXiv)](https://arxiv.org/abs/2506.18096)
+  - [DeepResearcher: Scaling Deep Research via Reinforcement Learning (arXiv:2504.03160)](https://arxiv.org/abs/2504.03160)
+- **Overview**:  
+  These works study the architectural components, retrieval methods, tool usage, and planning processes for Agents performing deep research in real-world environments.
+  - The DR Survey provides a taxonomy of Agent systems, key challenges, and future directions.
+  - DeepResearcher trains Agents using RL + web interaction, outperforming traditional retrieval- or prompt-based methods on open-domain research tasks.
diff --git a/app/docs/ai/agents-todo/cs294-194-196/index.en.mdx b/app/docs/ai/agents-todo/cs294-194-196/index.en.mdx
new file mode 100644
index 00000000..0d89429c
--- /dev/null
+++ b/app/docs/ai/agents-todo/cs294-194-196/index.en.mdx
@@ -0,0 +1,15 @@
+---
+title: CS294/194-196 Large Language Model Agents
+description: Course materials for CS294/194-196
+docId: eo5rwumxkh7twfdvlp5po9rc
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Course Information
+
+- Official site: https://rdi.berkeley.edu/llm-agents/f24
+- Institution: UC Berkeley
+- Highlights: Systematic introduction to the theory and practice of LLM agents
diff --git a/app/docs/ai/ai-math-basics/calculus-optimization/index.en.mdx b/app/docs/ai/ai-math-basics/calculus-optimization/index.en.mdx
new file mode 100644
index 00000000..4130a800
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/calculus-optimization/index.en.mdx
@@ -0,0 +1,49 @@
+---
+title: Calculus & Optimization
+description: Core concepts in calculus and optimization and their applications in large models
+date: "2024-01-12"
+tags:
+  - calculus
+  - optimization
+  - derivative
+  - gradient
+  - chain-rule
+  - convex-optimization
+  - backpropagation
+  - sgd
+  - adam
+  - rmsprop
+docId: v8m8kdjzzx7uhiz69r5m3m9o
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Core Concepts
+
+- Derivative
+- Partial derivative
+- Gradient
+- Chain rule
+- Taylor expansion
+- Lagrange multipliers
+- Convex optimization
+
+## Applications in Large Models
+
+### Backpropagation
+
+- A perfect embodiment of gradient computation and the chain rule.
+
+### Model Training
+
+- The core of minimizing the loss function (an optimization problem); all optimizers (SGD, Adam, RMSProp) are variants of gradient descent.
+
+### Activation Functions
+
+- Their derivative properties are critical for gradient propagation.
+
+### Model Convergence Analysis
+
+- Involves convergence theory from calculus.
diff --git a/app/docs/ai/ai-math-basics/information-theory/index.en.mdx b/app/docs/ai/ai-math-basics/information-theory/index.en.mdx
new file mode 100644
index 00000000..ad6cd522
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/information-theory/index.en.mdx
@@ -0,0 +1,47 @@
+---
+title: Information Theory
+description: Core concepts in information theory and their applications in large models
+date: "2024-01-13"
+tags:
+  - information-theory
+  - entropy
+  - cross-entropy
+  - kl-divergence
+  - mutual-information
+  - rl
+  - model-compression
+docId: gpoh50befguf7zgsetzkvbi3
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Core Concepts
+
+- Information content
+- Entropy
+- Joint entropy
+- Conditional entropy
+- Mutual information
+- Cross-entropy
+- KL divergence (Kullback-Leibler Divergence)
+
+## Applications in Large Models
+
+### Loss Function
+
+- Cross-entropy loss is a measure of the difference between the predicted distribution and the true distribution.
+
+### Attention Mechanism
+
+- When computing attention weights, the softmax operation relates to probability distributions and entropy.
+
+### Reinforcement Learning
+
+- The optimization objective in policy gradient may include an entropy regularization term to encourage exploration.
+- The core of TRPO / PPO algorithms is a KL divergence constraint.
+
+### Model Compression and Quantization
+
+- Evaluating quantization information loss.
diff --git a/app/docs/ai/ai-math-basics/linear-algebra/index.en.mdx b/app/docs/ai/ai-math-basics/linear-algebra/index.en.mdx
new file mode 100644
index 00000000..3e8a2f4f
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/linear-algebra/index.en.mdx
@@ -0,0 +1,54 @@
+---
+title: Linear Algebra
+description: Core concepts in linear algebra and their applications in large models
+date: "2024-01-10"
+tags:
+  - linear-algebra
+  - matrix
+  - tensor
+  - embedding
+  - attention
+  - transformer
+  - pca
+  - svd
+docId: l1kvojw2gvggxflrmzc7j7sm
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Core Concepts
+
+- Vector
+- Matrix
+- Tensor
+- Eigenvalue / eigenvector
+- SVD (Singular Value Decomposition)
+- PCA (Principal Component Analysis)
+
+## Applications in Large Models
+
+### Embedding
+
+- Word vectors and Token embeddings are fundamentally high-dimensional vectors.
+
+### Attention Mechanism
+
+- QKV matrix multiplication
+- Core computation in self-attention (dot product)
+
+### Transformer Architecture
+
+- Various layers (Linear Layer)
+- Residual connections
+- Feed-Forward Network  
+  → All involve matrix operations
+
+### Model Parameters
+
+- The entire model's parameter count can be represented using matrices and tensors.
+
+### Dimensionality Reduction and Visualization
+
+- Reducing the dimensionality of embedding spaces (t-SNE, UMAP, PCA) for analysis.
diff --git a/app/docs/ai/ai-math-basics/linear-algebra/resources/index.en.mdx b/app/docs/ai/ai-math-basics/linear-algebra/resources/index.en.mdx
new file mode 100644
index 00000000..cac27ae8
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/linear-algebra/resources/index.en.mdx
@@ -0,0 +1,20 @@
+---
+title: References
+description: Linear algebra reference materials.
+date: "2024-01-10"
+tags:
+  - linear-algebra
+  - resources
+docId: ba5lqs2zg1jqc30qzw3osm9v
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+- [Immersive Linear Algebra](https://textbooks.math.gatech.edu/ila/index2.html)
+- _The Geometric Meaning of Linear Algebra_ (Ren Guangqian, Xie Cong, Hu Cuifang) — PDF
+- [3Blue1Brown (YouTube Channel)](https://www.3blue1brown.com/)
+  - "Essence of Linear Algebra" video series
+  - "Essence of Calculus" video series  
+    → Exceptional visualization that helps build strong geometric intuition.
diff --git a/app/docs/ai/ai-math-basics/math-foundations.en.mdx b/app/docs/ai/ai-math-basics/math-foundations.en.mdx
new file mode 100644
index 00000000..c4360865
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/math-foundations.en.mdx
@@ -0,0 +1,89 @@
+---
+title: Mathematical Foundations for AI
+description: "Core mathematics for AI: linear algebra, probability and statistics, calculus and optimization, information theory, numerical analysis"
+date: "2025-01-27"
+tags:
+  - mathematics
+  - linear-algebra
+  - probability
+  - calculus
+  - information-theory
+docId: vcfer8dvlt80se4kmbnshx7x
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+AI and large models require a solid mathematical foundation. This section covers the core mathematical concepts needed for deep learning and large model development.
+
+## Core Mathematical Areas
+
+### 1. Linear Algebra
+
+**Core concepts**: vectors, matrices, tensors, eigenvalues/eigenvectors, SVD (Singular Value Decomposition), PCA (Principal Component Analysis)
+
+**Applications in large models**:
+
+- **Embedding**: word vectors and Token embeddings are fundamentally high-dimensional vectors
+- **Attention Mechanism**: QKV matrix multiplication; core computation in self-attention (dot product)
+- **Transformer architecture**: various layers (Linear Layer), residual connections, Feed-Forward Network — all involve matrix operations
+- **Model parameters**: the entire model's parameter count can be represented using matrices and tensors
+- **Dimensionality reduction and visualization**: reducing embedding spaces (t-SNE, UMAP, PCA) for analysis
+
+**References**:
+
+- [Immersive Linear Algebra](https://textbooks.math.gatech.edu/ila/index2.html)
+- [3Blue1Brown - Essence of Linear Algebra](https://www.youtube.com/@3blue1brown) — exceptional visualization that helps build geometric intuition
+- _The Geometric Meaning of Linear Algebra_ (Ren Guangqian, Xie Cong, Hu Cuifang)
+
+### 2. Probability and Statistics
+
+**Core concepts**: random variables, probability distributions (Gaussian, Bernoulli, multinomial), expectation, variance, covariance, conditional probability, Bayes' theorem, Maximum Likelihood Estimation (MLE), Maximum A Posteriori (MAP)
+
+**Applications in large models**:
+
+- **Language modeling**: P(next token | context) is conditional probability
+- **Loss function**: cross-entropy loss originates from information theory and measures differences between probability distributions
+- **Sampling and generation**: Top-k and Top-p (nucleus) sampling are both based on probability distributions
+- **Uncertainty quantification**: confidence estimation for model predictions
+- **Reinforcement learning**: optimization based on probabilistic policies
+
+### 3. Calculus and Optimization
+
+**Core concepts**: derivative, partial derivative, gradient, chain rule, Taylor expansion, Lagrange multipliers, convex optimization
+
+**Applications in large models**:
+
+- **Backpropagation**: a perfect embodiment of gradient computation and the chain rule
+- **Model training**: the core of minimizing the loss function; all optimizers (SGD, Adam, RMSProp) are variants of gradient descent
+- **Activation functions**: their derivative properties are critical for gradient propagation
+- **Model convergence analysis**: involves convergence theory from calculus
+
+### 4. Information Theory
+
+**Core concepts**: information content, entropy, joint entropy, conditional entropy, mutual information, cross-entropy, KL divergence
+
+**Applications in large models**:
+
+- **Loss function**: cross-entropy loss measures the difference between predicted and true distributions
+- **Attention mechanism**: the softmax operation relates to probability distributions and entropy when computing attention weights
+- **Reinforcement learning**: entropy regularization terms in policy gradient objectives; KL divergence constraints in TRPO/PPO algorithms
+- **Model compression and quantization**: evaluating quantization information loss
+
+### 5. Numerical Analysis
+
+**Core concepts**: floating-point precision, numerical stability, gradient clipping, learning rate scheduling
+
+**Applications in large models**:
+
+- **Preventing gradient explosion/vanishing**: large models are deep and computationally intensive, making numerical stability particularly critical
+- **BFloat16/FP16 training**: understanding how different floating-point precisions affect model training
+- **Optimizer selection**: some optimizers are numerically more stable
+
+## Study Recommendations
+
+1. **Combine theory with practice**: don't just derive formulas — understand how these mathematical concepts apply concretely in AI
+2. **Build visual intuition**: use resources like 3Blue1Brown to develop geometric understanding
+3. **Implement in code**: try implementing basic mathematical operations yourself to deepen understanding
+4. **Build progressively**: start from foundational concepts and gradually move to advanced applications
diff --git a/app/docs/ai/ai-math-basics/math_books.en.md b/app/docs/ai/ai-math-basics/math_books.en.md
new file mode 100644
index 00000000..1d91e241
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/math_books.en.md
@@ -0,0 +1,123 @@
+---
+title: Recommended Books on Mathematics and Data Science
+description: ""
+date: "2025-10-06"
+tags:
+  - tag-one
+docId: kzi6k1yg1sehlxidnxdsf59a
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Recommended Books on Mathematics and Data Science
+
+## Reader's Guide
+
+### A. Machine Learning / Data Science (Beginners – Engineers)
+
+- _Data with Dao, Programming Made Easy_ (数据有道，编程不难)
+- Iris Book Series ③④⑤⑦ (Mathematical Elements / Power of Matrices / Statistics Simplified / Machine Learning)
+- _Math with Bad Drawings_
+- _Numbers Don't Lie_
+- _The Ten Equations That Rule the World_
+
+### B. Probability / Statistics / Bayesian (Researchers – Advanced Enthusiasts)
+
+- _Probability Theory: The Logic of Science_ (概率论沉思录)
+- _The Mathematics of Selfishness_ (利己主义的数学解析)
+
+### C. Mathematical Foundations & General Knowledge (High School – Undergraduate & General Public)
+
+- _What Is Mathematics?_ (什么是数学)
+- _Mathematics and Life_ 1 / 2 / 3 (数学与生活)
+- _Strange Curves, Counting Rabbits…_ (怪曲线、数兔子…)
+
+### D. Competitions / Problem Sets / Reference Handbooks (Students – Teachers)
+
+- _Berkeley Problems in Mathematics_ (伯克利数学问题集)
+- _Mathematical Handbook_ (数学手册)
+
+### E. History of Mathematics & Biographies & Philosophy (Those Interested in Cultural Background)
+
+- _From Mathematics to Philosophy_ (从数学到哲学)
+- _Logical Dilemmas: The Life and Work of Kurt Gödel_ (哥德尔传)
+- _A Mathematician's Apology_ (一个数学家的辩白)
+
+---
+
+## 1. Iris Book Series
+
+- [Iris Book Series 01 · Programming Made Easy](https://space.bilibili.com/513194466?spm_id_from=333.337.search-card.all.click)  
+  — **Dr Ginger (Jiang Weisheng)**. A Python introduction for those with no programming experience, covering variables, control flow, functions, and object-oriented programming.
+
+- _Iris Book Series · Data with Dao_  
+  Using Python as the main thread, this book introduces data acquisition, cleaning, visualization, and basic analysis workflows for complete beginners — an entry-level data science read.
+
+- _Iris Book Series · Mathematical Elements (Volume 3)_  
+  A quick refresher on the linear algebra, calculus, and probability and statistics needed for machine learning.
+
+- _Iris Book Series · Power of Matrices (Volume 4)_  
+  Focuses on linear algebra and matrix decomposition (SVD, eigenvalues, singular values) and their applications in ML.
+
+- _Iris Book Series · Statistics Simplified (Volume 5)_  
+  Uses intuitive examples to explain core statistical concepts including Bayes, hypothesis testing, and regression analysis.
+
+- _Iris Book Series · Machine Learning (Volume 7)_  
+  Threads through the iris dataset to explain common supervised and unsupervised learning algorithms and hands-on scikit-learn usage.
+
+- [Math with Bad Drawings](https://www.goodreads.com/book/show/36205393-math-with-bad-drawings)  
+  — **Ben Orlin**. Explains functions, statistics, and game theory using cartoons with deliberately bad drawings — lighthearted and humorous.
+
+- [Numbers Don't Lie: 71 Things You Need to Know About the World](https://www.goodreads.com/book/show/50705179-numbers-don-t-lie)  
+  Interprets global topics such as energy, population, and technology through the lens of data and orders of magnitude.
+
+- [The Ten Equations That Rule the World and How You Can Use Them](https://www.goodreads.com/book/show/55607293-the-ten-equations-that-rule-the-world)  
+  Ten equations from linear regression to power laws, demonstrating their predictive power in finance, social networks, and sports.
+
+---
+
+## 2. Competitions / Reference Books
+
+- [Berkeley Problems in Mathematics (3rd Edition)](https://book.douban.com/subject/2066460/) — P. N. Sousa et al.  
+  A collection of classic problems from UC Berkeley entrance exams and competitions, focusing on analysis, algebra, and combinatorics; complete solutions included.
+
+- [Mathematical Handbook](https://book.douban.com/subject/20418732/) — Compiled by Sichuan Mining Institute  
+  A formula reference book covering common algebra, trigonometry, calculus, and statistical tables.
+
+---
+
+## 3. Probability / Bayesian
+
+- [Probability Theory: The Logic of Science](https://wap.sciencenet.cn/blog-1319915-1449152.html?mobile=1) — Edwin T. Jaynes  
+  Advocates unifying probability theory using maximum entropy and a Bayesian perspective; often called the "Bayesian Bible."
+
+- [The Mathematics of Selfishness](https://book.douban.com/subject/27150468/) — Karl Sigmund  
+  An introduction to evolutionary game theory, using the Prisoner's Dilemma to explain mathematical models of cooperation and altruism.
+
+---
+
+## 4. Popular Science
+
+- [Strange Curves, Counting Rabbits, & Other Mathematical Explorations](https://book.douban.com/subject/6985480/)  
+  A collection of popular mathematics essays for general audiences, covering fractals, the Fibonacci sequence, topological paradoxes, and more.
+
+- [What Is Mathematics?](https://book.douban.com/subject/10455982/) — Courant & Robbins (Classic New Edition)  
+  A general mathematics culture book covering number theory, geometry, calculus, and the concept of infinity.
+
+- [Mathematics and Life](https://book.douban.com/subject/26148739/) 1 / 2 / 3 — Tohyama Hiraku  
+  A classic Japanese popular science series that uses everyday scenarios to explain mathematical ideas, methods, and fascinations.
+
+---
+
+## 5. History of Mathematics · Biographies · Philosophy
+
+- [A Mathematician's Apology](https://book.douban.com/subject/2135227/) — G. H. Hardy  
+  A confession on mathematical aesthetics and the life of research, written with great literary quality.
+
+- [Logical Dilemmas: The Life and Work of Kurt Gödel](https://book.douban.com/subject/36073022/) — John Dawson  
+  A systematic account of Gödel's life, academic trajectory, and the background and impact of the incompleteness theorems.
+
+- [From Mathematics to Philosophy](https://book.douban.com/subject/36532721/) — Hao Wang  
+  Explores the relationships among formal logic, Gödel's incompleteness, mathematical foundations, and philosophical propositions.
diff --git a/app/docs/ai/ai-math-basics/numerical-analysis/index.en.mdx b/app/docs/ai/ai-math-basics/numerical-analysis/index.en.mdx
new file mode 100644
index 00000000..8dbb6b7e
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/numerical-analysis/index.en.mdx
@@ -0,0 +1,40 @@
+---
+title: Numerical Analysis
+description: Core concepts in numerical analysis and their applications in large models
+date: "2024-01-14"
+tags:
+  - numerical-analysis
+  - floating-point
+  - stability
+  - gradient-clipping
+  - learning-rate
+  - bfloat16
+  - fp16
+  - optimizer
+docId: ebgss2sa91drisxswsh6iu8x
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Core Concepts
+
+- Floating-point precision
+- Numerical stability
+- Gradient clipping
+- Learning rate scheduling
+
+## Applications in Large Models
+
+### Preventing Gradient Explosion / Vanishing
+
+- Large models are deep and computationally intensive, making numerical stability particularly critical.
+
+### BFloat16 / FP16 Training
+
+- Understanding how different floating-point precisions affect model training.
+
+### Optimizer Selection
+
+- Some optimizers are numerically more stable.
diff --git a/app/docs/ai/ai-math-basics/probability-statistics/index.en.mdx b/app/docs/ai/ai-math-basics/probability-statistics/index.en.mdx
new file mode 100644
index 00000000..393a1d53
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/probability-statistics/index.en.mdx
@@ -0,0 +1,58 @@
+---
+title: Probability and Statistics
+description: Core concepts in probability and statistics and their applications in large models
+date: "2024-01-11"
+tags:
+  - probability
+  - statistics
+  - random-variable
+  - distribution
+  - bayes
+  - mle
+  - map
+  - hypothesis-testing
+  - confidence-interval
+  - cross-entropy
+  - rlhf
+docId: d5fya0gd1w8vblv8qeqgnqtu
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Core Concepts
+
+- Random variable
+- Probability distributions (Gaussian, Bernoulli, multinomial)
+- Expectation
+- Variance
+- Covariance
+- Conditional probability
+- Bayes' theorem
+- Maximum Likelihood Estimation (MLE)
+- Maximum A Posteriori (MAP)
+- Hypothesis testing
+- Confidence interval
+
+## Applications in Large Models
+
+### Language Modeling
+
+- P(next token | context) is conditional probability.
+
+### Loss Function
+
+- Cross-entropy loss originates from information theory and measures differences between probability distributions.
+
+### Sampling and Generation
+
+- Top-k and Top-p (nucleus) sampling are both based on probability distributions.
+
+### Uncertainty Quantification
+
+- Confidence estimation for model predictions.
+
+### Reinforcement Learning (RLHF)
+
+- Optimization based on probabilistic policies.
diff --git a/app/docs/ai/ai-math-basics/probability-statistics/resources/index.en.mdx b/app/docs/ai/ai-math-basics/probability-statistics/resources/index.en.mdx
new file mode 100644
index 00000000..d667ea60
--- /dev/null
+++ b/app/docs/ai/ai-math-basics/probability-statistics/resources/index.en.mdx
@@ -0,0 +1,16 @@
+---
+title: References
+description: Probability and statistics reference materials.
+date: "2024-01-11"
+tags:
+  - probability
+  - statistics
+  - resources
+docId: q7kagbrpnek7b89axvssn4bo
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+- Fang Hao
diff --git a/app/docs/ai/compute-platforms/model-compuational-resource-demand.en.md b/app/docs/ai/compute-platforms/model-compuational-resource-demand.en.md
new file mode 100644
index 00000000..293659a3
--- /dev/null
+++ b/app/docs/ai/compute-platforms/model-compuational-resource-demand.en.md
@@ -0,0 +1,198 @@
+---
+title: Compute Requirements Guide
+description: How to calculate the GPU memory required to train large models
+date: "2025-09-20"
+tags:
+  - compute-platforms
+docId: ns7q5ehuje6oiua7as6rtnyf
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Notes on Multi-GPU Training of Large Models
+
+## 1. Unit Conventions
+
+1 GB = 1024 MB = 1024×1024 KB = 1024×1024×1024 Bytes = 1024×1024×1024×8 Bits
+
+**Parameter types and sizes:**
+
+| Parameter Type | Bytes |
+| -------------- | ----- |
+| FP32           | 4     |
+| FP16 / BF16    | 2     |
+| INT8           | 1     |
+| INT4           | 0.5   |
+
+---
+
+## 2. GPU Memory Calculation for Training Large Models
+
+Even with 80 GB of GPU memory, a single card cannot handle full training of models with billions of parameters.
+Weights are only one part — gradients, optimizer states, and activations all consume GPU memory.
+
+Assuming the model has **N parameters** (e.g., 2B = 2 billion):
+
+1. **Weights W**
+   - Storage format: BF16 (2 bytes)
+   - Memory: $W = N \times 2$ bytes
+   - Example: 2B parameters → ≈ 4 GB
+
+2. **Gradients G**
+   - Storage format: BF16 (2 bytes)
+   - Memory: $G = N \times 2$ bytes
+   - Example: 2B parameters → ≈ 4 GB
+
+3. **Optimizer States (Adam)**
+   - Contains momentum V and squared gradient S, each in FP32 (4 bytes)
+   - Each ≈ 8 GB, total ≈ 16 GB  
+     Note: optimizer memory also includes additional overhead (weight gradients may be copied many times during training)
+   1. **Weights W**
+      - Model parameters stored in BF16/FP16 at 2 bytes.
+
+   2. **Gradients G**
+      - Temporary storage during backpropagation, BF16/FP16 at 2 bytes.
+
+   3. **Optimizer states** (vary significantly by optimizer):
+      - **SGD**: typically only needs the gradients themselves — **0 copies**.
+      - **SGDM (SGD with momentum)**: requires one momentum vector (FP32, 4 bytes). **One copy**
+      - **Adam/AdamW**:
+      - **First-order momentum (V)**: FP32 (4 bytes).
+      - **Second-order momentum (S)**: FP32 (4 bytes).
+      - So **2 state copies**.
+
+   4. **Master weights ($W^A$)**
+      - Common in mixed-precision training: although forward/backward passes use BF16, the optimizer update requires FP32 precision → so an additional FP32 copy of the weights is stored.
+
+4. **Activations**
+   - Depends on batch size, seq_len, and implementation details
+   - Rough estimate: ≈ 0.7–1.0 × weight size
+
+---
+
+## 3. Formula Summary
+
+- **Standard Adam mode:**
+
+  $W + G + W^A + V + S + 0.7W ≈ 24.8$ GB (using 2B parameters as an example)
+
+- **DeepSpeed ZeRO-3 mode:**
+
+  $W + G + W^A + G^A + V + S + 0.7W ≈ 32.8$ GB
+
+Note: ZeRO-3 uses less memory but incurs higher communication and I/O overhead.
+
+---
+
+## 4. Real-World Case: Mixtral-8×7B
+
+### Setup and Constants
+
+- Architecture: `d_model ≈ 4096`, `ffn_dim ≈ 14336`, **8 experts** per layer, **32 layers**, SwiGLU (gate/up/down three linear layers).
+- Parameters per single expert:  
+  $4096×14336×2 + 14336×4096 = 176,160,768$ ≈ **1.76×10^8**  
+  → BF16 weights ≈ **352 MB/expert**
+- 8 experts per layer ≈ **2.82 GB/layer**
+- 32 layers total ≈ **90 GB (expert weights only)**  
+  → Full-expert full-parameter training is impossible on **44 GB GPU memory**.
+
+---
+
+### Case A: Router-only
+
+- Router parameters: `d_model × n_experts ≈ 4k × 8 = 32k`
+- All 32 layers ≈ **millions of parameters**
+- Extremely low overhead (MB level); memory is mainly consumed by **activations**
+- Fully feasible on 44 GB, but improvement is limited
+
+---
+
+### Case B: Partial Layers × Partial Experts
+
+Example: train only the bottom **6 layers**, **2 experts** per layer, plus the router.
+
+- Trainable parameter count:  
+  $6 × 2 × 176,160,768 = 2.114B$
+- Weights (BF16): ≈ 4.23 GB
+- Gradients (BF16): ≈ 4.23 GB
+- Adam states (V+S, FP32): ≈ 16.9 GB
+- Master weights (FP32): ≈ 8.46 GB
+- **Total (persistent memory + gradients)**: ≈ 33.8 GB
+- Adding frozen weight footprint and activation overhead, a 44 GB card requires:
+  - `batch=1–2`
+  - `seq_len ≤ 1024`
+  - `use_cache=False`
+  - `gradient_checkpointing=True`
+- Feasible, but requires strict control.
+
+---
+
+### Case C: 4-bit Full Model + LoRA
+
+Attach LoRA (r=16) to experts / router.
+
+- LoRA parameters per expert:  
+  $r × (4096+14336 + 4096+14336 + 14336+4096) = r × 55296$  
+  → r=16 → 0.885M/expert
+- 8 experts per layer: 7.08M
+- 32 layers: 226.6M LoRA parameters
+- Memory breakdown:
+  - Weights ≈ 0.45 GB
+  - Gradients ≈ 0.45 GB
+  - Adam + Master ≈ 2.72 GB
+  - Total ≈ 3.6 GB
+- Well within the 44 GB memory budget
+
+---
+
+## 5. Parallelism Strategies
+
+### Data Parallelism (DP)
+
+- Each GPU holds a full model copy, processes different batches, gradients are aggregated
+- Advantage: simple
+- Disadvantage: high memory redundancy
+
+### Distributed Data Parallelism (DDP)
+
+- One process per GPU, gradients synchronized in buckets
+- Advantage: mainstream, stable
+- Disadvantage: still requires full model on each GPU
+
+### ZeRO Optimization (DeepSpeed)
+
+- ZeRO-1: shard optimizer states
+- ZeRO-2: also shard gradients
+- ZeRO-3: shard parameters as well
+- Advantage: memory-efficient
+- Disadvantage: complex communication
+
+### Model Parallelism
+
+- **Tensor Parallelism (TP)**: split matrices across devices
+- **Pipeline Parallelism (PP)**: split layers, like an assembly line
+- **MoE Parallelism**: experts distributed across different devices, tokens activate a subset of experts
+
+---
+
+## 6. Lessons Learned
+
+- Memory fragmentation:  
+  `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True`  
+  (must be set before `import torch`)
+- Communication library:  
+  NCCL > Gloo > MPI (unless in special environments)
+- DDP: must synchronize random seed
+- Evaluation:
+  - eval_mb_size can be larger
+  - small training batch + gradient accumulation
+  - disable `model.config.use_cache`
+- device = "auto": HuggingFace will automatically distribute different parts of the model based on your GPU memory. For large models like 7B on a single 44 GB card: attention + embedding + some FFN layers typically go on GPU, while frozen modules or inactive MoE experts can offload to CPU. This is great for inference but **requires caution during training** — parameters that need gradients must reside on GPU at all times, otherwise each forward/backward pass involves moving parameters back and forth, causing catastrophic communication overhead. Therefore, **device_map=auto is not always safe for training** as it may place trainable layers on CPU, leading to slow or non-functional training.
+
+---
+
+Author: **Yang Lewis**  
+Non-commercial reproduction must credit the source.  
+For commercial use, contact the author: **840691168ly@gmail.com**
diff --git a/app/docs/ai/foundation-models/datasets/index.en.mdx b/app/docs/ai/foundation-models/datasets/index.en.mdx
new file mode 100644
index 00000000..e7f7cf59
--- /dev/null
+++ b/app/docs/ai/foundation-models/datasets/index.en.mdx
@@ -0,0 +1,327 @@
+---
+title: Dataset Construction
+description: "Large model dataset construction: data sources, processing pipelines, quality control"
+date: "2025-01-27"
+tags:
+  - dataset-construction
+  - data-processing
+  - data-quality
+  - common-crawl
+  - data-cleaning
+docId: egpawb1yui58yprrsgxn9qj2
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Data is the foundation of large models, and high-quality datasets directly impact model performance. This section provides a detailed introduction to methods and techniques for constructing large model datasets.
+
+## Data Sources
+
+### Web Data
+
+- **Common Crawl**: large-scale web crawl data
+  - Covers billions of web pages worldwide
+  - Rich multilingual content
+  - Regularly updated data snapshots
+
+- **Wikipedia**: high-quality encyclopedia data
+  - Available in multiple languages
+  - Structured knowledge content
+  - Continuously updated and maintained
+
+### Specialized Data
+
+- **Book corpora**: high-quality text data
+  - Project Gutenberg open-source books
+  - Academic publications
+  - Technical documentation and manuals
+
+- **Code data**: repositories such as GitHub
+  - Open-source project code
+  - Multiple programming languages
+  - Code comments and documentation
+
+- **Academic papers**: sources such as arXiv and PubMed
+  - Latest research findings
+  - Specialized domain knowledge
+  - Citation networks
+
+## Data Processing Pipeline
+
+### 1. Data Cleaning
+
+**Text quality filtering**:
+
+- Remove low-quality content (garbled text, duplicate content)
+- Language detection and filtering
+- Format standardization
+- Encoding normalization
+
+**Content filtering**:
+
+- Remove advertisements and spam
+- Filter harmful and inappropriate content
+- Remove privacy-sensitive information
+- Copyright content identification
+
+### 2. Format Standardization
+
+**Text normalization**:
+
+- Unify encoding format (UTF-8)
+- Standardize punctuation
+- Handle special characters
+- Paragraph and line-break conventions
+
+**Structured processing**:
+
+- Extract main body text
+- Remove HTML tags
+- Preserve meaningful formatting information
+- Unify document structure
+
+### 3. Deduplication
+
+**Exact deduplication**:
+
+- MD5 hash matching
+- Identification of perfectly identical content
+- Batch deduplication processing
+
+**Fuzzy deduplication**:
+
+- MinHash algorithm
+- Similarity threshold settings
+- Near-duplicate detection
+- SimHash fingerprint matching
+
+**Cross-document deduplication**:
+
+- Paragraph-level deduplication
+- Sentence-level deduplication
+- n-gram overlap detection
+
+### 4. Quality Filtering
+
+**Statistical metric filtering**:
+
+- Document length limits
+- Vocabulary richness checks
+- Linguistic complexity assessment
+- Punctuation ratio
+
+**Language model scoring**:
+
+- Perplexity evaluation
+- Language model scoring
+- Readability assessment
+- Grammar correctness checking
+
+### 5. Privacy Protection
+
+**Personally Identifiable Information (PII) detection**:
+
+- Email address detection
+- Phone number identification
+- ID number filtering
+- Address information handling
+
+**Data de-identification**:
+
+- Sensitive information substitution
+- Anonymization processing
+- Differential privacy techniques
+- Encrypted data storage
+
+## Data Quality Control
+
+### Quality Assessment Metrics
+
+**Content quality**:
+
+- Information accuracy
+- Logical coherence
+- Linguistic fluency
+- Knowledge depth
+
+**Diversity metrics**:
+
+- Topic coverage range
+- Linguistic style diversity
+- Source diversity
+- Temporal span coverage
+
+**Balance considerations**:
+
+- Language distribution balance
+- Domain knowledge balance
+- Viewpoint and stance balance
+- Cultural background diversity
+
+### Quality Assurance Process
+
+**Automated checks**:
+
+- Batch quality assessment
+- Anomaly detection algorithms
+- Statistical analysis reports
+- Quality trend monitoring
+
+**Manual review**:
+
+- Random sampling inspection
+- Expert domain review
+- Annotation quality control
+- Feedback loop mechanism
+
+## Special Data Processing
+
+### Multilingual Data
+
+**Language detection**:
+
+- Automatic language identification
+- Mixed multilingual processing
+- Dialect and variant identification
+- Code-switching handling
+
+**Cross-lingual alignment**:
+
+- Parallel corpus construction
+- Translation quality assessment
+- Cultural adaptation
+
+### Multimodal Data
+
+**Image-text alignment**:
+
+- Image-text pairing
+- Caption accuracy verification
+- Visual content understanding
+- Multimodal consistency
+
+**Structured data**:
+
+- Tabular data processing
+- Knowledge graph integration
+- Database content extraction
+
+## Data Pipeline Technologies
+
+### Distributed Processing
+
+**Big data frameworks**:
+
+- Apache Spark processing
+- Hadoop ecosystem
+- Distributed storage (HDFS)
+- Streaming data processing
+
+**Parallelization strategies**:
+
+- Data sharding
+- Task scheduling optimization
+- Dynamic resource allocation
+- Fault recovery mechanisms
+
+### Data Version Management
+
+**Version control**:
+
+- Dataset version tracking
+- Change log management
+- Rollback mechanism design
+- Incremental update support
+
+**Metadata management**:
+
+- Data source information logging
+- Processing pipeline tracking
+- Quality metric monitoring
+- Usage statistics analysis
+
+## Compliance Considerations
+
+### Laws and Regulations
+
+**Data compliance**:
+
+- GDPR privacy protection
+- Copyright law requirements
+- Regional regulatory compliance
+- Industry standard alignment
+
+**Usage licenses**:
+
+- Understanding open-source licenses
+- Commercial use restrictions
+- Derivative work rules
+- Attribution requirements
+
+### Ethical Considerations
+
+**Bias and fairness**:
+
+- Data bias identification
+- Representativeness analysis
+- Fairness evaluation metrics
+- Bias mitigation strategies
+
+**Social impact**:
+
+- Content values review
+- Cultural sensitivity considerations
+- Social responsibility
+- Negative impact assessment
+
+## Best Practices
+
+### Data Management
+
+1. **Establish clear data standards**
+2. **Implement automated quality checks**
+3. **Maintain data processing transparency**
+4. **Regularly update and maintain datasets**
+5. **Maintain comprehensive documentation**
+
+### Recommended Tools
+
+**Data processing tools**:
+
+- pandas: Python data processing
+- Apache Beam: batch and stream processing
+- Dask: parallel computing framework
+- Ray: distributed computing platform
+
+**Quality checking tools**:
+
+- Great Expectations: data quality framework
+- Apache Griffin: data quality monitoring
+- Deequ: data quality testing
+
+## Future Trends
+
+1. **Increased automation**: smarter data processing pipelines
+2. **Real-time data integration**: dynamic data updates and integration
+3. **Privacy-preserving techniques**: federated learning and differential privacy
+4. **Multimodal fusion**: more complex multimodal data processing
+5. **Personalized data**: customized datasets for specific tasks
+
+## Study Recommendations
+
+1. **Theory foundation**: master data science and statistics fundamentals
+2. **Engineering skills**: proficiency with big data processing tools
+3. **Quality awareness**: develop sensitivity to data quality
+4. **Compliance awareness**: understand relevant laws and regulations
+5. **Practical experience**: participate in real dataset construction projects
+
+## From UNSW IT-AI Involution Hell Documentation
+
+- https://huggingface.co/
+- AK https://hf.co/akhaliq
+- https://www.modelscope.cn/home
+- https://www.kaggle.com/datasets
+- UCI Machine Learning Repository: https://archive.ics.uci.edu/ml/index.php
+- ImageNet
diff --git a/app/docs/ai/foundation-models/deploy-infer/index.en.mdx b/app/docs/ai/foundation-models/deploy-infer/index.en.mdx
new file mode 100644
index 00000000..fcd791f4
--- /dev/null
+++ b/app/docs/ai/foundation-models/deploy-infer/index.en.mdx
@@ -0,0 +1,443 @@
+---
+title: Deployment and Inference
+description: "Large model deployment and inference optimization: KV Cache, Flash Attention, quantization, inference frameworks"
+date: "2025-01-27"
+tags:
+  - model-deployment
+  - inference-optimization
+  - kv-cache
+  - flash-attention
+  - quantization
+  - vllm
+docId: z157s85hnz1y37tr28y2a8h2
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Deployment and inference of large models is the critical step of putting trained models into production. It involves inference optimization, deployment frameworks, service architecture, and more.
+
+## Inference Optimization Techniques
+
+### KV Cache
+
+**Core principle**: cache key-value pairs to avoid redundant computation and accelerate generation.
+
+**Implementation**:
+
+- Store Keys and Values from historical sequences
+- New tokens only need to compute the current Query
+- Significantly reduces computational complexity
+- From O(n²·d) down to O(n·d)
+
+**Memory management**:
+
+- Dynamic memory allocation
+- Batch processing optimization
+- Memory defragmentation
+- OOM prevention mechanisms
+
+### Flash Attention
+
+**Technical characteristics**: a memory-efficient attention computation algorithm
+
+**Core optimizations**:
+
+- Tiled computation strategy
+- Memory access optimization
+- Reduced IO complexity
+- Numerical stability guarantees
+
+**Performance improvements**:
+
+- Reduced memory usage
+- Faster computation
+- Support for longer sequences
+- Hardware-friendly design
+
+### Quantization
+
+**Quantization methods**:
+
+- **INT8 quantization**: 8-bit integer representation
+- **INT4 quantization**: 4-bit integer representation
+- **Mixed precision**: different precision for different layers
+- **Dynamic quantization**: runtime quantization
+
+**Quantization strategies**:
+
+- Weight quantization
+- Activation quantization
+- KV Cache quantization
+- Gradient quantization
+
+**Tooling**:
+
+- PyTorch quantization
+- TensorRT quantization
+- ONNX quantization
+- Custom quantization kernels
+
+### Parallel Inference
+
+**Model parallelism**:
+
+- Tensor parallelism: intra-layer parameter splitting
+- Pipeline parallelism: inter-layer pipelining
+- Expert parallelism: MoE model expert distribution
+- Hybrid parallelism: combining multiple strategies
+
+**Data parallelism**:
+
+- Batch parallelism
+- Sequence parallelism
+- Dynamic batching
+- Continuous batching
+
+## Deployment Frameworks
+
+### vLLM
+
+**Highlights**: high-throughput inference engine
+
+- **PagedAttention**: efficient memory management
+- **Continuous batching**: dynamic batch optimization
+- **Streaming output**: real-time response support
+- **Multi-GPU support**: distributed inference for large models
+
+**Core technologies**:
+
+- Memory pool management
+- Request scheduling optimization
+- KV Cache sharing
+- Inference concurrency control
+
+### TensorRT-LLM
+
+**Highlights**: NVIDIA-optimized inference framework
+
+- **Deep optimization**: optimized for NVIDIA GPUs
+- **Operator fusion**: automatic operator fusion
+- **Multi-precision**: supports FP16/INT8/INT4
+- **Plugin ecosystem**: rich plugin support
+
+**Optimization techniques**:
+
+- Graph optimization
+- Memory optimization
+- Kernel fusion
+- Dynamic shape support
+
+### Text Generation Inference (TGI)
+
+**Highlights**: HuggingFace inference service
+
+- **Ease of use**: simple deployment and usage
+- **Model support**: broad model compatibility
+- **API standard**: standardized API interface
+- **Monitoring**: built-in monitoring and logging
+
+**Features**:
+
+- Automatic batching
+- Streaming responses
+- Safety filtering
+- Load balancing
+
+### FastChat
+
+**Highlights**: chat model deployment framework
+
+- **Multi-model**: supports various chat models
+- **Web interface**: user-friendly UI
+- **API service**: RESTful API support
+- **Distributed**: multi-node deployment support
+
+## Service Architecture Design
+
+### Inference Service Architecture
+
+**Component design**:
+
+- Model loader
+- Request handler
+- Batch scheduler
+- Response generator
+- Monitoring component
+
+**Performance optimization**:
+
+- Asynchronous processing
+- Connection pool management
+- Caching strategies
+- Resource scheduling
+
+### Load Balancing
+
+**Strategies**:
+
+- Round-robin scheduling
+- Least connections
+- Weighted distribution
+- Health checks
+
+**Implementation**:
+
+- Nginx load balancing
+- HAProxy configuration
+- Kubernetes Service
+- Custom load balancers
+
+### Scaling Strategies
+
+**Horizontal scaling**:
+
+- Instance count adjustment
+- Dynamic auto-scaling
+- Resource monitoring triggers
+- Warm-up mechanisms
+
+**Vertical scaling**:
+
+- Resource specification adjustment
+- GPU memory expansion
+- CPU core increases
+- Storage capacity expansion
+
+## Memory Optimization
+
+### Memory Management Strategies
+
+**KV Cache optimization**:
+
+- Paged storage
+- Memory sharing
+- Garbage collection
+- Defragmentation
+
+**Model weight optimization**:
+
+- Weight sharing
+- Lazy loading
+- Memory mapping
+- Compressed storage
+
+### Memory Monitoring
+
+**Monitoring metrics**:
+
+- Memory utilization
+- OOM frequency
+- Memory fragmentation rate
+- GC time statistics
+
+**Alerting mechanisms**:
+
+- Threshold alerts
+- Trend warnings
+- Automated handling
+- Failover
+
+## Inference Performance Optimization
+
+### Latency Optimization
+
+**Latency reduction strategies**:
+
+- Model warm-up
+- Batch processing optimization
+- Operator fusion
+- Hardware acceleration
+
+**Time to First Token (TTFT)**:
+
+- Prefill optimization
+- Memory pre-allocation
+- Model pre-loading
+- Cache warm-up
+
+### Throughput Optimization
+
+**Increasing throughput**:
+
+- Batch size tuning
+- Concurrent request handling
+- Pipeline processing
+- Resource utilization improvement
+
+**Continuous batching**:
+
+- Dynamic batch adjustment
+- Request priority management
+- Latency sensitivity tuning
+- Fairness guarantees
+
+### Cost Optimization
+
+**Compute costs**:
+
+- Maximize GPU utilization
+- Mixed instance usage
+- On-demand scaling
+- Spot instance usage
+
+**Storage costs**:
+
+- Model compression
+- Hot/cold data separation
+- Cache strategy optimization
+- Data lifecycle management
+
+## Quality Assurance
+
+### Model Validation
+
+**Functional testing**:
+
+- Output quality validation
+- Boundary condition testing
+- Stress testing
+- Regression testing
+
+**Performance testing**:
+
+- Latency benchmarking
+- Throughput testing
+- Concurrency capacity testing
+- Stability testing
+
+### Monitoring System
+
+**Core metrics**:
+
+- QPS (queries per second)
+- Average response time
+- P99 latency
+- Error rate
+- Resource utilization
+
+**Monitoring tools**:
+
+- Prometheus monitoring
+- Grafana visualization
+- Custom monitoring
+- Alert systems
+
+### A/B Testing
+
+**Test design**:
+
+- Traffic splitting
+- Metric comparison
+- Statistical significance
+- Effect evaluation
+
+**Implementation approaches**:
+
+- Canary releases
+- Blue-green deployments
+- Shadow testing
+- Progressive rollout
+
+## Security and Compliance
+
+### Security Protection
+
+**Input validation**:
+
+- Content filtering
+- Length limits
+- Format checks
+- Malicious input detection
+
+**Output control**:
+
+- Content moderation
+- Sensitive information filtering
+- Copyright protection
+- Harmful content blocking
+
+### Privacy Protection
+
+**Data protection**:
+
+- Request log desensitization
+- User information anonymization
+- Encrypted data transmission
+- Storage encryption
+
+**Compliance requirements**:
+
+- GDPR compliance
+- Data localization
+- Audit logs
+- Access control
+
+## Fault Handling
+
+### Common Issues
+
+**Performance issues**:
+
+- Out-of-Memory (OOM)
+- Low GPU utilization
+- Latency spikes
+- Throughput drops
+
+**Stability issues**:
+
+- Service crashes
+- Memory leaks
+- Network timeouts
+- Model anomalies
+
+### Recovery Strategies
+
+**Automatic recovery**:
+
+- Health checks
+- Auto-restart
+- Failover
+- Service degradation
+
+**Monitoring and alerting**:
+
+- Real-time monitoring
+- Early warning mechanisms
+- Automated handling
+- Manual intervention
+
+## Best Practices
+
+### Deployment Recommendations
+
+1. **Incremental deployment**: start small and scale gradually
+2. **Performance baselines**: establish performance benchmarks and monitoring
+3. **Resource planning**: plan compute and storage resources appropriately
+4. **Security first**: prioritize security and privacy protection
+5. **Complete documentation**: maintain comprehensive deployment documentation
+
+### Operations Strategies
+
+1. **Automated operations**: automate as much of the operations pipeline as possible
+2. **Monitoring and alerting**: build a comprehensive monitoring and alerting system
+3. **Backup and recovery**: establish data backup and recovery strategies
+4. **Version management**: standardize the version release process
+5. **Incident response**: develop detailed incident handling procedures
+
+## Future Trends
+
+1. **Hardware co-design**: deep software-hardware co-optimization
+2. **Edge deployment**: model deployment on edge computing devices
+3. **Federated inference**: distributed privacy-preserving inference
+4. **Adaptive optimization**: intelligent adaptive inference optimization
+5. **Green computing**: low-power, environmentally friendly inference techniques
+
+## Study Recommendations
+
+1. **Systematic learning**: comprehensive understanding of the inference optimization stack
+2. **Hands-on practice**: deploy and optimize inference services yourself
+3. **Performance tuning**: deep dive into performance tuning techniques
+4. **Framework proficiency**: become proficient with mainstream inference frameworks
+5. **Stay current**: track the latest developments in optimization techniques
diff --git a/app/docs/ai/foundation-models/evaluation/index.en.mdx b/app/docs/ai/foundation-models/evaluation/index.en.mdx
new file mode 100644
index 00000000..42b42c17
--- /dev/null
+++ b/app/docs/ai/foundation-models/evaluation/index.en.mdx
@@ -0,0 +1,287 @@
+---
+title: Model Evaluation
+description: "Large model evaluation system: benchmarks, evaluation metrics, Chinese and English evaluation standards"
+date: "2025-01-27"
+tags:
+  - model-evaluation
+  - benchmark
+  - mmlu
+  - c-eval
+  - evaluation-metrics
+docId: lndxpf7luoeqwwde4in23xr1
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Model evaluation is an essential means of measuring large model performance and capabilities, providing a scientific basis for model improvement and application selection.
+
+## Benchmark Evaluation System
+
+### General Capability Benchmarks
+
+#### MMLU (Massive Multitask Language Understanding)
+
+- **Scope**: multitask language understanding
+- **Question count**: 15,908 multiple-choice questions
+- **Subjects**: 57 disciplines from mathematics to history
+- **Difficulty range**: from high school to professional level
+- **Metric**: accuracy
+
+#### HellaSwag
+
+- **Goal**: commonsense reasoning ability
+- **Task type**: sentence completion
+- **Data source**: real-world scenario descriptions
+- **Metric**: accuracy
+
+#### ARC (AI2 Reasoning Challenge)
+
+- **Focus**: scientific reasoning ability
+- **Question type**: elementary school science multiple-choice
+- **Difficulty levels**: Easy and Challenge
+- **Characteristic**: requires multi-step reasoning
+
+#### GSM8K
+
+- **Scope**: mathematical problem solving
+- **Question type**: elementary school math word problems
+- **Answer format**: numerical answers
+- **Focus**: mathematical reasoning chains
+
+### Chinese Evaluation Benchmarks
+
+#### C-Eval
+
+- **Goal**: comprehensive Chinese evaluation
+- **Question count**: 13,948 questions
+- **Subject coverage**: 52 subject areas
+- **Difficulty range**: from middle school to professional level
+- **Characteristic**: aligned with the Chinese education system
+
+#### CMMLU (Chinese Massive Multitask Language Understanding)
+
+- **Scope**: Chinese multitask evaluation
+- **Question sources**: Chinese exams and textbooks
+- **Subject categories**: humanities, social science, STEM, medicine, etc.
+- **Metric**: multi-dimensional assessment
+
+#### AGIEval
+
+- **Characteristic**: human exam evaluation
+- **Data source**: real exam questions
+- **Exam types**: college entrance exam, civil service exam, bar exam, etc.
+- **Value**: direct comparison with human performance
+
+### Domain-Specific Benchmarks
+
+#### HumanEval
+
+- **Goal**: code generation ability
+- **Task type**: function implementation
+- **Programming language**: primarily Python
+- **Evaluation method**: unit test pass rate
+
+#### MATH
+
+- **Scope**: math competition problems
+- **Difficulty level**: high school math competition level
+- **Question types**: proofs, calculations
+- **Evaluation method**: answer correctness
+
+#### BBH (Big-Bench Hard)
+
+- **Characteristic**: LLM challenge benchmark
+- **Task source**: hard subset of Big-Bench
+- **Focus**: reasoning and comprehension
+- **Characteristic**: challenging for large models
+
+## Evaluation Methodology
+
+### Evaluation Design Principles
+
+1. **Comprehensiveness**: covers multiple model capabilities
+2. **Objectivity**: avoids subjective bias and preference
+3. **Reproducibility**: results can be reproduced and verified
+4. **Fairness**: fair comparison across different models
+5. **Practicality**: relevant to real-world application scenarios
+
+### Evaluation Dimensions
+
+#### Knowledge Ability
+
+- **Factual knowledge**: mastery of foundational facts
+- **Conceptual understanding**: understanding of abstract concepts
+- **Knowledge reasoning**: reasoning based on knowledge
+- **Knowledge currency**: awareness of recent developments
+
+#### Reasoning Ability
+
+- **Logical reasoning**: deductive and inductive reasoning
+- **Mathematical reasoning**: numerical computation and proofs
+- **Commonsense reasoning**: everyday common sense
+- **Causal reasoning**: understanding causal relationships
+
+#### Language Ability
+
+- **Language comprehension**: text understanding and parsing
+- **Language generation**: fluent and accurate generation
+- **Multilingual**: cross-lingual capabilities
+- **Style adaptation**: adapting to different writing styles
+
+#### Safety Evaluation
+
+- **Harmful content**: avoidance of harmful content generation
+- **Bias detection**: social bias identification
+- **Privacy protection**: handling of private information
+- **Adversarial robustness**: resistance to adversarial attacks
+
+## Evaluation Implementation
+
+### Evaluation Process
+
+1. **Benchmark selection**: choose appropriate benchmarks based on evaluation goals
+2. **Environment setup**: configure evaluation environment and dependencies
+3. **Model preparation**: load and configure the model under evaluation
+4. **Execute evaluation**: run evaluation scripts and programs
+5. **Result analysis**: compile and analyze evaluation results
+
+### Evaluation Frameworks
+
+#### OpenCompass
+
+- **Characteristic**: open-source evaluation framework
+- **Support**: multiple models and benchmarks
+- **Features**: automated evaluation pipeline
+- **Visualization**: result display and comparison
+
+#### lm-evaluation-harness
+
+- **Source**: open-sourced by EleutherAI
+- **Characteristic**: standardized evaluation interface
+- **Support**: wide range of evaluation tasks
+- **Ease of use**: simple command-line interface
+
+#### FlagEval
+
+- **Source**: BAAI (Beijing Academy of AI)
+- **Characteristic**: friendly to Chinese evaluation
+- **Coverage**: comprehensive evaluation dimensions
+- **Standards**: rigorous scientific evaluation standards
+
+### Evaluation Environment
+
+#### Hardware Requirements
+
+- **GPU**: select based on model size
+- **Memory**: sufficient system RAM
+- **Storage**: fast SSD storage
+- **Network**: stable network connection
+
+#### Software Environment
+
+- **Python**: primary programming language
+- **PyTorch/TensorFlow**: deep learning frameworks
+- **transformers**: model loading library
+- **Evaluation tools**: specific evaluation frameworks
+
+## Result Analysis
+
+### Performance Metrics
+
+#### Accuracy Metrics
+
+- **Accuracy**: overall accuracy rate
+- **Top-k accuracy**: accuracy within the top-k predictions
+- **F1 score**: harmonic mean of precision and recall
+- **BLEU/ROUGE**: text generation quality
+
+#### Efficiency Metrics
+
+- **Inference speed**: token generation speed
+- **Memory usage**: memory footprint during inference
+- **Energy consumption**: inference energy statistics
+- **Cost-effectiveness**: performance-to-cost ratio
+
+### Comparative Analysis
+
+#### Model Comparison
+
+- **Same-scale models**: comparison of models with similar parameter counts
+- **Different architectures**: comparison across different architectures
+- **Development trends**: trends in model capability development
+- **Pros and cons analysis**: strengths and weaknesses of each model
+
+#### Capability Analysis
+
+- **Strength identification**: task areas where the model excels
+- **Weakness analysis**: areas where the model falls short
+- **Improvement directions**: model optimization suggestions
+- **Application recommendations**: suitable use-case scenarios
+
+### Visualization
+
+#### Radar Charts
+
+- Multi-dimensional capability display
+- Comparison across different models
+- Intuitive capability distribution
+- Balance analysis
+
+#### Heatmaps
+
+- Fine-grained performance display
+- Task dimension analysis
+- Performance difference visualization
+- Pattern recognition
+
+## Evaluation Challenges
+
+### Technical Challenges
+
+1. **Evaluation cost**: large model evaluation consumes significant resources
+2. **Benchmark limitations**: existing benchmarks may lack comprehensiveness
+3. **Benchmark contamination detection**: preventing models from gaming benchmarks
+4. **Dynamic updates**: benchmarks require continuous updates
+
+### Methodological Challenges
+
+1. **Evaluation bias**: benchmarks themselves may contain biases
+2. **Cultural differences**: fairness in cross-cultural evaluation
+3. **Capability definition**: how to scientifically define and measure capabilities
+4. **Ecosystem effects**: the influence of evaluation on model development
+
+## Future Developments
+
+### Evaluation Innovation
+
+1. **Dynamic evaluation**: real-time updated evaluation benchmarks
+2. **Interactive evaluation**: multi-turn interactive evaluation modes
+3. **Human-AI collaboration**: evaluation involving human experts
+4. **Automation**: smarter automated evaluation systems
+
+### Evaluation Standards
+
+1. **International standards**: establishing internationally recognized evaluation standards
+2. **Industry norms**: developing industry-wide evaluation specifications
+3. **Certification systems**: establishing model capability certification
+4. **Regulatory alignment**: aligning with regulatory requirements
+
+## Best Practices
+
+### Evaluation Strategy
+
+1. **Multi-dimensional evaluation**: assess comprehensively from multiple dimensions
+2. **Benchmark combination**: use multiple benchmarks for cross-validation
+3. **Regular evaluation**: establish periodic evaluation mechanisms
+4. **Result verification**: multiple evaluation rounds to ensure reliability
+5. **Transparency**: publicly disclose evaluation methods and results
+
+### Applying Results
+
+1. **Model improvement**: improve models based on evaluation findings
+2. **Application guidance**: guide model selection for specific scenarios
+3. **Capability matching**: match tasks to model capabilities
+4. **Risk assessment**: identify model application risks
+5. **Continuous monitoring**: continuously monitor model performance
diff --git a/app/docs/ai/foundation-models/finetune/index.en.mdx b/app/docs/ai/foundation-models/finetune/index.en.mdx
new file mode 100644
index 00000000..07a02560
--- /dev/null
+++ b/app/docs/ai/foundation-models/finetune/index.en.mdx
@@ -0,0 +1,348 @@
+---
+title: Model Fine-Tuning
+description: "Large model fine-tuning techniques: LoRA, PEFT, fine-tuning frameworks, and other parameter-efficient methods"
+date: "2025-01-27"
+tags:
+  - fine-tuning
+  - lora
+  - peft
+  - parameter-efficient
+  - unsloth
+docId: l5nes88zd54y6ao64ufkylz2
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Model fine-tuning is the key technique for adapting pre-trained large models to specific tasks. This section introduces various efficient fine-tuning methods and practical tips.
+
+## Fine-Tuning Overview
+
+### Types of Fine-Tuning
+
+1. **Full fine-tuning**: updates all model parameters
+2. **Parameter-Efficient Fine-Tuning (PEFT)**: trains only a small number of parameters
+3. **Instruction tuning**: fine-tuning on instruction-following data
+4. **Alignment fine-tuning**: fine-tuning for human preference alignment
+
+### Fine-Tuning Challenges
+
+- **Compute resources**: full fine-tuning of large models is expensive
+- **Catastrophic forgetting**: fine-tuning may degrade original capabilities
+- **Data quality**: high-quality task data is difficult to obtain
+- **Hyperparameter sensitivity**: fine-tuning hyperparameter selection is critical
+
+## Parameter-Efficient Fine-Tuning (PEFT)
+
+### Core Idea
+
+Achieve results comparable to full fine-tuning by training only a small number of parameters, dramatically reducing compute and storage costs.
+
+### Main Methods
+
+#### LoRA (Low-Rank Adaptation)
+
+**Principle**: decompose weight updates into the product of low-rank matrices
+
+```
+W_new = W_original + ΔW = W_original + BA
+```
+
+where B and A are trainable low-rank matrices.
+
+**Advantages**:
+
+- Dramatically reduces the number of trainable parameters
+- Keeps pre-trained weights unchanged
+- Supports multi-task LoRA merging
+- Can be merged back into the original weights at inference time
+
+#### AdaLoRA (Adaptive LoRA)
+
+**Improvement**: adaptively adjusts the rank size for different layers
+
+- Allocates parameter budget based on importance
+- Dynamically prunes less important parameters
+- Further improves parameter efficiency
+
+#### Prefix Tuning
+
+**Principle**: prepends trainable prefix tokens to the input sequence
+
+- Only trains the prefix portion's parameters
+- Keeps the model backbone unchanged
+- Suited for generation tasks
+
+#### P-Tuning v2
+
+**Improvement**: a deeper version of Prefix Tuning
+
+- Adds trainable parameters at every layer
+- Better task adaptation capability
+- Suitable for both understanding and generation tasks
+
+#### BitFit
+
+**Principle**: fine-tunes only bias parameters
+
+- Extremely few parameters (less than 0.1%)
+- Suited for small-scale task fine-tuning
+- Extremely low compute cost
+
+### Method Comparison
+
+| Method        | Parameter Count | Use Case         | Advantages              | Disadvantages          |
+| ------------- | --------------- | ---------------- | ----------------------- | ---------------------- |
+| LoRA          | 0.1–1%          | General tasks    | Good results, easy impl | Need to choose rank    |
+| Prefix Tuning | 0.1–3%          | Generation tasks | Stable results          | Sequence length limits |
+| P-Tuning v2   | 0.1–5%          | Understanding    | Strong adaptability     | Slightly more params   |
+| BitFit        | < 0.1%          | Simple tasks     | Minimal parameters      | Limited expressiveness |
+
+## Fine-Tuning Frameworks and Tools
+
+### Recommended Frameworks
+
+#### LLaMA-Factory
+
+- **Highlights**: comprehensive fine-tuning toolkit
+- **Support**: multiple models and fine-tuning methods
+- **Ease of use**: web interface and configuration-driven
+- **Documentation**: detailed usage tutorials
+
+#### Hugging Face TRL
+
+- **Highlights**: officially recommended framework
+- **Support**: RL fine-tuning, SFT, DPO
+- **Ecosystem**: deeply integrated with transformers
+- **Updates**: continuously updated with latest techniques
+
+#### Swift Framework
+
+- **Source**: open-sourced by Alibaba
+- **Highlights**: Chinese-friendly, supports multimodal
+- **Performance**: optimized for domestic hardware
+- **Community**: active Chinese-language community
+
+#### X-Tuner Framework
+
+- **Source**: MMDetection team
+- **Highlights**: lightweight, easy to extend
+- **Performance**: excellent memory optimization
+- **Integration**: integrated with MMX toolset
+
+### Unsloth — Efficient Fine-Tuning Framework
+
+- **Project**: [GitHub link](https://github.com/unslothai/unsloth)
+- **Highlights**: significant speed improvements (2–5x)
+- **Optimization**: 80% reduction in memory usage
+- **Support**: mainstream models and methods
+- **Ease of use**: simple API interface
+
+## Fine-Tuning Practical Tips
+
+### Key Learning Points
+
+**Understand the underlying principles**:
+
+- Don't just run scripts — learn the underlying implementation
+- Understand the KV Cache mechanism and memory management
+- Master the role and implementation of Causal Mask
+- Understand gradient computation and backpropagation
+
+### Data Preparation
+
+**Data formats**:
+
+- Instruction-response pair format
+- Conversational data format
+- Task-specific formats
+- Multi-turn dialogue handling
+
+**Data quality**:
+
+- Data cleaning and deduplication
+- Quality assessment and filtering
+- Data balancing and augmentation
+- Domain data collection
+
+### Hyperparameter Tuning
+
+**Key parameters**:
+
+- Learning rate: typically smaller than in pre-training
+- LoRA rank (r): balance performance and efficiency
+- LoRA alpha: controls adaptation strength
+- Batch size: adjust based on hardware
+
+**Training strategies**:
+
+- Progressive learning rate scheduling
+- Early stopping to prevent overfitting
+- Gradient accumulation to simulate large batches
+- Periodic evaluation and checkpointing
+
+## Multi-Task Fine-Tuning
+
+### Task Routing
+
+**Methods**:
+
+- Task-specific LoRA modules
+- Mixture of Experts (MoE) architecture
+- Conditional generation control
+- Multi-head output design
+
+### Modular Design
+
+**LoRA combinations**:
+
+- Task-specific LoRA
+- Domain-general LoRA
+- Capability-enhancement LoRA
+- Dynamic combination strategies
+
+## Advanced Fine-Tuning Techniques
+
+### Instruction Tuning
+
+**Data construction**:
+
+- Diverse instruction templates
+- Task description variants
+- Few-shot examples
+- Negative sample construction
+
+**Training strategies**:
+
+- Multi-task mixed training
+- Curriculum learning
+- Contrastive learning enhancement
+- Meta-learning methods
+
+### Reinforcement Learning Fine-Tuning (RLHF)
+
+**Process**:
+
+1. Supervised Fine-Tuning (SFT)
+2. Reward model training
+3. Reinforcement learning optimization
+4. Iterative improvement
+
+**Key techniques**:
+
+- PPO algorithm optimization
+- Reward model design
+- Value function estimation
+- Policy gradient computation
+
+### Alignment Fine-Tuning
+
+**Methods**:
+
+- Constitutional AI
+- DPO (Direct Preference Optimization)
+- Learning from human feedback
+- Value alignment
+
+## Evaluation and Analysis
+
+### Evaluation Metrics
+
+**Task performance**:
+
+- Accuracy, F1 score
+- BLEU, ROUGE scores
+- Human evaluation quality
+- Task-specific metrics
+
+**Model capabilities**:
+
+- Preservation of original capabilities
+- Adaptation to new tasks
+- Generalization performance testing
+- Robustness analysis
+
+### Analysis Tools
+
+**Visualization**:
+
+- Loss curve analysis
+- Attention weight visualization
+- Parameter change tracking
+- Performance comparison charts
+
+**Diagnostics**:
+
+- Overfitting detection
+- Catastrophic forgetting analysis
+- Parameter importance analysis
+- Activation pattern analysis
+
+## Deployment and Inference
+
+### Model Merging
+
+**LoRA merging**:
+
+```python
+# Merge LoRA weights back into the base model
+merged_model = base_model + lora_model.merge()
+```
+
+**Multi-LoRA switching**:
+
+- Dynamic loading of different LoRAs
+- Task-specific routing
+- Memory-efficient switching
+- Batch processing optimization
+
+### Inference Optimization
+
+**Memory optimization**:
+
+- Quantization techniques
+- Gradient checkpointing
+- Dynamic batching
+- KV Cache optimization
+
+**Speed optimization**:
+
+- Model parallel inference
+- Batch processing optimization
+- Hardware acceleration
+- Compilation optimization
+
+## Best Practices
+
+### Experiment Design
+
+1. **Establish baselines**: start with simple methods
+2. **Ablation studies**: validate the contribution of each component
+3. **Hyperparameter search**: systematic tuning
+4. **Multiple runs**: ensure reproducibility
+5. **Detailed logging**: record all experimental details
+
+### Engineering Tips
+
+1. **Progressive training**: from small data to large data
+2. **Checkpoint management**: save and restore regularly
+3. **Monitoring mechanisms**: real-time training state monitoring
+4. **Error handling**: gracefully handle training exceptions
+5. **Resource management**: allocate compute resources appropriately
+
+## Future Trends
+
+1. **Automated fine-tuning**: automatic selection of fine-tuning strategies and hyperparameters
+2. **Multimodal fine-tuning**: unified fine-tuning for cross-modal tasks
+3. **Personalized fine-tuning**: model adaptation to individual users
+4. **Federated fine-tuning**: privacy-preserving distributed fine-tuning
+5. **Continual learning**: continual adaptation without forgetting
+
+## Study Recommendations
+
+1. **Theory foundation**: deeply understand the mathematical principles of fine-tuning
+2. **Hands-on practice**: start with simple tasks
+3. **Code reading**: read the source code of excellent frameworks
+4. **Experimental comparison**: compare the effectiveness of different methods
+5. **Community participation**: be active in open-source communities and forums
diff --git a/app/docs/ai/foundation-models/foundation-models-lifecycle.en.mdx b/app/docs/ai/foundation-models/foundation-models-lifecycle.en.mdx
new file mode 100644
index 00000000..6cffba98
--- /dev/null
+++ b/app/docs/ai/foundation-models/foundation-models-lifecycle.en.mdx
@@ -0,0 +1,119 @@
+---
+title: Foundation Models
+description: "The full lifecycle of foundation models: dataset construction, training, fine-tuning, deployment, and evaluation"
+date: "2025-01-27"
+tags:
+  - foundation-models
+  - llm-lifecycle
+  - model-development
+docId: i88bna4sg5pr4ekhg32drv2i
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Foundation models are the core of modern AI systems. This section covers the complete technology stack and lifecycle management from dataset construction to deployment and evaluation.
+
+## Core Components
+
+### Dataset Construction
+
+- See: [Dataset Construction](./datasets/)
+- Data sourcing and acquisition strategies
+- Data cleaning and quality control
+- Privacy protection and compliance
+- Multimodal data processing techniques
+
+### Model Training
+
+- See: [Model Training](./training/)
+- Distributed training techniques
+- MoE (Mixture of Experts) models
+- Model weight merging strategies
+- Training optimization and stability
+
+### Model Fine-Tuning
+
+- See: [Model Fine-Tuning](./finetune/)
+- LoRA (Low-Rank Adaptation)
+- PEFT (Parameter-Efficient Fine-Tuning)
+- Instruction tuning and alignment
+- Fine-tuning frameworks and tools
+
+### Deployment and Inference
+
+- See: [Deployment and Inference](./deploy-infer/)
+- KV Cache optimization
+- Flash Attention acceleration
+- Quantization and parallel inference
+- Inference framework comparison
+
+### Model Evaluation
+
+- See: [Model Evaluation](./evaluation/)
+- Benchmark evaluation systems
+- Chinese and English evaluation benchmarks
+- Evaluation methods and metrics
+- Result analysis and application
+
+### Classic QKV Interview Questions
+
+- See: [QKV Interview Questions](./qkv-interview/)
+- KV Cache working principles
+- Attention mechanism details
+- Classic interview question breakdowns
+- In-depth technical analysis
+
+## Learning Paths
+
+### Beginner Track
+
+1. Theory foundations: Transformer architecture and attention mechanism
+2. Data processing: understanding the dataset construction pipeline
+3. Fine-tuning practice: mastering LoRA and other parameter-efficient fine-tuning methods
+4. Evaluation understanding: familiarity with mainstream benchmarks and metrics
+
+### Advanced Development
+
+1. Training optimization: distributed training and MoE
+2. Inference acceleration: KV Cache, Flash Attention, etc.
+3. Deployment engineering: vLLM, TensorRT, and other inference frameworks
+4. Performance tuning: system-level performance analysis and optimization
+
+### Architecture Design
+
+1. Architecture trade-offs: pros and cons of different architectures and their scenarios
+2. System integration: end-to-end application system design
+3. Cost optimization: balancing performance, cost, and resources
+4. Technology selection: scenario-driven technical solutions
+
+## Key Concepts
+
+### Decoder-only Architecture Advantages
+
+- Attention fit: causal attention naturally suits generation tasks
+- Generation adaptation: natively suited for autoregressive language modeling
+- Unified framework: multiple tasks unified under text generation
+
+### KV Cache Core Principles
+
+- Reuse: reusing historical KV pairs reduces computation
+- Complexity reduction: O(n²) → O(n)
+- Memory trade-off: trading space for time
+
+## Technology Trends
+
+1. Model efficiency: parameter-efficient training and inference optimization
+2. Multimodal fusion: unified text/image/audio
+3. Long-context handling: support for longer contexts
+4. Edge deployment: compression for edge devices
+5. Green AI: compute techniques that reduce energy consumption
+
+## References
+
+- _Hands-on Large Models_ (Zhihu column)
+- _Attention is All You Need_
+- _Language Models are Few-Shot Learners_
+
+> Learning tip: The stack is broad and fast-moving — choose your path based on your role and goals; balance theory with practice, and keep up with the frontier.
diff --git a/app/docs/ai/foundation-models/qkv-interview/index.en.mdx b/app/docs/ai/foundation-models/qkv-interview/index.en.mdx
new file mode 100644
index 00000000..8f249ffc
--- /dev/null
+++ b/app/docs/ai/foundation-models/qkv-interview/index.en.mdx
@@ -0,0 +1,110 @@
+---
+title: Classic QKV Interview Questions
+description: In-depth analysis of classic interview questions on the QKV mechanism and KV Cache in Transformers
+date: "2025-01-27"
+tags:
+  - interview-questions
+  - qkv
+  - kv-cache
+  - attention-mechanism
+  - transformer
+docId: h7s6nm7h5oqnhhdq9m1mgwwo
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Classic QKV Interview Questions
+
+The QKV mechanism in Transformers is a hot topic in large model interviews. This section provides an in-depth breakdown of the classic interview questions.
+
+## Core Interview Questions
+
+### 1. Why Can KV Be Cached During LLM Inference?
+
+**Core reason**: the autoregressive generation property means KV pairs can be reused.
+
+**Detailed breakdown**:
+
+1. **Eliminate redundant computation**: the Keys and Values of historical sequences would need to be recomputed on every generation step — caching avoids this
+2. **Speed up inference**: when generating a new token, only the current token's Query needs to be computed, then attended to the cached KV pairs
+3. **Reduce computational complexity**: from O(n²·d) down to O(n·d), where n is sequence length and d is vector dimension
+4. **Cross-request reuse**: multiple requests sharing the same prefix can share KV Cache, improving overall system throughput
+
+### 2. Why Can't Q Be Cached?
+
+**Key insight**: Q doesn't need to be cached — it's not that it can't be.
+
+**Reasoning**:
+
+1. **Dependency difference**: the output for each newly generated token only depends on that token's Q, and that Q is never needed again in subsequent inference steps
+2. **No efficiency gain**: caching Q brings no efficiency improvement; each Q is generated based on the preceding sequence and has temporal dependencies
+3. **Autoregressive property**: each token generation depends only on all previous tokens — computing Q is itself inherently based on the historical sequence
+
+### 3. Why Are Three Different Matrices WQ, WK, WV Needed?
+
+**Function separation**: decompose the attention mechanism into three distinct roles:
+
+- **Query generation (WQ)**: generates "what I'm looking for"
+- **Key generation (WK)**: generates "what I am"
+- **Value generation (WV)**: generates "what information I contain"
+
+**Mathematical principle**: different linear transformations learn different representation spaces, increasing the model's expressive power and flexibility.
+
+### 4. What Is the Purpose of Multi-Head Attention?
+
+**Core idea**: parallel specialization — different heads learn different types of attention patterns.
+
+**Specific roles**:
+
+1. **Information subspaces**: each head attends to different feature subspaces
+2. **Attention diversity**: simultaneously captures multiple types of attention patterns
+3. **Positional information**: different heads may focus on different positional relationships
+4. **Semantic levels**: different heads attend to different levels of semantic information
+
+### 5. How Is KV Cache Memory Usage Calculated?
+
+**Formula**:
+
+```
+KV Cache memory = 2 × sequence length × num layers × hidden dim × num heads × bytes per element
+```
+
+**Optimization strategies**:
+
+- Quantization: use INT8 or INT4 quantization for KV Cache
+- Paging: PagedAttention's paged storage
+- Compression: dynamically compress inactive cache entries
+- Sharing: KV Cache sharing across multiple requests
+
+## Advanced Technical Questions
+
+### Flash Attention Optimization Principle
+
+- **Memory access optimization**: tiled attention computation reduces data transfer between HBM and SRAM
+- **Algorithm improvement**: IO complexity reduced from O(N²) to O(N²d²/M), enabling support for longer sequences
+
+### Impact of Different Precisions on KV Cache
+
+| Precision | Memory Usage | Compute Speed | Precision Loss |
+| --------- | ------------ | ------------- | -------------- |
+| FP16      | 50%          | 1.5–2x        | Minimal        |
+| INT8      | 25%          | 2–3x          | Small          |
+| INT4      | 12.5%        | 3–4x          | Moderate       |
+
+## Interview Preparation Tips
+
+### Technical Depth
+
+1. **Understand the principles**: deeply understand the mathematical foundations of the attention mechanism
+2. **Implementation details**: understand the concrete implementation of KV Cache
+3. **Optimization techniques**: master related optimization techniques
+4. **Performance analysis**: be able to analyze memory and compute overhead
+
+### Communication Skills
+
+1. **Structured answers**: follow the order of principle → implementation → optimization
+2. **Use examples**: explain abstract concepts with concrete examples
+3. **Back with data**: support optimization claims with specific numbers
+4. **Comparative analysis**: compare the pros and cons of different approaches
diff --git a/app/docs/ai/foundation-models/training/index.en.mdx b/app/docs/ai/foundation-models/training/index.en.mdx
new file mode 100644
index 00000000..82b82087
--- /dev/null
+++ b/app/docs/ai/foundation-models/training/index.en.mdx
@@ -0,0 +1,293 @@
+---
+title: Model Training
+description: "Large model training techniques: MoE, distributed training, model weight merging, and more"
+date: "2025-01-27"
+tags:
+  - model-training
+  - distributed-training
+  - moe
+  - model-merging
+  - training-optimization
+docId: jgz0nl0cbd4frj2dg98mdv0x
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Large model training is a complex engineering problem involving distributed computing, memory optimization, training stability, and more.
+
+## Training Fundamentals
+
+### Training Pipeline
+
+1. **Data preparation**: tokenization, batching, data loading
+2. **Model initialization**: weight initialization, architecture configuration
+3. **Forward pass**: compute the loss function
+4. **Backward pass**: gradient computation and updates
+5. **Model saving**: checkpoint saving and recovery
+
+### Key Techniques
+
+- **Gradient accumulation**: simulating large-batch training
+- **Mixed precision**: FP16/BF16 training acceleration
+- **Gradient clipping**: preventing gradient explosion
+- **Learning rate scheduling**: optimizing the convergence process
+
+## Distributed Training
+
+### Data Parallelism
+
+- **Principle**: different GPUs process different data batches
+- **Implementation**: PyTorch DDP, DeepSpeed
+- **Suited for**: scenarios with small models but large data volumes
+
+### Model Parallelism
+
+- **Tensor parallelism**: split model layers across different GPUs
+- **Pipeline parallelism**: assign model layers sequentially across GPUs
+- **Expert parallelism**: expert assignment for MoE models
+
+### Hybrid Parallelism
+
+- **3D parallelism**: data + tensor + pipeline parallelism
+- **Zero Redundancy Optimizer**: optimizer state sharding
+- **Activation recomputation**: trade compute for memory
+
+## MoE (Mixture of Experts)
+
+### Core Concepts
+
+**Sparse activation**: only a subset of expert networks is activated each time, achieving a balance between computational efficiency and model capacity.
+
+**Routing mechanism**: intelligently routes inputs to appropriate experts
+
+- Top-k routing: selects the k most relevant experts
+- Load balancing: ensures balanced expert utilization
+- Noise injection: improves routing robustness
+
+**Architecture design**:
+
+- Expert network structure
+- Gating network design
+- Residual connection strategies
+
+### Technical Challenges
+
+1. **Load balancing**: preventing uneven expert utilization
+2. **Communication overhead**: cross-device expert calls
+3. **Training stability**: routing learning convergence
+4. **Inference optimization**: sparse model inference acceleration
+
+## Model Weight Merging
+
+**Weight merging techniques**: see the full comparison table for weight merging methods
+
+### Merging Strategies
+
+**Linear interpolation merging**:
+
+```python
+merged_weight = alpha * weight_1 + (1 - alpha) * weight_2
+```
+
+**SLERP (Spherical Linear Interpolation)**:
+
+- Suited for normalized weights
+- Preserves the angular relationship of weight vectors
+- Particularly effective for embedding layers
+
+**Task Arithmetic**:
+
+- Merging based on task vectors
+- Supports combining capabilities from multiple tasks
+- Controllable capability transfer
+
+### Use Cases
+
+- **Multi-task model fusion**: combining capabilities from different tasks
+- **Different training stage integration**: merging weights from different training stages
+- **Capability combination optimization**: balancing different capability dimensions
+
+## Training Optimization Techniques
+
+### Memory Optimization
+
+**Gradient checkpointing**:
+
+- Recompute activation values
+- Reduce memory footprint
+- Trade-off with computation time
+
+**Zero Redundancy Optimizer**:
+
+- ZeRO-1: optimizer state sharding
+- ZeRO-2: add gradient sharding
+- ZeRO-3: parameter sharding
+
+**CPU offloading**:
+
+- Store parameters on CPU
+- Dynamically load to GPU
+- Extended memory capacity
+
+### Compute Optimization
+
+**Operator fusion**:
+
+- LayerNorm fusion
+- Attention operator optimization
+- Custom CUDA kernels
+
+**Compilation optimization**:
+
+- TorchScript compilation
+- TensorRT optimization
+- ONNX conversion
+
+## Training Stability
+
+### Numerical Stability
+
+**Loss scaling**:
+
+- Automatic mixed precision
+- Dynamic loss scaling
+- Gradient overflow detection
+
+**Weight initialization**:
+
+- Xavier/Kaiming initialization
+- Hierarchical initialization
+- Pre-trained weight loading
+
+### Training Monitoring
+
+**Metric monitoring**:
+
+- Loss curve tracking
+- Gradient norm monitoring
+- Learning rate changes
+- Memory usage
+
+**Anomaly detection**:
+
+- NaN/Inf detection
+- Gradient explosion monitoring
+- Model divergence warnings
+
+## Large-Scale Training Engineering
+
+### Hardware Configuration
+
+**Compute resources**:
+
+- GPU cluster configuration
+- Memory and bandwidth requirements
+- Storage system design
+- Network topology optimization
+
+**Environment management**:
+
+- Docker containerization
+- Environment consistency guarantees
+- Dependency management
+- Version control
+
+### Experiment Management
+
+**Hyperparameter search**:
+
+- Grid search
+- Bayesian optimization
+- Early stopping
+- Resource budget management
+
+**Experiment tracking**:
+
+- MLflow experiment logging
+- Weights & Biases monitoring
+- Experiment result comparison
+- Reproducibility guarantees
+
+## Fault Handling
+
+### Common Issues
+
+1. **Out-of-memory**: batch size adjustment, gradient accumulation
+2. **Convergence issues**: learning rate adjustment, architecture optimization
+3. **Communication failures**: network configuration, node recovery
+4. **Data issues**: data validation, anomaly handling
+
+### Recovery Strategies
+
+**Checkpoint mechanism**:
+
+- Periodically save model state
+- Save optimizer state
+- Record random seed
+- Resume training progress
+
+**Fault-tolerant design**:
+
+- Node failure detection
+- Automatic task restart
+- Elastic training architecture
+- Data integrity checks
+
+## Performance Optimization
+
+### System Level
+
+**I/O optimization**:
+
+- Data prefetching
+- Multi-process data loading
+- Memory-mapped files
+- SSD storage optimization
+
+**Communication optimization**:
+
+- AllReduce algorithm optimization
+- Communication topology design
+- Bandwidth utilization improvement
+- Latency reduction techniques
+
+### Algorithm Level
+
+**Training strategies**:
+
+- Progressive training
+- Curriculum learning
+- Adversarial training
+- Multi-stage training
+
+**Regularization techniques**:
+
+- Dropout variants
+- Weight decay
+- Label smoothing
+- Data augmentation
+
+## Best Practices
+
+1. **Thorough experiment design**: control variables, ensure reproducibility
+2. **Progressive scaling**: validate from small models to large models step by step
+3. **Monitoring-driven**: real-time monitoring of training state and resource usage
+4. **Documentation**: detailed recording of experiment configurations and results
+5. **Team collaboration**: establish good experiment sharing mechanisms
+
+## Future Directions
+
+1. **Automated training**: automatic hyperparameter tuning and architecture search
+2. **Efficient architectures**: more efficient model architecture design
+3. **Hardware co-design**: software-hardware co-optimization
+4. **Green AI**: reducing training energy consumption and carbon emissions
+5. **Federated learning**: distributed collaborative training paradigm
+
+## From UNSW IT-AI Involution Hell Documentation
+
+- SwanLab - AI model training tracking and visualization tool
+- Documentation: https://docs.swanlab.cn/guide_cloud/general/what-is-swanlab.html
+- Official site: https://swanlab.cn
+- GitHub: https://github.com/swanhubx/swanlab
diff --git a/app/docs/ai/generative-todo/generative-models-plan.en.mdx b/app/docs/ai/generative-todo/generative-models-plan.en.mdx
new file mode 100644
index 00000000..d8cf5697
--- /dev/null
+++ b/app/docs/ai/generative-todo/generative-models-plan.en.mdx
@@ -0,0 +1,12 @@
+---
+title: Generative Models
+description: "Generative models: Diffusion and other generative techniques"
+status: todo
+docId: nor5ktairygnt4dorqbddo9n
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+This section aggregates generative model resources (Diffusion, etc.) — content to be added later.

From 07bd538f3d420deeb4dcbc42ff5d2fae6c7ce692 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:45:42 +0000
Subject: [PATCH 11/19] =?UTF-8?q?feat(docs):=20i18n=20computer-science=20+?=
 =?UTF-8?q?=20jobs=20+=20all-projects=20=E7=BF=BB=E8=AF=91=E5=AE=8C?=
 =?UTF-8?q?=E6=88=90=20(23=20=E7=AF=87)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

translator-cs-jobs 产出：
- computer-science: 双向翻译
  - data-structures (en→zh, 5 篇): index / array / linked-list
  - frontend (zh→en, 2 篇)
  - cpp_backend (zh→en, 8 篇): mempool / threadpool / 编译系列
  - index.mdx (zh→en)
- jobs (zh→en, 5 篇): event-keynote 2 / interview-prep 3
- all-projects (zh→en, 2 篇): ai-town / multimodal-rl

代码块内变量名和 API 原样保留，仅翻译注释。
frontmatter 继承原文 docId，带 translatedFrom 标记。
---
 app/docs/all-projects/ai-town.en.mdx          |  74 +++
 app/docs/all-projects/multimodal-rl.en.mdx    |  94 ++++
 .../1_Handwritten_threadpool.en.md            | 423 ++++++++++++++++++
 .../2_Handwritten_mempool1.en.md              | 136 ++++++
 .../cpp_backend/easy_compile/1_cpp_libs.en.md | 186 ++++++++
 .../cpp_backend/easy_compile/2_base_gcc.en.md | 154 +++++++
 .../cpp_backend/easy_compile/3_Make.en.md     |  69 +++
 .../cpp_backend/easy_compile/4_CMake.en.md    | 187 ++++++++
 .../cpp_backend/easy_compile/5_vcpkg.en.md    | 150 +++++++
 .../cpp_backend/mempool_simple.en.mdx         | 120 +++++
 .../array/01-static-array.zh.mdx              | 173 +++++++
 .../array/02-dynamic-array.zh.mdx             | 327 ++++++++++++++
 .../data-structures/array/index.zh.mdx        |  84 ++++
 .../data-structures/index.zh.mdx              |  49 ++
 .../data-structures/linked-list/index.zh.mdx  | 258 +++++++++++
 .../frontend/frontend-learning/index.en.mdx   |  93 ++++
 .../computer-science/frontend/index.en.mdx    |  30 ++
 app/docs/computer-science/index.en.mdx        |  75 ++++
 app/docs/jobs/event-keynote/coffee-chat.en.md | 137 ++++++
 .../jobs/event-keynote/event-takeway.en.md    |  30 ++
 .../jobs/interview-prep/interview-tips.en.mdx |  94 ++++
 .../jobs/interview-prep/pre-interview.en.md   |  51 +++
 ...ations-to-get-an-offer-as-a-student.en.mdx | 137 ++++++
 23 files changed, 3131 insertions(+)
 create mode 100644 app/docs/all-projects/ai-town.en.mdx
 create mode 100644 app/docs/all-projects/multimodal-rl.en.mdx
 create mode 100644 app/docs/computer-science/cpp_backend/Handwritten_pool_components/1_Handwritten_threadpool.en.md
 create mode 100644 app/docs/computer-science/cpp_backend/Handwritten_pool_components/2_Handwritten_mempool1.en.md
 create mode 100644 app/docs/computer-science/cpp_backend/easy_compile/1_cpp_libs.en.md
 create mode 100644 app/docs/computer-science/cpp_backend/easy_compile/2_base_gcc.en.md
 create mode 100644 app/docs/computer-science/cpp_backend/easy_compile/3_Make.en.md
 create mode 100644 app/docs/computer-science/cpp_backend/easy_compile/4_CMake.en.md
 create mode 100644 app/docs/computer-science/cpp_backend/easy_compile/5_vcpkg.en.md
 create mode 100644 app/docs/computer-science/cpp_backend/mempool_simple.en.mdx
 create mode 100644 app/docs/computer-science/data-structures/array/01-static-array.zh.mdx
 create mode 100644 app/docs/computer-science/data-structures/array/02-dynamic-array.zh.mdx
 create mode 100644 app/docs/computer-science/data-structures/array/index.zh.mdx
 create mode 100644 app/docs/computer-science/data-structures/index.zh.mdx
 create mode 100644 app/docs/computer-science/data-structures/linked-list/index.zh.mdx
 create mode 100644 app/docs/computer-science/frontend/frontend-learning/index.en.mdx
 create mode 100644 app/docs/computer-science/frontend/index.en.mdx
 create mode 100644 app/docs/computer-science/index.en.mdx
 create mode 100644 app/docs/jobs/event-keynote/coffee-chat.en.md
 create mode 100644 app/docs/jobs/event-keynote/event-takeway.en.md
 create mode 100644 app/docs/jobs/interview-prep/interview-tips.en.mdx
 create mode 100644 app/docs/jobs/interview-prep/pre-interview.en.md
 create mode 100644 app/docs/jobs/interview-prep/preparations-to-get-an-offer-as-a-student.en.mdx

diff --git a/app/docs/all-projects/ai-town.en.mdx b/app/docs/all-projects/ai-town.en.mdx
new file mode 100644
index 00000000..eda40423
--- /dev/null
+++ b/app/docs/all-projects/ai-town.en.mdx
@@ -0,0 +1,74 @@
+---
+title: AI Town Design Document
+description: ""
+date: "2025-10-18"
+tags:
+  - ai-project
+docId: bkxwg1m9p9rnm8062wsm020w
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# AI Town Design Document
+
+## 1. Project Overview
+
+- **Type**: A lightweight simulation + social + quest-based mini-game driven by multi-agent NPCs
+- **Core selling points**: NPCs "remember you" and collaborate with each other through dialogue; players can use **community contribution points** (earned by posting, submitting PRs, etc.) to obtain in-game currency and abilities, driving town events
+- **Technology foundation**: Godot 4 (Microverse-style) + multi-agent framework (O-R-P-A: Observe → Retrieve → Plan → Act) + local model first (with template fallback)
+
+## 2. Goals (MVP)
+
+1. Single map + 3 NPCs (merchant / messenger / editor) + quest board (fetch / relay / check-in)
+2. Dialogue with **short-term memory + end-of-day summary**
+3. **Minimal community integration**: support entering a "redemption code" to receive coins / action points (future: automatic issuance via webhook)
+4. Use points (or redeemed coins) to trigger 2–3 **visible world changes** (discount day / extra quests / expanded dialogue budget)
+
+## 3. Core Gameplay (Version 1)
+
+- **Loop**: Accept quest → Dialogue / collaborate with NPCs → Complete to earn coins / AP → Nightly summary generated → Events refresh next day
+- **Uses of points / coins** (pick 2–3 to implement first)
+  - Unlock a **discount day** at the shop (all prices -10%)
+  - Purchase **action points** (one extra quest per day)
+  - Purchase **dialogue budget** (3 additional conversation turns with an NPC that day)
+  - Trigger a **theme-day announcement** (published by the editor NPC; NPC dialogue becomes more active)
+
+## 4. Open-Source Community Integration (Two Phases)
+
+### Phase A (MVP) — Redemption Code Verification
+
+- The community backend issues one-time **redemption codes** (containing point value and expiry); player enters code in-game → server verifies and voids → returns coins / AP
+- **Advantage**: No login or account binding required; maximally stable and ready to ship
+
+### Phase B (Mid-term) — Automatic Issuance via Webhook
+
+- Posts, PR merges on GitHub / the site trigger a Webhook → write to `pending_rewards`
+- Game launch or clicking "Sync" → fetch pending rewards → automatically credited
+- Optional: bind Steam / GitHub account for stronger identity verification
+
+## 5. System Architecture (Minimal Modules)
+
+- **Client (Godot)**
+  - `Wallet` (authoritative entry point for coins / AP)
+  - `TaskManager`, `DialogManager`, `MemoryManager`, `CharacterManager`
+  - `TownEventBus` (broadcasts shop open / midday break / close / theme day)
+  - `RedeemPanel` (redemption code UI)
+
+- **Services (can be merged into community backend)**
+  - `/api/v1/redeem` (one-time verification and voiding)
+  - (Reserved) `/api/v1/rewards/pending`, `/webhooks/github`
+
+**Data Flow (MVP)**  
+Community issues code → Player enters it in-game → `redeem` verifies → Returns coins / AP → `Wallet` credits → `TownEventBus` triggers discount / quest refresh
+
+## 6. Scoring and Spending (Initial Draft)
+
+| Action           | Community Points Earned | In-game Conversion (Example) |
+| ---------------- | ----------------------: | ---------------------------- |
+| Post approved    |                     +80 | 80 pts = 400 coins           |
+| PR merged        |                     +80 | 80 pts = 400 coins           |
+| Article featured |                     +50 | 50 pts = 1 "theme day" item  |
+
+> Conversion rates are stored in a config file; events can apply temporary bonuses (e.g., 1.2× on weekends)
diff --git a/app/docs/all-projects/multimodal-rl.en.mdx b/app/docs/all-projects/multimodal-rl.en.mdx
new file mode 100644
index 00000000..cf0b5b2f
--- /dev/null
+++ b/app/docs/all-projects/multimodal-rl.en.mdx
@@ -0,0 +1,94 @@
+---
+title: Multimodal Reinforcement Learning Project (MVP Goals)
+description: Build a lightweight multimodal understanding and generation system that closes the loop from visual perception to language expression, incorporating reinforcement learning and answer-to-image generation.
+date: "2025-10-17"
+tags:
+  - projects
+  - multimodal
+  - reinforcement-learning
+  - RLHF
+docId: ifwz8sqxqsgjrafa79pycrcm
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Multimodal Group – MVP Specification
+
+**Project version:** v0.1  
+**Repository:** [involutionhell](https://github.com/InvolutionHell/involutionhell)
+
+---
+
+<a id="vision"></a>
+## 1. Vision
+
+Build a lightweight multimodal understanding and generation system that enables the model to interpret images, retrieve relevant information, and produce logically coherent text output.  
+The goal is to close the full loop from visual perception to language expression, and further develop the ability to explain answers through generated images.
+
+<a id="mvp-goals"></a>
+## 2. MVP Phase Goals
+
+<a id="phase-1"></a>
+### Phase 1: Basic Multimodal Pipeline
+
+- Image content recognition (objects, scenes, semantic labels).
+- Semantic retrieval (image → text / text → image).
+- Generative understanding and text output.
+- Model references: CLIP / SigLIP / BLIP-2 / LLaVA / Qwen-VL.
+
+<a id="phase-2"></a>
+### Phase 2: Multimodal Reinforcement Learning
+
+- Incorporate user feedback and reward signals to optimise model generation and retrieval performance.
+- Main directions:
+  1. RLHF / DPO fine-tuning to learn user preferences.
+  2. Retrieval strategy optimisation based on behavioural data.
+  3. Generation quality control and consistency improvement.
+
+- Goal: give the system the ability to self-improve and adapt to user preferences.
+
+<a id="phase-2-5"></a>
+### Phase 2.5: Answer-to-Image Generation
+
+- Automatically generate illustrative images from the model's text answers to aid comprehension.
+- Implementation: use Stable Diffusion / SDXL to convert answer text into image prompts.
+- Application examples:
+  - Answer "the process of black hole formation" → generate a structural diagram.
+  - Explain a scene from a novel → generate a conceptual illustration.
+
+- Goal: enable the system not only to understand images and answer questions, but also to explain answers through generated images.
+
+<a id="architecture"></a>
+## 3. System Architecture
+
+```
+[Frontend] → Upload image / Display results
+      ↓
+[Backend API] → FastAPI + LangChain + Vector Search
+      ↓
+[Multimodal Models] → CLIP / BLIP / LLaVA / Qwen-VL
+      ↓
+[RL Module + Answer-to-Image] (Phase 2 and 2.5)
+```
+
+<a id="milestones"></a>
+## 4. Milestones
+
+| Phase     | Goal                                  | Deliverables                                  |
+| --------- | ------------------------------------- | --------------------------------------------- |
+| Phase 1   | Multimodal recognition and generation | Image recognition, retrieval, text generation |
+| Phase 2   | Reinforcement learning optimisation   | RLHF / DPO, retrieval strategy optimisation   |
+| Phase 2.5 | Answer-to-image generation            | Automatic illustration generation             |
+| Phase 3   | Scaling and deployment                | Web demo and API interface                    |
+
+<a id="team"></a>
+## 5. Team Responsibilities
+
+| Module                                          | Owner    |
+| ----------------------------------------------- | -------- |
+| Image recognition and encoding                  | Member A |
+| Semantic retrieval and data processing          | Member B |
+| Generation module and model integration         | Member C |
+| Reinforcement learning and visualisation output | Member D |
diff --git a/app/docs/computer-science/cpp_backend/Handwritten_pool_components/1_Handwritten_threadpool.en.md b/app/docs/computer-science/cpp_backend/Handwritten_pool_components/1_Handwritten_threadpool.en.md
new file mode 100644
index 00000000..79f12d08
--- /dev/null
+++ b/app/docs/computer-science/cpp_backend/Handwritten_pool_components/1_Handwritten_threadpool.en.md
@@ -0,0 +1,423 @@
+---
+title: Handwritten Thread Pool
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: mnjkrtrs7xk3fq538eqreuge
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Thread Pool
+
+> The code below uses the single-queue `BlockingQueue`. When using the double-queue version, replace all `BlockingQueue` with `BlockingQueuePro`.
+
+## Interface
+
+**Constructor**: Initializes the blocking queue with a `unique_ptr`, creates `threads_num` threads, and binds each to the `Worker` function. (All Workers block inside `Pop()` at this point.)
+
+**Post**: Submits a task to the thread pool.
+
+**Destructor**: Wakes up all consumer threads. Workers that find no tasks exit. This has nothing to do with producers.
+
+```cpp
+#pragma once
+
+#include <thread>
+#include <functional>
+#include <vector>
+
+template <typename T>
+class BlockingQueue;
+
+class ThreadPool {
+public:
+    // initialize the thread pool
+    explicit ThreadPool(int threads_num);
+
+    // stop the thread pool
+    ~ThreadPool();
+
+    // submit a task to the thread pool
+    void Post(std::function<void()> task);
+
+private:
+    // the loop function each thread runs
+    void Worker();
+    // task queue
+    std::unique_ptr<BlockingQueue<std::function<void()>>> task_queue_;
+    // holds all thread objects; each thread is bound to Worker()
+    std::vector<std::thread> workers_;
+};
+```
+
+```cpp
+#include "blockingqueue.h"
+#include <memory>
+#include "threadpool.h"
+
+// Creates a blocking queue and spawns threads_num threads, each bound to Worker().
+ThreadPool::ThreadPool(int threads_num) {
+    task_queue_ = std::make_unique<BlockingQueue<std::function<void()>>>();
+    for (size_t i = 0; i < threads_num; ++i) {
+        workers_.emplace_back([this] {Worker();});
+// The lambda [this] { Worker(); } captures this, so each thread can call the current object's Worker.
+    }
+}
+
+// Stop the thread pool
+// Cancel the queue and wake up all blocked threads
+ThreadPool::~ThreadPool() {
+    task_queue_->Cancel();
+    for(auto &worker : workers_) {
+        if (worker.joinable())
+            worker.join();
+    }
+}
+
+// Submit a task
+void ThreadPool::Post(std::function<void()> task) {
+    task_queue_->Push(task);
+}
+
+// Fetch a task from task_queue_ and execute it
+void ThreadPool::Worker() {
+    while (true) {
+        std::function<void()> task;
+        // blocking is implemented inside Pop
+        if (!task_queue_->Pop(task)) {
+            break;
+        }
+        task();
+    }
+}
+```
+
+# Blocking Queue (Single-Queue Version)
+
+![Design](https://cdn.nlark.com/yuque/0/2025/jpeg/43055607/1758722093302-3845f815-ddbc-4bee-a789-de63daa92cd1.jpeg)
+
+**Single-queue maintenance:**
+
+1. `nonblock_` (bool): non-blocking flag. When `true`, the queue does not block. Default is blocking (false) at construction.
+2. `queue_` (`std::queue<T>`): the underlying storage container.
+3. `mutex_` (`std::mutex`): mutex for thread-safe access.
+4. `not_empty_` (`std::condition_variable`): used for producer-consumer synchronization.
+
+```cpp
+template <typename T>
+class BlockingQueue {
+public:
+    BlockingQueue(bool nonblock = false) : nonblock_(nonblock) { }
+    // enqueue
+    void Push(const T &value) {
+        // lock_guard automatically locks/unlocks (locks on construction, unlocks on destruction)
+        std::lock_guard<std::mutex> lock(mutex_);
+        // push element into queue
+        queue_.push(value);
+        // notify one waiting thread: the queue is not empty, a task is available
+        not_empty_.notify_one();
+    }
+    // normal pop: element is returned
+    // exceptional pop: no element returned
+    bool Pop(T &value) {
+        // lock — if already locked by another thread, this thread blocks here
+        // condition_variable::wait requires unique_lock
+        std::unique_lock<std::mutex> lock(mutex_);
+
+        // The main purpose of this line is to ensure safe dequeue.
+        // We only proceed when the queue is non-empty.
+        // But what if we only check queue emptiness and Cancel() is called?
+        // After Cancel(), we want consumers to exit, but they'd still block here
+        // because they don't know whether to stop.
+        // So we use nonblock_ to signal consumers to stop.
+        //
+        // Proceed when: queue is non-empty OR Cancel() has been called
+        //   -> predicate is true -> continue
+        // Block when: queue is empty AND Cancel() has NOT been called
+        //   -> predicate is false -> auto-unlock mutex, yield CPU, block here
+        not_empty_.wait(lock, [this]{ return !queue_.empty() || nonblock_; });
+        if (queue_.empty()) return false;  // consumer thread exits
+
+        value = queue_.front();
+        queue_.pop();
+        return true;
+    }
+
+    // unblock all threads waiting on this queue
+    void Cancel() {
+        // auto lock/unlock
+        std::lock_guard<std::mutex> lock(mutex_);
+        // tell consumers to stop
+        nonblock_ = true;
+        // wake up all threads blocked in wait (each woken thread re-evaluates the predicate)
+        not_empty_.notify_all();
+    }
+
+private:
+    bool nonblock_;
+    std::queue<T> queue_;
+    std::mutex mutex_;
+    std::condition_variable not_empty_;
+};
+```
+
+# Blocking Queue (Double-Queue Version)
+
+![Design](https://cdn.nlark.com/yuque/0/2025/jpeg/43055607/1759131100901-946e59ae-cd19-4546-aa9d-a9ee658f0b5a.jpeg)
+
+In the single-queue version, both producers and consumers compete for the same lock.
+
+Double-queue design:
+
+- `prod_queue_`: the queue producers write to (protected by `prod_mutex_`).
+- `cons_queue_`: the queue consumers read from (protected by `cons_mutex_`).
+- When the consumer queue is empty, `SwapQueue_()` **swaps** the two queues, enabling batch transfer.
+
+Benefits:
+
+- **Reduced lock contention**: producers and consumers mostly do not compete for the same lock.
+- **Higher throughput**: a single swap lets consumers retrieve data in bulk, reducing frequent locking.
+
+```cpp
+template <typename T>
+class BlockingQueuePro {
+public:
+    BlockingQueuePro(bool nonblock = false) : nonblock_(nonblock) {}
+
+    void Push(const T &value) {
+        std::lock_guard<std::mutex> lock(prod_mutex_);
+        prod_queue_.push(value);
+        not_empty_.notify_one();
+    }
+
+    bool Pop(T &value) {
+        std::unique_lock<std::mutex> lock(cons_mutex_);
+        // auto-trigger swap when consumer queue is empty; if still empty after swap, return false
+        // no need for .wait here
+        if (cons_queue_.empty() && SwapQueue_() == 0) {
+            return false;
+        }
+        value = cons_queue_.front();
+        cons_queue_.pop();
+        return true;
+    }
+
+    void Cancel() {
+        std::lock_guard<std::mutex> lock(prod_mutex_);
+        nonblock_ = true;
+        not_empty_.notify_all();
+    }
+
+private:
+    int SwapQueue_() {
+        std::unique_lock<std::mutex> lock(prod_mutex_);
+        // block when producer queue is empty and not cancelled
+        // proceed when cancelled regardless of queue state
+        not_empty_.wait(lock, [this] {return !prod_queue_.empty() || nonblock_; });
+        std::swap(prod_queue_, cons_queue_);
+        // return the number of items now in the consumer queue
+        // =0 only when producer queue was empty and cancellation was signalled
+        return cons_queue_.size();
+    }
+
+    bool nonblock_;
+    std::queue<T> prod_queue_;
+    std::queue<T> cons_queue_;
+    std::mutex prod_mutex_;
+    std::mutex cons_mutex_;
+    std::condition_variable not_empty_;
+};
+```
+
+# Benchmark
+
+The double-queue version is theoretically faster due to reduced lock contention. The experiment below confirms this.
+
+Setup: 4 producer threads, each submitting 25,000 tasks (each task runs a 1,000-iteration loop). The consumer thread count equals the optimal thread count for the current CPU (`std::thread::hardware_concurrency()`). In this WSL environment, that is 16.
+
+Both `single` (single-queue) and `double` (double-queue) versions are benchmarked, outputting total elapsed time and QPS.
+
+```cpp
+std::atomic<int> task_counter{0};
+int main() {
+    const int num_producers = 4;
+    const int num_tasks_per_producer = 25000; // 100,000 tasks total
+    const int num_threads_in_pool = std::thread::hardware_concurrency();
+
+    // For testing, the single and double versions are subclasses of ThreadPoolBase
+    // See appendix for implementation details
+    ThreadPoolSingle pool(num_threads_in_pool);
+
+    auto start = std::chrono::high_resolution_clock::now();
+
+    std::vector<std::thread> producers;
+    // 4 producers start working
+    for (int i = 0; i < num_producers; ++i) {
+        producers.emplace_back(Producer, std::ref(pool), i, num_tasks_per_producer);
+    }
+    for (auto& p : producers) {
+        p.join();
+    }
+
+    // main thread busy-waits; using std::condition_variable to wait for wakeup would be more efficient
+    while (task_counter < num_producers * num_tasks_per_producer) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+    }
+
+    auto end = std::chrono::high_resolution_clock::now();
+    double elapsed = std::chrono::duration<double>(end - start).count();
+
+    int total_tasks = num_producers * num_tasks_per_producer;
+    std::cout << "[Single Queue] Total: " << total_tasks
+              << " tasks. Time: " << elapsed
+              << " seconds. QPS = " << total_tasks / elapsed << std::endl;
+}
+```
+
+```cpp
+void Task(int id) {
+    volatile long sum = 0;
+    for (int i = 0; i < 1000; ++i) {
+        sum += i;
+    }
+    task_counter++;
+}
+
+void Producer(ThreadPoolSingle& pool, int producer_id, int num_tasks) {
+    for (int i = 0; i < num_tasks; ++i) {
+        int task_id = producer_id * 100000 + i;
+        pool.Post([task_id]() { Task(task_id); });
+    }
+}
+```
+
+Results:  
+![](https://cdn.nlark.com/yuque/0/2025/png/43055607/1758958199070-807c1517-f594-4617-bf88-3f8228a66594.png)
+
+After reducing task count and main-thread poll interval by 10x (tasks per producer: 2,500, poll interval: 5 ms) — the original completed too fast and the main thread wait time distorted results:
+
+![](https://cdn.nlark.com/yuque/0/2025/png/43055607/1758958396516-f9017fa6-3732-4c52-b585-ed44e7c8b3ef.png)
+
+Averaging across runs would be more rigorous, but the gap is large enough that measurement error does not affect the conclusion.
+
+# Appendix
+
+Modified single-queue and double-queue versions
+
+```cpp
+#pragma once
+
+#include <thread>
+#include <functional>
+#include <vector>
+#include "blockingqueue.h"
+#include "blockingqueuepro.h"
+
+// forward declarations
+// blockingqueue can only be used as pointer or reference
+
+// template <typename T>
+// class BlockingQueue;
+// template <typename T>
+// class BlockingQueuePro;
+
+// class ThreadPool {
+// public:
+//     // initialize the thread pool
+//     explicit ThreadPool(int threads_num);
+
+//     // stop the thread pool
+//     ~ThreadPool();
+
+//     // submit a task to the thread pool
+//     void Post(std::function<void()> task);
+
+// private:
+//     void Worker();
+//     std::unique_ptr<BlockingQueue<std::function<void()>>> task_queue_;
+//     std::vector<std::thread> workers_;
+// };
+
+
+class ThreadPoolBase {
+public:
+    explicit ThreadPoolBase(int threads_num) : threads_num_(threads_num) {}
+    virtual ~ThreadPoolBase() = default;
+
+    virtual void Post(std::function<void()> task) = 0;
+
+protected:
+    int threads_num_;
+    std::vector<std::thread> workers_;
+};
+
+class ThreadPoolSingle : public ThreadPoolBase {
+public:
+    explicit ThreadPoolSingle(int threads_num)
+        : ThreadPoolBase(threads_num),
+          task_queue_(std::make_unique<BlockingQueue<std::function<void()>>>()) {
+        for (int i = 0; i < threads_num_; ++i) {
+            workers_.emplace_back([this] { Worker(); });
+        }
+    }
+
+    ~ThreadPoolSingle() {
+        task_queue_->Cancel();
+        for (auto &w : workers_) {
+            if (w.joinable()) w.join();
+        }
+    }
+
+    void Post(std::function<void()> task) override {
+        task_queue_->Push(task);
+    }
+
+private:
+    void Worker() {
+        while (true) {
+            std::function<void()> task;
+            if (!task_queue_->Pop(task)) break;
+            task();
+        }
+    }
+
+    std::unique_ptr<BlockingQueue<std::function<void()>>> task_queue_;
+};
+
+class ThreadPoolDouble : public ThreadPoolBase {
+public:
+    explicit ThreadPoolDouble(int threads_num)
+        : ThreadPoolBase(threads_num),
+          task_queue_(std::make_unique<BlockingQueuePro<std::function<void()>>>()) {
+        for (int i = 0; i < threads_num_; ++i) {
+            workers_.emplace_back([this] { Worker(); });
+        }
+    }
+
+    ~ThreadPoolDouble() {
+        task_queue_->Cancel();
+        for (auto &w : workers_) {
+            if (w.joinable()) w.join();
+        }
+    }
+
+    void Post(std::function<void()> task) override {
+        task_queue_->Push(task);
+    }
+
+private:
+    void Worker() {
+        while (true) {
+            std::function<void()> task;
+            if (!task_queue_->Pop(task)) break;
+            task();
+        }
+    }
+
+    std::unique_ptr<BlockingQueuePro<std::function<void()>>> task_queue_;
+};
+```
diff --git a/app/docs/computer-science/cpp_backend/Handwritten_pool_components/2_Handwritten_mempool1.en.md b/app/docs/computer-science/cpp_backend/Handwritten_pool_components/2_Handwritten_mempool1.en.md
new file mode 100644
index 00000000..8e0db091
--- /dev/null
+++ b/app/docs/computer-science/cpp_backend/Handwritten_pool_components/2_Handwritten_mempool1.en.md
@@ -0,0 +1,136 @@
+---
+title: Handwritten Fixed-Size Memory Pool
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: xgxqqvglxyauoeh8eye7lzu6
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Handwritten Fixed-Size Memory Pool
+
+## Design Diagram
+
+![Design](https://cdn.nlark.com/yuque/0/2025/jpeg/43055607/1758718719250-e6f52459-0f73-493b-8294-7b8f931da054.jpeg)
+
+## Code
+
+### Struct Definitions
+
+#### `mempool_s` — struct that manages the memory pool
+
+```c
+typedef struct mempool_s {
+    int blocksize;  // size of each memory block
+    int freecount;   // number of remaining free blocks
+    char *free_ptr;   // pointer to the next free block
+    char *mem;   // head pointer of the entire memory pool
+} mempool_t;
+```
+
+### Public Interface
+
+#### `memp_create`: Create the memory pool
+
+```c
+int memp_create(mempool_t *m, int block_size) {
+
+    if (!m) return -1;
+
+    // 1. initialize these two simple ints
+    m->blocksize = block_size;
+    m->freecount = MEM_PAGE_SIZE / block_size;
+
+    // 2. allocate space for the entire pool and initialize m->mem
+    m->mem = (char *)malloc(MEM_PAGE_SIZE);
+    if (!m->mem) {  // allocation failed (not enough free memory)
+        return -2;
+    }
+    // zero-initialize the allocated space
+    memset(m->mem, 0, MEM_PAGE_SIZE);
+
+    // 3. initialize free_ptr
+    m->free_ptr = m->mem;
+
+    // initialize the "next pointer" inside each block
+    int i = 0;
+    char *ptr = m->mem;
+    for (i = 0;i < m->freecount;i ++) {
+
+        *(char **)ptr = ptr + block_size;
+        ptr = ptr + block_size;
+    }
+    // the last block's "next_ptr" points to NULL
+    *(char **)ptr = NULL;
+    return 0;
+}
+```
+
+#### `memp_alloc`: Allocate a block
+
+```c
+void *memp_alloc(mempool_t *m) {
+    // pool is full
+    if (!m || m->freecount == 0) return NULL;
+    // 1. get the next free block as the return value
+    void *ptr = m->free_ptr;
+    // 2. update free_ptr
+    m->free_ptr = *(char **)ptr;
+    // 3. update freecount
+    m->freecount --;
+
+    return ptr;
+}
+```
+
+#### `memp_free`: Free a specific block
+
+```c
+void memp_free(mempool_t *m, void *ptr) {
+    // equivalent to: ptr->next = m->free_ptr
+    // insert the freed block at the head of the free list (head insertion)
+    *(char **)ptr = m->free_ptr;
+    // update free_ptr (the head of the free block linked list)
+    m->free_ptr = (char *)ptr;
+    // update freecount
+    m->freecount ++;
+}
+```
+
+#### `memp_destory`: Destroy the entire memory pool
+
+```c
+void memp_destory(mempool_t *m) {
+    if (!m) return ;
+    // free the entire pool in one call — the pool was malloc'd as a whole, not block by block
+    free(m->mem);
+}
+```
+
+## Usage Example
+
+```c
+int main() {
+    mempool_t m;
+    memp_create(&m, 32);
+
+    void *p1 = memp_alloc(&m);
+    printf("memp_alloc : %p\n", p1);
+
+    void *p2 = memp_alloc(&m);
+    printf("memp_alloc : %p\n", p2);
+
+    void *p3 = memp_alloc(&m);
+    printf("memp_alloc : %p\n", p3);
+
+    memp_free(&m, p2);
+}
+```
+
+Output: each block is exactly 32 bytes apart, as expected.
+
+![](https://cdn.nlark.com/yuque/0/2025/png/43055607/1759069995143-4548da88-8c23-463e-b9e7-0f7d8978f03b.png)
diff --git a/app/docs/computer-science/cpp_backend/easy_compile/1_cpp_libs.en.md b/app/docs/computer-science/cpp_backend/easy_compile/1_cpp_libs.en.md
new file mode 100644
index 00000000..366ae5c5
--- /dev/null
+++ b/app/docs/computer-science/cpp_backend/easy_compile/1_cpp_libs.en.md
@@ -0,0 +1,186 @@
+---
+title: C++ Libraries on Linux/Windows
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: totx4pej5lhyt1nl4anwhakj
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# C++ Libraries on Linux/Windows
+
+## 1. What Does a Library Look Like?
+
+Every library has its own source format, but all libraries share:
+
+1. `.hpp`/`.h` (located under `include/` or the root directory): header files required for linking — these are mandatory.
+2. `.cpp`: implements the logic declared in the headers. Some libraries are called **header-only libraries** (logic implemented directly in headers — no `.cpp`, no need to link a static or shared library).
+
+- On Linux: `XXX.so` (shared object / dynamic library) or `XXX.a` (static library)
+- On Windows: `XXX.lib` (static library) or `XXX.dll` (dynamic library)
+
+## 2. How to Obtain a Library?
+
+> Unless otherwise noted, the environment is Linux (specifically Ubuntu).
+
+### Downloading a Third-Party Library
+
+#### Windows
+
+1. **Manually compile and install from source**: download or `git clone` a zip/7z archive, extract the source, and compile to produce the required library.
+2. **Use a language-specific package manager** (C++: vcpkg; Python: pip; Java: Maven). For example, vcpkg downloads packages to `vcpkg/installed/` by default.
+
+> A brief introduction to vcpkg:
+>
+> - **vcpkg** is Microsoft's open-source cross-platform C/C++ package manager.
+> - Supports **Windows / Linux / macOS**, though it was originally developed for the Windows ecosystem.
+> - Its main purpose is to simplify obtaining third-party C/C++ libraries and their complex build configurations.
+
+#### Linux
+
+1. **Compile and install from source**
+
+> Some libraries can only be installed from source, such as the latest versions of gRPC and Protobuf.
+>
+> The typical workflow is: clone → run `make` or a one-click script like `build.sh`.
+
+2. **Use the system package manager**
+
+> `apt`, `yum`, `dnf`, etc.
+
+3. **Use a language-specific package manager**
+
+> vcpkg, conan, etc.
+
+### Writing Your Own Library
+
+After writing your `.cpp` and `.h` files, you can compile them into a static or dynamic library.
+
+#### Package as a Static Library
+
+```bash
+g++ -c mylib.cpp -o mylib.o
+ar rcs my_static_lib.a mylib.o  // not recommended
+// In practice, prefix the output name with "lib".
+// This is more than a convention.
+// When using -lmylib, the linker automatically prepends "lib" when searching,
+// looking for libmylib.a or .so.
+// Without the "lib" prefix, you must specify the full path (the linker can't find it automatically).
+ar rcs libmy_static_lib.a mylib.o  // recommended
+```
+
+#### Package as a Dynamic Library
+
+```bash
+// Simple project: one-step compilation
+g++ -shared -o mylib.dll mylib.cpp
+// Complex project: first compile to .o, then convert to .dll
+g++ -c -fPIC mylib.cpp -o mylib.o
+g++ -shared -o my_dynamic_lib.dll mylib.o
+```
+
+`-fPIC` generates Position Independent Code, which is required for dynamic libraries.
+
+Following the steps above, you get `my_dynamic_lib.dll` (dynamic library) and `my_static_lib.a` (static library).
+
+## 3. Where Are Downloaded Libraries Stored?
+
+#### Python libraries may be located at
+
+- `venv/` in the project root (a virtual environment created by `venv`; the folder name is user-defined)
+- A specific environment folder under the conda directory (the conda base path is set when conda is installed; all conda environments are stored there, with the environment name as a subdirectory)
+
+#### Frontend libraries may be located at
+
+- `node_modules/` in the project root (created and managed by npm)
+
+#### Java Spring projects (Maven) may be located at
+
+- Cached at `~/.m2/repository/` (the local Maven repository, same on Windows and Linux)
+
+#### C++ libraries (Linux)
+
+- System built-in libraries: `/lib`, `/lib64`, `/usr/lib`, `/usr/lib64`
+- apt/yum/dnf/pacman package manager libraries: `/usr/lib/x86_64-linux-gnu` (library files), `/usr/include` (headers)
+- Self-compiled: `/usr/local/lib`, `/usr/local/include`
+- Package managers (vcpkg/conan): dedicated paths under the user's home directory
+  - vcpkg: `~/vcpkg/installed/<triplet>/lib`
+  - conan: `~/.conan/data/<package>/<version>/...`
+  - Custom placement: `~/lib`, `~/include`
+
+## 4. How to Use a Library?
+
+> Using a library requires two steps: **link the headers** and **link the library implementation**.
+
+### Link Headers
+
+> Without `-I` (uppercase i), the compiler searches the current file's directory by default.
+
+1. Keep library headers in place and hard-code each header path individually:
+
+```bash
+g++ -I path/to/s_lib1.h -I path/to/s_lib2.h ... -o output main.cpp
+```
+
+2. Consolidate headers into an `include/` directory under the project, then specify that directory once (recommended for your own libraries; avoid moving third-party headers since they may have other dependencies):
+
+```bash
+g++ -I include/ -o output main.cpp
+```
+
+(Note: the include directory specified with `-I` is combined with `#include "path/to/lib.h"` in code to form the full header path.)
+
+> If you find pure command-line compilation tedious, writing this into `CMakeLists.txt` with CMake is much more convenient.
+
+### Link Library Implementation
+
+> **For non-header-only libraries, you also need to link the library implementation — either dynamic or static.**
+
+#### Link a Dynamic Library
+
+```bash
+g++ myapp.cpp -L /path/to/library -l mylib
+```
+
+At runtime, it is recommended to place `.dll` files in the same directory as the executable. The dynamic library search order is:
+
+1. Windows (highest to lowest priority):
+   - The program's current directory (usually where the `.exe` resides).
+   - Windows system directories (e.g., `C:\Windows\System32`).
+   - The current user's `AppData` folder.
+
+2. Linux (highest to lowest priority):
+   - `LD_LIBRARY_PATH` environment variable (set at runtime)
+   - `rpath` / `runpath` embedded in the executable
+   - System cache `/etc/ld.so.cache`
+   - **System default directories (most common)**: `/lib`, `/usr/lib`, `/usr/local/lib`, `/lib64` (64-bit systems). These are hard-coded into the dynamic linker.
+
+> You can also: 1. Set the `PATH` environment variable. 2. Use `LoadLibrary("D:\\libs\\mylib.dll")` in code to load a DLL explicitly.
+
+#### Link a Static Library
+
+```bash
+g++ main.cpp -L /path/to/lib -l <lib_name>
+```
+
+Note: during compilation of a static library, only the availability of header files is checked — the compiler does not verify whether third-party libraries exist or are correctly linked. This is because the compilation stage only generates object files (`.o`) and packages them into a `.a` file; linking does not happen yet. A C++ program build can therefore be broken into three stages:
+
+- **Compiling the static library**:
+  - Only include headers (`include_directories`) — no need to specify third-party `.a` or `.so` files.
+- **Compiling the main program**:
+  - Only include the static library's headers.
+  - No need to specify third-party `.a` or `.so` files.
+- **Linking the main program**:
+  - Must explicitly bring in the static library and any third-party libraries it depends on (e.g., `-lthirdparty`).
+
+**Common compiler flags** (command-line compilation):
+
+1. `-I` (uppercase i) + header directory: tells the compiler where to find headers (required)
+2. `-L` + library directory: specifies where library files are located
+3. `-l` (lowercase L) + library name: specifies the library to link; searches for `.so` or `.a` (Windows: `.dll` and `.lib`); no suffix needed; prefers dynamic libraries
+
+The difference between `-L` and `-l`: `-L` specifies the path where the library lives; `-l` specifies the library name within that path.
diff --git a/app/docs/computer-science/cpp_backend/easy_compile/2_base_gcc.en.md b/app/docs/computer-science/cpp_backend/easy_compile/2_base_gcc.en.md
new file mode 100644
index 00000000..c3ff693d
--- /dev/null
+++ b/app/docs/computer-science/cpp_backend/easy_compile/2_base_gcc.en.md
@@ -0,0 +1,154 @@
+---
+title: GCC/G++ Basics
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: kyu85av71b4n07hbdycbhvj9
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# GCC/G++ Basics
+
+## Getting Started with g++
+
+### Installing GCC/G++ (Linux)
+
+Install using your distribution's package manager.
+
+```cpp
+sudo apt update
+sudo apt install build-essential -y
+// check versions
+gcc --version
+g++ --version
+```
+
+```cpp
+// CentOS / RHEL 7:
+sudo yum groupinstall "Development Tools" -y
+// CentOS Stream 8 / RHEL 8+ / Fedora:
+sudo dnf groupinstall "Development Tools" -y
+```
+
+### Installing GCC/G++ (Windows)
+
+Download (URLs below) and add to your `PATH` environment variable so `gcc`/`g++` commands work from any directory.
+
+1. MinGW
+
+[https://osdn.net/projects/mingw/downloads/68260/mingw-get-setup.exe/](https://www.mingw-w64.org/downloads/)
+
+[https://www.mingw-w64.org/downloads/](https://www.mingw-w64.org/downloads/) (recommended)
+
+**A gotcha with MinGW and the `<thread>` standard library:**
+
+Using threads with plain MinGW 9.2.0 requires these extra steps:
+
+1. Download additional header files from this repository: [https://github.com/meganz/mingw-std-threads](https://github.com/meganz/mingw-std-threads)
+
+![](https://cdn.nlark.com/yuque/0/2025/png/43055607/1759049314232-221dc93b-c560-4036-b049-db786935066f.png)
+
+Place those header files into MinGW's `include/` directory.
+
+2. In your code, change `#include <thread>` to `#include <mingw.thread.h>` (as documented in the repository's README).
+3. If compiling from the command line, add the flag `-D_WIN32_WINNT=0x0501` to tell the compiler you are targeting Windows XP or later. (This may only be necessary for the win32 variant — the mingw-win64 version might not require it.)
+
+The problematic version I used:
+
+![](https://cdn.nlark.com/yuque/0/2025/png/43055607/1759049360158-4e44b580-0b41-4c64-8266-3a2bf893aa12.png)
+
+After switching to w64devkit:
+
+![](https://cdn.nlark.com/yuque/0/2025/png/43055607/1759049365783-54107cad-7f77-497a-a679-1cc80f2c5095.png)
+
+Also note: plain MinGW requires you to separately install `mingw32-make` via the MinGW Installer (`mingw-get.exe`).
+
+### Basic g++ Usage Example
+
+Create a text file (`test.txt`), rename its extension to `.cpp` (marking it as a C++ source file), open it with Notepad or VS Code, and write:
+
+```cpp
+int main(){
+    return 0;
+}
+```
+
+Save and close the file.
+
+Open a terminal (cmd) in the directory containing `test.cpp` and run:
+
+```bash
+g++ test.cpp
+```
+
+An `a.exe` executable appears in the current directory. Since no output name was specified, the compiler uses the default name `a`.
+
+To specify the output file name with `-o`:
+
+```bash
+g++ -o b test.cpp
+```
+
+This produces `b.exe` in the current directory.
+
+`-o` stands for output; the token immediately following it is the output filename. You can also reorder the arguments:
+
+```bash
+g++ test.cpp -o b
+```
+
+(Note: the `-o` flag and its value must stay together — do not separate them.)
+
+C/C++ compilation consists of four stages (the commands above show the all-in-one shortcut):
+
+Preprocessing → Compilation → Assembly → Linking
+
+| Stage         | Input       | Output & Extension                    | Flag (abbreviation meaning) |
+| ------------- | ----------- | ------------------------------------- | --------------------------- |
+| Preprocessing | `.cpp`/`.h` | Preprocessed file `.i` (Intermediate) | `-E` (Expansion)            |
+| Compilation   | `.i`        | Assembly code `.s`                    | `-S` (Source)               |
+| Assembly      | `.s`        | Object file `.o`                      | `-c` (Compile)              |
+| Linking       | `.o`        | Executable `.exe` or no extension     | plain `g++`                 |
+
+**Preprocessing stage: `.cpp` → `.i`**  
+Handles `#include` (header inclusion), `#define` (macro expansion), `#ifdef` (conditional compilation), and similar directives.
+
+```bash
+g++ -E test.cpp  // print preprocessed output (expanded macros and included headers) to the terminal
+g++ -E test.cpp -o preprocess.i  // write output to preprocess.i
+```
+
+**Compilation stage: `.i` → `.s`**
+
+Translates preprocessed code into assembly.
+
+```bash
+g++ -S preprocess.i -o assemble.s
+```
+
+**Assembly stage: `.s` → `.o`**
+
+Converts assembly into machine code, producing an object file (not directly executable on its own).
+
+```bash
+g++ -c assemble.s -o machine.o
+g++ -c test.cpp // you can also pass .cpp directly to generate the same-named .o
+```
+
+**Linking stage: `.o` → `.exe`**
+
+Links one or more object files with libraries to produce an executable.
+
+```bash
+g++ machine.o -o test
+```
+
+**Generating debug information:**
+
+```bash
+g++ -g test.cpp -o test  // includes debug symbols compared to plain g++ test.cpp -o test
+```
diff --git a/app/docs/computer-science/cpp_backend/easy_compile/3_Make.en.md b/app/docs/computer-science/cpp_backend/easy_compile/3_Make.en.md
new file mode 100644
index 00000000..ffc97465
--- /dev/null
+++ b/app/docs/computer-science/cpp_backend/easy_compile/3_Make.en.md
@@ -0,0 +1,69 @@
+---
+title: Building with Make
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: g6wucmr69lamd9xyxm7uunnd
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Building with Make
+
+### 1. How `make` Works
+
+`make` manages the build process based on **file dependencies** and **timestamps**, following these steps:
+
+1. Read the `Makefile`: `make` parses the `Makefile` to obtain build rules.
+2. **Check target modification times**: `make` compares timestamps to decide whether to rebuild. For example, if `source.c` is newer than `source.o`, `make` considers `source.o` stale and re-runs the relevant compile command.
+3. **Execute build rules**: if a target needs rebuilding, `make` runs the compilation and linking commands according to the dependency graph until the final target (executable or library) is produced.
+
+### 2. Basic `Makefile` Structure
+
+A `Makefile` is the configuration file `make` reads. It defines build rules, targets, dependencies, and commands. A typical `Makefile` contains:
+
+### Basic Syntax
+
+- **Target**: the file to build (usually an object file or the final executable).
+- **Dependency**: files the target depends on. If a dependency is updated, the target must be rebuilt.
+- **Command**: the shell command that produces the target (e.g., a compile command). Commands **must be indented with a TAB character**.
+
+```makefile
+target: dependencies
+    command
+```
+
+### 3. `Makefile` Example
+
+Suppose we have a simple C++ project with two source files, `main.cpp` and `utils.cpp`, which produce object files `main.o` and `utils.o`, and are ultimately linked into the executable `myapp`.
+
+```makefile
+CC = g++                # compiler: g++
+CFLAGS = -Wall -g       # compile flags: -Wall enables all warnings, -g includes debug info
+
+# object files
+OBJS = main.o utils.o
+
+# executable
+TARGET = myapp
+
+# default target
+all: $(TARGET)
+
+$(TARGET): $(OBJS)      # executable depends on object files
+    $(CC) $(OBJS) -o $(TARGET)   # link command: produces the executable
+
+main.o: main.cpp utils.h
+    $(CC) $(CFLAGS) -c main.cpp  # compile command: produces main.o
+
+utils.o: utils.cpp utils.h
+    $(CC) $(CFLAGS) -c utils.cpp # compile command: produces utils.o
+
+clean:
+    rm -f $(OBJS) $(TARGET)       # remove intermediate and target files
+```
+
+Targets serve as the entry points for execution.
diff --git a/app/docs/computer-science/cpp_backend/easy_compile/4_CMake.en.md b/app/docs/computer-science/cpp_backend/easy_compile/4_CMake.en.md
new file mode 100644
index 00000000..e8cf0b52
--- /dev/null
+++ b/app/docs/computer-science/cpp_backend/easy_compile/4_CMake.en.md
@@ -0,0 +1,187 @@
+---
+title: CMake
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: xk44lx4q1gpcm1uqk8nnbg7q
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# CMake
+
+[https://juejin.cn/post/6844903557183832078](https://juejin.cn/post/6844903557183832078) — A CMake tutorial on Juejin
+
+[https://zhuanlan.zhihu.com/p/97369704](https://zhuanlan.zhihu.com/p/97369704) — A CMake tutorial on Zhihu
+
+CMake is used to compile a C++ project into an executable or a static/dynamic library.
+
+## CMake Commands
+
+Run `cmake --help` for more information.
+
+1. CMake requires a `CMakeLists.txt`, or you can pass parameters manually when invoking CMake.
+2. It is recommended to create a `build/` directory inside the project directory, then run CMake from there (you can also specify the build directory with `--build`).
+
+```bash
+cmake .. // append any extra parameters not declared in CMakeLists.txt (most parameters are usually declared there)
+```
+
+Running CMake from the `build/` directory keeps generated files (intermediate and final) separate from the project source. `..` tells CMake that `CMakeLists.txt` and the source code are in the parent directory (the project root).
+
+CMake works in two steps: first it generates build system files, then it uses those files to perform the actual build. The key commands are:
+
+1. Generate using a preset
+
+```bash
+cmake --preset=default
+```
+
+This generates:
+
+- `build.ninja`
+- `CMakeCache.txt`
+- `CMakeFiles/`
+- `cmake_install.cmake`
+- `vcpkg_installed/`
+- `vcpkg-manifest-install.log`
+
+Besides using a preset, you can also specify options manually:
+
+Use `-S` to set the source directory and `-B` to set the build directory. For example:
+
+`cmake -S .` sets the current directory as the source directory (the CMake variable **`CMAKE_SOURCE_DIR`** holds this path).
+
+2. Build
+
+```bash
+cmake --build vcpkg-build   // build into vcpkg-build; no need to cd into build/ and run cmake .. separately
+```
+
+This generates (in addition to the files from step 1):
+
+- `build.ninja`
+- `CMakeCache.txt`
+- `CMakeFiles/`
+- `cmake_install.cmake`
+- `libfeature-extraction-lib.a` (the compiled static library, new compared to step 1)
+- `vcpkg_installed/`
+- `vcpkg-manifest-install.log`
+
+## CMakeLists.txt
+
+### Understanding `find_package` in Depth
+
+```
+find_package(<LibraryName> [optional args: CONFIG REQUIRED])
+```
+
+`find_package` does **not** eagerly load all components of a library. Components are loaded lazily when referenced by `target_link_libraries` or other commands that need them. It merely declares a global target.
+
+`CONFIG` — use the library's own config file (e.g., `spdlogConfig.cmake`) rather than CMake's built-in find module.
+
+`REQUIRED` — abort with an error if the package is not found.
+
+### `.cmake` Files
+
+`CMakeLists.txt` and `.cmake` files share exactly the same syntax. A `.cmake` file is essentially a reusable module for `CMakeLists.txt`. Include one as follows:
+
+```makefile
+include(cmakes/gtests_main.cmake)
+include(cmakes/gtests_fe.cmake)
+```
+
+Typically used with conditionals, for example to enable unit tests optionally:
+
+```makefile
+option(BUILD_TESTS "Build tests" OFF)
+if (BUILD_TESTS)
+    include(cmakes/gtests_main.cmake)
+    include(cmakes/gtests_fe.cmake)
+endif ()
+```
+
+### CMake Parameters and Their g++ Equivalents
+
+Every CMake command parameter has a corresponding g++ flag, except for internal-only parameters (helpers used within the CMake script itself).
+
+**Required configuration items in `CMakeLists.txt` and their g++ equivalents:**
+
+1. **Define the build target** — executable or library
+
+```cmake
+add_executable(my_program main.cpp)
+// or
+add_library(my_library STATIC my_library.cpp)
+```
+
+`add_executable` corresponds to:
+
+```bash
+g++ -o my_program main.cpp
+```
+
+`add_library` corresponds to:
+
+```bash
+g++ -c my_library.cpp -o my_library.o
+ar rcs libmy_library.a my_library.o
+```
+
+2. **Link dependency libraries**
+
+```cmake
+target_link_libraries(my_program gtest gtest_main)
+```
+
+3. **Include directories** (for your own headers and third-party headers)
+
+```cmake
+include_directories(my_program PRIVATE /path/to/include)
+// or
+target_include_directories(my_program PRIVATE /path/to/include)
+```
+
+4. **Set build type**
+
+```cmake
+CMAKE_BUILD_TYPE
+```
+
+**CMake-internal parameters (no g++ equivalent):**
+
+1. Specify the minimum CMake version: `cmake_minimum_required(VERSION 3.10)`
+2. Set project name and version: `project(ProjectName VERSION 1.0 LANGUAGES CXX)`
+3. Find a package: `find_package(PackageName CONFIG REQUIRED)`
+
+**Package search order** (from ChatGPT — may need revision):
+
+CMake searches for a package's config file in this order:
+
+- Paths specified in the `CMAKE_PREFIX_PATH` environment variable.
+- System default install paths (e.g., `/usr/lib/cmake`, `/usr/local/lib/cmake`).
+- The path specified by `CMAKE_INSTALL_PREFIX`.
+- If using a package manager like vcpkg, the path specified in its toolchain file.
+
+(Default vcpkg path: `[vcpkg-root]/installed/[triplet]/share/[package]/[package]Config.cmake`)
+
+For reference:
+
+- Package headers install to: `[vcpkg-root]/installed/[triplet]/include/`
+- Library files install to: `[vcpkg-root]/installed/[triplet]/lib/`
+
+### Running Without `CMakeLists.txt`
+
+Required command-line parameters when not using a `CMakeLists.txt`:
+
+1. Specify the source directory (required)
+2. Set build type: `-D CMAKE_BUILD_TYPE=Release` or `Debug`
+3. Choose a build generator:
+   - `-G "Unix Makefiles"` — Unix make generator
+   - `-G "MinGW Makefiles"` — Windows make generator
+   - `-G "Ninja"` — Ninja generator (requires separate installation)
+   - `-G "Visual Studio 16 2019"` — Visual Studio 2019 generator
+   - `-G "Xcode"` — Apple Xcode generator
diff --git a/app/docs/computer-science/cpp_backend/easy_compile/5_vcpkg.en.md b/app/docs/computer-science/cpp_backend/easy_compile/5_vcpkg.en.md
new file mode 100644
index 00000000..32444e07
--- /dev/null
+++ b/app/docs/computer-science/cpp_backend/easy_compile/5_vcpkg.en.md
@@ -0,0 +1,150 @@
+---
+title: vcpkg Package Manager
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: gtqamuq3tftmvzstbunkgbo5
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# vcpkg Package Manager
+
+vcpkg has two modes: **Classic mode** and **Manifest mode**.
+
+Classic mode: run `vcpkg install` to download packages, then specify the include and library directories in `CMakeLists.txt`.
+
+Manifest mode: write a `vcpkg.json` manifest file.
+
+## Configuration
+
+Download and set up the environment:
+
+```bash
+git clone <https://github.com/microsoft/vcpkg.git>
+cd vcpkg/
+./bootstrap-vcpkg.sh
+```
+
+Recommended: add vcpkg to your environment variables:
+
+```bash
+echo 'export VCPKG_ROOT="$HOME/vcpkg"' >> ~/.bashrc
+// adjust the path if you cloned vcpkg somewhere other than ~
+```
+
+1. Add vcpkg to your project:
+
+```bash
+// create and initialize the manifest files vcpkg.json and vcpkg-configuration.json
+vcpkg new --application
+// add a dependency (e.g. fmt); this appends "fmt" to the dependencies list in vcpkg.json
+vcpkg add port fmt // only modifies vcpkg.json without validation — same as editing the file manually
+```
+
+vcpkg reads the manifest (`vcpkg.json`) to determine which dependencies to install and integrate with CMake, providing the packages the project needs.
+
+2. Add library information to `CMakeLists.txt`:
+
+```bash
+find_package(fmt CONFIG REQUIRED)
+target_link_libraries(HelloWorld PRIVATE fmt::fmt)
+```
+
+3. Run the CMake configuration:
+
+Create a `CMakePresets.json` file and set the toolchain (`CMAKE_TOOLCHAIN_FILE`) to point to vcpkg's built-in CMake toolchain. When this toolchain is active, CMake automatically links libraries installed by vcpkg.
+
+```json
+{
+  "version": 2,
+  "configurePresets": [
+    {
+      "name": "vcpkg",
+      "generator": "Ninja", // (or "MinGW Makefiles") — equivalent to the -G "" flag shown earlier
+      "binaryDir": "${sourceDir}/build",
+      "cacheVariables": {
+        "CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
+        // {VCPKG_ROOT} can be set in CMakeUserPresets.json
+        // or configured as an environment variable
+      }
+    }
+  ]
+}
+```
+
+```json
+{
+  "version": 2,
+  "configurePresets": [
+    {
+      "name": "default",
+      "inherits": "vcpkg",
+      "environment": {
+        "VCPKG_ROOT": "<path to vcpkg>"
+      }
+    }
+  ]
+}
+```
+
+## Usage
+
+1. Package your own library and manage it with vcpkg (register it in a vcpkg registry).
+2. Download and use third-party libraries managed by vcpkg.
+
+**Creating Your Own vcpkg Registry**
+
+In addition to the official vcpkg git registry at https://github.com/microsoft/vcpkg, you can create your own git registry (there is also a filesystem registry type, but this section focuses on git registries).
+
+**What is a vcpkg registry?**
+
+A registry stores metadata about libraries (not the library source code itself — storing source code would make it bloated).
+
+**What is it used for?**
+
+It is referenced in `vcpkg-configuration.json`, as shown below:
+
+```bash
+{
+  "default-registry": {
+    "kind": "git",
+    "repository": "https://github.com/microsoft/vcpkg.git",
+    "baseline": "234534dfvbsdvw43434f"
+  },  // the default registry is required — it loads the official registry (thousands of packages)
+  "registries": [   // custom registries
+    {
+      "kind": "git",
+      "repository": "https://github.com/xxx/xxx.git", // repository URL
+      "baseline": "d3e4723c1224t34fsdsvd0e4c2615f6d75",  // version baseline
+      "reference": "main",  // branch name
+      "packages": [
+        "datastax-cpp-driver",  // library names included in this registry
+        "cpp-common",
+        "ppconsul",
+        "leveldb",
+        "grpc",
+        "polaris-cpp"
+      ]
+    },
+    {
+      "kind": "git",
+      "repository": "git@gitlab.xxxxx/xxxx.git",
+      "baseline": "15efa5017d9a3esdvsdvsdvwecs1d316",
+      "reference": "main",
+      "packages": [
+        "feature-generation-lib",
+        "nps-client-brpc",
+        "brpc",
+        "cybercore-sdk-cpp",
+        "opentelemetry-cpp",
+        "mv-protocols-cpp",
+        "feature-extraction-lib"
+      ]
+    }
+  ]
+}
+```
diff --git a/app/docs/computer-science/cpp_backend/mempool_simple.en.mdx b/app/docs/computer-science/cpp_backend/mempool_simple.en.mdx
new file mode 100644
index 00000000..b5384a7d
--- /dev/null
+++ b/app/docs/computer-science/cpp_backend/mempool_simple.en.mdx
@@ -0,0 +1,120 @@
+---
+title: Handwritten Memory Pool (Simple Fixed-Size)
+description: ""
+date: "2025-09-27"
+tags:
+  - tag-one
+docId: q8290wmhyofuiskzn1ph63ta
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Handwritten Memory Pool (Simple Fixed-Size)
+
+# Simple Version (Fixed-Size Blocks)
+
+## Design Diagram
+
+![Design](https://cdn.nlark.com/yuque/0/2025/jpeg/43055607/1758718719250-e6f52459-0f73-493b-8294-7b8f931da054.jpeg)
+
+## Code Structure
+
+```c
+typedef struct mempool_s {
+    int blocksize;  // size of each memory block
+    int freecount;   // number of remaining free blocks
+    char *free_ptr;   // pointer to the next free block
+    char *mem;   // head pointer of the entire memory pool
+} mempool_t;
+```
+
+```c
+int memp_create(mempool_t *m, int block_size) {
+
+    if (!m) return -1;
+
+    // 1. initialize these two simple ints
+    m->blocksize = block_size;
+    m->freecount = MEM_PAGE_SIZE / block_size;
+
+    // 2. allocate space for the entire pool and initialize m->mem
+    m->mem = (char *)malloc(MEM_PAGE_SIZE);
+    if (!m->mem) {  // allocation failed (not enough free memory)
+        return -2;
+    }
+    // zero-initialize the allocated space
+    memset(m->mem, 0, MEM_PAGE_SIZE);
+
+    // 3. initialize free_ptr
+    m->free_ptr = m->mem;
+
+    // initialize the "next pointer" inside each block
+    int i = 0;
+    char *ptr = m->mem;
+    for (i = 0;i < m->freecount;i ++) {
+
+        *(char **)ptr = ptr + block_size;
+        ptr = ptr + block_size;
+    }
+    // the last block's "next_ptr" points to NULL
+    *(char **)ptr = NULL;
+    return 0;
+}
+```
+
+```c
+void *memp_alloc(mempool_t *m) {
+    // pool is full
+    if (!m || m->freecount == 0) return NULL;
+    // 1. get the next free block as the return value
+    void *ptr = m->free_ptr;
+    // 2. update free_ptr
+    m->free_ptr = *(char **)ptr;
+    // 3. update freecount
+    m->freecount --;
+
+    return ptr;
+}
+```
+
+```c
+void memp_free(mempool_t *m, void *ptr) {
+    // equivalent to: ptr->next = m->free_ptr
+    // insert the block to be freed at the head of the free list (head insertion)
+    *(char **)ptr = m->free_ptr;
+    // update free_ptr (the head of the free block linked list)
+    m->free_ptr = (char *)ptr;
+    // update freecount
+    m->freecount ++;
+}
+```
+
+```c
+void memp_destory(mempool_t *m) {
+    if (!m) return ;
+    // free the entire pool in one call, since the pool was malloc'd as a whole, not block by block
+    free(m->mem);
+}
+```
+
+## Usage Example
+
+```c
+int main() {
+    mempool_t m;
+    memp_create(&m, 32);
+
+    void *p1 = memp_alloc(&m);
+    printf("memp_alloc : %p\n", p1);
+
+    void *p2 = memp_alloc(&m);
+    printf("memp_alloc : %p\n", p2);
+
+    void *p3 = memp_alloc(&m);
+    printf("memp_alloc : %p\n", p3);
+
+    memp_free(&m, p2);
+}
+```
diff --git a/app/docs/computer-science/data-structures/array/01-static-array.zh.mdx b/app/docs/computer-science/data-structures/array/01-static-array.zh.mdx
new file mode 100644
index 00000000..fb8c3a7d
--- /dev/null
+++ b/app/docs/computer-science/data-structures/array/01-static-array.zh.mdx
@@ -0,0 +1,173 @@
+---
+title: 静态数组
+description: 静态数组的实现原理、特性与使用场景
+date: "2024-01-04"
+tags:
+  - static-array
+  - memory-management
+docId: gmpls10e2dz0bbizotvhglc8
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 静态数组
+
+静态数组是数组最基本的形式，其大小在编译时确定，在程序运行期间无法改变。
+
+## 内存布局
+
+静态数组在内存中连续存储：
+
+```
+内存地址：  1000  1004  1008  1012  1016
+数组：      [10]  [20]  [30]  [40]  [50]
+下标：       0     1     2     3     4
+```
+
+假设每个整数占 4 字节，数组元素 `arr[i]` 的内存地址为：
+
+```
+address = base_address + i * element_size
+```
+
+## 特性分析
+
+### 时间复杂度
+
+- **访问**：O(1) —— 通过下标直接计算内存地址
+- **查找**：O(n) —— 需要遍历整个数组
+- **插入**：O(n) —— 需要移动后续元素
+- **删除**：O(n) —— 需要移动后续元素
+
+### 空间复杂度
+
+- **存储**：O(n) —— n 个元素
+- **额外空间**：O(1) —— 无需额外指针或元数据
+
+## 代码实现
+
+### C++ 实现
+
+```cpp
+#include <iostream>
+using namespace std;
+
+int main() {
+    // 声明静态数组
+    int arr[5] = {10, 20, 30, 40, 50};
+
+    // 访问元素
+    cout << "First element: " << arr[0] << endl;
+
+    // 修改元素
+    arr[2] = 35;
+
+    // 遍历数组
+    for (int i = 0; i < 5; i++) {
+        cout << arr[i] << " ";
+    }
+
+    return 0;
+}
+```
+
+### JavaScript 实现
+
+```javascript
+// JavaScript 中的数组实际上是动态的，但我们可以模拟静态数组的行为
+class StaticArray {
+  constructor(size) {
+    this.size = size;
+    this.data = new Array(size);
+  }
+
+  get(index) {
+    if (index < 0 || index >= this.size) {
+      throw new Error("Index out of bounds");
+    }
+    return this.data[index];
+  }
+
+  set(index, value) {
+    if (index < 0 || index >= this.size) {
+      throw new Error("Index out of bounds");
+    }
+    this.data[index] = value;
+  }
+
+  length() {
+    return this.size;
+  }
+}
+
+// 使用示例
+const arr = new StaticArray(5);
+arr.set(0, 10);
+arr.set(1, 20);
+console.log(arr.get(0)); // 10
+```
+
+## 优缺点
+
+### 优点
+
+1. **内存效率高**：无额外元数据开销
+2. **对缓存友好**：连续内存布局提升访问效率
+3. **简单直接**：易于实现和使用
+4. **编译期优化**：编译器可进行更多优化
+
+### 缺点
+
+1. **大小固定**：运行时无法改变大小
+2. **内存浪费**：若无法充分利用所有空间
+3. **插入/删除效率低**：需要移动大量元素
+
+## 应用场景
+
+静态数组特别适用于：
+
+- **嵌入式系统**：内存受限，需要精确控制内存
+- **高性能计算**：需要最大化内存访问效率
+- **系统编程**：底层系统代码，需要可预测的内存布局
+- **固定大小数据集**：如像素数组、音频采样等
+
+## 实际示例
+
+### 图像处理
+
+```cpp
+// 处理 640x480 的灰度图像
+unsigned char image[640 * 480];
+
+// 访问像素 (x, y)
+int getPixel(int x, int y) {
+    return image[y * 640 + x];
+}
+
+// 设置像素值
+void setPixel(int x, int y, unsigned char value) {
+    image[y * 640 + x] = value;
+}
+```
+
+### 查找表
+
+```cpp
+// 预计算查找表
+const int SQUARE_TABLE[101] = {
+    0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100,
+    // ... 预计算 0-100 的平方值
+};
+
+int getSquare(int n) {
+    return SQUARE_TABLE[n]; // O(1) 查找
+}
+```
+
+## 总结
+
+静态数组是理解所有数组类型的基础。尽管存在大小固定的局限，其高效性和简洁性在特定场景下不可替代。
+
+下一节我们将学习动态数组如何解决静态数组大小固定的问题。
diff --git a/app/docs/computer-science/data-structures/array/02-dynamic-array.zh.mdx b/app/docs/computer-science/data-structures/array/02-dynamic-array.zh.mdx
new file mode 100644
index 00000000..45cf5a92
--- /dev/null
+++ b/app/docs/computer-science/data-structures/array/02-dynamic-array.zh.mdx
@@ -0,0 +1,327 @@
+---
+title: 动态数组
+description: 动态数组的实现原理、扩容机制与性能分析
+date: "2024-01-05"
+tags:
+  - dynamic-array
+  - resizing
+  - amortized-analysis
+docId: nuojcaq1s6r5nggul0uq3r3j
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 动态数组
+
+动态数组解决了静态数组大小固定的问题，可在运行时动态调整大小，是许多高级数据结构和算法的基础。
+
+## 核心概念
+
+动态数组通过以下机制实现动态扩展：
+
+1. **容量（Capacity）**：当前已分配内存能容纳的元素数量
+2. **大小（Size）**：当前实际存储的元素数量
+3. **扩容策略**：当 size 超过 capacity 时，重新分配更大的内存空间
+
+```
+容量：  [_ _ _ _ _ _ _ _]  (capacity = 8)
+大小：  [1 2 3 4 _ _ _ _]  (size = 4)
+```
+
+## 扩容机制
+
+### 常见扩容策略
+
+1. **倍增扩容**：每次将容量扩大为原来的两倍
+2. **黄金比例扩容**：增长因子为 1.5 或 1.618
+3. **固定增量扩容**：每次增加固定数量的空间
+
+```javascript
+// 倍增扩容示例
+function resize(oldCapacity) {
+  return oldCapacity * 2; // 或 oldCapacity + oldCapacity
+}
+```
+
+### 扩容过程
+
+```
+初始状态：[1 2 3 4] capacity=4, size=4
+
+添加元素 5：
+1. 检测到 size == capacity
+2. 分配新内存：capacity = 4 * 2 = 8
+3. 复制旧数据：[1 2 3 4 _ _ _ _]
+4. 添加新元素：[1 2 3 4 5 _ _ _]
+5. 释放旧内存
+```
+
+## 代码实现
+
+### JavaScript 实现
+
+```javascript
+class DynamicArray {
+  constructor() {
+    this.capacity = 2;
+    this.size = 0;
+    this.data = new Array(this.capacity);
+  }
+
+  // 获取元素
+  get(index) {
+    if (index < 0 || index >= this.size) {
+      throw new Error("Index out of bounds");
+    }
+    return this.data[index];
+  }
+
+  // 设置元素
+  set(index, value) {
+    if (index < 0 || index >= this.size) {
+      throw new Error("Index out of bounds");
+    }
+    this.data[index] = value;
+  }
+
+  // 在末尾添加元素
+  push(value) {
+    // 检查是否需要扩容
+    if (this.size >= this.capacity) {
+      this.resize();
+    }
+
+    this.data[this.size] = value;
+    this.size++;
+  }
+
+  // 移除最后一个元素
+  pop() {
+    if (this.size === 0) {
+      throw new Error("Array is empty");
+    }
+
+    const value = this.data[this.size - 1];
+    this.size--;
+
+    // 可选：缩容以节省内存
+    if (this.size < this.capacity / 4) {
+      this.shrink();
+    }
+
+    return value;
+  }
+
+  // 扩容
+  resize() {
+    const oldCapacity = this.capacity;
+    this.capacity *= 2;
+    const newData = new Array(this.capacity);
+
+    // 复制旧数据
+    for (let i = 0; i < this.size; i++) {
+      newData[i] = this.data[i];
+    }
+
+    this.data = newData;
+    console.log(`Expanded: ${oldCapacity} -> ${this.capacity}`);
+  }
+
+  // 缩容
+  shrink() {
+    if (this.capacity <= 2) return;
+
+    const oldCapacity = this.capacity;
+    this.capacity = Math.floor(this.capacity / 2);
+    const newData = new Array(this.capacity);
+
+    for (let i = 0; i < this.size; i++) {
+      newData[i] = this.data[i];
+    }
+
+    this.data = newData;
+    console.log(`Shrunk: ${oldCapacity} -> ${this.capacity}`);
+  }
+
+  // 在指定位置插入元素
+  insert(index, value) {
+    if (index < 0 || index > this.size) {
+      throw new Error("Index out of bounds");
+    }
+
+    if (this.size >= this.capacity) {
+      this.resize();
+    }
+
+    // 右移元素
+    for (let i = this.size; i > index; i--) {
+      this.data[i] = this.data[i - 1];
+    }
+
+    this.data[index] = value;
+    this.size++;
+  }
+
+  // 删除指定位置的元素
+  remove(index) {
+    if (index < 0 || index >= this.size) {
+      throw new Error("Index out of bounds");
+    }
+
+    const value = this.data[index];
+
+    // 向左移动元素
+    for (let i = index; i < this.size - 1; i++) {
+      this.data[i] = this.data[i + 1];
+    }
+
+    this.size--;
+
+    if (this.size < this.capacity / 4) {
+      this.shrink();
+    }
+
+    return value;
+  }
+
+  length() {
+    return this.size;
+  }
+
+  toString() {
+    const elements = [];
+    for (let i = 0; i < this.size; i++) {
+      elements.push(this.data[i]);
+    }
+    return `[${elements.join(", ")}] (size: ${this.size}, capacity: ${this.capacity})`;
+  }
+}
+```
+
+### 使用示例
+
+```javascript
+const arr = new DynamicArray();
+
+// 添加元素
+arr.push(1);
+arr.push(2);
+arr.push(3); // 触发扩容
+console.log(arr.toString()); // [1, 2, 3] (size: 3, capacity: 4)
+
+// 插入元素
+arr.insert(1, 10);
+console.log(arr.toString()); // [1, 10, 2, 3] (size: 4, capacity: 4)
+
+// 删除元素
+arr.remove(0);
+console.log(arr.toString()); // [10, 2, 3] (size: 3, capacity: 4)
+```
+
+## 性能分析
+
+### 时间复杂度
+
+| 操作             | 平均情况 | 最坏情况 | 说明                    |
+| ---------------- | -------- | -------- | ----------------------- |
+| 访问             | O(1)     | O(1)     | 直接索引访问            |
+| 搜索             | O(n)     | O(n)     | 需要遍历                |
+| 插入（末尾）     | O(1)\*   | O(n)     | 摊还 O(1)，偶尔需要扩容 |
+| 插入（任意位置） | O(n)     | O(n)     | 需要移动元素            |
+| 删除（末尾）     | O(1)\*   | O(1)     | 摊还 O(1)               |
+| 删除（任意位置） | O(n)     | O(n)     | 需要移动元素            |
+
+\*摊还时间复杂度
+
+### 摊还分析
+
+虽然单次扩容操作需要 O(n) 时间，但通过摊还分析可以证明：
+
+- 连续进行 n 次 `push` 操作的总时间复杂度为 O(n)
+- 因此单次 `push` 操作的摊还时间复杂度为 O(1)
+
+**证明思路**：
+
+- 假设从空数组开始，进行 n 次 push 操作
+- 扩容发生在大小为 1, 2, 4, 8, ..., 2^k 时
+- 总的复制操作次数为：1 + 2 + 4 + ... + 2^k < 2n
+- 所以平均每次 push 的成本为 (n + 2n) / n = 3 = O(1)
+
+## 优化策略
+
+### 1. 选择合适的增长因子
+
+```javascript
+// 不同的增长策略
+const GROWTH_STRATEGIES = {
+  DOUBLE: (capacity) => capacity * 2, // 快速增长，可能浪费内存
+  GOLDEN_RATIO: (capacity) => Math.floor(capacity * 1.5), // 平衡增长
+  FIBONACCI: (capacity) => capacity + previousCapacity, // 渐进增长
+};
+```
+
+### 2. 预分配容量
+
+```javascript
+// 如果知道大概的数据量，可以预分配容量
+class DynamicArray {
+  constructor(initialCapacity = 2) {
+    this.capacity = initialCapacity;
+    this.size = 0;
+    this.data = new Array(this.capacity);
+  }
+
+  // 预留容量
+  reserve(minCapacity) {
+    if (minCapacity > this.capacity) {
+      this.capacity = minCapacity;
+      this.resize();
+    }
+  }
+}
+```
+
+### 3. 内存对齐优化
+
+```cpp
+// C++ 中考虑内存对齐
+template<typename T>
+class DynamicArray {
+private:
+    T* data;
+    size_t size;
+    size_t capacity;
+
+    // 确保容量是 2 的幂次，有利于内存对齐
+    size_t nextPowerOfTwo(size_t n) {
+        size_t power = 1;
+        while (power < n) power <<= 1;
+        return power;
+    }
+};
+```
+
+## 实际应用
+
+### 1. 编程语言中的动态数组
+
+- **JavaScript**：`Array`
+- **Python**：`list`
+- **Java**：`ArrayList`
+- **C++**：`std::vector`
+- **C#**：`List<T>`
+
+### 2. 应用场景
+
+- **缓冲区**：网络数据包缓冲、文件读取缓冲
+- **集合**：实现栈、队列等其他数据结构
+- **图形编程**：顶点数组、像素缓冲
+- **数据处理**：动态添加和处理数据项
+
+## 总结
+
+动态数组是现代编程中最重要的数据结构之一，它兼具数组高效访问的特性和灵活调整大小的能力。理解其扩容机制和摊还分析，对于编写高效代码至关重要。
+
+虽然动态数组解决了静态数组的大小限制问题，但在某些场景下，链表等其他数据结构可能更为合适。下一节我们将学习链表相关知识。
diff --git a/app/docs/computer-science/data-structures/array/index.zh.mdx b/app/docs/computer-science/data-structures/array/index.zh.mdx
new file mode 100644
index 00000000..6b0e2aee
--- /dev/null
+++ b/app/docs/computer-science/data-structures/array/index.zh.mdx
@@ -0,0 +1,84 @@
+---
+title: 数组
+description: 数组的基本概念、特性与应用场景
+date: "2024-01-03"
+tags:
+  - array
+  - linear-data-structure
+docId: ai7cmwf4irjaobqf7uokj3b4
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 数组
+
+数组是最基础也最重要的数据结构之一。它是一种有序集合，存储相同类型的元素，这些元素在内存中连续存放。
+
+## 数组的特性
+
+### 优点
+
+- **随机访问**：可通过下标在 O(1) 时间内访问任意元素
+- **内存利用率高**：元素连续存储，内存利用率高
+- **对缓存友好**：连续的内存访问模式对 CPU 缓存非常友好
+
+### 缺点
+
+- **大小固定**：静态数组的大小在创建时确定，无法动态调整
+- **插入/删除开销大**：在中间位置插入或删除元素需要移动其他元素
+
+## 数组类型
+
+### 静态数组
+
+- [静态数组详解](/computer-science/data-structures/array/static-array)
+- 大小在编译时确定
+- 通常分配在栈上
+
+### 动态数组
+
+- [动态数组详解](/computer-science/data-structures/array/dynamic-array)
+- 大小可在运行时调整
+- 通常分配在堆上
+
+## 基本操作
+
+```javascript
+// 创建数组
+const arr = [1, 2, 3, 4, 5];
+
+// 访问元素 - O(1)
+const element = arr[2]; // 3
+
+// 修改元素 - O(1)
+arr[1] = 10;
+
+// 遍历数组 - O(n)
+for (let i = 0; i < arr.length; i++) {
+  console.log(arr[i]);
+}
+```
+
+## 时间复杂度
+
+| 操作 | 时间复杂度 |
+| ---- | ---------- |
+| 访问 | O(1)       |
+| 查找 | O(n)       |
+| 插入 | O(n)       |
+| 删除 | O(n)       |
+
+## 应用场景
+
+数组适用于：
+
+- 需要频繁随机访问元素的场景
+- 数据量相对固定的情况
+- 需要高效遍历的算法
+- 作为其他数据结构的底层基础
+
+---
+
+继续学习数组的具体实现和高级应用！
diff --git a/app/docs/computer-science/data-structures/index.zh.mdx b/app/docs/computer-science/data-structures/index.zh.mdx
new file mode 100644
index 00000000..f10987e1
--- /dev/null
+++ b/app/docs/computer-science/data-structures/index.zh.mdx
@@ -0,0 +1,49 @@
+---
+title: 数据结构基础
+description: 数据结构的基本概念与核心知识
+date: "2024-01-02"
+tags:
+  - data-structures
+  - fundamentals
+docId: vti0bt2qlnr681msbk6igznc
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 数据结构基础
+
+数据结构是计算机科学的基础，研究如何在计算机中组织和存储数据，以实现高效的访问与修改。
+
+## 什么是数据结构？
+
+数据结构是一种组织、管理和存储数据的格式，使数据的访问和修改更加高效。为算法选择合适的数据结构，是设计高效程序的关键。
+
+## 主要分类
+
+### 线性数据结构
+
+- [数组](/computer-science/data-structures/array) - 最基础的数据结构
+- [链表](/computer-science/data-structures/linked-list) - 动态数据结构
+- 栈 - 后进先出（LIFO）
+- 队列 - 先进先出（FIFO）
+
+### 非线性数据结构
+
+- 树 - 层次结构
+- 图 - 复杂关系结构
+- 哈希表 - 快速查找结构
+
+## 学习重点
+
+学习数据结构时，重点关注以下方面：
+
+1. **时间复杂度** - 操作效率
+2. **空间复杂度** - 内存占用
+3. **应用场景** - 何时选用何种结构
+4. **实现细节** - 如何编写具体代码
+
+## 下一步
+
+建议从最基础的数组入手，逐步过渡到更复杂的数据结构。每种结构都有其独特的优势与适用场景。
diff --git a/app/docs/computer-science/data-structures/linked-list/index.zh.mdx b/app/docs/computer-science/data-structures/linked-list/index.zh.mdx
new file mode 100644
index 00000000..9c271aad
--- /dev/null
+++ b/app/docs/computer-science/data-structures/linked-list/index.zh.mdx
@@ -0,0 +1,258 @@
+---
+title: 链表
+description: 链表的基本概念、类型与应用场景
+date: "2024-01-06"
+tags:
+  - linked-list
+  - pointers
+  - dynamic-data-structure
+docId: lt9yrqt0ksl2liabq9ocw0z4
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 链表
+
+链表是一种线性数据结构，其元素不存储在连续的内存位置，而是通过指针相互链接。每个元素（称为节点）包含数据和一个指向下一个节点的指针。
+
+## 基本概念
+
+### 节点结构
+
+```
+┌─────────────┐    ┌─────────────┐    ┌─────────────┐
+│ data | next │───▶│ data | next │───▶│ data | null │
+└─────────────┘    └─────────────┘    └─────────────┘
+     节点 1             节点 2             节点 3
+```
+
+每个节点包含：
+
+- **数据域（data）**：存储实际数据
+- **指针域（next）**：指向下一个节点
+
+### 链表与数组的对比
+
+| 特性       | 数组          | 链表                 |
+| ---------- | ------------- | -------------------- |
+| 内存布局   | 连续          | 分散                 |
+| 访问方式   | 随机访问 O(1) | 顺序访问 O(n)        |
+| 插入/删除  | O(n)          | O(1)                 |
+| 内存开销   | 低            | 高（需存储额外指针） |
+| 缓存友好性 | 好            | 差                   |
+
+## 链表类型
+
+### 单链表
+
+- [单链表详解](/computer-science/data-structures/linked-list/singly-linked-list)
+- 每个节点只有一个指向下一节点的指针
+- 只能从头到尾单向遍历
+
+### 双链表
+
+- [双链表详解](/computer-science/data-structures/linked-list/doubly-linked-list)
+- 每个节点有两个指针：prev 和 next
+- 可双向遍历
+
+### 循环链表
+
+- 最后一个节点指向第一个节点
+- 形成环形结构
+
+## 基本操作
+
+### 创建节点
+
+```javascript
+class ListNode {
+  constructor(data) {
+    this.data = data;
+    this.next = null;
+  }
+}
+```
+
+### 遍历链表
+
+```javascript
+function traverse(head) {
+  let current = head;
+  while (current !== null) {
+    console.log(current.data);
+    current = current.next;
+  }
+}
+```
+
+### 查找元素
+
+```javascript
+function search(head, target) {
+  let current = head;
+  let index = 0;
+
+  while (current !== null) {
+    if (current.data === target) {
+      return index;
+    }
+    current = current.next;
+    index++;
+  }
+
+  return -1; // 未找到
+}
+```
+
+## 时间复杂度分析
+
+| 操作             | 时间复杂度 | 说明           |
+| ---------------- | ---------- | -------------- |
+| 访问             | O(n)       | 需要从头遍历   |
+| 查找             | O(n)       | 需要遍历搜索   |
+| 插入（头部）     | O(1)       | 直接修改头指针 |
+| 插入（尾部）     | O(n)       | 需要找到尾节点 |
+| 插入（已知位置） | O(1)       | 直接修改指针   |
+| 删除（头部）     | O(1)       | 直接修改头指针 |
+| 删除（已知节点） | O(1)       | 直接修改指针   |
+| 删除（按值）     | O(n)       | 需要先查找     |
+
+## 优缺点
+
+### 优点
+
+1. **动态大小**：可在运行时动态增长和缩减
+2. **高效的插入/删除**：在已知位置的插入/删除为 O(1)
+3. **内存利用率高**：按需分配内存
+4. **灵活性强**：可实现复杂数据结构
+
+### 缺点
+
+1. **额外内存开销**：每个节点需要存储指针
+2. **不支持随机访问**：无法直接访问第 i 个元素
+3. **缓存性能差**：节点在内存中不连续
+4. **指针管理复杂**：容易出现内存泄漏或悬空指针
+
+## 应用场景
+
+### 适合使用的情况
+
+- **频繁插入/删除**：尤其是在列表中间操作
+- **大小未知**：运行时数据量变化较大
+- **实现其他结构**：如栈、队列、图的邻接表
+- **撤销操作**：编辑器的撤销功能
+
+### 不适合使用的情况
+
+- **需要随机访问**：频繁通过下标访问元素
+- **内存敏感**：对内存使用有严格要求
+- **缓存敏感**：需要高性能顺序访问
+
+## 实际应用示例
+
+### 1. 音乐播放列表
+
+```javascript
+class Song {
+  constructor(title, artist) {
+    this.title = title;
+    this.artist = artist;
+    this.next = null;
+  }
+}
+
+class Playlist {
+  constructor() {
+    this.head = null;
+    this.current = null;
+  }
+
+  addSong(title, artist) {
+    const newSong = new Song(title, artist);
+    if (!this.head) {
+      this.head = newSong;
+      this.current = newSong;
+    } else {
+      let last = this.head;
+      while (last.next) {
+        last = last.next;
+      }
+      last.next = newSong;
+    }
+  }
+
+  nextSong() {
+    if (this.current && this.current.next) {
+      this.current = this.current.next;
+      return this.current;
+    }
+    return null;
+  }
+}
+```
+
+### 2. 浏览器历史记录
+
+```javascript
+class HistoryEntry {
+  constructor(url, title) {
+    this.url = url;
+    this.title = title;
+    this.next = null;
+  }
+}
+
+class BrowserHistory {
+  constructor() {
+    this.head = null;
+    this.maxSize = 100;
+    this.size = 0;
+  }
+
+  visit(url, title) {
+    const entry = new HistoryEntry(url, title);
+    entry.next = this.head;
+    this.head = entry;
+    this.size++;
+
+    // 限制历史记录大小
+    if (this.size > this.maxSize) {
+      this.removeLast();
+    }
+  }
+
+  getHistory() {
+    const history = [];
+    let current = this.head;
+    while (current) {
+      history.push({
+        url: current.url,
+        title: current.title,
+      });
+      current = current.next;
+    }
+    return history;
+  }
+}
+```
+
+## 学习建议
+
+1. **从基础开始**：先掌握单链表的基本操作
+2. **用图形辅助理解**：用图示理解指针变化
+3. **注意边界情况**：空链表、单节点、头尾节点的特殊处理
+4. **多练习指针操作**：熟练掌握指针的增删改查
+5. **对比学习**：与数组对比，理解各自的适用场景
+
+## 下一步
+
+建议按以下顺序学习：
+
+1. [单链表](/computer-science/data-structures/linked-list/singly-linked-list) - 掌握基本概念和操作
+2. [双链表](/computer-science/data-structures/linked-list/doubly-linked-list) - 理解双向指针的优势
+3. 循环链表 - 了解特殊链表变体
+4. 链表高级应用 - 如 LRU 缓存、跳表等
+
+掌握链表是理解更复杂数据结构（如树和图）的必要基础！
diff --git a/app/docs/computer-science/frontend/frontend-learning/index.en.mdx b/app/docs/computer-science/frontend/frontend-learning/index.en.mdx
new file mode 100644
index 00000000..5b56b912
--- /dev/null
+++ b/app/docs/computer-science/frontend/frontend-learning/index.en.mdx
@@ -0,0 +1,93 @@
+---
+title: What Frontend Skills Should 2025 New Graduates Learn?
+description: ""
+date: "2025-09-19"
+tags:
+  - frontend
+docId: uzoqs57kwc4tfut4wvgnbjhf
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+The author is a fresh graduate who worked domestically for 8 months. The following reflects my understanding of frontend learning for the 2025 autumn recruitment season. Please feel free to point out any errors or omissions.
+
+#### Prerequisites
+
+1. The ability to effectively use Google Search and leverage LLMs to find information.
+
+#### HTML
+
+1. Understand the purpose of `<meta>` tags, such as `viewport` and `keyword`.
+
+2. Use `<link>` and `<script>` tags, and understand attributes like `rel`, `async`, and `defer`.
+
+#### CSS
+
+1. Basics: box model (`box-sizing`), Flexbox layout, positioning (`position: relative/absolute/fixed/sticky` and their differences). How to center elements.
+
+2. `rem`/`em` units.
+
+3. Tailwind CSS is a must-learn.
+
+4. Understand CSS preprocessors (SCSS).
+
+Personally, I think advanced topics like CSS animations are good to know at a high level — delegating them to AI is much more convenient. As for Tailwind, I consider it essential in 2025: once you have a solid CSS foundation, you can start using Tailwind right away.
+
+#### JavaScript
+
+1. Basics: functions, objects, arrays, `this`, prototype, closures, prototype chain, inheritance, modules, `Promise`, `async/await`, ES6 features.
+   Learn the basics from the JavaScript Red Book — many interview questions come directly from it.
+
+2. DOM manipulation. DOM and BOM APIs.
+
+3. Network requests: `fetch`, axios. Beginners should spend more time with `fetch` to understand the fundamentals, but try different request libraries like axios to compare their pros and cons.
+
+#### Frameworks
+
+React:
+
+1. [Read the official documentation](https://react.dev/)
+
+2. State management, including React's built-in `useContext` and `useReducer` hooks, and popular libraries (Redux RTK, zustand, mobx).
+   State management doesn't need to be studied too deeply or narrowly — different projects at work use different approaches.
+
+#### Node.js
+
+1. Use Express to build a simple server and communicate with the frontend. 2. Database usage: CRUD operations using ORM/ODM libraries (MongoDB, MySQL). 3. Understand RESTful API standards.
+
+At this point, build one or two solid portfolio projects, review interview questions, and memorize the key concepts — you should be able to land an internship without much trouble. That said, standing out in the autumn recruitment season requires more.
+
+### Advanced Topics
+
+#### TypeScript
+
+I consider TypeScript essential for new graduates, even though campus recruitment interviews may not directly test TypeScript syntax.
+
+TypeScript's static type checking catches a large number of potential errors (such as field name typos and type mismatches) at coding time.
+Using TypeScript-defined `interface` or `type` also facilitates AI-assisted coding. One key reason LLMs can generate "usable" code is type information — when you clearly define interface fields, function parameters, and return types in TypeScript, AI can "understand" your data structures and context, producing more accurate, business-logic-aligned code.
+
+Of course, in real work projects are also written in TypeScript — you can't write code without knowing it.
+
+**Toolchain:**
+
+1. Understand what each part of the toolchain does (e.g., parsers: Babel, esbuild, swc; bundlers: Webpack, Rollup, esbuild — what are the differences between categories, and between tools within the same category).
+
+2. Be able to use at least one bundler; understand `package.json` configuration.
+
+3. Configure ESLint, Prettier, and StyleLint to unify code style.
+
+4. Understand npm, yarn, pnpm and other package managers, and how to use them to manage project dependencies.
+
+5. Understand CI/CD and automation workflows.
+
+6. Be familiar with Husky.
+
+#### Advanced Frameworks (Next.js, Remix)
+
+First, understand why advanced frameworks are needed on top of React itself:
+
+React is a UI library — it doesn't directly address routing, data fetching, or SSR. Advanced frameworks fill those gaps. I recommend Next.js: it has a larger community, and Vercel provides one-click deployment, making it easier to learn about engineering practices.
+
+At this stage, new graduates have sufficient breadth for the autumn recruitment season. Make sure your resume includes a Next.js project deployed on Vercel (recommended stack: Next.js + Tailwind + pnpm + TypeScript). Of course, academic background and internship experience also matter. Additionally, mastering CS fundamentals (mainly computer networking) and algorithm problems (LeetCode Hot 100) is important — frontend interviews don't demand much in algorithms, so focus on quality over quantity.
diff --git a/app/docs/computer-science/frontend/index.en.mdx b/app/docs/computer-science/frontend/index.en.mdx
new file mode 100644
index 00000000..71523f20
--- /dev/null
+++ b/app/docs/computer-science/frontend/index.en.mdx
@@ -0,0 +1,30 @@
+---
+title: Frontend
+description: Comprehensive frontend development learning resources
+date: "2025-09-19"
+tags:
+  - frontend
+docId: y3xkz4kituc738jwsojo7cml
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+This document is dedicated to sharing frontend development learning resources, covering job requirements and key focus areas for both domestic and international job markets.
+
+## Learning Path
+
+This knowledge base follows the [Frontend Learning Roadmap](https://roadmap.sh/frontend):
+
+- HTML
+- CSS
+- JavaScript
+- Framework: React
+- State management: Redux, zustand, useContext (hooks)
+- Node.js, Express
+- Build tools: Webpack, Vite, rspack
+- Frontend interview essentials
+- Testing: Jest, Vitest
+- Databases: MongoDB, MySQL
+- DevOps: Docker, Kubernetes
diff --git a/app/docs/computer-science/index.en.mdx b/app/docs/computer-science/index.en.mdx
new file mode 100644
index 00000000..b16f4db2
--- /dev/null
+++ b/app/docs/computer-science/index.en.mdx
@@ -0,0 +1,75 @@
+---
+title: Computer Science
+description: Computer Science Knowledge Collection
+date: "2024-01-01"
+tags:
+  - computer-science
+  - overview
+docId: ksjj9shalh6hqezx6t6am5vw
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Computer Science
+
+Welcome to the Computer Science Knowledge Base! Here we collect core concepts and in-depth analysis from various fields of computer science.
+
+## Main Content
+
+### Data Structures and Algorithms
+
+- [Data Structures Fundamentals](https://github.com/sherlock-zhang/blog/blob/master/computer-science/data-structures)
+- Common Algorithm Analysis
+- Complexity Theory
+
+### Programming Languages
+
+- Programming Paradigms
+- Language Design Principles
+- Compiler Theory
+
+## Learning Suggestions
+
+We recommend learning in the following order:
+
+1. First master basic data structures
+2. Understand implementations of common algorithms
+3. Learn algorithm complexity analysis
+4. Dive into advanced topics in specific domains
+
+---
+
+_This knowledge base is maintained by a student community. Contributions are welcome!_
+
+# 计算机科学
+
+欢迎来到计算机科学知识库！我们在此收集了计算机科学各个领域的核心概念和深入分析。
+
+## 主体内容
+
+### 数据结构和算法
+
+- [数据结构基础](/computer-science/data-structures)
+- 常见算法分析
+- 复杂性理论
+
+### 编程语言
+
+- 编程范式
+- 语言设计原则
+- 编译器理论
+
+## 学习建议
+
+我们建议按照以下顺序学习：
+
+1. 首先掌握基本的数据结构
+2. 理解常见算法的实现
+3. 学习算法复杂度分析
+4. 深入特定领域的高级主题
+
+---
+
+_This knowledge base is maintained by a student community. Contributions are welcome!_
diff --git a/app/docs/jobs/event-keynote/coffee-chat.en.md b/app/docs/jobs/event-keynote/coffee-chat.en.md
new file mode 100644
index 00000000..496e1fc8
--- /dev/null
+++ b/app/docs/jobs/event-keynote/coffee-chat.en.md
@@ -0,0 +1,137 @@
+---
+title: Senior Tech-Industry Engineer Coffee Chat Recap
+description: ""
+date: "2025-11-01"
+tags:
+  - tag-one
+docId: ld59a8z1v84ig4rlr0p0n2a9
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Involution Hell × Teacher R | Australian Programmer Job Search & Career Planning Session Recap
+
+## Event Overview
+
+**Topic**: Australian Programmer Job Search & Career Planning Sharing Session  
+**Guest**: Teacher R (Australian backend engineer)  
+**Time**: Halloween evening  
+**Organizer**: Involution Hell Job Series — Coffee Chat Session 2  
+**Platform**: Discord
+
+Follow us on Xiaohongshu at "Involution Hell" (内卷地狱). You are also welcome to contribute articles on the [official site](https://involutionhell.vercel.app/) or contribute on GitHub.
+
+## 1. Career Development: Big Companies as the Core Launchpad
+
+### Prioritize Big Companies
+
+In the early stage of your career (especially within the first 3–4 years), aim to get into a large company.
+Reasons include:
+
+- Abundant platform resources;
+- Larger-scale projects;
+- Well-structured internal promotion system;
+- Mature technical and management practices.
+
+### Job-Hopping Strategy
+
+- Small company → big company: you may be placed one level lower, but the growth is faster;
+- Big company → small company: you can typically negotiate a higher title or salary.
+
+### Divergence After Five Years
+
+- Some people continue ascending to Senior or Lead at large companies;
+- Others move into startups, become independent developers, or build their own products.
+
+## 2. Breaking Into SRE: Positioning a Security Background
+
+**Example question**: With a security background, should I start with K8s automation or Secrets management when entering SRE?
+
+**Teacher R's recommendation:**
+
+- **Prioritize K8s automation projects**  
+  This is the most common direction and the one most likely to resonate with interviewers. Typical projects include automated deployment, CI/CD, and monitoring & alerting.
+
+- **A security background is a neutral bonus**  
+  Unless you are applying to a Security SRE team, a security background won't add significant weight, but it demonstrates systematic thinking and risk awareness.
+
+## 3. Australian Experience vs. Competitiveness Back in China
+
+**Key insights:**
+
+- Australian work experience is not automatically an advantage in Chinese recruitment;
+- HR focuses more on company brand than on project content;
+- Unless you worked at a global tech giant (Google, Amazon, Microsoft, Atlassian), the project scale is typically smaller than what BAT engineers in China work on.
+
+**Salary comparison:**
+
+- Australia: Dev and SRE salaries are roughly equivalent;
+- China: SRE is closer to operations, and Dev salaries are generally higher.
+
+## 4. Australian Internship Timeline and Job Search Rhythm
+
+Teacher R's reminder: **internship applications require preparation six months in advance**. Generally, start searching in February–April for internships that begin at year's end (October–December).
+
+| Period  | Key Actions                                                   |
+| ------- | ------------------------------------------------------------- |
+| Feb–Mar | Polish resume and project experience                          |
+| Apr–Jun | Start applying; prepare for online assessments and interviews |
+| Jul–Sep | Peak interview season                                         |
+| Oct–Dec | Internship officially begins                                  |
+
+Planning your timeline well in advance is crucial — internships are often the springboard to full-time employment after graduation.
+
+## 5. Practical Resources and Community Discussions
+
+### Recommended LeetCode Practice Sites
+
+- [Labuladong Algorithm Site](https://labuladong.online/algo/)  
+  Community feedback: mobile-friendly, comprehensive problem set, clean interface.
+
+### Other Suggestions
+
+- Connect broadly on LinkedIn to build your personal platform — treat it as a professional showcase, not a social feed;
+- Real internal company transition example: "From Amazon IT Support to Developer";
+- Highly recommended: [Student Job Preparation Guide](https://involutionhell.vercel.app/docs/jobs/interview-prep/preparations-to-get-an-offer-as-a-student).
+
+## 6. Visa and International Career Paths: The E-3 Visa
+
+**E-3 Visa overview:**
+
+- A US work visa exclusively for Australian citizens;
+- Allows working in the United States;
+- An indirect path to the US job market after accumulating experience in Australia.
+
+**Career path summary:**
+
+> Join a large company → build project experience → develop transferable skills → start preparing for internships six months early.
+
+Consistent LeetCode practice, maintaining a portfolio, and participating in community sharing are the keys to staying competitive long-term — and don't forget to contribute to open-source projects like Involution Hell.
+
+## 7. Startup and Innovation Resources
+
+The following platforms are genuine and active startup support resources based in Australia:
+
+| Platform                                                  | Description                                                                                                         |
+| --------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------- |
+| [Spark Festival](https://sparkfestival.co)                | NSW's official startup festival, bringing together startups, universities, investors, and innovation organisations. |
+| [Startmate](https://startmate.com)                        | Australia/New Zealand's leading startup accelerator, offering funding, mentorship, and community support.           |
+| [UNSW Founders Program](https://www.founders.unsw.edu.au) | UNSW-operated startup incubation program for students and alumni, providing mentoring and seed funding.             |
+
+These platforms are well-regarded in the Australian startup ecosystem and are ideal for tech talent exploring independent development or entrepreneurship.
+
+## 8. Closing Thoughts: From "Involution" to "Standing Out"
+
+Key takeaways from this session:
+
+1. Large companies are the launchpad for early career growth;
+2. Enter SRE by focusing on mainstream technology stacks;
+3. Plan ahead for internships and visas;
+4. Continuously build technical skills and project experience;
+5. Actively engage with community and startup ecosystems.
+
+**Organizer**: [Involution Hell](https://involutionhell.vercel.app/)  
+Dedicated to helping overseas Chinese programmers and students with job search planning and career growth.  
+Join our Discord community and explore the path from "involution" to "standing out."
diff --git a/app/docs/jobs/event-keynote/event-takeway.en.md b/app/docs/jobs/event-keynote/event-takeway.en.md
new file mode 100644
index 00000000..bd06d162
--- /dev/null
+++ b/app/docs/jobs/event-keynote/event-takeway.en.md
@@ -0,0 +1,30 @@
+---
+title: Career Events Recap Hub
+description: Career Events Recap Introduction Page
+date: "2025-10-26"
+tags:
+  - intro
+  - job
+  - event
+docId: s8w3d2p5k9m4h7z1x0c2a8r6
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+In the Career Events Recap Hub, we systematically compile the **highlights and key takeaways** from each job-search-related event, including:
+
+- **Coffee Chat / AMA highlights**: career growth and interview insights from guests across industries.
+- **Interview recaps and tip summaries**: breakdowns of real job-search cases, interview preparation experience, and resume optimization ideas.
+- **Role-specific experience sharing**: growth path references from student to new professional, and from junior to senior roles.
+
+## Why This Section Exists
+
+Every event contains invaluable accumulated experience.  
+But without systematic organisation, these insights tend to fade over time.  
+This section aims to turn community knowledge into reusable learning resources, helping more people avoid common pitfalls.
+
+## Past Recording Download Links
+
+1. [2026/02/07 RoboTe](https://drive.google.com/file/d/1Ylf6dmOs5TILIpo9jqxKS32dQa_EN-8b/view?usp=sharing)
diff --git a/app/docs/jobs/interview-prep/interview-tips.en.mdx b/app/docs/jobs/interview-prep/interview-tips.en.mdx
new file mode 100644
index 00000000..29d76160
--- /dev/null
+++ b/app/docs/jobs/interview-prep/interview-tips.en.mdx
@@ -0,0 +1,94 @@
+---
+title: "Conquering Every Interview Stage | OA Coding Grind, HireVue Score Tricks, and Group Interview Tactics"
+description: ""
+date: "2025-09-19"
+tags:
+  - tag-one
+docId: fkk8ghklsr15a0s3vcxnswnj
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Conquering Every Interview Stage | OA Coding Grind, HireVue Score Tricks, and Group Interview Tactics
+
+In Australia, the success rate for landing an IT job is reportedly less than 10% — especially for international students, whose English communication skills rarely reach native-speaker level. This guide aims to share techniques beyond language ability that can improve your chances of getting an offer.  
+Have you ever felt like this: **OA problems breaking your spirit, desperately wanting to see your HireVue score, freezing up in a group interview, or wanting to rage-quit during a live coding session?**
+
+Don't panic. This guide compiles my own hard-won lessons and the war stories of classmates around me, walking you through every stage of the interview process.
+
+## Online Assessments | HackerRank / CodeSignal / SHL Full Breakdown
+
+The first time I received an OA email, I stared at the problems in a cold sweat and realised I couldn't answer a single one.  
+The answer to improving your algorithm fundamentals is always the same, no matter who you ask: practice.  
+Common platforms include **HackerRank, CodeSignal, and SHL**.
+
+- **HackerRank**: The global IT job search standard; virtually every large Australian company uses it.
+- **CodeSignal**: Feels like a real coding environment — the closest to an actual live coding session.
+- **SHL**: The most common platform in Australia; a mix of logic, maths, and psychometric tests — feels like clearing puzzle levels.
+
+Common question types: array and string manipulation (rotating arrays, longest substrings), SQL join analysis, probability and logic questions. My advice: don't grind blindly. Use **LeetCode's company tag problem sets** (Canva and Atlassian both have curated lists), then find a practice partner. That beats grinding alone five times over.
+
+For those targeting jobs in China, grind the Hot 100. A first pass to understand the patterns, then multiple rounds until classic problem types become second nature.
+
+## Video Interviews | Demystifying the HireVue Scoring Black Box
+
+Video interviews are the most anxiety-inducing stage. Talking to a camera alone is rough. Common questions cover team conflict, time management, and diversity and inclusion. Seemingly simple, these questions actually assess whether your values align with the company's.
+
+The most common platform is **HireVue**. Many people wonder how they performed after finishing. Here's a little trick to see your score:
+
+1. Open your browser's **Developer Tools**.
+2. Switch to the **Network** tab.
+3. Find the `graphql` request (usually the third from the bottom).
+4. Click **Response** — your score is there.
+
+This tip is genuinely useful. Compare your scores across attempts and adjust your delivery accordingly.
+
+When answering, use the **STAR method** (Situation-Task-Action-Result), then connect to the company's culture. For example, Canva emphasises _"Be a good human"_ — closing your answer with a nod to that value lands well. I always rehearse in front of a mirror first to avoid the dead-eyed-stare-at-screen problem, and I make sure my background is tidy — HR notices these things.
+
+## Group Interviews | Survive the Gauntlet: Island Problems, Indigenous Affairs, and Public Transport Cases
+
+A group interview is like playing Werewolf: the assessors are watching for teamwork, not who speaks the loudest. Common scenarios include:
+
+- **Island/Bank problem**: Stranded on an island, the group must agree on five items to keep. Tests decision-making and collaboration — not who's the cleverest.
+- **Government policy question**: How would you improve outcomes for Indigenous communities? Tests values and social awareness.
+- **Consulting case**: Design an app to improve public transportation. Clarity of logic and reasonable division of responsibilities matter more than flashy ideas.
+
+My approach: **proactively suggest a structure, summarise periodically, and close with a three-minute pitch**. A few all-purpose phrases prepared in advance — like _"Let's organise our main points first"_ — can hold the group together. My first group interview, I was so nervous I sounded like I was rapping. I later realised that staying calm scores more points than being clever.
+
+Have you encountered any stranger group interview questions? Share in the comments.
+
+## Technical Interviews: The Final Boss | Live Coding + System Design Breakdown
+
+The technical interview tests raw ability. Common formats: **whiteboard** or **pair programming**. The goal is not just correctness, but clean code and clear explanation.
+
+Many people assume data roles skip system design. Then Amazon or Atlassian hits you with: **how would you build a data pipeline on AWS?** I had no idea the first time. My framework now:
+
+1. Clarify requirements first
+2. Consider scale, interfaces, and storage
+3. Add caching, fault tolerance, and scalability
+
+For **Live Coding**, always **think out loud**:
+
+- Restate the problem to confirm your understanding
+- Explain your approach as you write
+- When given a hint, repeat back your interpretation before modifying
+- Check boundary conditions and run through test cases at the end
+
+Don't be afraid to get stuck. Verbalising your thought process matters more than silence.
+
+## Full Interview Preparation Checklist | My One-Week Plan
+
+My standard process:
+
+1. **Company research**: Annual reports, recent news — write key points on cards you can flip through anytime.
+2. **Mock interviews**: Practise with a friend, or use ChatGPT voice for simulated sessions.
+3. **Problem review**: Rotate through arrays, SQL, and probability questions until they're muscle memory.
+4. **Follow-up**: Send a thank-you email within 24 hours, reaffirm your fit for the role, and add any points you wish you'd mentioned.
+
+## Closing Thoughts
+
+Finding a job in Australia is like clearing a game: the OA is the tutorial, the video interview is a solo dungeon, the group interview is a co-op raid, and the technical interview is the final boss. Solid preparation, natural delivery, and genuine teamwork can get you through every stage.
+
+Which stage do you dread most? The algorithm grind, the awkward camera stare, or the Werewolf-level chaos of a group interview? Drop a comment — you might find a fellow survivor who's been through the same.
diff --git a/app/docs/jobs/interview-prep/pre-interview.en.md b/app/docs/jobs/interview-prep/pre-interview.en.md
new file mode 100644
index 00000000..25e38d14
--- /dev/null
+++ b/app/docs/jobs/interview-prep/pre-interview.en.md
@@ -0,0 +1,51 @@
+---
+title: "Must-Read Before Your Interview: Four Tips That Significantly Boost Your Success Rate"
+description: ""
+date: "2025-09-28"
+tags:
+  - tag-one
+docId: cgo4lweflk5jx1hsncr8hshk
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Before an interview, preparation goes beyond reviewing your resume and practising answers. The more important work is doing your **background research** — the kind that most candidates skip, but that you do thoroughly. Why? Because in a competitive interview, the interviewer can sense almost immediately whether you've put in the effort. Drop a few specific details that resonate with them during the conversation, and your impression score jumps instantly.
+
+---
+
+## 1. Understand the Company at a Strategic Level
+
+Research the company's business lines, strategic initiatives, competitive landscape, industry trends, and major recent news. For example: What big project has the company launched recently? Is it driven by technology, distribution, or brand?  
+If you can say something like:
+
+> "I noticed your company has been heavily investing in [X business direction] recently — how does this role support that strategy?"  
+> The interviewer will immediately feel that you belong there.
+
+## 2. Research Your Interviewer's Background
+
+Use LinkedIn, the company website, public talks, and even feedback from insiders to learn about the interviewer's career history and areas of focus. For example: which business unit do they lead? What projects have they worked on? What do they emphasise in public talks?  
+This information not only gives you conversation material — it creates genuine connection. A candidate who naturally brings up topics the interviewer cares about can close the distance almost instantly.
+
+## 3. Find Out What It's Actually Like Inside
+
+Employee feedback is far more valuable than official PR material. Questions worth researching: Is the company culture relaxed or highly structured? Does the team operate at a fast pace or a steady one? Is the manager approachable or demanding?  
+These "behind-the-scenes" details can often be learned from former colleagues or insiders. Armed with this knowledge, you can ask sharper, more thoughtful questions during the interview's Q&A section — demonstrating genuine effort and careful thinking.
+
+## 4. Prepare Differentiating Material
+
+Don't just prepare standard answers to common questions. Have one or two highlights that are hard to replicate. For example:
+
+- You owned a small but critical part of a project;
+- You have a unique perspective on a specific niche;
+- You drew a valuable lesson from a failure.
+
+This kind of differentiated response makes you stand out among many candidates. Instead of "I really love your product," a much more powerful line is:
+
+> "I think your company may face a challenge in [X area], and that's precisely where I can contribute."
+
+## References
+
+- [How to Research a Company Before an Interview (Jianli.com)](https://www.jianli.com/article/gvvjqb.html)
+- [Highly Rated Interview Experience Sharing (Jianshu)](https://www.jianshu.com/p/ea07ec667730)
diff --git a/app/docs/jobs/interview-prep/preparations-to-get-an-offer-as-a-student.en.mdx b/app/docs/jobs/interview-prep/preparations-to-get-an-offer-as-a-student.en.mdx
new file mode 100644
index 00000000..c61ef16d
--- /dev/null
+++ b/app/docs/jobs/interview-prep/preparations-to-get-an-offer-as-a-student.en.mdx
@@ -0,0 +1,137 @@
+---
+title: A Programmer's Guide to Job Searching and Internships as a Student
+description: ""
+date: "2025-09-29"
+tags:
+  - tag-one
+docId: pne40puz5alzsf0f5jb0frbm
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T08:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# A Programmer's Guide to Job Searching and Internships as a Student
+
+This post shares my personal job search journey as a student, covering part-time work, internships, and graduate programs, along with extracurricular activities I believe help secure an offer before graduation. Since all of this preparation happened while I was a student, I decided to consolidate everything into a single post.
+
+First, a **disclaimer**:
+
+1. I graduated at the beginning of COVID-19. The job market then was likely less competitive than it is now, reshaped by the impact of AI.
+2. I had no visa concerns. If you lack permanent residency, you may need to do more preparation on top of what is described here. Keep going!
+3. Luck matters, but it is not within your control. Sometimes you can be fully prepared and still miss an opportunity — that is not your fault; it is a matter of market timing. Stay confident.
+4. My target role was Software Development Engineer (SDE). I have limited knowledge of the currently popular Data Scientist path.
+5. The content here represents **my personal views only** and has no connection to any company I have worked for, past or present.
+
+## 1. How to Land Your First Internship or Part-Time Job
+
+During university, I primarily earned income through three channels, listed from hardest to easiest:
+
+1. **University Course Tutor** [Strongly Recommended]
+   - **Overview**: Professors are usually very busy, so they recruit strong students who have already taken the course to help with tutorials, workshops, marking assignments, and grading.
+   - **How to find it**: Watch your university's job board — positions are typically posted before the semester starts. You will need at least a High Distinction (HD) average to be competitive. If your grades are strong, you can also email professors directly to ask about future tutoring opportunities.
+   - **Why it is worth it**: In my view, tutoring is the highest-value part-time job available to university students.
+     - **Strong resume signal**: HR recognises this experience highly. It demonstrates: 1) solid domain knowledge; 2) strong communication and mentoring ability — both critical in the workplace.
+     - **Good pay**: My workshop rate was AUD 50/hour; small-group tutorials paid AUD 120/hour.
+     - **Soft skill development**: You quickly discover that communication and teaching ability are just as important as technical skill in a programmer's career. The industry strongly embraces mentor/mentee culture.
+     - **Networking**: Building a good relationship with professors can unlock a second career path.
+     - **Flexible hours**: Generally not subject to the strict weekly hour limits of student visas.
+
+2. **Research Assistant (RA) for a Professor**
+   - **Overview**: This opportunity depends heavily on the professor and on timing. The work primarily involves helping with projects and writing various types of code — ranging from front-end development and experiment simulations to data visualisation, back-end implementation, and research paper support.
+   - **How to find it**: The approaches vary widely, including but not limited to:
+     - Following your university's computer science community channels, where professors sometimes post recruitment notices.
+     - Watching for Summer Research School announcements — you may be kept on after the programme ends.
+     - If you excelled in a course, proactively contact the professor (since the course is over, there is no grade at risk — be bold).
+   - **Advantages**:
+     - Gain real project experience and learn from professors about cutting-edge topics. Professors can also serve as references during background checks.
+     - High HR recognition — it is a paid, formal position.
+
+3. **Contractor at a Consulting Firm**
+   - **Overview**: These opportunities are relatively rare because companies usually hire experienced professionals from the market directly.
+   - **My experience**: I was lucky. During a hackathon hosted by a consulting firm, our team pushed hard to build a product prototype, and I later received a contractor offer from that company — my first formal job.
+
+## 2. Large Company Internships
+
+Typically, during the long summer holiday in the year before graduation (November to February), large companies open internship programmes. This internship experience could earn you your first full-time offer and, at a minimum, will make your resume far more competitive in subsequent applications.
+
+**Note**: This path generally requires applicants to have permanent residency (PR). Students on a student visa may consider using this period to apply for internships at large Chinese tech companies instead.
+
+**Timeline:**
+
+1. **February–March of the year before graduation**: Large companies open applications for summer internships (November–February).
+2. **June–July**: Companies extend offers and conduct team matching.
+3. **November**: Internship begins.
+4. **February (following year)**: Internship ends. If you perform well, you may receive your first full-time offer in March.
+
+**Company List:**
+
+1. **High-Frequency Trading (HFT) firms**: Optiver, IMC, Akuna, etc.
+2. **Tech companies**: Google, Atlassian, Amazon, Canva, Rokt, etc.
+3. **Major consulting firms and banks**.
+
+**How to Prepare:**
+
+1. **Grind LeetCode / HackerRank**: The two platforms are not very different — consistent practice is what matters.
+2. **Build up experience**: Tutoring, research assistant work, and part-time jobs all strengthen your resume.
+3. **Maintain strong grades**: When candidates are otherwise similar, a high GPA remains a meaningful differentiator.
+
+If you land an internship, congratulations! Use the time to absorb everything like a sponge: the tech stack, company culture, team collaboration norms, hiring process, interview standards, and architecture and design documents for well-known internal systems. Talk to colleagues and deliver good work. Everything you learn can be taken with you when you leave.
+
+Also watch for Google Australia's **STEP Program**, a special internship for second-year undergraduates. The interview primarily tests LeetCode-style problem-solving.
+
+## 3. Graduate Job Search
+
+In my own case, when I finished my master's degree (I enrolled in the master's because I hadn't found a job after my bachelor's...), my background included:
+
+1. Two hackathon experiences
+2. Two years of tutoring computer science courses
+3. One research assistant (RA) position with a professor
+4. One contractor role at a consulting firm
+5. An Atlassian internship with a return offer
+6. A thesis on Kubernetes research
+
+Yet this resume was screened out by Canva without even reaching an interview. That hit me hard and triggered a lot of anxiety.
+
+I then reached out to a senior contact who referred me internally for AWS's graduate programme. In preparing for the interviews, since my previous experience was largely back-to-back technical rounds, I was less familiar with the process at non-pure-tech large companies. My preparation included:
+
+1. **Get a referral**: Absolutely, definitely seek an internal referral. It can get you past the online assessment (OA) entirely.
+2. **Grind LeetCode**: Labuladong's algorithm cheat sheet is excellent for categorising problem types.
+3. **Prepare behavioural questions**: Even without formal work experience, you can draw on team project experiences from university — the scenarios, your role, and what you learned — to build your answers. For AWS's behavioural interviews, I wrote 10 short stories and practised answering different questions using them. The STAR method (Situation-Task-Action-Result) is highly recommended for structuring responses.
+4. **Review your projects**: Revisit the architecture of projects you have built and think about what could be improved or optimised.
+5. **Mock interviews**: Practise with friends or peers. I also used interviewing.io, paying for anonymous mock interviews with senior engineers (expensive — roughly USD 100+ per session).
+
+On a "nothing to lose" basis, I also applied to Alibaba's standard recruitment and their "Alibaba Star" programme... For Chinese autumn recruitment, memorising standard technical Q&A (八股文) is also key.
+
+## Summary
+
+Job searching after graduation is an anxiety-prone process that requires long-term planning. It does not begin in your final semester — it should start the moment you enrol. If I were to plan my university timeline again:
+
+1. **Year 1**: Maintain a high GPA, start grinding LeetCode. (Even if you have taken an algorithms course, that does not mean you can solve LeetCode problems — it is a test-taking skill, and the earlier you start, the better.) Look for opportunities to join competitive programming at your university.
+2. **Year 2**: Keep an eye on the Google STEP Program, Optiver internships, and Summer School Research Opportunities.
+3. **Year 3 (or Year 2 of a three-year degree)**: Focus on summer internship programmes at major tech companies.
+4. **Final year / graduation year**: Begin your formal job search and apply broadly.
+
+Throughout your entire university life, continue to:
+
+1. Actively participate in competitions — hackathons and competitive programming.
+2. Contribute to open-source projects. **[involutionhell](https://github.com/InvolutionHell/involutionhell) is a great opportunity**.
+3. Seek tutoring positions.
+4. Pursue research assistant opportunities with professors.
+5. Competitive programming results are a major differentiator if you can achieve them.
+6. **Expand the scope of team assignments**: Don't just meet the minimum requirements — treat them as opportunities to learn new technologies. For example, try using cloud services or building your own Kubernetes cluster for the back end; use modern front-end frameworks like React or Next.js; or integrate a large language model (LLM) into your application.
+7. **Continuously explore how to collaborate with AI in your coding work**. AI is fundamentally reshaping the industry.
+
+All of these efforts demonstrate the following qualities from different angles, increasing your chances of passing resume screening:
+
+1. Genuine passion for technology and the ability to learn independently.
+2. Technical competence validated by external recognition.
+3. Strong communication, collaboration, and interpersonal skills.
+
+## Further Reading
+
+Vote for the topics you would like covered next:
+
+1. Sydney IT company interview styles / preparation strategies
+2. Programmer career tracks (front-end/back-end, mobile, DevOps/SRE, large language models, comparing large vs. small companies)
+3. Workplace promotion and personal growth

From 35c96b5a43d3e0f305aa9b4fdbb3952f7cbe3c9d Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 17:46:08 +0000
Subject: [PATCH 12/19] =?UTF-8?q?feat(docs):=20i18n=20Leetcode=20=E7=9B=AE?=
 =?UTF-8?q?=E5=BD=95=E7=BF=BB=E8=AF=91=E5=AE=8C=E6=88=90=20(42=20=E7=AF=87?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

translator-leetcode 产出。Leetcode 题解目录特殊处理：
- 原文件名含方括号/中文/空格/_translated 后缀，命名风格混乱
- 翻译版统一改为 kebab-case 英文 slug：
  [146]LRU 缓存_translated.md → 146-lru-cache.en.md
  [121]买卖股票的最佳时期_translated.md → 121-best-time-to-buy-and-sell-stock.en.md

双向翻译：
- zh→en: 35 篇（绝大多数原文是中文题解）
- en→zh: 7 篇（2241 ATM / 2270 Split Array / 3138 Anagram /
  46 全排列 / 9021 TUT / 93 IP / Counting Stars / 等英文原文）

代码块（Python/C++/Java）原样保留，仅译注释。LaTeX 公式保留。
frontmatter 继承原文 docId，带 translatedFrom 标记。
跳过：1 个 <Cards> 索引页。
---
 .../1004-max-consecutive-ones-iii.en.md       |  74 +++++
 .../121-best-time-to-buy-and-sell-stock.en.md |  39 +++
 ...eplace-substring-for-balanced-string.en.md |  78 ++++++
 ...by-vegan-friendly-price-and-distance.en.md |  60 ++++
 .../Leetcode/142-linked-list-cycle-ii.en.md   | 106 +++++++
 .../Leetcode/146-lru-cache.en.md              | 117 ++++++++
 ...45-find-kth-bit-in-nth-binary-string.en.md | 143 ++++++++++
 ...um-deletions-to-make-string-balanced.en.md |  43 +++
 .../1664-ways-to-make-a-fair-array.en.md      |  64 +++++
 .../Leetcode/1825-mk-average.en.md            |  66 +++++
 ...-on-number-of-points-inside-a-circle.en.md |  72 +++++
 .../Leetcode/213-house-robber-ii.en.md        |  49 ++++
 ...me-by-concatenating-two-letter-words.en.md |  92 ++++++
 .../Leetcode/219-contains-duplicate-ii.en.md  |  62 +++++
 .../Leetcode/2241-design-an-atm-machine.zh.md |  96 +++++++
 .../2270-number-of-ways-to-split-array.zh.md  |  44 +++
 .../Leetcode/2293-min-max-game.en.md          |  39 +++
 .../2299-strong-password-checker-ii.en.md     |  75 +++++
 ...glish-letter-in-upper-and-lower-case.en.md |  78 ++++++
 ...-minimum-amount-of-time-to-fill-cups.en.md |  66 +++++
 ...341-maximum-number-of-pairs-in-array.en.md |  57 ++++
 .../Leetcode/2490-circular-sentence.en.md     |  36 +++
 ...2-find-the-array-concatenation-value.en.md |  42 +++
 .../Leetcode/2582-pass-the-pillow.en.md       |  36 +++
 .../2639-find-column-width-of-grid.en.md      |  56 ++++
 .../Leetcode/2679-sum-in-a-matrix.en.md       |  98 +++++++
 ...le-and-non-divisible-sums-difference.en.md |  51 ++++
 ...stribute-elements-into-two-arrays-ii.en.md |  81 ++++++
 ...imum-length-of-anagram-concatenation.zh.md |  87 ++++++
 .../345-reverse-vowels-of-a-string.en.md      |  59 ++++
 .../Leetcode/42-trapping-rain-water.en.md     | 109 ++++++++
 .../Leetcode/46-permutations.zh.md            |  91 ++++++
 .../538-convert-bst-to-greater-sum-tree.en.md | 120 ++++++++
 ...distribute-money-to-maximum-children.en.md |  88 ++++++
 .../76-minimum-window-substring.en.md         | 116 ++++++++
 ...move-duplicates-from-sorted-array-ii.en.md |  46 +++
 .../Leetcode/9021-tut-3-25t1.zh.md            | 263 ++++++++++++++++++
 .../Leetcode/93-restore-ip-addresses.zh.md    |  69 +++++
 .../Leetcode/994-rotting-oranges.en.md        | 153 ++++++++++
 .../brief-alternate-homework-help.en.md       | 136 +++++++++
 ...-stars-inter-uni-programming-contest.zh.md |  62 +++++
 ...021-remove-nth-node-from-end-of-list.en.md | 100 +++++++
 42 files changed, 3419 insertions(+)
 create mode 100644 app/docs/CommunityShare/Leetcode/1004-max-consecutive-ones-iii.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/121-best-time-to-buy-and-sell-stock.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/1234-replace-substring-for-balanced-string.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/1333-filter-restaurants-by-vegan-friendly-price-and-distance.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/142-linked-list-cycle-ii.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/146-lru-cache.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/1545-find-kth-bit-in-nth-binary-string.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/1653-minimum-deletions-to-make-string-balanced.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/1664-ways-to-make-a-fair-array.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/1825-mk-average.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/1828-queries-on-number-of-points-inside-a-circle.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/213-house-robber-ii.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2131-longest-palindrome-by-concatenating-two-letter-words.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/219-contains-duplicate-ii.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2241-design-an-atm-machine.zh.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2270-number-of-ways-to-split-array.zh.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2293-min-max-game.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2299-strong-password-checker-ii.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2309-greatest-english-letter-in-upper-and-lower-case.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2335-minimum-amount-of-time-to-fill-cups.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2341-maximum-number-of-pairs-in-array.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2490-circular-sentence.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2562-find-the-array-concatenation-value.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2582-pass-the-pillow.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2639-find-column-width-of-grid.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2679-sum-in-a-matrix.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/2894-divisible-and-non-divisible-sums-difference.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/3072-distribute-elements-into-two-arrays-ii.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/3138-minimum-length-of-anagram-concatenation.zh.md
 create mode 100644 app/docs/CommunityShare/Leetcode/345-reverse-vowels-of-a-string.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/42-trapping-rain-water.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/46-permutations.zh.md
 create mode 100644 app/docs/CommunityShare/Leetcode/538-convert-bst-to-greater-sum-tree.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/6323-distribute-money-to-maximum-children.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/76-minimum-window-substring.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/80-remove-duplicates-from-sorted-array-ii.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/9021-tut-3-25t1.zh.md
 create mode 100644 app/docs/CommunityShare/Leetcode/93-restore-ip-addresses.zh.md
 create mode 100644 app/docs/CommunityShare/Leetcode/994-rotting-oranges.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/brief-alternate-homework-help.en.md
 create mode 100644 app/docs/CommunityShare/Leetcode/counting-stars-inter-uni-programming-contest.zh.md
 create mode 100644 app/docs/CommunityShare/Leetcode/sword-offer-ii-021-remove-nth-node-from-end-of-list.en.md

diff --git a/app/docs/CommunityShare/Leetcode/1004-max-consecutive-ones-iii.en.md b/app/docs/CommunityShare/Leetcode/1004-max-consecutive-ones-iii.en.md
new file mode 100644
index 00000000..36aa5c0d
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/1004-max-consecutive-ones-iii.en.md
@@ -0,0 +1,74 @@
+---
+title: "1004. Max Consecutive Ones III"
+date: "2022.12.07-01:15"
+tags:
+  - - Python
+  - - solved
+    - answer
+abbrlink: ed19b576
+docId: ytg2bds2dnhzw37nrb3vassy
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Today's daily problem was too hard, so I picked one myself. The approach is hash table + sliding window, though it feels like only sliding window was really used.
+
+```python
+
+class Solution:
+    def longestOnes(self, nums: List[int], k: int) -> int:
+        k_mean = k
+        # Used to record how many arrays there are now
+        flag = 0
+        # Used to record the maximum land number
+        max_flag = 0
+        for start in range(len(nums)):
+            tail = start
+            while k >= 0 and tail <= len(nums) - 1:
+                if nums[tail] == 1:
+                    tail += 1
+                    flag += 1
+                elif nums[tail] == 0 and k > 0:
+                    tail += 1
+                    k -= 1
+                    flag += 1
+                elif nums[tail] == 0 and k == 0:
+                    k = k_mean
+                    max_flag = max(max_flag, flag)
+                    flag = 0
+                    break
+                if tail == len(nums):
+                    max_flag = max(max_flag, flag)
+                    flag = 0
+                    break
+        return max_flag
+```
+
+This was my initial approach. Although it uses two pointers, it lacks flexibility — feels very stiff.
+
+The following is from [@Lincoln](/u/lincoln), recorded here purely as a personal note, not presented as my own answer.
+
+```
+class Solution:
+    def longestOnes(self, nums: List[int], k: int) -> int:
+        """
+        Idea: 1. k=0 can be understood as finding the longest substring without duplicates
+             2. If (current window size - number of 1s in window) <= k: expand the window (right+1)
+                If (current window size - number of 1s in window) > k: slide the window right (left+1)
+        Method: Hash table + Sliding window
+        """
+        n = len(nums)
+        o_res = 0
+        left = right = 0
+        while right < n:
+            if nums[right]== 1: o_res += 1
+            if right-left+1- o_res > k:
+                if nums[left]== 1: o_res -= 1
+                left += 1
+            right += 1
+        return right - left
+```
+
+Lincoln's thinking is very clear — worth remembering.
diff --git a/app/docs/CommunityShare/Leetcode/121-best-time-to-buy-and-sell-stock.en.md b/app/docs/CommunityShare/Leetcode/121-best-time-to-buy-and-sell-stock.en.md
new file mode 100644
index 00000000..761ac0f7
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/121-best-time-to-buy-and-sell-stock.en.md
@@ -0,0 +1,39 @@
+---
+title: "121. Best Time to Buy and Sell Stock"
+date: "2024.01.01 0:00"
+tags:
+  - Python
+  - answer
+  - Array
+  - Dynamic planning
+abbrlink: 3a21fe32
+docId: w9ffo1wycpbz50051cb7lyo5
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[121. Best Time to Buy and Sell Stock](https://leetcode-cn.com/problems/best-time-to-buy-and-sell-stock/)
+
+# Approach
+
+Dynamic programming — find the minimum problem.
+
+Recurrence: the maximum profit on day `i` = max(maximum profit on day `i-1`, price on day `i` − minimum price before day `i`)
+
+# Code
+
+```python
+class Solution:
+    def maxProfit(self, prices: List[int]) -> int:
+        # Recurrence: max profit on day i = max(max profit on day i-1, price on day i - min price before day i-1)
+        dp = [0] * len(prices)
+        min_price = prices[0]
+        for i in range(1, len(prices)):
+            dp[i] = max(dp[i - 1], prices[i] - min_price)
+            min_price = min(min_price, prices[i])
+        return dp[-1]
+```
diff --git a/app/docs/CommunityShare/Leetcode/1234-replace-substring-for-balanced-string.en.md b/app/docs/CommunityShare/Leetcode/1234-replace-substring-for-balanced-string.en.md
new file mode 100644
index 00000000..fe320cb7
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/1234-replace-substring-for-balanced-string.en.md
@@ -0,0 +1,78 @@
+---
+title: "1234. Replace the Substring for Balanced String — Daily Problem"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+abbrlink: 56d97dcf
+docId: p8igr19xfxnuyo2lpngnr6fg
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[1234. Replace the Substring for Balanced String](https://leetcode.cn/problems/replace-the-substring-for-balanced-string/description/)
+
+# Approach
+
+`all()`: returns `True` if `bool(x)` is `True` for all values `x` in the iterable. Returns `True` if the iterable is empty.
+
+Two pointers in the same direction — the elegant solution for this problem.
+
+If every character in the string appears exactly `n/4` times, then it is a "balanced string."
+
+If any character **outside** the substring to be replaced appears more than $m = \dfrac{n}{4}$ times, then no matter how you replace the substring, you cannot make that character's count equal to `m`.
+
+Conversely, if every character **outside** the substring appears at most `m` times:
+
+> Then a replacement exists that makes `s` a balanced string, where each character appears exactly `m` times.
+> For this problem, let the left and right endpoints of the substring be `left` and `right`. Enumerate `right`:
+> if every character outside the substring has count ≤ m, then the substring from `left` to `right` is a valid replacement. Its length is `right − left + 1`.
+> Update the minimum answer, then move `left` right to shrink the substring length.
+
+# Code
+
+```python
+class Solution:
+    def balancedString(self, s: str) -> int:
+        s_c = Counter(s)
+        n = len(s)
+        if all(s_c[v] <= n//4 for v in s_c):
+            return 0
+        ans, left = inf, 0
+        # enumerate right endpoint
+        for i, j in enumerate(s):
+            s_c[j] -= 1
+            while all(s_c[v] <= n // 4 for v in s_c):
+                ans = min(ans, i - left + 1)
+                s_c[s[left]] += 1
+                left += 1
+        return ans
+
+```
+
+```go
+func balancedString(s string) int {
+	cnt, m := ['X']int{}, len(s)/4
+	for _, c := range s {
+		cnt[c]++
+	}
+	if cnt['Q'] == m && cnt['W'] == m && cnt['E'] == m && cnt['R'] == m {
+		return 0
+	}
+	ans, left := len(s), 0
+	for right, c := range s {
+		cnt[c]--
+		for cnt['Q'] <= m && cnt['W'] <= m && cnt['E'] <= m && cnt['R'] <= m {
+			ans = min(ans, right-left+1)
+			cnt[s[left]]++
+			left++
+		}
+	}
+	return ans
+}
+func min(a, b int) int { if a > b { return b }; return a }
+```
diff --git a/app/docs/CommunityShare/Leetcode/1333-filter-restaurants-by-vegan-friendly-price-and-distance.en.md b/app/docs/CommunityShare/Leetcode/1333-filter-restaurants-by-vegan-friendly-price-and-distance.en.md
new file mode 100644
index 00000000..07aba716
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/1333-filter-restaurants-by-vegan-friendly-price-and-distance.en.md
@@ -0,0 +1,60 @@
+---
+title: "1333. Filter Restaurants by Vegan-Friendly, Price, and Distance"
+date: "2024.01.01 0:00"
+tags:
+  - Python
+  - answer
+  - Sort
+  - Array
+abbrlink: 7f1331bc
+docId: jcqhknk5z2xr3rfqn49me4j9
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[1333. Filter Restaurants by Vegan-Friendly, Price, and Distance](https://leetcode-cn.com/problems/filter-restaurants-by-vegan-friendly-price-and-distance/)
+
+# Approach
+
+My initial approach used `pop()`, but the time complexity was too high (~400ms). I switched to a list comprehension.
+
+A senior once told me: `pop()` runtime is fine, but when you combine it with index operations inside, it becomes very slow.
+
+**`sorted` and `lambda` usage:**
+
+`lambda` is Python's anonymous function syntax. Here, `lambda x: (x[1], x[0])` defines a function that takes an element `x` (a sublist of `restaurants`) and returns a tuple `(x[1], x[0])`.
+
+This means sorting is first based on `x[1]` (rating), then by `x[0]` (id) if `x[1]` values are equal.
+
+```python
+while ind < len(restaurants):
+     i = restaurants[ind]
+     if veganFriendly == 1 and i[2] == 0:
+         restaurants.pop(ind)
+     elif maxPrice < i[3]:
+         restaurants.pop(ind)
+     elif maxDistance < i[4]:
+         restaurants.pop(ind)
+     else:
+         ind += 1
+```
+
+# Code
+
+```python
+class Solution:
+    def filterRestaurants(self, restaurants: List[List[int]], veganFriendly: int, maxPrice: int, maxDistance: int) -> \
+            List[int]:
+        restaurants = [
+            i for i in restaurants
+            if (veganFriendly == 0 or i[2] == veganFriendly)
+               and i[3] <= maxPrice
+               and i[4] <= maxDistance
+        ]
+        restaurants = sorted(restaurants, key=lambda x: (x[1], x[0]), reverse=True)
+        return [i[0] for i in restaurants]
+```
diff --git a/app/docs/CommunityShare/Leetcode/142-linked-list-cycle-ii.en.md b/app/docs/CommunityShare/Leetcode/142-linked-list-cycle-ii.en.md
new file mode 100644
index 00000000..fc98fb23
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/142-linked-list-cycle-ii.en.md
@@ -0,0 +1,106 @@
+---
+title: "142. Linked List Cycle II"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+abbrlink: e2c9cca9
+docId: ylpucy1rbbnfpe3t62u8kcfq
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+[142. Linked List Cycle II](https://leetcode.cn/problems/linked-list-cycle-ii/)
+
+# Approach
+
+### Problem Analysis
+
+This type of linked list problem is generally solved using the two-pointer technique — for example, finding the node at distance K from the tail, finding the cycle entrance, or finding the intersection node.
+
+### Algorithm
+
+1. **First meeting of two pointers:** Set two pointers `fast` and `slow` both pointing to `head`. `fast` moves 2 steps per round, `slow` moves 1 step per round.
+
+   a. **Case 1:** If `fast` reaches the end of the list, the list has no cycle — return `null`.
+
+   **Tip:** If there is a cycle, the two pointers will definitely meet. Each round, the gap between `fast` and `slow` decreases by 1, so `fast` will eventually catch `slow`.
+
+   b. **Case 2:** When `fast == slow`, the two pointers **meet for the first time inside the cycle**. Let's analyze the steps taken:
+
+   Suppose the list has `a + b` nodes in total, with `a` nodes from the head to the cycle entrance (not counting the entrance node), and `b` nodes in the cycle. Let `f` and `s` be the steps taken by each pointer:
+   - `fast` travels twice the steps of `slow`: `f = 2s`
+   - `fast` travels `n` extra cycles compared to `slow`: `f = s + nb`
+   - From the above: `f = 2nb`, `s = nb` — both pointers have walked an integer multiple of the cycle length.
+
+2. **Current situation analysis:**
+
+   If a pointer walks `k` steps from `head`, it reaches the cycle entrance when `k = a + nb` (after `a` steps it reaches the entrance; each additional cycle of `b` steps brings it back to the entrance).
+
+   Currently, `slow` has walked `nb` steps. So we just need `slow` to walk `a` more steps to reach the cycle entrance.
+
+   But we don't know `a`. We use a second pointer starting from `head`. When this pointer and `slow` both walk `a` steps together, they meet exactly at the cycle entrance.
+
+3. **Second meeting of two pointers:**
+   Keep `slow` in place, reset `fast` to `head`. Both move forward 1 step per round.
+
+   When `fast` has walked `a` steps: `slow` has walked `a + nb` steps. The two pointers meet and both point to the **cycle entrance**.
+
+4. Return the node pointed to by `slow`.
+
+### Complexity Analysis
+
+**Time complexity O(N):** In the second encounter, the slow pointer walks `a < a + b` steps. In the first encounter, the slow pointer walks `a + b − x < a + b` steps (where `x` is the distance from the meeting point to the entrance). The overall complexity is linear.
+
+**Space complexity O(1):** The two pointers use constant extra space.
+
+# Code
+
+```python
+class Solution:
+    def detectCycle(self, head: Optional[ListNode]) -> Optional[ListNode]:
+        a = head
+        b = head
+        while True:
+            if not b or not b.next:
+                return None
+            a = a.next
+            b = b.next.next
+            if b == a:
+                break
+        b = head
+        while a != b:
+            a, b = a.next, b.next
+        return b
+```
+
+```cpp
+class Solution {
+public:
+    ListNode *detectCycle(ListNode *head) {
+        ListNode *a = head;
+        ListNode *b = head;
+        while (true) {
+            if (!b or !b->next) {
+                return nullptr;
+            }
+            a = a->next;
+            b = b->next->next;
+            // because b moves two steps each time, a and b must meet
+            if (a == b) {
+                break;
+            }
+        }
+        // After meeting, keep a in place, reset b to head
+        b = head;
+        while(a!=b){
+            a = a->next;
+            b = b->next;
+        }
+
+        return a;
+    }
+};
+```
diff --git a/app/docs/CommunityShare/Leetcode/146-lru-cache.en.md b/app/docs/CommunityShare/Leetcode/146-lru-cache.en.md
new file mode 100644
index 00000000..8656c431
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/146-lru-cache.en.md
@@ -0,0 +1,117 @@
+---
+title: "146. LRU Cache"
+date: "2024.01.01 0:00"
+tags:
+  - Python
+  - answer
+  - Linked list
+  - design
+  - Hash table
+  - Doubly linked list
+abbrlink: b9130c0e
+docId: u0szm4sv8mr3on3ivbfo5r84
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+First encountered: `2023/3/14-15:21`
+
+[146. LRU Cache](https://leetcode-cn.com/problems/lru-cache/)
+
+# Approach
+
+Simple simulation using a dictionary — I didn't actually use the doubly linked list or LRU concepts. I feel guilty about this one; I can barely remember the air conditioner I built six months ago.
+
+The answer should be understandable from the code comments. Feel free to ask if anything is unclear.
+
+# Code
+
+```python
+class LRUCache:
+
+    def __init__(self, capacity: int):
+        self.capacity = capacity
+        self.cache = collections.OrderedDict()
+
+    def get(self, key: int) -> int:
+        # if key is in the dictionary, return its value
+        if key in self.cache:
+            self.cache.move_to_end(key)
+            return self.cache[key]
+
+        else:
+            # if key is not in the dictionary, return -1
+            return -1
+
+    def put(self, key: int, value: int) -> None:
+        if key in self.cache:
+            # if key is in the dictionary, update its value
+            self.cache.move_to_end(key)
+            self.cache[key] = value
+        else:
+            if len(self.cache) == self.capacity:
+                # if key is not in the dictionary and cache is full, evict LRU element
+                self.cache.popitem(last=False)
+            # if key is not in the dictionary and cache is not full, add the element
+            self.cache[key] = value
+```
+
+# Second Solution
+
+```python
+class Node:
+    # Speed up attribute access and save memory
+    __slots__ = 'prev', 'next', 'key', 'value'
+
+    def __init__(self, key=0, value=0):
+        self.key = key
+        self.value = value
+
+class LRUCache:
+    def __init__(self, capacity: int):
+        self.capacity = capacity
+        self.dummy = Node()  # sentinel node
+        self.dummy.prev = self.dummy
+        self.dummy.next = self.dummy
+        self.key_to_node = dict()
+
+    def get_node(self, key: int) -> Optional[Node]:
+        if key not in self.key_to_node:  # key not found
+            return None
+        node = self.key_to_node[key]  # key found
+        self.remove(node)  # remove from current position
+        self.push_front(node)  # move to front
+        return node
+
+    def get(self, key: int) -> int:
+        node = self.get_node(key)
+        return node.value if node else -1
+
+    def put(self, key: int, value: int) -> None:
+        node = self.get_node(key)
+        if node:  # key exists
+            node.value = value  # update value
+            return
+        self.key_to_node[key] = node = Node(key, value)  # new node
+        self.push_front(node)  # move to front
+        if len(self.key_to_node) > self.capacity:  # over capacity
+            back_node = self.dummy.prev
+            del self.key_to_node[back_node.key]
+            self.remove(back_node)  # remove LRU node
+
+    # Remove a node
+    def remove(self, x: Node) -> None:
+        x.prev.next = x.next
+        x.next.prev = x.prev
+
+    # Add a node at the front of the linked list
+    def push_front(self, x: Node) -> None:
+        x.prev = self.dummy
+        x.next = self.dummy.next
+        x.prev.next = x
+        x.next.prev = x
+```
diff --git a/app/docs/CommunityShare/Leetcode/1545-find-kth-bit-in-nth-binary-string.en.md b/app/docs/CommunityShare/Leetcode/1545-find-kth-bit-in-nth-binary-string.en.md
new file mode 100644
index 00000000..01e90c67
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/1545-find-kth-bit-in-nth-binary-string.en.md
@@ -0,0 +1,143 @@
+---
+title: "1545. Find Kth Bit in Nth Binary String"
+date: "2026.03.04 00:46"
+tags:
+  - Leetcode
+  - answer
+  - Math
+  - String
+docId: zuoplhoodv7tzfgku0pwzi6w
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+Given two positive integers `n` and `k`, the binary string `Sn` is formed as follows:
+
+- `S1 = "0"`
+- When `i > 1`: `Si = Si-1 + "1" + reverse(invert(Si-1))`
+
+where `+` denotes concatenation, `reverse(x)` returns the string `x` reversed, and `invert(x)` flips every bit in `x` (0 becomes 1, 1 becomes 0).
+
+The first 4 strings in the sequence are:
+
+- `S1 = "0"`
+- `S2 = "011"`
+- `S3 = "0111001"`
+- `S4 = "011100110110001"`
+
+Return the `k`-th character of `Sn`. It is guaranteed that `k` is within the length of `Sn`.
+
+# Solution
+
+I first tried the brute-force approach — generate `Sn` directly — but found that for large `n`, `Sn` becomes extremely long and causes memory overflow.
+
+```javascript
+/**
+ * @param {number} n
+ * @param {number} k
+ * @return {character}
+ */
+var findKthBit = function (n, k) {
+  var reverseR = function (input) {
+    return input
+      .split("") // split into array ["0", "1", "1", "1", "0"]
+      .map((char) => char ^ 1) // flip each bit: [1, 0, 0, 0, 1]
+      .reverse() // reverse the array: [1, 0, 0, 0, 1]
+      .join(""); // join back to string "10001"
+  };
+  let S = "0";
+  for (let i = 1; i < n; i++) {
+    S = S + "1" + reverseR(S);
+  }
+  return S[k - 1];
+};
+```
+
+Then I tried the mathematical flip approach. Observing $S_i = S_{i-1} + "1" + \text{reverse}(\text{invert}(S_{i-1}))$:
+
+**Length rule:** $|S_n| = 2^n - 1$.
+
+- $S_1$: length $2^1-1=1$, middle bit is position 1.
+- $S_2$: length $2^2-1=3$, middle bit is position 2.
+- $S_3$: length $2^3-1=7$, middle bit is position 4.
+
+**Three cases:**
+
+- **Left half** ($k < \text{mid}$): This is a copy of $S_{n-1}$. Recurse: "what is the $k$-th bit of $S_{n-1}$?"
+- **Middle** ($k = \text{mid}$): By the construction formula, this bit is always `"1"`.
+- **Right half** ($k > \text{mid}$): The right portion is the reversed invert of $S_{n-1}$.
+
+Due to the **reverse**, the 1st character of the right half corresponds to the last character of the left half, and so on.
+
+Mapping formula: $S_n[k] = \text{invert}(S_{n-1}[2^n - k])$.
+
+For example, to find the 6th bit of $S_3$ (length 7), it corresponds to the invert of the $2^3 - 6 = 2$nd bit of $S_2$.
+
+```javascript
+var findKthBit = function (n, k) {
+  let flip = false; // track the number of inversions needed
+  while (n > 1) {
+    let mid = 1 << (n - 1); // 2^(n-1)
+    if (k === mid) {
+      // middle bit is always 1
+      let res = 1;
+      return (flip ? res ^ 1 : res).toString();
+    } else if (k > mid) {
+      // if on the right side, mirror to the left and add one inversion
+      k = 2 * mid - k;
+      flip = !flip;
+    }
+    // if on the left side, just look at n-1
+    n--;
+  }
+  // finally back to S1, which is "0"
+  let res = 0;
+  return (flip ? res ^ 1 : res).toString();
+};
+```
+
+## Why is `mid` equal to $2^{n-1}$?
+
+We can derive the center position (mid) from the total length of $S_n$.
+
+**Calculate the length $L_n$ of $S_n$:**
+
+- $S_1 = "0"$, so $L_1 = 1$
+- $S_n = S_{n-1} + "1" + \text{modified } S_{n-1}$
+
+Length recurrence: $L_n = 2 \times L_{n-1} + 1$
+
+Examples:
+
+- $L_1 = 1$
+- $L_2 = 3$
+- $L_3 = 7$
+- $L_4 = 15$
+
+Pattern: $L_n = 2^n - 1$
+
+## Why does this approach work?
+
+Think of it like finding a specific point on an infinitely folded tape:
+
+- **Brute force**: Fold the paper 20 times, unroll the tape, count from the beginning to position $k$.
+- **This approach (backtracking/iteration)**: Look at the already-folded paper ($S_n$) and ask: is position $k$ on the left or right of the fold?
+  - If right: mirror it to the left (symmetric transform) and mark it as flipped once (`flip = !flip`).
+  - If left: just look at the left side.
+  - The paper halves in size (`n--`). Repeat until hitting a fold point (`k === mid`) or shrinking to size 1 (`n=1`).
+
+## If on the right side and `S[k]=0`, does flipping it over mean `S[k]=1`?
+
+According to the problem, the right half of $S_i$ is: $\text{reverse}(\text{invert}(S_{i-1}))$. Two operations:
+
+- **Invert**: $0 \to 1$, $1 \to 0$.
+- **Reverse**: positions are mirrored.
+
+So if we see a 0 in the right half and trace back through both operations:
+
+- Because it was inverted: it was 1 before inversion.
+- Because it was reversed: it corresponds to the symmetric position on the left half.
diff --git a/app/docs/CommunityShare/Leetcode/1653-minimum-deletions-to-make-string-balanced.en.md b/app/docs/CommunityShare/Leetcode/1653-minimum-deletions-to-make-string-balanced.en.md
new file mode 100644
index 00000000..8989e7c4
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/1653-minimum-deletions-to-make-string-balanced.en.md
@@ -0,0 +1,43 @@
+---
+title: "1653. Minimum Deletions to Make String Balanced"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+abbrlink: cac21f27
+docId: bsf0yz1zrmlz7masrdmq8fq6
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[1653. Minimum Deletions to Make String Balanced](https://leetcode.cn/problems/minimum-deletions-to-make-string-balanced/description/)
+
+# Approach
+
+**Q:** Why does writing it with `if-else` as `(c - 'a') * 2 - 1` run much faster?
+
+**A:** When the CPU encounters a branch (conditional jump instruction), it predicts which branch will be executed. If the prediction is correct, the CPU continues along the predicted path. If the prediction fails, the CPU must roll back previous instructions and load the correct ones to ensure correctness.
+
+For the data in this problem, characters `'a'` and `'b'` can be considered to appear randomly, which means branch prediction will fail with roughly 50% probability.
+
+The rollback and reload operations caused by mispredictions consume extra CPU cycles. If the branch can be eliminated at a lower cost, it will inevitably improve efficiency for this type of problem.
+
+**Note:** This optimization technique often reduces readability — it's best not to use it in production code.
+
+# Code
+
+```python
+class Solution:
+    def minimumDeletions(self, s: str) -> int:
+        ans = delete = s.count('a')
+        for c in s:
+            delete -= 1 if c == 'a' else -1
+            if delete < ans:  # manual min is much faster
+                ans = delete
+        return ans
+
+```
diff --git a/app/docs/CommunityShare/Leetcode/1664-ways-to-make-a-fair-array.en.md b/app/docs/CommunityShare/Leetcode/1664-ways-to-make-a-fair-array.en.md
new file mode 100644
index 00000000..f9f6d942
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/1664-ways-to-make-a-fair-array.en.md
@@ -0,0 +1,64 @@
+---
+title: "1664. Ways to Make a Fair Array — Daily Problem"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - Dynamic programming
+  - - Daily problem
+abbrlink: 1978f474
+docId: ska0npc89ja1r4pdt2qow79u
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+[1664. Ways to Make a Fair Array](https://leetcode.cn/problems/ways-to-make-a-fair-array/description/?orderBy=most_relevant)
+
+# Approach
+
+When reading the problem, I immediately knew we shouldn't actually delete elements one by one — that would time out. So I tried a "pure Python" approach using slices to process all the data, but it still timed out.
+
+Then I checked the official solution, which uses dynamic programming. The key insight is:
+
+> In general, suppose we delete element at index `i`.
+> Obviously, elements before index `i` remain unchanged.
+> Elements originally at index `j` where `j > i` shift to index `j − 1`.
+> Therefore, elements after index `i` that were at even indices become odd, and vice versa.
+
+# Code
+
+```python slice approach (TLE)
+class Solution:
+    def waysToMakeFair(self, nums: List[int]) -> int:
+        flag = 0
+        for i in range(len(nums)):
+            temp_nums = nums[:i] + nums[i+1:]
+            if sum(temp_nums[::2])==sum(temp_nums[1::2]):
+                flag += 1
+        return flag
+```
+
+```python official solution
+class Solution:
+    def waysToMakeFair(self, nums: List[int]) -> int:
+        res = odd1 = even1 = odd2 = even2 = 0
+        for i, num in enumerate(nums):
+            if i & 1:
+                odd2 += num
+            else:
+                even2 += num
+        for i, num in enumerate(nums):
+            if i & 1:
+                odd2 -= num
+            else:
+                even2 -= num
+            if odd1 + even2 == odd2 + even1:
+                res += 1
+            if i & 1:
+                odd1 += num
+            else:
+                even1 += num
+        return res
+```
diff --git a/app/docs/CommunityShare/Leetcode/1825-mk-average.en.md b/app/docs/CommunityShare/Leetcode/1825-mk-average.en.md
new file mode 100644
index 00000000..3bc789e1
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/1825-mk-average.en.md
@@ -0,0 +1,66 @@
+---
+title: "1825. Find MK Average"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - unsolved
+  - - difficulty
+  - - Multiple set
+abbrlink: 6be57ef7
+docId: n38sohi8zlxesl82tgv854kj
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Solution Approach
+
+Maintain 3 multisets: `lower` (the smallest k elements), `middle` (elements in between), and `upper` (the largest k elements).
+
+# Insert Operation
+
+- If `num ≤ max(lower)`, insert `num` into `lower`
+- If `num ≥ min(upper)`, insert `num` into `upper`
+- Otherwise, insert `num` into `middle`
+
+After insertion, if `lower` or `upper` has more than k elements, transfer an element to `middle`.
+
+Throughout the operation, maintain the element sum of `middle`.
+
+# Delete Operation
+
+- Let the element to delete be `d`
+- `d` must exist in one or more of `lower`, `middle`, or `upper`
+- Delete from the appropriate set
+
+After deletion, if `lower` or `upper` has fewer than k elements, retrieve an element from `middle`.
+
+Throughout the operation, maintain the element sum of `middle`.
+
+# Average Operation
+
+$\text{average} = \text{sum} / (m - 2 \cdot k)$ (rounded down).
+
+Code with issues:
+
+```python
+class MKAverage:
+
+    def __init__(self, m: int, k: int):
+        self.m = m
+        self.k = k
+        self.list1 = []
+
+    def addElement(self, num: int) -> None:
+        self.list1.append(num)
+
+    def calculateMKAverage(self) -> int:
+        if len(self.list1) < self.m:
+            return -1
+        else:
+            list2 = self.list1[-1:-self.m-1:-1]
+            list2 = sorted(list2)
+            list2 = list2[self.k:len(list2) - self.k]
+        return sum(list2) // len(list2)
+```
diff --git a/app/docs/CommunityShare/Leetcode/1828-queries-on-number-of-points-inside-a-circle.en.md b/app/docs/CommunityShare/Leetcode/1828-queries-on-number-of-points-inside-a-circle.en.md
new file mode 100644
index 00000000..495d175a
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/1828-queries-on-number-of-points-inside-a-circle.en.md
@@ -0,0 +1,72 @@
+---
+title: "1828. Queries on Number of Points Inside a Circle — Daily Problem"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - math
+abbrlink: 3277549c
+docId: chb8ee5s38v8gh751n9e5znj
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[1828. Queries on Number of Points Inside a Circle](https://leetcode.cn/problems/queries-on-number-of-points-inside-a-circle/description/)
+
+# Approach
+
+Today's problem is very simple — I wonder if it's a counterbalance after yesterday's hard one.
+
+The task asks: for each circle in `queries`, how many points from the `points` array fall inside it?
+
+Honestly, I was a bit scared at first — thought it would be a graph problem again. But after reading carefully, it's just a straightforward math problem. We can use Euclidean distance to solve it (time complexity O(n²) — I thought there would be a better solution, but at a glance everyone solved it the same way).
+
+The Euclidean distance formula:
+
+$\sqrt{(x_1 - x_2)^2 + (y_1 - y_2)^2}$
+
+See the code for the specific implementation:
+
+# Code
+
+```python
+class Solution:
+    def countPoints(self, points: List[List[int]], queries: List[List[int]]) -> List[int]:
+        # Check if the Euclidean distance from the center is <= r
+        ans = [0] * len(queries)
+        flag = 0
+        for x, y, r in queries:
+            for i, j in points:
+                if ((x - i) ** 2 + (y - j) ** 2) ** (1 / 2) <= r:
+                    ans[flag] += 1
+            flag += 1
+        return ans
+```
+
+An alternative approach from the community that is harder to understand at first glance:
+
+```python
+class Solution:
+    def countPoints(self, points: List[List[int]], queries: List[List[int]]) -> List[int]:
+        points = sorted(points)
+
+        res = [0 for _ in range(len(queries))]
+
+        for i, (u, v, r) in enumerate(queries):
+            left, right = u - r, u + r
+
+            idx1 = bisect_left(points, [left, -inf])
+            idx2 = bisect_right(points, [right, inf])
+
+            for x, y in points[idx1: idx2 + 1]:
+                if (v - r <= y <= v + r and
+                    (x - u) * (x - u) + (y - v) * (y - v) <= r * r):
+
+                    res[i] += 1
+
+        return res
+```
diff --git a/app/docs/CommunityShare/Leetcode/213-house-robber-ii.en.md b/app/docs/CommunityShare/Leetcode/213-house-robber-ii.en.md
new file mode 100644
index 00000000..0175cc3b
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/213-house-robber-ii.en.md
@@ -0,0 +1,49 @@
+---
+title: "213. House Robber II"
+date: "2024.01.01 0:00"
+tags:
+  - Python
+  - answer
+  - Array
+  - Dynamic planning
+abbrlink: 85beb0bf
+docId: rv6egbynttb4mt1n0412bue0
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[213. House Robber II](https://leetcode-cn.com/problems/house-robber-ii/)
+
+# Approach
+
+This time I finally understand dynamic programming a bit better. At first I thought we needed to decide where to start — rob the first or skip to the second (that's the difference from House Robber I). But actually the smallest subproblem is the same as House Robber I: whether to rob the current house.
+
+We define a `dp` array where `dp[i]` represents the maximum profit when we've considered up to house `i`. If we rob the current house: `dp[i] = dp[i-2] + nums[i]` (we can't rob the house immediately before). If we don't: `dp[i] = dp[i-1]`.
+
+State transition: `dp[i] = max(dp[i-2] + nums[i], dp[i-1])`.
+
+Finally, since the houses form a circle, we split into two cases: rob starting from house 1 (skip the last), or starting from house 2 (skip the first).
+
+# Code
+
+```python
+class Solution:
+    def rob(self, nums: List[int]) -> int:
+        def rob1(nums: List[int]) -> int:
+            if len(nums) == 1:
+                return nums[0]
+            ans = 0
+            dp = [0] * len(nums)
+            # def: dp[i] represents the max profit when robbing up to house i
+            # rob house i: dp[i-2] + nums[i]; don't rob: dp[i-1]
+            for i in range(len(nums)):
+                dp[i] = max(dp[i - 2] + nums[i], dp[i - 1])
+                ans = max(ans, dp[i])
+            return ans
+        # rob starting from house 1, or starting from house 2
+        return max(rob1(nums[:-1]), rob1(nums[1:])) if len(nums) != 1 else nums[0]
+```
diff --git a/app/docs/CommunityShare/Leetcode/2131-longest-palindrome-by-concatenating-two-letter-words.en.md b/app/docs/CommunityShare/Leetcode/2131-longest-palindrome-by-concatenating-two-letter-words.en.md
new file mode 100644
index 00000000..101c93e0
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2131-longest-palindrome-by-concatenating-two-letter-words.en.md
@@ -0,0 +1,92 @@
+---
+title: "2131. Longest Palindrome by Concatenating Two Letter Words"
+date: "2025/5/25-2:33"
+tags:
+  - - Python
+  - - Answer
+abbrlink: 9fa195e5
+docId: ksw2vic4alf1tdnnueay81g8
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[2131. Longest Palindrome by Concatenating Two Letter Words](https://leetcode.cn/problems/longest-palindrome-by-concatenating-two-letter-words/description/?envType=daily-question&envId=2025-05-25)
+
+# Approach
+
+The idea comes from the brilliant ling-nc.
+
+We build a hash map to count the occurrences of each word.
+
+For each word, we check whether its reverse exists in the hash map. If it does, they can form a palindrome pair — we update the result and decrease the corresponding count. If the reverse does not exist, we increment the count for the current word. Finally, we check if there's any palindromic word that can be used as the center of the palindrome.
+
+Example:
+
+1. Input: `["lc", "cl", "gg", "gg"]`
+
+2. `"lc"` has no pair → stored in the map: `{ "lc": 1 }`
+
+3. `"cl"` finds `"lc"` exists → use `"lc" + "cl"` as a pair → `res += 4` → map updated to `{ "lc": 0 }`
+
+4. `"gg"` has no pair → stored in the map: `{ "lc": 0, "gg": 1 }`
+
+5. Second `"gg"` finds `"gg"` exists → use `"gg" + "gg"` as a pair → `res += 4` → map becomes `{ "lc": 0, "gg": 0 }`
+
+   Then we check whether the hash map contains any palindromic word (i.e., a word with two identical characters) that can be used as the center of the final palindrome string. If such a word exists, we can add 2 more to the result.
+
+6. Finding a center word (can only pick one symmetric word)
+   In this example, all `"gg"` words have been paired, so none is left → no center word is added.
+
+# Code
+
+```python
+from collections import defaultdict
+from typing import List
+class Solution:
+    def longestPalindrome(self, words: List[str]) -> int:
+        count = defaultdict(int)
+        res = 0
+        for word in words:
+            if count[word[::-1]] > 0:
+                count[word[::-1]] -= 1
+                res += 4
+            else:
+                count[word] += 1
+        for key, value in count.items():
+            if key[0] == key[1] and value > 0:
+                res += 2
+                break
+        return res
+```
+
+```typescript
+function longestPalindrome(words: string[]): number {
+  const count: Map<string, number> = new Map();
+  let res = 0;
+
+  for (const word of words) {
+    const reversed = word.split("").reverse().join("");
+    const reversedCount = count.get(reversed) ?? 0;
+
+    if (reversedCount > 0) {
+      count.set(reversed, reversedCount - 1);
+      res += 2 * word.length;
+    } else {
+      count.set(word, (count.get(word) ?? 0) + 1);
+    }
+  }
+
+  for (const [word, freq] of count.entries()) {
+    if (word === word.split("").reverse().join("") && freq > 0) {
+      res += word.length;
+      break; // only one palindromic center allowed
+    }
+  }
+
+  return res;
+}
+```
diff --git a/app/docs/CommunityShare/Leetcode/219-contains-duplicate-ii.en.md b/app/docs/CommunityShare/Leetcode/219-contains-duplicate-ii.en.md
new file mode 100644
index 00000000..b2fb1e38
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/219-contains-duplicate-ii.en.md
@@ -0,0 +1,62 @@
+---
+title: "219. Contains Duplicate II — Hash Table Approach"
+date: "2024.01.01 0:00"
+categories:
+  - - Python
+    - Hash table
+  - - solved
+    - answer
+tags:
+  - - Python
+  - - solved
+abbrlink: 16b0e9f1
+docId: k4btd9x3l3xnnl4dnr64d8cq
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Approach
+
+I did this problem on my phone initially, using a double hash table — one storing elements, one storing their indices. When an element appeared more than twice, I checked whether any two of its indices had an absolute difference ≤ k. Time complexity: `O(n³)`.
+
+# Code
+
+```python
+class Solution:
+    def containsNearbyDuplicate(self, nums: List[int], k: int) -> bool:
+        hash_1={}
+        hash_2={}
+        for index, i in enumerate(nums):
+            if i not in hash_1:
+                hash_1[i] = 1
+                hash_2[i] = [index]
+            else:
+                hash_1[i] += 1
+                hash_2[i].append(index)
+        for i in hash_1:
+            if hash_1[i] >= 2:
+                for j in range(len(hash_2[i])):
+                    for m in range(j + 1, len(hash_2[i])):
+                        if abs(hash_2[i][j]-hash_2[i][m]) <= k:
+                            return True
+        return False
+```
+
+After seeing [@Gongshui Sanye](/u/ac_oier)'s solution, I realized the point of this problem is to practice applying two pointers within a hash table. The problem is essentially asking: does a sliding window of size ≤ k+1 contain any duplicate element?
+
+```
+# Clarified problem: does a window of size ≤ k+1 contain duplicate elements?
+class Solution:
+    def containsNearbyDuplicate(self, nums: List[int], k: int) -> bool:
+        n = len(nums)
+        s = set()
+        for i in range(n):
+            if i > k:
+                s.remove(nums[i - k - 1])
+            if nums[i] in s:
+                return True
+            s.add(nums[i])
+        return False
+```
diff --git a/app/docs/CommunityShare/Leetcode/2241-design-an-atm-machine.zh.md b/app/docs/CommunityShare/Leetcode/2241-design-an-atm-machine.zh.md
new file mode 100644
index 00000000..afe77cd4
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2241-design-an-atm-machine.zh.md
@@ -0,0 +1,96 @@
+---
+title: "2241. 设计 ATM 机器"
+date: "2025-01-06"
+tags:
+  - - Python
+  - - Answer
+abbrlink: a21411f
+docId: lzrh7ftq3kegsyx8gimonrfu
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 题目
+
+[2241. 设计 ATM 机器](https://leetcode.cn/problems/design-an-atm-machine/description/?envType=daily-question&envId=2025-01-05)
+
+有一台 ATM 机器，存储了 5 种面额的钞票：20 美元、50 美元、100 美元、200 美元和 500 美元。初始时 ATM 内没有任何钞票。用户可以用这台机器存钱或取钱。
+
+取款时，机器优先使用面额较大的钞票。
+
+例如，如果想取 300 美元，而机器里有 2 张 50 美元、1 张 100 美元和 1 张 200 美元，则机器会使用 100 美元和 200 美元那两张钞票。
+
+但如果想取 600 美元，而机器里有 3 张 200 美元和 1 张 500 美元，那么取款请求会被拒绝，因为机器会先尝试使用 500 美元，然后无法用剩余钞票凑出 100 美元。注意，此时不允许改用 200 美元代替 500 美元。
+
+实现 ATM 类：
+
+- `ATM()` 初始化 ATM 对象。
+- `void deposit(int[] banknotesCount)` 按 $20、$50、$100、$200、$500 的顺序存入新钞票。
+- `int[] withdraw(int amount)` 按 $20、$50、$100、$200、$500 的顺序返回一个长度为 5 的数组，表示向用户提供的各面额钞票数量，并更新 ATM 内剩余钞票数量。如果无法完成取款，返回 `[-1]`（此时不取出任何钞票）。
+
+# 思路
+
+这道题的目的是模拟一台 ATM 机器，让你取多少钱，就返回多少钱，不能多也不能少。我用的是贪心思想，因为"机器里有 3 张 `$200` 的钞票和 1 张 `$500` 的钞票，那么取款请求会被拒绝"这句话说明可以跳过复杂动态规划中的背包问题，直接考虑简单贪心。
+
+由于存的钱的面额只有 `20`、`50`、`100`、`200`、`500` 这五种，我们提前存在列表里等待遍历即可。然后创建一个 `defaultdict()` 为 ATM 机器里的每种面额创建哈希表。
+
+`deposit()` 创建了一个反向遍历的字典。因为我们需要从大面额到小面额遍历，反向字典在此非常方便。
+
+假设初始 `amount` 为 `600`，遍历到的第一个面额就是 `500`，完全符合题目逻辑。
+
+`withdraw()` 函数中，我创建了一个临时字典深拷贝，这样在返回 `[-1]` 时不会修改初始数组。否则还要回溯，比较麻烦。
+
+我和 Sylvia 用了两种不同的遍历方式：她遍历面额列表，而我直接遍历字典（实际上直接遍历 key）。
+
+1. 如果当前金额（`600`）大于等于当前面额（`500`），则尝试扣除。如果银行钱直接取完，再看下一个面额。
+2. 如果没有取完，`amount` 扣除能扣除的份额后，继续看下一个面额。
+3. 最后 `amount` 还有剩余则返回 `[-1]`，否则计算一共消耗了多少张钞票，即为答案。
+
+# 代码
+
+```python
+import copy
+from typing import List
+
+from collections import defaultdict
+
+
+class ATM:
+
+    def __init__(self):
+        self.sd = defaultdict(int)
+        self.amount = ['20', '50', '100', '200', '500']
+
+    def deposit(self, banknotesCount: List[int]) -> None:
+        for i in range(len(banknotesCount) - 1, -1, -1):
+            self.sd[self.amount[i]] += banknotesCount[i]
+
+
+
+    def withdraw(self, amount: int) -> List[int]:
+        tempSd = copy.deepcopy(self.sd)
+        # key = 面值, value = 张数
+        for key, value in tempSd.items():
+            if amount >= int(key) and value > 0:
+                # 需要多少张钞票
+                howManyPiece = amount // int(key)
+                if howManyPiece >= value:
+                    # 全部取出来
+                    tempSd[key] = 0
+                    amount -= value * int(key)
+                else:
+                    # 取出这么多钞票
+                    tempSd[key] -= howManyPiece
+                    amount -= int(key) * howManyPiece
+        else:
+            if amount > 0:
+                return [-1]
+            else:
+                ans = []
+                for i in self.sd.keys():
+                    ans.append(self.sd[i] - tempSd[i])
+                self.sd = copy.deepcopy(tempSd)
+                return ans[::-1]
+```
diff --git a/app/docs/CommunityShare/Leetcode/2270-number-of-ways-to-split-array.zh.md b/app/docs/CommunityShare/Leetcode/2270-number-of-ways-to-split-array.zh.md
new file mode 100644
index 00000000..a758f657
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2270-number-of-ways-to-split-array.zh.md
@@ -0,0 +1,44 @@
+---
+title: "2270. 分割数组的方案数"
+date: "2025/1/14-9:31"
+tags:
+  - - Python
+  - - Answer
+abbrlink: c25bb550
+docId: a6inw303oslb7i5tcqj5xxx4
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 题目
+
+[2270. 分割数组的方案数](https://leetcode.cn/problems/number-of-ways-to-split-array/description/)
+
+# 思路
+
+`2 <= nums.length <= 10^5`，因此我们可以直接获取第一个数字，初始状态指针位于 index 0，正要往 index 1 走的时候。然后只需要一次 for 循环就可以搞定。
+
+重点是第二个方法，来自题解。
+
+# 代码
+
+```python
+class Solution:
+    def waysToSplitArray(self, nums: List[int]) -> int:
+        temp_sum = nums[0]
+        total_sum = sum(nums) - temp_sum
+        ans = 0
+        for i in range(1, len(nums)):
+            if temp_sum >= total_sum:
+                ans += 1
+            temp_sum += nums[i]
+            total_sum -= nums[i]
+        return ans
+```
+
+```python
+t = (sum(nums) + 1) // 2
+return sum(s >= t for s in accumulate(nums[:-1]))
+```
diff --git a/app/docs/CommunityShare/Leetcode/2293-min-max-game.en.md b/app/docs/CommunityShare/Leetcode/2293-min-max-game.en.md
new file mode 100644
index 00000000..0b237c1d
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2293-min-max-game.en.md
@@ -0,0 +1,39 @@
+---
+title: "2293. Min Max Game — Daily Problem"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - solved
+    - answer
+abbrlink: 9df6242c
+category: null
+docId: mssz5wgh368yp55qcvs1op5e
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Although it's a simple problem, it involves a recursive idea.
+
+The approach is to continuously convert the 1D list into a 2D list for processing (this avoids index confusion caused by in-place changes), then use a `flag` counter to compare maximum and minimum values.
+
+At the end, return the only remaining number — though we know it must be `nums[0]` by this point, we still use `nums[0]` to avoid warnings.
+
+```python
+class Solution:
+    def minMaxGame(self, nums: List[int]) -> int:
+        flag = 0
+        while len(nums) > 1:
+            nums = [nums[i:i + 2] for i in range(0, len(nums), 2)]
+            # split into 2D
+            for i in range(len(nums)):
+                if flag % 2 == 0:
+                    nums[i] = min(nums[i])
+                    flag += 1
+                else:
+                    nums[i] = max(nums[i])
+                    flag += 1
+        return nums[0]
+
+```
diff --git a/app/docs/CommunityShare/Leetcode/2299-strong-password-checker-ii.en.md b/app/docs/CommunityShare/Leetcode/2299-strong-password-checker-ii.en.md
new file mode 100644
index 00000000..b6716565
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2299-strong-password-checker-ii.en.md
@@ -0,0 +1,75 @@
+---
+title: "2299. Strong Password Checker II — Daily Problem"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - Bit operation
+abbrlink: 7ded25bb
+docId: fxn6bn619g3a9l98l9vggpg1
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Beat 100% of users on execution time. Not much to say about this problem except for one **bit manipulation** technique.
+
+# My Code
+
+```python
+class Solution:
+    def strongPasswordCheckerII(self, password: str) -> bool:
+        flag1 = flag2 = flag3 = flag4 = 1
+        ret = None
+        for i in password:
+            if ret == i:
+                return False
+            if i.isdigit():
+                flag1 = 0
+            elif i.isupper():
+                flag2 = 0
+            elif i.islower():
+                flag3 = 0
+            else:
+                flag4 = 0
+            ret = i
+        if sum([flag1, flag2, flag3, flag4]) == 0 and len(password) >= 8:
+            return True
+        return False
+```
+
+# Bit Manipulation Code
+
+**Method 1: Simulation + Bit Manipulation**
+
+Based on the problem description, we simulate the process of checking whether a password meets the requirements.
+
+First, check if the password length is less than 8 — if so, return `False`.
+
+Then use a bitmask `mask` to track whether the password contains lowercase letters, uppercase letters, digits, and special characters. Traverse the password: for each character, first check if it's the same as the previous one — if so, return `False`. Then update `mask` based on the character type.
+
+Finally, check whether `mask == 15` — if so, return `True`, otherwise return `False`.
+
+```python
+class Solution:
+    def strongPasswordCheckerII(self, password: str) -> bool:
+        if len(password) < 8:
+            return False
+        mask = 0
+        for i, c in enumerate(password):
+            if i and c == password[i - 1]:
+                return False
+            if c.islower():
+                mask |= 1
+            elif c.isupper():
+                mask |= 2
+            elif c.isdigit():
+                mask |= 4
+            else:
+                mask |= 8
+        return mask == 15
+```
+
+Author: ylb  
+Link: https://leetcode.cn/problems/strong-password-checker-ii/solutions/2068878/by-lcbin-hk2a/
diff --git a/app/docs/CommunityShare/Leetcode/2309-greatest-english-letter-in-upper-and-lower-case.en.md b/app/docs/CommunityShare/Leetcode/2309-greatest-english-letter-in-upper-and-lower-case.en.md
new file mode 100644
index 00000000..4e2a8686
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2309-greatest-english-letter-in-upper-and-lower-case.en.md
@@ -0,0 +1,78 @@
+---
+title: "2309. Greatest English Letter in Upper and Lower Case — Daily Problem"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - Daily problem
+  - - Bit operation
+  - - Hash table
+  - - C++
+abbrlink: b4953d62
+docId: mc2rjsq7syibclikyhomsbft
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+[2309. Greatest English Letter in Upper and Lower Case](https://leetcode.cn/problems/greatest-english-letter-in-upper-and-lower-case/description/)
+
+# Approach
+
+1. **Hash table:** The first approach that comes to mind is using a dictionary. But it turns out we can directly traverse the alphabet from `Z` downward, check if both cases exist, and return immediately.
+
+_Runtime beats 99.9%_
+
+2. **Bit manipulation:** We use two integers `mask1` and `mask2` to record which lowercase and uppercase letters appear in string `s`. In `mask1`, bit `i` represents whether lowercase letter `i` appears; in `mask2`, bit `i` represents whether uppercase letter `i` appears.
+
+We then perform a bitwise AND of `mask1` and `mask2`. The result `mask` has bit `i` set only if both cases of letter `i` appear.
+
+We find the highest set bit in `mask` and convert it to the corresponding uppercase letter. If no bit is set, return an empty string.
+
+> Author: ylb  
+> Link: https://leetcode.cn/problems/greatest-english-letter-in-upper-and-lower-case/solutions/2077636/by-lcbin-zbg0/  
+> Source: LeetCode  
+> All rights reserved by the author. Commercial use requires authorization; non-commercial use must cite the source.
+
+# Code
+
+```python Traverse the alphabet
+class Solution:
+    def greatestLetter(self, s: str) -> str:
+        for i in range(90, 64, -1):
+            if chr(i) in s and chr(i+32) in s:
+                return chr(i)
+        return ""
+```
+
+```cpp Hash table
+class Solution {
+public:
+    string greatestLetter(string s) {
+        unordered_set<char> strin(s.begin(),s.end());
+        for(char c = 'Z'; c >= 'A'; --c){
+            if(strin.count(c) && strin.count(char(c+32))){
+                return string(1,c);
+            }
+        }
+        return "";
+    }
+};
+```
+
+```python Bit manipulation
+class Solution:
+    def greatestLetter(self, s: str) -> str:
+        # Use two integers mask1 and mask2 to record lowercase and uppercase letters in s
+        # In mask1, bit i represents whether lowercase letter i appears;
+        # In mask2, bit i represents whether uppercase letter i appears.
+        mask1 = mask2 = 0
+        for i in s:
+            if i.islower():
+                mask1 |= 1 << (ord(i) - ord("a"))
+            else:
+                mask2 |= 1 << (ord(i) - ord("A"))
+        mask = mask1 & mask2
+        return chr(mask.bit_length() - 1 + ord("A")) if mask else ""
+```
diff --git a/app/docs/CommunityShare/Leetcode/2335-minimum-amount-of-time-to-fill-cups.en.md b/app/docs/CommunityShare/Leetcode/2335-minimum-amount-of-time-to-fill-cups.en.md
new file mode 100644
index 00000000..f8ab9355
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2335-minimum-amount-of-time-to-fill-cups.en.md
@@ -0,0 +1,66 @@
+---
+title: "2335. Minimum Amount of Time to Fill Cups — Daily Problem"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - dp
+  - - Daily problem
+  - - golang
+abbrlink: 4400daa1
+docId: hiqhki2z4v6oy0jstrcs7im0
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[2335. Minimum Amount of Time to Fill Cups](https://leetcode.cn/problems/minimum-amount-of-time-to-fill-cups/description/)
+
+# Approach
+
+1. This problem is actually quite simple, though I didn't use a greedy approach initially. Looking at the second test case, I realized the key insight: minimize the chance of any count reaching 0. So keep sorting and always operate on the two largest numbers. This also handles edge cases naturally. But because of the sorting, I'm unsure if it scales well for very large inputs.
+
+   Looking at ylb's solution — same idea but with much cleaner edge case handling, two fewer conditionals than mine. The mathematical approach below may be the intended solution for harder cases.
+
+2. **Mathematical approach?** Sort the three drink counts from small to large as `x, y, z`. The goal is to pair different drinks as often as possible.
+   - If `x + y <= z`, the answer is `z`.
+   - Otherwise, let `t = (x + y - z)`. If `t` is even, the answer is $\frac{t}{2} + z$; otherwise $\frac{t+1}{2} + z$.
+
+# Code
+
+```python
+class Solution:
+    def fillCups(self, amount: List[int]) -> int:
+        amount.sort()
+        count = 0
+        # Try to avoid reaching 0
+        while amount[-1] > 0:
+            if amount[-1] > 0 and amount[1] > 0:
+                amount[-1] -= 1
+                amount[1] -= 1
+                count += 1
+            if amount[-1] > 0 and amount[1] == 0:
+                return count + amount[-1]
+            amount.sort()
+        return count
+```
+
+```go
+import "sort"
+
+func fillCups(amount []int) int {
+	ans := 0
+	for amount[0] + amount[1] + amount[2] > 0 {
+		sort.Ints(amount)
+		ans ++
+		amount[2] --
+		if amount[1] > 0{
+			amount[1] --
+		}
+	}
+	return ans
+}
+```
diff --git a/app/docs/CommunityShare/Leetcode/2341-maximum-number-of-pairs-in-array.en.md b/app/docs/CommunityShare/Leetcode/2341-maximum-number-of-pairs-in-array.en.md
new file mode 100644
index 00000000..a5857a01
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2341-maximum-number-of-pairs-in-array.en.md
@@ -0,0 +1,57 @@
+---
+title: "2341. Maximum Number of Pairs in Array — Daily Problem"
+tags:
+  - - Python
+  - - answer
+abbrlink: f953c753
+date: "2024.01.01 0:00"
+docId: s3w19zdm6yhkhj4o0ba3kbal
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[2341. Maximum Number of Pairs in Array](https://leetcode.cn/problems/maximum-number-of-pairs-in-array/description/)
+
+# Approach
+
+## My Approach
+
+Not sure if it was seeing the word "Easy" that made me go for the optimal solution right away. Actually, ylb's hash table approach is still faster here. Sort the list and check pairs by scanning adjacent equal elements.
+
+## Hash Table Approach
+
+After counting with `Counter`, use `a += v // 2` and `b += v % 2`. For each number `x` with count `v`:
+
+- If `v >= 1`, we can form `v // 2` pairs from the `x` values in the array.
+- Accumulate this count into variable `a`.
+
+# Code
+
+```python Simple counting
+class Solution:
+    def numberOfPairs(self, nums: List[int]) -> List[int]:
+        nums.sort()
+        ans = [0, len(nums)]
+        for index in range(1, len(nums)):
+            if nums[index - 1] == nums[index]:
+                ans[0] += 1
+                ans[1] -= 2
+                nums[index - 1] = nums[index] = -1
+        return ans
+```
+
+```python Hash table
+class Solution:
+    def numberOfPairs(self, nums: List[int]) -> List[int]:
+        x = Counter(nums)
+        a = 0
+        b = 0
+        for k,v in x.items():
+            a+=v//2
+            b+=v%2
+        return [a,b]
+```
diff --git a/app/docs/CommunityShare/Leetcode/2490-circular-sentence.en.md b/app/docs/CommunityShare/Leetcode/2490-circular-sentence.en.md
new file mode 100644
index 00000000..94bd8b38
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2490-circular-sentence.en.md
@@ -0,0 +1,36 @@
+---
+title: "2490. Circular Sentence"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - String
+abbrlink: 5c07686c
+docId: pe6o8l76945uo7aqv79ddhii
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[2490. Circular Sentence](https://leetcode.cn/problems/circular-sentence/)
+
+# Approach
+
+Split each word, then concatenate the sentence with itself (e.g., `"abc"` → `"abcabc"`). Check whether the last character of each word equals the first character of the next word (wrapping around).
+
+# Code
+
+```python
+class Solution:
+    def isCircularSentence(self, sentence: str) -> bool:
+        sentence = sentence.split(' ')
+        length = len(sentence)
+        sentence += sentence
+        for i in range(0, length):
+            if sentence[i][-1] != sentence[i+1][0]:
+                return False
+        return True
+```
diff --git a/app/docs/CommunityShare/Leetcode/2562-find-the-array-concatenation-value.en.md b/app/docs/CommunityShare/Leetcode/2562-find-the-array-concatenation-value.en.md
new file mode 100644
index 00000000..c111d273
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2562-find-the-array-concatenation-value.en.md
@@ -0,0 +1,42 @@
+---
+title: "2562. Find the Array Concatenation Value"
+date: "2024.01.01 0:00"
+tags:
+  - Python
+  - answer
+  - Array
+  - Two Pointers
+  - simulation
+abbrlink: b625a0e1
+docId: naxatag8x2nnvkhbwdfc1azc
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[2562. Find the Array Concatenation Value](https://leetcode-cn.com/problems/find-the-concatenation-of-an-array/)
+
+# Approach
+
+This problem is very similar to quiz 4 — both use two pointers.
+
+I made a mistake when computing the right pointer using a negative index: `right = -left + 1`. It's tricky to reason about with positive indices, so I switched to `right = len(nums) - 1 - left`.
+
+# Code
+
+```python
+class Solution:
+    def findTheArrayConcVal(self, nums: List[int]) -> int:
+        sums = 0
+        for left in range(len(nums)):
+            right = len(nums) - 1 - left
+            if left == right:
+                sums += nums[left]
+                break
+            elif left < right:
+                sums += int(str(nums[left]) + str(nums[right]))
+        return sums
+```
diff --git a/app/docs/CommunityShare/Leetcode/2582-pass-the-pillow.en.md b/app/docs/CommunityShare/Leetcode/2582-pass-the-pillow.en.md
new file mode 100644
index 00000000..14aed75b
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2582-pass-the-pillow.en.md
@@ -0,0 +1,36 @@
+---
+title: "2582. Pass the Pillow"
+date: "2024.01.01 0:00"
+tags:
+  - Python
+  - answer
+  - math
+  - simulation
+abbrlink: 82e09f92
+docId: p9gvb8klqv990cq88j4l76zy
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[2582. Pass the Pillow](https://leetcode-cn.com/problems/pass-the-pillow/)
+
+# Approach
+
+Math problem — find the pattern. With `n` people, the period is `n-1`. Number of full cycles = $\lfloor time / (n-1) \rfloor$. If the number of full cycles is even, the pillow moves forward from the start; otherwise, it moves backward from the end.
+
+# Code
+
+```python
+class Solution:
+    def passThePillow(self, n: int, time: int) -> int:
+        if n > time:
+            return time + 1
+        if time // (n-1) % 2 == 0:
+            return time % (n-1) + 1
+        else:
+            return n - time % (n-1)
+```
diff --git a/app/docs/CommunityShare/Leetcode/2639-find-column-width-of-grid.en.md b/app/docs/CommunityShare/Leetcode/2639-find-column-width-of-grid.en.md
new file mode 100644
index 00000000..1e8a5716
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2639-find-column-width-of-grid.en.md
@@ -0,0 +1,56 @@
+---
+title: "2639. Find the Width of Columns of a Grid"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+abbrlink: 5a764983
+docId: lnx1bszj5aqqqfa50sejjv7n
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[2639. Find the Width of Columns of a Grid](https://leetcode.cn/problems/find-the-width-of-columns-of-a-grid/description/?envType=daily-question&envId=2024-04-27)
+
+# Approach
+
+My first thought was to use `map`: build an expression like `[list(map(lambda x: len(str(x)), row)) for row in grid]`, but the problem requires finding the maximum value per column. Doing that manually would require O(n²) time complexity, so I used `numpy` to easily find the maximum of each column in one pass.
+
+# Code
+
+```python
+import numpy as np
+
+class Solution:
+    def findColumnWidth(self, grid: List[List[int]]) -> List[int]:
+         # let's convert the grid to a numpy array
+        np_grid = np.array(grid, dtype=str)
+        # calculate the length of each element in the grid
+        lengths = np.vectorize(len)(np_grid)
+        # find the maximum length of each column
+        max_lengths = lengths.max(axis=0)
+        return max_lengths.tolist()
+```
+
+```rust
+impl Solution {
+    pub fn find_column_width(grid: Vec<Vec<i32>>) -> Vec<i32> {
+        let col_n = grid[0].len();
+        let mut ans = vec![0; col_n];
+
+        for row in grid.iter() {
+            for (i, &num) in row.iter().enumerate() {
+                let length = num.to_string().len() as i32;
+                if length > ans[i] {
+                    ans[i] = length;
+                }
+            }
+        }
+        ans
+    }
+}
+```
diff --git a/app/docs/CommunityShare/Leetcode/2679-sum-in-a-matrix.en.md b/app/docs/CommunityShare/Leetcode/2679-sum-in-a-matrix.en.md
new file mode 100644
index 00000000..97d6fe11
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2679-sum-in-a-matrix.en.md
@@ -0,0 +1,98 @@
+---
+title: "2679. Sum in a Matrix"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - Array
+  - - matrix
+  - - Sort
+  - - simulation
+  - - heap（Priority queue）
+abbrlink: "5277100"
+docId: clx9mmqqvxipdfamqciuo146
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[2679. Sum in a Matrix](https://leetcode.cn/problems/sum-in-a-matrix/)
+
+# Approach
+
+**One-liner**
+
+The idea is to find the largest number from each sub-list, pop it out, then sum those values. Traversing repeatedly would be inefficient, so I thought of using `zip` to traverse multiple sub-arrays at once.
+
+First, sort each sub-array, then use `zip` to traverse column by column and find the maximum.
+
+For example:  
+`nums = [[7,2,1],[6,4,2],[6,5,3],[3,2,1]]`
+
+After sorting:  
+`nums = [[1,2,7],[2,4,6],[3,5,6],[1,2,3]]`
+
+Then using `zip`:  
+`[(1,2,3,1),(2,4,5,2),(7,6,6,3)]`
+
+Find the maximum of each column:  
+`[3,5,7]`
+
+Sum:  
+`15`
+
+# Code
+
+```python
+class Solution:
+    def matrixSum(self, nums: List[List[int]]) -> int:
+        return sum(max(i) for i in \
+        zip(*(sorted(sublist) for sublist in nums)))
+```
+
+### Explanation of `*` and `zip()`
+
+```python
+nums = [[1,2,7],[2,4,6],[3,5,6],[1,2,3]]
+
+for i in range(len(nums[1])):
+    for j in range(len(nums)):
+        print(nums[j][i])
+# ans = 123124527663
+num1 = [1,2,7]
+num2 = [2,4,6]
+num3 = [3,5,6]
+num4 = [1,2,3]
+```
+
+`zip()` pairs elements from multiple lists one-to-one, returning a `zip` object that can be converted to a list using `list()`.
+
+```python
+for i in zip(num1, num2, num3, num4):
+    print(i)
+#(1, 2, 3, 1)
+#(2, 4, 5, 2)
+#(7, 6, 6, 3)
+```
+
+`*nums` in Python is not a pointer — it unpacks each element of `nums` as a separate argument into the function.
+
+```python
+# Unpacking
+print(*nums)
+# [1, 2, 7] [2, 4, 6] [3, 5, 6] [1, 2, 3]
+```
+
+`zip(*nums)` passes each element of `nums` as a separate argument to `zip()`, equivalent to `zip(num1, num2, num3, num4)`.
+
+```python
+for i in zip(*nums):
+    print(i)
+# Equivalent
+#(1, 2, 3, 1)
+#(2, 4, 5, 2)
+#(7, 6, 6, 3)
+```
diff --git a/app/docs/CommunityShare/Leetcode/2894-divisible-and-non-divisible-sums-difference.en.md b/app/docs/CommunityShare/Leetcode/2894-divisible-and-non-divisible-sums-difference.en.md
new file mode 100644
index 00000000..b5cc348e
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/2894-divisible-and-non-divisible-sums-difference.en.md
@@ -0,0 +1,51 @@
+---
+title: "2894. Divisible and Non-divisible Sums Difference"
+date: "2025.05.27 23:52"
+tags:
+  - - Python
+  - - answer
+  - - typescript
+abbrlink: 66adcc9e
+docId: y0ntwlksnvj7ymuapqvkvmwr
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+[2894. Divisible and Non-divisible Sums Difference](https://leetcode.cn/problems/divisible-and-non-divisible-sums-difference/description/)
+
+# Approach
+
+**Problem statement:**  
+In $[1, n]$, find the difference between the sum of all numbers not divisible by $m$ and the sum of all numbers divisible by $m$.
+
+**Derivation:**
+
+1. Total sum:  
+   $S_{\text{total}} = \sum_{i=1}^{n} i = \frac{n(n+1)}{2}$
+
+2. Numbers divisible by $m$: $m, 2m, 3m, \dots, \left\lfloor \frac{n}{m} \right\rfloor m$  
+   $S_{\text{divisible}} = m \cdot \left(1 + 2 + \dots + k\right) = m \cdot \frac{k(k+1)}{2}$
+
+3. Sum of non-divisible numbers:  
+   $S_{\text{not\_div}} = S_{\text{total}} - S_{\text{divisible}}$
+
+4. The required answer:  
+   $\text{difference} = S_{\text{not\_div}} - S_{\text{divisible}}$  
+   $= \frac{n(n+1)}{2} - m \cdot \left\lfloor \frac{n}{m} \right\rfloor(\left\lfloor \frac{n}{m} \right\rfloor+1)$
+
+# Code
+
+```python
+class Solution:
+    def differenceOfSums(self, n: int, m: int) -> int:
+        # divisible = m * (1 + n // m) * (n // m) // 2
+        # undivisible = n * (n + 1) // 2 - n * ((1 + n // m) * (n // m) // 2)
+        return - m * ((1 + n // m) * (n // m)) + (n * (n + 1) >> 1)
+```
+
+```typescript
+const differenceOfSums = (n: number, m: number): number =>
+  -m * ((1 + Math.floor(n / m)) * Math.floor(n / m)) + ((n * (n + 1)) >> 1);
+```
diff --git a/app/docs/CommunityShare/Leetcode/3072-distribute-elements-into-two-arrays-ii.en.md b/app/docs/CommunityShare/Leetcode/3072-distribute-elements-into-two-arrays-ii.en.md
new file mode 100644
index 00000000..0769b160
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/3072-distribute-elements-into-two-arrays-ii.en.md
@@ -0,0 +1,81 @@
+---
+title: "3072. Distribute Elements into Two Arrays II"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - Tree array
+  - Thread tree
+  - Array
+  - simulation
+abbrlink: 48a38683
+docId: r12u8o7j73oxhbvgphi939fb
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[3072. Distribute Elements into Two Arrays II](https://leetcode.cn/problems/distribute-elements-into-two-arrays-ii/description/?envType=daily-question&envId=2024-06-05)
+
+Given a **1-indexed** integer array `nums` of length `n`.
+
+Define function `greaterCount` such that `greaterCount(arr, val)` returns the number of elements in `arr` that are **strictly greater than** `val`.
+
+You need to distribute all elements of `nums` into two arrays `arr1` and `arr2` using `n` operations. In the first operation, add `nums[1]` to `arr1`. In the second operation, add `nums[2]` to `arr2`. For the `i`-th operation (i > 2):
+
+- If `greaterCount(arr1, nums[i]) > greaterCount(arr2, nums[i])`, add `nums[i]` to `arr1`.
+- If `greaterCount(arr1, nums[i]) < greaterCount(arr2, nums[i])`, add `nums[i]` to `arr2`.
+- If `greaterCount(arr1, nums[i]) == greaterCount(arr2, nums[i])`, add to the array with fewer elements.
+- If still equal, add `nums[i]` to `arr1`.
+
+Return the concatenation of `arr1` and `arr2`.
+
+# Approach
+
+1. **Initialization:** Reverse `nums` so we can use `pop()` instead of `pop(0)` (learned from a senior — `pop()` is much faster). Assign the first element to `arr1` and `temp1`, and the second to `arr2` and `temp2`.
+
+2. **Iterative processing:** Use `while` to traverse remaining elements. For each element, use `bisect.bisect_right` on `arr1` and `arr2` to count elements smaller than the current one. Subtract from `len(arr1)` / `len(arr2)` to get the count of elements greater than it. To use binary search, `arr1` and `arr2` must stay sorted — use `insort()` in Python. We also maintain a separate answer array to preserve insertion order.
+
+3. **Merge the answer**
+
+# Code
+
+```python
+import bisect
+from typing import List
+
+class Solution:
+    def resultArray(self, nums: List[int]) -> List[int]:
+        nums = nums[::-1]
+        temp = nums.pop()
+        arr1 = [temp]
+        temp1 = [temp]
+        temp = nums.pop()
+        arr2 = [temp]
+        temp2 = [temp]
+        while nums:
+            temp = nums.pop()
+            # [28] [2]
+            index1 = bisect.bisect_right(arr1, temp)
+            index2 = bisect.bisect_right(arr2, temp)
+            length_1 = len(arr1) - index1
+            length_2 = len(arr2) - index2
+            if length_1 > length_2:
+                bisect.insort(arr1, temp)
+                temp1.append(temp)
+            elif length_1 < length_2:
+                bisect.insort(arr2, temp)
+                temp2.append(temp)
+            else:
+                if len(arr1) > len(arr2):
+                    bisect.insort(arr2, temp)
+                    temp2.append(temp)
+                else:
+                    bisect.insort(arr1, temp)
+                    temp1.append(temp)
+
+        return temp1 + temp2
+```
diff --git a/app/docs/CommunityShare/Leetcode/3138-minimum-length-of-anagram-concatenation.zh.md b/app/docs/CommunityShare/Leetcode/3138-minimum-length-of-anagram-concatenation.zh.md
new file mode 100644
index 00000000..e520f691
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/3138-minimum-length-of-anagram-concatenation.zh.md
@@ -0,0 +1,87 @@
+---
+title: "3138. 同位字符串连接的最小长度"
+date: 20/12/2024
+tags:
+  - Python
+  - Prefix Sum
+  - Hash Table
+  - String
+  - Counting
+abbrlink: d1339d55
+docId: o3knuvbpnki6isfjv3g5ohau
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 题目描述
+
+https://leetcode.cn/problems/minimum-length-of-anagram-concatenation/description/?envType=daily-question&envId=2024-12-20
+
+给你一个字符串 `s`，已知它是某个字符串 `t` 的若干个同位字符串的串联。
+
+返回字符串 `t` 的最小可能长度。
+
+同位字符串是由重新排列字母形成的字符串。例如，"aab"、"aba" 和 "baa" 都是 "aab" 的同位字符串。
+
+#### 示例 1：
+
+    输入：s = "abba"
+
+    输出：2
+
+    解释：
+
+    t 的一个可能的字符串是 "ba"。
+
+#### 示例 2：
+
+    输入：s = "cdef"
+
+    输出：4
+
+    解释：
+
+    t 的一个可能的字符串是 "cdef"，注意 t 可以等于 s。
+
+# 思路
+
+由于题意很容易联想到这道题要进行计数（`Counter`），我们需要找到每个字符串的最小子串。例如 `abba` 的最小子串是 `ab`；`cdef` 是 `cdef`。
+
+我们从长度 1（即单个字符）开始遍历，通过切片获取当前子串。
+
+最开始对原始字符串进行 `Counter`，得到字符数量字典。接下来只需判断当前子串中某个字符的计数乘以 `n/k` 是否等于原始字符串的计数（即当前子串乘以 x 倍是否等于原始字符串）。
+
+Since the problem naturally suggests using a counting method (`Counter`), we need to find the minimum substring for each string. For example, for `abba`, the result is `ab`; for `cdef`, it's `cdef`.
+We iterate from length `1` (a single character) onwards, slicing the string to get the current substring.
+
+Initially, we compute the character count for the original string using `Counter`, which gives us a dictionary of character frequencies.
+Next, we only need to check if the count of each character in the current substring multiplied by `n/k` equals the count in the original string (i.e., whether repeating the current substring x times equals the original string).
+
+# 代码
+
+```python
+import collections
+class Solution:
+    def minAnagramLength(self, s: str) -> int:
+        def check(k: int) -> bool:
+            # 遍历字符串 s，每次取长度为 k 的子串
+            # Iterate over the string `s`, taking substrings of length `k`
+            for i in range(0, n, k):
+                # 统计每个字符出现的次数
+                # Count the occurrences of each character in the current substring
+                cnt1 = collections.Counter(s[i: i + k])
+                for c, v in cnt.items():
+                    # 如果每个字符出现的次数乘以 n/k != cnt[] return False
+                    # If the count of any character multiplied by (n // k) != the original count, return False
+                    if cnt1[c] * (n // k) != v:
+                        return False
+            return True
+
+        cnt = collections.Counter(s)
+        n = len(s)
+        for i in range(1, n+1):
+            if n % i == 0 and check(i):
+                return i
+```
diff --git a/app/docs/CommunityShare/Leetcode/345-reverse-vowels-of-a-string.en.md b/app/docs/CommunityShare/Leetcode/345-reverse-vowels-of-a-string.en.md
new file mode 100644
index 00000000..43e29d39
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/345-reverse-vowels-of-a-string.en.md
@@ -0,0 +1,59 @@
+---
+title: "345. Reverse Vowels of a String"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+abbrlink: 1c57c22c
+docId: udm0daiek9dr22xq4doep5w4
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[345. Reverse Vowels of a String](https://leetcode.cn/problems/reverse-vowels-of-a-string/description/)
+
+# Approach
+
+Wrote two methods. After checking Gongshui Sanye's hints, I realized this can be solved with two pointers — check whether the characters at both ends are vowels. Two improved versions below.
+
+# Code
+
+```python Two pointers
+class Solution:
+    def reverseVowels(self, s: str) -> str:
+        vowels = 'aeiouAEIOU'
+        start = 0
+        end = len(s) - 1
+        while start < end:
+            while s[end] not in vowels and start < end:
+                end -= 1
+            while s[start] not in vowels and start < end:
+                start += 1
+            if s[start] in vowels and s[end] in vowels:
+                s[start], s[end] = s[end], s[start]
+                start += 1
+                end -= 1
+        return ''.join(s)
+```
+
+```python String operation
+class Solution:
+    def reverseVowels(self, s: str) -> str:
+        s = list(s)
+        vowels = 'aeiouAEIOU'
+        ans = []
+        for i in s:
+            if i in vowels:
+                ans.append(i)
+        a = ''
+        for i in range(len(s)):
+            if s[i] in vowels:
+                a += ans.pop()
+            else:
+                a += s[i]
+        return ''.join(a)
+```
diff --git a/app/docs/CommunityShare/Leetcode/42-trapping-rain-water.en.md b/app/docs/CommunityShare/Leetcode/42-trapping-rain-water.en.md
new file mode 100644
index 00000000..94d4e4a2
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/42-trapping-rain-water.en.md
@@ -0,0 +1,109 @@
+---
+title: "42. Trapping Rain Water"
+date: "2025/3/27-19:56"
+tags:
+  - - Python
+  - - Answer
+abbrlink: 60fe0230
+docId: jv8qj3ljyr2uomaehnv0l77k
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[42. Trapping Rain Water](https://leetcode.cn/problems/trapping-rain-water/)
+
+# Approach
+
+This is a problem I've memorized inside out — totally muscle memory. But I revisited it yesterday and tried to actually understand it. I even made a GIF to help myself visualize what's happening. Here's the code:
+
+Let's use the example: `[0,1,0,2,1,0,1,3,2,1,2,1]`
+
+The core idea is basically the "bucket theory": we treat the two outermost bars as the "Walls of Maria" — and ignore the inner ones for now. The max height of water we can hold is `min(leftmax, rightmax)`. In other words, the shorter wall decides the water level.
+
+But height alone isn't enough — we also need width to compute the actual amount of water. So we look at each bar one by one and calculate how much water we can trap on top of it, then sum it all up.
+
+We use two pointers: `left` and `right`, and also keep track of the highest wall on the left (`leftmax`) and on the right (`rightmax`).
+
+Take this specific frame as an example: at this point, the max water we can hold is `leftmax = 2`, but the current column has height `1`, so we can trap `2 - 1 = 1` unit of water.
+
+If we compared `leftmax` and `rightmax` directly, we wouldn't know why this particular column can hold water. The only reason it can trap water is because its height is less than or equal to `leftmax`.
+
+在做这道题的时候，完全背诵下来了。但是昨天重新理解了一下，画了一个 GIF 图供自己理解。核心思想是木桶原理：把最外面的两根柱子视作边界，那么最多能装 `min(leftmax, rightmax)` 高的水。我们一根柱子一根柱子地看，计算每 "1" 个宽度能装多少水，然后加起来即可。
+
+# Code
+
+```python
+class Solution:
+    def trap(self, height: list[int]) -> int:
+        ans = leftmost = rightmost = 0
+        left, right = 0, len(height) - 1
+        while left < right:
+            leftmost = max(leftmost, height[left])
+            rightmost = max(rightmost, height[right])
+            if leftmost <= rightmost:
+                ans += leftmost - height[left]
+                left += 1
+            else:
+                ans += rightmost - height[right]
+                right -= 1
+        return ans
+```
+
+```python
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import numpy as np
+
+# Given height list for illustration
+height = [0,1,0,2,1,0,1,3,2,1,2,1]
+
+# Initialize variables as in the function
+left, right = 0, len(height) - 1
+leftmax = rightmax = 0
+ans = 0
+
+# For animation, store each frame's water level and pointers
+frames = []
+
+# Simulate the logic and capture frames
+while left < right:
+    leftmax = max(leftmax, height[left])
+    rightmax = max(rightmax, height[right])
+    water = [0] * len(height)
+    if leftmax <= rightmax:
+        trapped = max(0, leftmax - height[left])
+        ans += trapped
+        water[left] = trapped
+        frames.append((height.copy(), water.copy(), left, right))
+        left += 1
+    else:
+        trapped = max(0, rightmax - height[right])
+        ans += trapped
+        water[right] = trapped
+        frames.append((height.copy(), water.copy(), left, right))
+        right -= 1
+
+# Create animation
+fig, ax = plt.subplots(figsize=(10, 5))
+
+def update(frame):
+    ax.clear()
+    heights, water, l_ptr, r_ptr = frame
+    indices = np.arange(len(heights))
+    ax.bar(indices, heights, color='grey', edgecolor='black')
+    ax.bar(indices, water, bottom=heights, color='blue', edgecolor='blue', alpha=0.6)
+    ax.axvline(l_ptr, color='green', linestyle='--', label='Left Pointer')
+    ax.axvline(r_ptr, color='red', linestyle='--', label='Right Pointer')
+    ax.set_ylim(0, max(height) + 3)
+    ax.set_title("Trapping Rain Water Animation")
+    ax.legend()
+
+ani = animation.FuncAnimation(fig, update, frames=frames, interval=500, repeat=False)
+
+from IPython.display import HTML
+ani.save("trapping_rain_water.gif", writer="pillow", fps=2)  # save as GIF
+```
diff --git a/app/docs/CommunityShare/Leetcode/46-permutations.zh.md b/app/docs/CommunityShare/Leetcode/46-permutations.zh.md
new file mode 100644
index 00000000..dc048920
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/46-permutations.zh.md
@@ -0,0 +1,91 @@
+---
+title: "46. 全排列"
+date: 24/3/2025
+tags:
+  - Python
+  - "9021"
+  - tree
+abbrlink: d567a4cd
+docId: mxt0ux1zpbzph4nuxz51eyg7
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 题目描述
+
+给定一个不含重复数字的整数数组 `nums`，返回其所有可能的全排列。答案可以以任意顺序返回。
+
+示例 1：
+
+输入：nums = [1,2,3]
+输出：[[1,2,3],[1,3,2],[2,1,3],[2,3,1],[3,1,2],[3,2,1]]
+
+示例 2：
+
+输入：nums = [0,1]
+输出：[[0,1],[1,0]]
+
+示例 3：
+
+输入：nums = [1]
+输出：[[1]]
+
+# 思路
+
+这道题更像是一道树形问题，可以用如下树形结构来理解：
+
+```shell
+dfs(0): nums = [1,2,3]
+|
+|-- i=0: swap(0,0) -> [1,2,3]
+|   |
+|   |-- dfs(1)
+|       |-- i=1: swap(1,1) -> [1,2,3]
+|       |   |-- dfs(2): append [1,2,3]
+|       |-- i=2: swap(1,2) -> [1,3,2]
+|           |-- dfs(2): append [1,3,2]
+|
+|-- i=1: swap(0,1) -> [2,1,3]
+|   |
+|   |-- dfs(1)
+|       |-- i=1: swap(1,1) -> [2,1,3]
+|       |   |-- dfs(2): append [2,1,3]
+|       |-- i=2: swap(1,2) -> [2,3,1]
+|           |-- dfs(2): append [2,3,1]
+|
+|-- i=2: swap(0,2) -> [3,2,1]
+    |
+    |-- dfs(1)
+        |-- i=1: swap(1,1) -> [3,2,1]
+        |   |-- dfs(2): append [3,2,1]
+        |-- i=2: swap(1,2) -> [3,1,2]
+            |-- dfs(2): append [3,1,2]
+
+```
+
+我们将当前位置 `index` 与从 `index` 到末尾的每个候选位置 `i` 进行交换。可以将 `index` 和 `i` 看作左右指针：`index` 决定正在填充哪个位置，`i` 尝试将不同的数字放入该位置。
+
+递归调用前，交换 `nums[i]` 和 `nums[index]`，尝试在位置 `index` 放置新数字。当到达最后一个位置（`index == len(nums) - 1`）时，将当前排列加入结果列表。递归返回后，再次交换以恢复原始状态（回溯）。
+
+# 代码
+
+```python
+class Solution:
+    def permute(self, nums: List[int]) -> List[List[int]]:
+        # index
+        def dfs(index):
+            # Reach the last element
+            if index == len(nums) - 1:
+                res.append(list(nums))
+                return
+            for i in range(index, len(nums)):
+                nums[i], nums[index] = nums[index], nums[i]
+                dfs(index + 1)
+                nums[i], nums[index] = nums[index], nums[i]
+
+        res = []
+        dfs(0)
+        return res
+```
diff --git a/app/docs/CommunityShare/Leetcode/538-convert-bst-to-greater-sum-tree.en.md b/app/docs/CommunityShare/Leetcode/538-convert-bst-to-greater-sum-tree.en.md
new file mode 100644
index 00000000..59026844
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/538-convert-bst-to-greater-sum-tree.en.md
@@ -0,0 +1,120 @@
+---
+title: "538. Convert BST to Greater Sum Tree"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - Binary tree
+abbrlink: 32401b69
+docId: wen0bbo8m93oih1mx6sva9sh
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[538. Convert BST to Greater Sum Tree](https://leetcode.cn/problems/convert-bst-to-greater-sum-tree/)
+
+Given the root of a Binary Search Tree (BST) with distinct node values, convert it into a Greater Sum Tree such that every node's new value equals the sum of all values greater than or equal to the original value in the BST.
+
+# Approach
+
+**Method 1: Reverse In-order Traversal**
+
+This problem requires us to update each node's value to the sum of all values greater than itself. We can traverse the BST in reverse in-order (right → node → left), accumulating the sum as we go and updating each node's value.
+
+**Method 2: Morris Traversal**
+
+There's an elegant online solution that uses only O(1) space for in-order traversal. It was first proposed by J. H. Morris in his 1979 paper "Traversing Binary Trees Simply and Cheaply," hence the name Morris Traversal.
+
+Let's walk through an example with a simple binary tree:
+
+```markdown
+          1
+         / \
+        2   3
+       / \
+      4   5
+```
+
+1. Initialize the current node as the root (`current = 1`).
+
+2. While the current node is not null:
+   1. If the current node has a left child, find the in-order predecessor (rightmost node of the left subtree). In this example, the predecessor is node 5.
+   2. If the predecessor's right node is null, point it to the current node (`5 -> 1`).
+   3. Move the current node to its left child (`current = 2`).
+
+   ...and so on, traversing via the threaded links until complete.
+
+# Code
+
+```python Reverse in-order traversal
+class Solution:
+    def convertBST(self, root: TreeNode) -> TreeNode:
+        def dfs(root: TreeNode):
+            nonlocal total
+            if root:
+                dfs(root.right)
+                total += root.val
+                root.val = total
+                dfs(root.left)
+
+        total = 0
+        dfs(root)
+        return root
+```
+
+```python Morris Traversal
+class Solution:
+    def convertBST(self, root: TreeNode) -> TreeNode:
+        # Get the successor node (next node in in-order traversal)
+        def getSuccessor(node: TreeNode) -> TreeNode:
+            succ = node.right
+            while succ.left and succ.left != node:
+                succ = succ.left
+            return succ
+
+        total = 0  # accumulated sum
+        node = root
+
+        while node:
+            if not node.right:  # right child is null
+                total += node.val
+                node.val = total
+                node = node.left
+            else:
+                succ = getSuccessor(node)
+                if not succ.left:  # successor's left is null
+                    succ.left = node  # create thread
+                    node = node.right
+                else:  # successor's left is not null
+                    succ.left = None  # remove thread
+                    total += node.val
+                    node.val = total
+                    node = node.left
+
+        return root
+
+class Solution:
+    def convertBST(self, root: TreeNode) -> TreeNode:
+        def convert(node: TreeNode, total: int) -> int:
+            if not node:
+                return total
+
+            # recursively traverse right subtree
+            total = convert(node.right, total)
+
+            # update node value to accumulated sum
+            total += node.val
+            node.val = total
+
+            # recursively traverse left subtree
+            total = convert(node.left, total)
+
+            return total
+
+        convert(root, 0)
+        return root
+```
diff --git a/app/docs/CommunityShare/Leetcode/6323-distribute-money-to-maximum-children.en.md b/app/docs/CommunityShare/Leetcode/6323-distribute-money-to-maximum-children.en.md
new file mode 100644
index 00000000..edae8fd6
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/6323-distribute-money-to-maximum-children.en.md
@@ -0,0 +1,88 @@
+---
+title: "6323. Distribute Money to Maximum Children"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+abbrlink: b9130c0e
+docId: kw44if3s2zi4w2gs1gfhxvoz
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+First encountered: `2023/3/19-16:51`
+
+[6323. Distribute Money to Maximum Children](https://leetcode.cn/problems/distribute-money-to-maximum-children/)
+
+# Approach
+
+This was a weekly contest problem from March. I realized early it was a math problem, but spent a long time not knowing how to handle the remaining money after distribution.
+
+So I wrote a list-based approach, giving money to children one by one.
+
+**Mathematical approach:**
+
+- If 0 children remain but `money > 0`, then we must take money back from one child who already received $8 — `ans` minus one.
+- If 1 child remains but `money == 3`, to avoid giving exactly $4, we must take money from a child who received $8 — `ans` minus one.
+- In all other cases, give the remaining money to one child. If that child gets exactly $4, swap $1 with another child, so `ans` stays constant.
+
+**ylb's explanation (updated 2023-9-22):**
+
+- If `money < children`, there must be children who get nothing — return `−1`.
+- If `money > 8 × children`, `children − 1` children each get $8, and the last child gets the rest — return `children − 1`.
+- If `money == 8 × children − 4`, `children − 2` children get $8, and the two remaining share $12 (just not $4 or $8 each) — return `children − 2`.
+- Otherwise, assume `x` children each get $8. The remaining money is `money − 8x`, and we need it to be ≥ `children − x`. Maximize `x`.
+
+# Code
+
+```python List
+class Solution:
+    def distMoney(self, money: int, children: int) -> int:
+        money -= children
+        children_list = [1] * children
+        if money < 0:
+            return -1
+        counts = min(money // 7, children)
+        for i in range(counts):
+            children_list[i] = 8
+        children_list[-1] += money - counts * 7
+        counts = children_list.count(8)
+        if children_list[-1] == 4:
+            if children_list[-2] != 8:
+                pass
+            else:
+                counts -= 1
+        return counts
+```
+
+```python math
+class Solution:
+    def distMoney(self, money: int, children: int) -> int:
+        money -= children  # each child gets at least $1
+        if money < 0: return -1
+        ans = min(money // 7, children)  # preliminary allocation, maximize children getting $8
+        money -= ans * 7
+        children -= ans
+        # children == 0 and money: must give remaining money to a child already at $8
+        # children == 1 and money == 3: avoid giving exactly $4 to any child
+        if children == 0 and money or \
+           children == 1 and money == 3:
+            ans -= 1
+        return ans
+```
+
+```python ylb
+class Solution:
+    def distMoney(self, money: int, children: int) -> int:
+        if money < children:
+            return -1
+        if money > 8 * children:
+            return children - 1
+        if money == 8 * children - 4:
+            return children - 2
+        return (money-children) // 7
+```
diff --git a/app/docs/CommunityShare/Leetcode/76-minimum-window-substring.en.md b/app/docs/CommunityShare/Leetcode/76-minimum-window-substring.en.md
new file mode 100644
index 00000000..2ad9504c
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/76-minimum-window-substring.en.md
@@ -0,0 +1,116 @@
+---
+title: "76. Minimum Window Substring"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+  - - difficulty
+abbrlink: ae10d3c1
+docId: l358imxaj1mmtth6dydvu54s
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[76. Minimum Window Substring](https://leetcode.cn/problems/minimum-window-substring/description/)
+
+# Approach
+
+- When `t`'s elements are not all present: move right pointer right
+- When all of `t`'s elements are present: move left pointer right; if right pointer hasn't reached the end, continue moving right pointer
+- If `t` is found: break and restart
+
+## Optimization
+
+> The code finds the minimum length window in string `s` that contains all characters in string `t`. It uses two pointers, `left` and `right`, to traverse `s` and track the window. The `isAll` function checks if all characters in `t` are present in the current window.
+> Improvements:
+>
+> - Use a `dict_keys` dictionary instead of `isAll` to track character counts in the current window, updating directly instead of calling `Counter` repeatedly.
+> - Remove unused variables `ans`, `hash_1`, `dict_t`.
+> - Initialize `right` outside the loop, and use `while right < len(s) and not isAll(dict_keys, dict_t)` to terminate when all characters in `t` are found.
+> - Track the minimum window length and the start/end indices, return `s[start:end+1]` at the end.
+
+**`get()` method syntax:**
+
+`dict.get(key[, value])`
+
+**Parameters:**
+
+- `key` — the key to look up.
+- `value` — optional, the default to return if the key doesn't exist.
+
+**Return value:** Returns the value for the key, or `None` (or the default) if not found.
+
+# Code
+
+```python Mine
+class Solution:
+    def minWindow(self, s: str, t: str) -> str:
+        ans = 1000001
+        hash_1 = {}
+
+        def isAll(dict_keys, target):
+            for i in target:
+                if i not in dict_keys:
+                    return False
+                else:
+                    if dict_keys[i] < target[i]:
+                        return False
+            return True
+
+        dict_t = Counter(t)
+        left = 0
+        right = 0
+        while right < len(s):
+            dict1 = Counter(s[left:right + 1])
+            if isAll(dict1, dict_t):
+                ans = min(ans, right - left + 1)
+                hash_1[right - left + 1] = s[left:right+1]
+                print(ans, hash_1)
+                left += 1
+            else:
+                right += 1
+            print(ans, hash_1, dict1.keys())
+        return hash_1[ans] if len(hash_1) != 0 else ""
+```
+
+```python
+class Solution:
+    def minWindow(self, s: str, t: str) -> str:
+        # store character counts in t
+        dict_t = Counter(t)
+        # store characters in current sliding window
+        dict_keys = {}
+        left = 0
+        right = 0
+        min_len = float('inf')
+        start = 0
+        end = 0
+        while right < len(s):
+            # if the current character is in t, add it to the window dict
+            if s[right] in dict_t:
+                dict_keys[s[right]] = dict_keys.get(s[right], 0) + 1
+            # if the current window contains all characters in t
+            while right < len(s) and isAll(dict_keys, dict_t):
+                if right - left + 1 < min_len:
+                    min_len = right - left + 1
+                    start = left
+                    end = right
+                # move left pointer right, remove character from window dict
+                if s[left] in dict_keys:
+                    dict_keys[s[left]] -= 1
+                    if dict_keys[s[left]] == 0:
+                        del dict_keys[s[left]]
+                left += 1
+            right += 1
+        return s[start:end + 1] if min_len != float('inf') else ""
+
+def isAll(dict_keys, target):
+    for key in target:
+        if key not in dict_keys or dict_keys[key] < target[key]:
+            return False
+    return True
+```
diff --git a/app/docs/CommunityShare/Leetcode/80-remove-duplicates-from-sorted-array-ii.en.md b/app/docs/CommunityShare/Leetcode/80-remove-duplicates-from-sorted-array-ii.en.md
new file mode 100644
index 00000000..ed300fdf
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/80-remove-duplicates-from-sorted-array-ii.en.md
@@ -0,0 +1,46 @@
+---
+title: "80. Remove Duplicates from Sorted Array II — Python beats 98.40% using collections.Counter!"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - solved
+    - answer
+abbrlink: 73b5ce9c
+category: null
+docId: ryp6s59uwc10w2dywgs6f66h
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Python's `collections.Counter()` counts elements in a list and returns a dictionary.
+
+For example, `nums = [1, 1, 1, 2, 2, 3]` → `Counter({1: 3, 2: 2, 3: 1})`.
+
+1. Traverse the dictionary: when `value > 2`, set `value = 2`. This caps any element appearing more than twice down to 2.
+2. Convert `Counter({1: 2, 2: 2, 3: 1})` back to a list using `elements()`.
+
+---
+
+Since the problem passes `nums` **by reference**, we just need to clear `nums` and extend it with the modified list.
+
+```python
+from collections import Counter # import
+class Solution:
+    def removeDuplicates(self, nums: List[int]) -> List[int]:
+        dict1 = Counter(nums)
+        for i in dict1:
+            if dict1[i] > 2:
+                dict1[i] = 2
+        list1 = list(dict1.elements())
+        nums.clear() # clear the list
+        nums.extend(list1) # add elements back
+        return len(nums)
+```
+
+**Complexity Analysis**
+
+Time complexity: `O(n)`, where `n` is the length of the array. We traverse the array at most once.
+
+Space complexity: `O(1)`. We only use constant space for a few variables.
diff --git a/app/docs/CommunityShare/Leetcode/9021-tut-3-25t1.zh.md b/app/docs/CommunityShare/Leetcode/9021-tut-3-25t1.zh.md
new file mode 100644
index 00000000..e6830d98
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/9021-tut-3-25t1.zh.md
@@ -0,0 +1,263 @@
+---
+title: "9021_TUT_3 第 25 次课练习题"
+date: 2025/3/07
+tags:
+  - "9021"
+  - lab
+abbrlink: 974decd3
+docId: s0cadbu09dgu54q0zxttkx7z
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 练习 1
+
+### 题目描述
+
+给定两个整数 `m` 和 `n`，其中：
+
+- `m` 表示重复单元（pattern）的数量。
+- `n` 表示每个单元中元素（下划线）的数量。
+
+目标是生成一个字符串，规则如下：
+
+- 每个单元包含 `n` 个下划线（`_`），以 `|` 分隔。
+- 各单元之间以 `*` 连接。
+
+### 我的解法
+
+```python
+def generate_struct(n):
+    return '|'.join(n * ['_'])
+
+def f1(m, n):
+    return ' * '.join(m * [generate_struct(n)])
+```
+
+#### 思路
+
+我将每个单元视为独立的结构。先构建最小单元（用 `|` 连接下划线），再用 `join()` 以 `*` 拼接各单元。
+
+#### 辅助函数 `generate_struct(n)`
+
+生成基础结构：将 `n` 个下划线以 `|` 连接。  
+例如 `n = 2` 时，结果为 `"_|_"`。
+
+### 标准解法
+
+```python
+def f1(m, n):
+    return ' * '.join('|'.join('_' for _ in range(n)) for _ in range(m))
+```
+
+### 简洁表达
+
+内层 `join` 用生成器表达式创建 `n` 个下划线以 `|` 连接的字符串；外层 `join` 重复 `m` 次，用 `*` 连接各单元。
+
+两种方案的对比：我的解法注重模块化（拆分为小函数），标准解法将一切压缩为一行列表推导式。
+
+# 练习 2
+
+### 题目描述
+
+根据给定数字 `n` 的各位数字生成图案：
+
+- 奇数位显示黑色方块（⬛）。
+- 偶数位显示白色方块（⬜）。
+
+### 我的解法
+
+```python
+def f2(n):
+    ans = ''
+    for i in str(n):
+        if int(i) % 2 == 0:
+            ans += '⬜'
+        else:
+            ans += '⬛'
+    print(ans)
+```
+
+#### 思路
+
+1. 将数字 `n` 转为字符串，逐位遍历。
+2. 用取模运算（`% 2`）判断奇偶。
+3. 奇数位追加黑色方块，偶数位追加白色方块。
+4. 打印最终字符串。
+
+### 标准解法
+
+```python
+def f2(n):
+    print(''.join({0: '\u2b1c', 1: '\u2b1b'}[int(d) % 2] for d in str(n)))
+```
+
+Martin 博士的解法更 Pythonic：用字典和生成器表达式简化代码，Unicode 转义序列直接引用方块符号。
+
+# 练习 3
+
+### 题目描述
+
+将数字 `n` 视为不同进制（2 到 10）下的表示，将其转换为十进制值（仅对合法进制有效）。
+
+例如 `n = 2143`：
+
+- `2143` 在 5 进制下等于十进制的 `298`。
+- `2143` 在 6 进制下等于十进制的 `495`。
+- 以此类推。
+
+### 我的解法
+
+```python
+def f3(n: int):
+    for i in range(2, 11):
+        try:
+            value = int(str(n), i)
+            print(f'{n} is {value} in base {i}')
+        except ValueError:
+            pass
+```
+
+#### 思路
+
+1. 遍历进制 2 到 10。
+2. 用 `int(str(n), i)` 将 `n` 作为 `i` 进制数转换为十进制。若数字不合法则抛出 `ValueError`，跳过该进制。
+3. 用 `try-except` 处理非法进制。
+
+### 标准解法
+
+```python
+def f3(n):
+    n_as_string = str(n)
+    min_base = max(2, max({int(d) for d in n_as_string}) + 1)
+    for b in range(min_base, 11):
+        print(f'{n} is {int(n_as_string, b)} in base {b}')
+```
+
+标准解法通过集合推导提取最大数字来确定最小合法进制，跳过非法进制而无需异常处理。
+
+# 练习 4
+
+### 题目描述
+
+创建函数 `f4(n, base)`，返回字典 `D`：
+
+- 键为 `0` 到 `n` 的整数。
+- 值为以 `base` 进制表示的元组（从十进制转换而来）。
+
+### 我的解法
+
+```python
+def convert_to_base(n, base):
+    if n == 0:
+        return '0'
+    digits = []
+    while n:
+        digits.append(str(n % base))
+        n //= base
+    return ''.join(digits[::-1])
+
+def f4(n: int, base: int):
+    D = {}
+    for i in range(0, n + 1):
+        D[i] = tuple(map(int, convert_to_base(i, base)))
+    return D
+```
+
+#### 思路
+
+1. 辅助函数 `convert_to_base(n, base)` 用辗转相除法将十进制数转换为指定进制字符串。
+2. 主函数遍历 `0` 到 `n`，转换每个数并存为元组。
+
+#### 关于 `map()` 的 Pythonic 性
+
+`map()` 来自函数式编程范式，现在通常用列表推导式代替，因为后者更简洁、易读：
+
+```python
+D[i] = tuple([int(digit) for digit in convert_to_base(i, base)])
+```
+
+### 标准解法
+
+```python
+def f4(n, base):
+    D = {0: (0,)}
+    for m in range(1, n + 1):
+        digits = []
+        p = m
+        while p:
+            digits.append(p % base)
+            p //= base
+        D[m] = tuple(reversed(digits))
+    return D
+```
+
+两种解法均正确。我的解法用辅助函数增加了模块化，标准解法更简洁，直接在主函数中完成转换。
+
+# 练习 5
+
+先运行这段代码：
+
+```python
+print(0.1 + 0.2)
+```
+
+结果不是 `0.3`，而是 `0.30000000000000004`，这是为什么？
+
+### 题目描述
+
+此练习旨在揭示计算机浮点运算的局限性。计算机以二进制格式存储浮点数，通常会引入精度误差。
+
+### 解法
+
+```python
+def f5(integral_part, fractional_part):
+    precision = len(str(fractional_part))
+    a_float = float(str(integral_part) + '.' + str(fractional_part))
+    simple_precision = f'{a_float:.{precision}f}'
+    extended_simple_precision = simple_precision + '0' * precision
+    double_precision = f'{a_float:.{precision * 2}f}'
+    print('With', precision * 2, 'digits after the decimal point, ', end='')
+    if extended_simple_precision == double_precision:
+        print(simple_precision, 'prints out with', precision, 'trailing',
+              precision == 1 and 'zero,' or 'zeroes,', 'namely, as',
+              extended_simple_precision
+             )
+    else:
+        print(simple_precision, 'prints out as', double_precision)
+```
+
+通过简单精度（`simple_precision`）和双精度（`double_precision`）对比，展示浮点数并非总是以我们期望的方式存储。
+
+# 练习 6
+
+### 题目描述
+
+给定：
+
+- 列表 `L`，包含多个等长度 `n` 的整数子列表。
+- 列表 `fields`，是 `{1, ..., n}` 的一个排列。
+
+要求用多键排序机制对 `L` 排序：先按 `fields[0]` 指定位置排序，相等时按 `fields[1]`，以此类推。
+
+例如 `fields = [2, 1]` 表示先按第二个元素排序，相同时按第一个元素排序。
+
+### 我的解法
+
+```python
+def f6(L, fields):
+    return sorted(L, key=lambda x: [x[i-1] for i in fields])
+```
+
+`sorted()` 函数根据 `key` 排序。lambda 函数按 `fields` 中指定的位置提取每个子列表的元素，`x[i-1]` 是因为 `fields` 从 1 开始索引，而 Python 列表从 0 开始。
+
+### 标准解法
+
+```python
+def f6(L, fields):
+    return sorted(L, key=lambda x: tuple(x[i - 1] for i in fields))
+```
+
+**为什么用元组？** 元组不可变，Python 内置排序可以高效比较元组。两种解法均正确，标准解法用元组更符合 Python 惯例。
diff --git a/app/docs/CommunityShare/Leetcode/93-restore-ip-addresses.zh.md b/app/docs/CommunityShare/Leetcode/93-restore-ip-addresses.zh.md
new file mode 100644
index 00000000..bc7a86ba
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/93-restore-ip-addresses.zh.md
@@ -0,0 +1,69 @@
+---
+title: "93. 复原 IP 地址"
+date: "2025/3/25-14:03"
+tags:
+  - - Python
+  - - Answer
+abbrlink: 9d0d3b9c
+docId: d5evrnoglwjvmyginjq84bl0
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 题目
+
+[93. 复原 IP 地址](https://leetcode.cn/problems/restore-ip-addresses/description/?envType=company&envId=mihoyo&favoriteSlug=mihoyo-all)
+
+# 思路
+
+这道 MiHoYo 笔试题和 LeetCode 46（全排列）非常相似，都依赖回溯思路。
+
+如代码所示，在第一遍遍历中，我们可能得到类似 `['2', '5', '5', '2']` 的初始 `parts`，但此时还没有遍历完整个字符串。
+
+进入下一层 DFS 时，指针因为刚刚 `+length`，实际上指向了当前段的最右边。如果指针已经到达字符串末尾，说明我们已访问完所有字符——此时找到一个合法答案，加入结果列表。
+
+重要提示：`parts + [part]` 是值传递而非像第 46 题那样的引用传递。这意味着我们不需要手动撤销更改（不需要 `pop()` 回溯），因为每次递归调用都会创建一个新列表。
+
+This MiHoYo coding test question is very similar to LeetCode 46 (Permutations), and both rely on the backtracking approach.
+
+As shown in the code, during the first traversal, we may get something like `['2', '5', '5', '2']` as our initial parts, but at this point, we haven't traversed the entire string yet.
+
+When we enter the next level of DFS, the pointer moves forward by `+length`, so it effectively moves to the far right of the current segment. If the pointer has reached the end of the string, it means we've visited all characters — in this case, we've found one valid answer and can add it to the result list.
+
+One important note: `parts + [part]` is pass-by-value, not by reference like in LeetCode 46. This means we don't need to manually undo changes (i.e., no need to backtrack with `pop()`), because each recursive call creates a new list.
+
+# 代码
+
+```python
+from typing import List
+
+class Solution:
+    def restoreIpAddresses(self, s: str) -> List[str]:
+        res = []
+
+        def backtrack(start: int, parts: List[str]):
+            # 终止条件：正好4段且用完所有字符
+            # Stop condition: exactly 4 segments and all characters used up
+            if len(parts) == 4:
+                if start == len(s):
+                    res.append(".".join(parts))
+                return
+
+            for length in range(1, 4):  # 每段长度1~3 Each segment length 1~3
+                if start + length > len(s):
+                    break
+                part = s[start:start+length]
+
+                # 前导0非法，但0本身合法
+                # Leading 0 is illegal, but 0 itself is legal
+                if len(part) > 1 and part[0] == '0':
+                    continue
+
+                if int(part) <= 255:
+                    backtrack(start + length, parts + [part])  # 注意用 + 避免污染 We need to use + to avoid pollution
+
+        backtrack(0, [])
+        return res
+```
diff --git a/app/docs/CommunityShare/Leetcode/994-rotting-oranges.en.md b/app/docs/CommunityShare/Leetcode/994-rotting-oranges.en.md
new file mode 100644
index 00000000..d540206a
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/994-rotting-oranges.en.md
@@ -0,0 +1,153 @@
+---
+title: "994. Rotting Oranges"
+date: "2024.05.14 0:00"
+tags:
+  - Python
+  - BFS
+  - Bilateral queue
+abbrlink: 56e64fdd
+docId: axhoyzdtxoc82q58j1os57c8
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[994. Rotting Oranges](https://leetcode.cn/problems/rotting-oranges/)
+
+Given an `m x n` grid where each cell can have one of three values:
+
+- `0` represents an empty cell.
+- `1` represents a fresh orange.
+- `2` represents a rotten orange.
+
+Every minute, fresh oranges **adjacent in 4 directions** to a rotten orange become rotten.
+
+Return the minimum number of minutes until no fresh orange remains. If impossible, return `-1`.
+
+# Approach
+
+This problem can be solved with BFS. We track the spread of rotten oranges, record time, and check if any fresh orange remains unreachable.
+
+Initial idea:
+
+```python
+class Solution:
+    def orangesRotting(self, grid: List[List[int]]) -> int:
+        bad_orange = []
+        # find all initially rotten oranges
+        for i in range(len(grid)):
+            for j in range(len(grid[0])):
+                if grid[i][j] == 2:
+                    # push into the initial queue
+                    bad_orange.append((i, j))
+```
+
+Similar to multi-threading: each thread has an initial queue, and queues spread gradually via BFS.
+
+# Code
+
+```python
+from collections import deque
+
+class Solution:
+    def orangesRotting(self, grid: List[List[int]]) -> int:
+        bad_orange = deque()
+        fresh_oranges = 0
+        rows, cols = len(grid), len(grid[0])
+
+        # find all initially rotten oranges and count fresh ones
+        for i in range(rows):
+            for j in range(cols):
+                if grid[i][j] == 2:
+                    bad_orange.append((i, j))
+                elif grid[i][j] == 1:
+                    fresh_oranges += 1
+
+        # direction array: up, down, left, right
+        directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]
+
+        # if no fresh oranges, return 0 immediately
+        if fresh_oranges == 0:
+            return 0
+
+        # BFS
+        minutes = 0
+        while bad_orange:
+            minutes += 1
+            for _ in range(len(bad_orange)):
+                x, y = bad_orange.popleft()
+                for dx, dy in directions:
+                    nx, ny = x + dx, y + dy
+                    if 0 <= nx < rows and 0 <= ny < cols and grid[nx][ny] == 1:
+                        grid[nx][ny] = 2
+                        fresh_oranges -= 1
+                        bad_orange.append((nx, ny))
+
+        # if fresh oranges remain, return -1
+        return minutes - 1 if fresh_oranges == 0 else -1
+```
+
+```go
+import (
+	"sync"
+)
+
+func orangesRotting(grid [][]int) int {
+	rows, cols := len(grid), len(grid[0])
+	badOranges := make([][2]int, 0)
+	freshOranges := 0
+
+	// find all initially rotten oranges and count fresh ones
+	for r := 0; r < rows; r++ {
+		for c := 0; c < cols; c++ {
+			if grid[r][c] == 2 {
+				badOranges = append(badOranges, [2]int{r, c})
+			} else if grid[r][c] == 1 {
+				freshOranges += 1
+			}
+		}
+	}
+
+	// if no fresh oranges, return 0
+	if freshOranges == 0 {
+		return 0
+	}
+
+	directions := [][2]int{{0, 1}, {1, 0}, {0, -1}, {-1, 0}}
+	minutes := 0
+
+	var wg sync.WaitGroup
+
+	// BFS
+	for len(badOranges) > 0 {
+		minutes++
+		nextBadOranges := make([][2]int, 0)
+		for _, orange := range badOranges {
+			x, y := orange[0], orange[1]
+			wg.Add(1)
+			go func(x, y int) {
+				defer wg.Done()
+				for _, d := range directions {
+					nx, ny := x+d[0], y+d[1]
+					if nx >= 0 && nx < rows && ny >= 0 && ny < cols && grid[nx][ny] == 1 {
+						grid[nx][ny] = 2
+						nextBadOranges = append(nextBadOranges, [2]int{nx, ny})
+						freshOranges--
+					}
+				}
+			}(x, y)
+		}
+		wg.Wait()
+		badOranges = nextBadOranges
+	}
+
+	// if fresh oranges remain, return -1
+	if freshOranges > 0 {
+		return -1
+	}
+	return minutes - 1
+}
+```
diff --git a/app/docs/CommunityShare/Leetcode/brief-alternate-homework-help.en.md b/app/docs/CommunityShare/Leetcode/brief-alternate-homework-help.en.md
new file mode 100644
index 00000000..0158cad1
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/brief-alternate-homework-help.en.md
@@ -0,0 +1,136 @@
+---
+title: "Brief Alternate Assignment Help"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - pandas
+abbrlink: dbbd7d58
+docId: one7va4e0hvbq1eqhm6ww2kd
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[brief_alternate.pdf](..%2Fassets%2Fdocuments%2Fbrief_alternate.pdf)
+[superstore_transaction.csv](..%2Fassets%2Fdocuments%2Fsuperstore_transaction.csv)
+
+# Approach
+
+Self-taught `pandas` data processing — much more powerful than using `import csv` from before.
+
+Key points to remember:
+
+1. `df["Name"]` automatically treats the first row as column names and returns a Series containing only that column.
+2. `idxmax()` returns the index of the maximum value; `max()` returns the maximum value itself.
+3. `.loc` is an important pandas function used to select and locate data in a DataFrame. It allows you to choose rows and columns using label-based indexing:
+   - Select a single row
+   - Select multiple rows
+   - Select a single column
+   - Select multiple columns
+
+   Syntax: `df.loc[row_indexer, column_indexer]`
+
+4. `unique()` returns the count of unique values without duplicates.
+
+[superstore](..%2Fassets%2Fdocuments%2Fsuperstore)
+
+```python s_p
+# Import pandas library as pd
+import pandas as pd
+
+# Read CSV file named 'superstore_transaction.csv' and store it in a dataframe named 'df'
+df = pd.read_csv("superstore_transaction.csv")
+
+# Remove "$" and "," from the values in the 'Profit' column and convert it to integer
+df["Profit"] = df["Profit"].str.replace('$', "").str.replace(",", "").astype(int)
+
+# Remove "$" and "," from the values in the 'Sales' column and convert it to integer
+df["Sales"] = df["Sales"].str.replace('$', "").str.replace(",", "").astype(int)
+
+# Get the index of the row with the maximum value in the 'Profit' column and store it in 'col_max_profit'
+col_max_profit = df["Profit"].idxmax()
+# Get the index of the row with the maximum value in the 'Sales' column and store it in 'col_max_sales'
+col_max_sales = df["Sales"].idxmax()
+
+# Store the details of the transaction with highest sales
+highest_sales_info = [
+    "=========================\n"
+    "HIGHEST SALES TRANSACTION\n"
+    "=========================\n",
+    "Category: {}\n".format(df.loc[col_max_sales, "Category"]),
+    "Customer Name: {}\n".format(df.loc[col_max_sales, "Customer Name"]),
+    "Product Name: {}\n".format(df.loc[col_max_sales, "Product Name"]),
+    "Segment: {}\n".format(df.loc[col_max_sales, "Segment"]),
+    "Sub-Category: {}\n".format(df.loc[col_max_sales, "Sub-Category"]),
+    "Profit: {}\n".format(df["Sales"].max()),
+]
+
+# Store the details of the transaction with the highest profit
+highest_profit_info = [
+    "==========================\n"
+    "HIGHEST PROFIT TRANSACTION\n"
+    "==========================\n",
+    "Category: {}\n".format(df.loc[col_max_profit, "Category"]),
+    "Customer Name: {}\n".format(df.loc[col_max_profit, "Customer Name"]),
+    "Product Name: {}\n".format(df.loc[col_max_profit, "Product Name"]),
+    "Segment: {}\n".format(df.loc[col_max_profit, "Segment"]),
+    "Sub-Category: {}\n".format(df.loc[col_max_profit, "Sub-Category"]),
+    "Profit: {}\n".format(df["Profit"].max()),
+]
+
+# Open a file named 'summary_report.txt' in 'append' mode and store it in 'file'
+with open("summary_report.txt", "a") as file:
+    # Write the 'highest_sales_info' details to the file
+    file.write(''.join(highest_sales_info))
+    file.write(''.join(highest_profit_info))
+
+```
+
+```python api
+import requests
+
+url = ""
+payload = {}
+headers = {
+    "apikey": ""
+}
+r = requests.request("GET", url, headers=headers, data=payload)  # API response
+# view response result
+print("Status code:", r.status_code)
+# return content is in JSON format
+# store the API response in a variable
+# json() only decodes JSON-format returns
+response_dict = r.json()
+# process result and get response dictionary
+# explore repository information — nested response_dict
+
+f = open("summary_report.txt", "w")
+head = [
+    "=================================================\n"
+    "SINGAPORE TO US DOLLAR EXCHANGE RATE IN REAL TIME\n"
+    "=================================================\n"
+]
+f.writelines(head)
+f.writelines([str(response_dict['info']['rate'])+"\n"])
+f.close()
+print("over")
+
+```
+
+```python customers
+import pandas as pd
+
+df = pd.read_csv("superstore_transaction.csv")
+highest_sales_info = [
+    "====================\n"
+    "SUPERSTORE CUSTOMERS\n"
+    "====================\n",
+    "TOTAL: {}\n".format(df["Customer Name"].nunique(), "Category"),
+]
+with open("summary_report.txt", "a") as file:
+    file.writelines(highest_sales_info)
+
+```
diff --git a/app/docs/CommunityShare/Leetcode/counting-stars-inter-uni-programming-contest.zh.md b/app/docs/CommunityShare/Leetcode/counting-stars-inter-uni-programming-contest.zh.md
new file mode 100644
index 00000000..39919ad0
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/counting-stars-inter-uni-programming-contest.zh.md
@@ -0,0 +1,62 @@
+---
+title: "Counting Stars — 校际编程竞赛"
+date: 22/9/2024
+tags:
+  - Contest
+  - Python
+  - Binary Search
+abbrlink: a29b0a05
+docId: fostlzqqx6l10qz1egd8dw5m
+lang: zh
+translatedFrom: en
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# 题目描述
+
+https://interunia.unswcpmsoc.com/task/Counting%20Stars/
+
+# 思路
+
+- 给定一组星星的位置，需要计算能解释这些位置所需的最少星星数量。
+- 流星（即移动的星星）从左向右、从高到低运动（x 坐标增大，y 坐标减小），不做水平或垂直移动。
+- 每颗流星可能出现在多个位置（因为它在移动），最终的累积图像会显示它经过的所有位置。
+- 固定星星的位置保持不变。
+
+因此，我们需要维护一个**当前链的最后 y 坐标列表**。
+
+1. **对点排序**：按 x 坐标递增排序。
+2. **初始化**：创建空列表 `last_y` 存储每条链的最后 y 坐标。
+3. **遍历点集**：
+   - 对于每个点 `(x, y)`：
+     - 用 `bisect_right` 在 `last_y` 中找到第一个大于当前 y 的位置。
+     - 如果索引小于 `last_y` 长度，说明存在可以容纳当前点的链，更新该链的最后 y 坐标为当前 y。
+     - 如果索引等于 `last_y` 长度，说明没有合适的链，需要创建新链，将当前 y 加入 `last_y`。
+
+# 代码
+
+```python
+import bisect
+
+n = int(input())
+stars = []
+
+for _ in range(n):
+    x, y = map(int, input().split())
+    stars.append((x, y))
+
+# 按 x 坐标递增排序
+stars.sort(key=lambda x: (x[0],))
+
+last_y = []
+
+for x, y in stars:
+    idx = bisect.bisect_right(last_y, y)
+    if idx < len(last_y):
+        last_y[idx] = y  # 更新链的最后一个 y 坐标
+    else:
+        last_y.append(y)  # 创建新的链
+
+print(len(last_y))
+```
diff --git a/app/docs/CommunityShare/Leetcode/sword-offer-ii-021-remove-nth-node-from-end-of-list.en.md b/app/docs/CommunityShare/Leetcode/sword-offer-ii-021-remove-nth-node-from-end-of-list.en.md
new file mode 100644
index 00000000..565b3b37
--- /dev/null
+++ b/app/docs/CommunityShare/Leetcode/sword-offer-ii-021-remove-nth-node-from-end-of-list.en.md
@@ -0,0 +1,100 @@
+---
+title: "Sword Offer II 021. Remove the Nth Node From End of List"
+date: "2024.01.01 0:00"
+tags:
+  - - Python
+  - - answer
+abbrlink: 3ed2f01c
+docId: qfvqmc1exp066falnsg97c5m
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Problem
+
+[Sword Offer II 021. Remove the Nth Node From End of List](https://leetcode.cn/problems/SLwz0R/description/)
+
+# Approach
+
+**Two Pointers (Sliding Window Algorithm)**
+
+In this approach, we first create a dummy head node `dummy` and point it to the original `head`. Then we use two pointers `fast` and `slow`, advancing `fast` by `n` steps first.
+
+Next, we move both `fast` and `slow` simultaneously until `fast` reaches the end of the list. At this point, `slow` points to the `(n+1)`-th node from the end. We set `slow.next = slow.next.next` to delete the `n`-th node from the end.
+
+Finally, we return `dummy.next`, which is the head of the updated list.
+
+The initial implementation was inspired by C-style linked list approach.
+
+# Code
+
+```python Sliding window algorithm
+class Solution:
+    def removeNthFromEnd(self, head: Optional[ListNode], n: int) -> Optional[ListNode]:
+        # Create a dummy head node
+        dummy = ListNode(0)
+        # Point dummy's next to the original head
+        dummy.next = head
+        # Define fast and slow pointers, advance fast by n steps
+        fast = slow = dummy
+        for i in range(n):
+            fast = fast.next
+        # Move both pointers simultaneously until fast reaches the tail
+        while fast and fast.next:
+            fast = fast.next
+            slow = slow.next
+
+        # Set slow's next to slow.next.next to delete the nth node from end
+        slow.next = slow.next.next
+        return dummy.next
+```
+
+```python Pure linked list
+class Solution:
+    def removeNthFromEnd(self, head: Optional[ListNode], n: int) -> Optional[ListNode]:
+
+        # Calculate length
+        def get_list_length(head):
+            # If the linked list is empty, length is 0
+            if not head:
+                return 0
+
+            # Traverse the linked list and count
+            length = 0
+            current = head
+            while current:
+                length += 1
+                current = current.next
+
+            return length
+
+        # Find and delete the target node
+        def delete(node, count):
+            if count == n + 1 or n == length:
+                node.next = node.next.next
+                return
+            if node.next:
+                delete(node.next, count - 1)
+
+        length = get_list_length(head)
+        delete(head, length)
+        return head
+
+
+def list_to_linked_list(lst):
+    if not lst:
+        return None
+
+    # Head node
+    head = ListNode(lst[0])
+    current = head
+
+    # Traverse the list and convert each element to a linked list node
+    for i in range(1, len(lst)):
+        current.next = ListNode(lst[i])
+        current = current.next
+
+    return head
+```

From 0ee40fadfc700a2522dd2bb971b11236bcd59992 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 18:05:59 +0000
Subject: [PATCH 13/19] =?UTF-8?q?feat(docs):=20i18n=20ai/=20=E5=90=8E?=
 =?UTF-8?q?=E5=8D=8A=E9=83=A8=E5=88=86=E7=BF=BB=E8=AF=91=E5=AE=8C=E6=88=90?=
 =?UTF-8?q?=20+=20MDX=20=E8=AF=AD=E6=B3=95=E4=BF=AE=E5=A4=8D=20(32=20?=
 =?UTF-8?q?=E7=AF=87)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

translator-ai-2 产出 + MDX 语法修复。全部 zh→en。

覆盖：
- llm-basics (12 篇): courses / cuda / deep-learning / embeddings / pytorch / transformer
- methodology (1)
- misc-tools (1)
- model-datasets-platforms (1)
- multimodal (10): courses / llava / mllm / qwenvl / ViT / VAE / VQVAE / RQVAE 等
- recommender-systems (7): 王树森推荐系统笔记全集（粗排/精排/重排/冷启动等）

MDX 语法修复：
- 2 个 recommender-systems 文件的裸 <1000 / <30 / <24 / <8 用反引号包裹
  （MDX 把 <digit 当成 JSX tag 开头导致 build 失败）
- app/api/analytics/top-docs/route.ts: Prisma JSON filter startsWith
  写法错误，改为内存筛选

术语决策（新发现）:
- 笔记 → post (Xiaohongshu 场景)
- 粗排/精排/重排 → pre-ranking / full ranking / re-ranking
- 保量 → exposure guarantee
- 融合分数 → fused score
- 老汤模型 → aged model (inline explanation)
---
 app/api/analytics/top-docs/route.ts           |    9 +-
 app/docs/ai/llm-basics/courses/index.en.mdx   |   70 +
 app/docs/ai/llm-basics/cuda/index.en.mdx      |   34 +
 .../llm-basics/deep-learning/d2l/index.en.mdx |   29 +
 .../ai/llm-basics/deep-learning/index.en.mdx  |  111 ++
 .../deep-learning/misc/index.en.mdx           |   52 +
 .../llm-basics/deep-learning/nlp/index.en.mdx |   24 +
 .../ai/llm-basics/embeddings/index.en.mdx     |   43 +
 .../embeddings/qwen3-embedding/index.en.mdx   |   28 +
 app/docs/ai/llm-basics/llm-foundations.en.mdx |  170 +++
 app/docs/ai/llm-basics/pytorch/index.en.mdx   |  100 ++
 .../transformer/ai-by-hand/index.en.mdx       |   31 +
 .../ai/llm-basics/transformer/index.en.mdx    |   49 +
 .../methodology/research-methodology.en.mdx   |  218 +++
 .../ai/misc-tools/learning-toolkit.en.mdx     |  100 ++
 .../platform-and-datasets.en.mdx              |  197 +++
 app/docs/ai/multimodal/RQVAE/index.en.mdx     |  122 ++
 app/docs/ai/multimodal/VAE/index.en.mdx       |  272 ++++
 app/docs/ai/multimodal/VQVAE/index.en.mdx     |  128 ++
 app/docs/ai/multimodal/courses/index.en.mdx   |   25 +
 app/docs/ai/multimodal/llava/index.en.mdx     |  123 ++
 app/docs/ai/multimodal/mllm/index.en.mdx      |   24 +
 .../ai/multimodal/multimodal-overview.en.mdx  |  173 +++
 app/docs/ai/multimodal/qwenvl/index.en.mdx    |  237 +++
 .../ai/multimodal/video-mm-todo/index.en.mdx  |   98 ++
 app/docs/ai/multimodal/vit/index.en.mdx       |    9 +
 .../recommender-roadmap.en.mdx                |  176 +++
 .../wangshusen_recommend_crossing.en.mdx      |  261 ++++
 ...wangshusen_recommend_note_retrieval.en.mdx | 1333 +++++++++++++++++
 ...wangshusen_recommend_note_coldstart.en.mdx |  500 +++++++
 ...ngshusen_recommend_note_improvement.en.mdx |  488 ++++++
 .../wangshusen_recommend_note_rank.en.mdx     |  354 +++++
 .../wangshusen_recommend_note_rerank.en.mdx   |  422 ++++++
 33 files changed, 6007 insertions(+), 3 deletions(-)
 create mode 100644 app/docs/ai/llm-basics/courses/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/cuda/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/deep-learning/d2l/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/deep-learning/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/deep-learning/misc/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/deep-learning/nlp/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/embeddings/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/embeddings/qwen3-embedding/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/llm-foundations.en.mdx
 create mode 100644 app/docs/ai/llm-basics/pytorch/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/transformer/ai-by-hand/index.en.mdx
 create mode 100644 app/docs/ai/llm-basics/transformer/index.en.mdx
 create mode 100644 app/docs/ai/methodology/research-methodology.en.mdx
 create mode 100644 app/docs/ai/misc-tools/learning-toolkit.en.mdx
 create mode 100644 app/docs/ai/model-datasets-platforms/platform-and-datasets.en.mdx
 create mode 100644 app/docs/ai/multimodal/RQVAE/index.en.mdx
 create mode 100644 app/docs/ai/multimodal/VAE/index.en.mdx
 create mode 100644 app/docs/ai/multimodal/VQVAE/index.en.mdx
 create mode 100644 app/docs/ai/multimodal/courses/index.en.mdx
 create mode 100644 app/docs/ai/multimodal/llava/index.en.mdx
 create mode 100644 app/docs/ai/multimodal/mllm/index.en.mdx
 create mode 100644 app/docs/ai/multimodal/multimodal-overview.en.mdx
 create mode 100644 app/docs/ai/multimodal/qwenvl/index.en.mdx
 create mode 100644 app/docs/ai/multimodal/video-mm-todo/index.en.mdx
 create mode 100644 app/docs/ai/multimodal/vit/index.en.mdx
 create mode 100644 app/docs/ai/recommender-systems/recommender-roadmap.en.mdx
 create mode 100644 app/docs/ai/recommender-systems/wangshusen_recommend_crossing.en.mdx
 create mode 100644 app/docs/ai/recommender-systems/wangshusen_recommend_note/wangshusen_recommend_note_retrieval.en.mdx
 create mode 100644 app/docs/ai/recommender-systems/wangshusen_recommend_note_coldstart.en.mdx
 create mode 100644 app/docs/ai/recommender-systems/wangshusen_recommend_note_improvement.en.mdx
 create mode 100644 app/docs/ai/recommender-systems/wangshusen_recommend_note_rank.en.mdx
 create mode 100644 app/docs/ai/recommender-systems/wangshusen_recommend_note_rerank.en.mdx

diff --git a/app/api/analytics/top-docs/route.ts b/app/api/analytics/top-docs/route.ts
index 8f69ca62..1a70650a 100644
--- a/app/api/analytics/top-docs/route.ts
+++ b/app/api/analytics/top-docs/route.ts
@@ -17,21 +17,24 @@ export async function GET(req: NextRequest) {
     since.setFullYear(since.getFullYear() - 10);
   }
 
+  // Prisma 对 JSON 字段的 startsWith 过滤不能直接嵌套写在 where，
+  // 这里先按 eventType + createdAt 过滤，再在内存里按 path 前缀筛
   const rows = await prisma.analyticsEvent.findMany({
     where: {
       eventType: "page_view",
       createdAt: { gte: since },
-      eventData: { path: { startsWith: "/docs/" } },
     },
     select: { eventData: true },
   });
 
-  // 统计各路径 PV
+  // 统计各路径 PV（内存过滤 /docs/ 前缀）
   const counts: Record<string, number> = {};
   for (const row of rows) {
     const data = row.eventData as { path?: string; title?: string } | null;
     const path = data?.path;
-    if (path) counts[path] = (counts[path] ?? 0) + 1;
+    if (path && path.startsWith("/docs/")) {
+      counts[path] = (counts[path] ?? 0) + 1;
+    }
   }
 
   const top = Object.entries(counts)
diff --git a/app/docs/ai/llm-basics/courses/index.en.mdx b/app/docs/ai/llm-basics/courses/index.en.mdx
new file mode 100644
index 00000000..20a9bd0c
--- /dev/null
+++ b/app/docs/ai/llm-basics/courses/index.en.mdx
@@ -0,0 +1,70 @@
+---
+title: LLM Introductory Courses
+description: A curated collection of courses on deep learning and large language models.
+docId: xboc8qj2128aivvt0goo1wow
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Introductory Courses
+
+## CS224N — Stanford Deep Learning for NLP
+
+Covers NLP fundamentals and includes a miniGPT project.
+
+- Official site: [CS224N Stanford](https://web.stanford.edu/class/cs224n/index.html#schedule)
+- Video: (2025, bilingual) Stanford CS224N — Deep Learning for Natural Language Processing
+- Slides and assignments: [Quark Cloud Drive](https://pan.quark.cn/s/1e43e9b25006)
+
+---
+
+## CMU Advanced NLP
+
+- Course homepage: [CMU Advanced NLP Spring 2025](https://cmu-l3.github.io/anlp-spring2025/)
+- Code: [GitHub Repo](https://github.com/cmu-l3/anlp-spring2025-code/tree/main)
+- Assignments:
+  - **minLLaMA**: [HW1](https://github.com/cmu-l3/anlp-spring2025-hw1) (implement LLaMA from scratch — extremely high quality)
+  - **RAG**: [HW2](https://github.com/cmu-l3/anlp-spring2025-hw2)
+
+---
+
+## NanoGPT — GPT from Scratch
+
+- Code: [NanoGPT GitHub](https://github.com/karpathy/nanoGPT)
+- Learning materials: [LLM Training Series] NanoGPT source code walkthrough and Chinese GPT training practice
+
+---
+
+## Stanford CS336 — Language Modeling from Scratch (Spring 2025)
+
+- Course videos: [YouTube Playlist](https://www.youtube.com/watch?v=SQ3fZ1sAqXI&list=PLoROMvodv4rOY23Y0BoGoBGgQ1zmU_MT_)
+- Course homepage: [CS336 Official Site](https://stanford-cs336.github.io/spring2025/)
+- Code repository: [GitHub Repo](https://github.com/stanford-cs336/spring2025-lectures)
+- Video translation: [Bilibili](https://www.bilibili.com/video/BV13SV9zdEhX/?spm_id_from=333.337.search-card.all.click&vd_source=14245d272f6606a31fe299db9e47ca84)
+- Translation collection: [Zhihu Column](https://zhuanlan.zhihu.com/p/1906315844386034284)
+- Private assignment repo: TODO
+
+### Assignment Overview
+
+1. **Assignment 1**: Implement a BPE tokenizer, Transformer architecture, and Adam optimizer; train on TinyStories and OpenWebText (PyTorch primitives only).
+2. **Assignment 2**: Implement Flash Attention 2 in Triton; distributed data parallelism + optimizer sharding.
+3. **Assignment 3**: Scaling Laws. Fit scaling laws using IsoFLOP, simulating experiments under a fixed compute budget.
+4. **Assignment 4**: Data pipeline. Convert Common Crawl HTML to text, filter (quality, harmful content, PII), deduplicate.
+5. **Assignment 5**: Alignment. Implement supervised fine-tuning, expert iteration, GRPO and variants; run RL on Qwen 2.5 Math 1.5B to improve MATH benchmark performance.
+
+### Prerequisites
+
+- Math: MATH 51, CME 100
+- Probability: CS 109
+
+### Study Notes
+
+- CS336 study notes: TODO
+
+---
+
+## Happy-LLM — Build a 215M LLM from Scratch
+
+- Code repository: [Happy-LLM GitHub](https://github.com/datawhalechina/happy-llm)
diff --git a/app/docs/ai/llm-basics/cuda/index.en.mdx b/app/docs/ai/llm-basics/cuda/index.en.mdx
new file mode 100644
index 00000000..861161a1
--- /dev/null
+++ b/app/docs/ai/llm-basics/cuda/index.en.mdx
@@ -0,0 +1,34 @@
+---
+title: CUDA
+description: CUDA learning resources and large model optimization techniques
+date: "2024-01-17"
+tags:
+  - cuda
+  - gpu
+  - triton
+  - cutlass
+  - flashattention
+  - ring-attention
+  - profiling
+docId: nwt5322vw4q6sz8ho8qynv28
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Recommended Courses
+
+- [CUDA Lectures (comprehensive GitHub course)](https://github.com/cuda-mode/lectures)  
+  Covers **profiling, Triton, Cutlass, FlashAttention, Ring Attention** and other practical topics.
+
+## Related Articles
+
+- [Zhihu Column: Advanced CUDA Learning](https://zhuanlan.zhihu.com/p/711304830)
+
+## Learning Value
+
+- Understand GPU parallel computing and memory optimization
+- Master the core components of model acceleration (FlashAttention, Ring Attention)
+- Learn to use profiling tools to identify performance bottlenecks
+- Become familiar with next-generation high-performance GPU programming frameworks such as Triton and Cutlass
diff --git a/app/docs/ai/llm-basics/deep-learning/d2l/index.en.mdx b/app/docs/ai/llm-basics/deep-learning/d2l/index.en.mdx
new file mode 100644
index 00000000..ded59efc
--- /dev/null
+++ b/app/docs/ai/llm-basics/deep-learning/d2l/index.en.mdx
@@ -0,0 +1,29 @@
+---
+title: Dive into Deep Learning (D2L) by Mu Li
+description: Notes related to Dive into Deep Learning.
+date: "2024-01-18"
+tags:
+  - d2l
+  - deep-learning
+  - pytorch
+  - notes
+docId: dqg4iqz7hgyq38cqz3tg9tlf
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Official Resources
+
+- [Dive into Deep Learning (D2L Official Site)](https://zh-v2.d2l.ai/)
+
+## E-Books
+
+- [Mu Li — Dive into Deep Learning.pdf]
+- [Dive into Deep Learning (PyTorch Edition).pdf]
+
+## Study Notes
+
+- Dive into Deep Learning notes — TODO
+- Quark Cloud Drive: [D2L Chinese Notes](https://pan.quark.cn/s/9a7cf3f3eae2)
diff --git a/app/docs/ai/llm-basics/deep-learning/index.en.mdx b/app/docs/ai/llm-basics/deep-learning/index.en.mdx
new file mode 100644
index 00000000..d4b3bbb9
--- /dev/null
+++ b/app/docs/ai/llm-basics/deep-learning/index.en.mdx
@@ -0,0 +1,111 @@
+---
+title: Deep Learning Fundamentals
+description: "Deep learning fundamentals: Dive into Deep Learning by Mu Li, NLP, and machine learning resources"
+date: "2025-01-27"
+tags:
+  - deep-learning
+  - d2l
+  - nlp
+  - machine-learning
+docId: vdclex41huib10ccsqw9u76k
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Deep learning is the theoretical foundation of large language models. This section provides systematic learning resources and practical guidance.
+
+## Dive into Deep Learning by Mu Li
+
+### Core Resources
+
+- **Official website**: [https://zh-v2.d2l.ai/](https://zh-v2.d2l.ai/) — Chinese online tutorial
+- **Highlights**: Equal emphasis on theory and code; provides both PyTorch and MXNet implementations
+- **Coverage**: From basic linear regression to advanced attention mechanisms
+
+### Learning Materials
+
+- **PDF edition**: Mu Li — Dive into Deep Learning
+- **PyTorch edition**: Dive into Deep Learning (PyTorch Edition)
+- **Notes**: Dive into Deep Learning Chinese Notes
+  - Quark Cloud Drive: https://pan.quark.cn/s/9a7cf3f3eae2
+
+### Characteristics
+
+- **Practice-oriented**: Every concept has a corresponding code implementation
+- **Progressive**: Builds from simple concepts to complex models step by step
+- **Comprehensive**: Covers the main areas of deep learning
+- **Up-to-date**: Continuously updated with the latest techniques and methods
+
+## Learning Recommendations
+
+### Suggested Order
+
+1. **Math foundations**: Linear algebra, probability theory, calculus
+2. **Machine learning**: Understanding classical ML algorithms
+3. **Deep learning**: Neural network basics and backpropagation
+4. **Modern architectures**: Transformer and attention mechanisms
+5. **Applied practice**: Applying models to specific tasks
+
+### Practical Tips
+
+1. **Balance theory and practice**: Implement every concept you learn
+2. **Project-driven**: Consolidate knowledge through complete projects
+3. **Community participation**: Join learning communities for discussion
+4. **Stay current**: Keep up with the latest technical developments
+
+### Common Challenges
+
+1. **Math barrier**: Requires some mathematical background
+2. **Abstract concepts**: Some ideas are abstract and require hands-on practice
+3. **Fast-moving field**: Requires continuous learning of new techniques
+4. **Theory-practice balance**: Balancing theoretical study with practical work
+
+## Advanced Directions
+
+### Theoretical Deepening
+
+- Optimization theory and algorithms
+- Information theory and deep learning
+- Statistical learning theory
+- Bayesian deep learning
+
+### Application Domains
+
+- Computer vision
+- Natural language processing
+- Speech recognition and synthesis
+- Recommender systems
+
+### Engineering Practice
+
+- Large-scale training
+- Model deployment and optimization
+- Distributed computing
+- MLOps practices
+
+## Resource Summary
+
+### Online Courses
+
+- MIT 6.034 Artificial Intelligence
+- Stanford CS229 Machine Learning
+- Deep Learning Specialization (Coursera)
+- Fast.ai Practical Deep Learning
+
+### Classic Textbooks
+
+- _Deep Learning_ (Goodfellow et al., the "Bible")
+- _Machine Learning_ (Zhihua Zhou, the "Watermelon Book")
+- _Statistical Learning Methods_
+- _Pattern Recognition and Machine Learning_
+
+### Practice Platforms
+
+- Kaggle competition platform
+- Google Colab
+- Jupyter Notebook
+- GitHub open-source projects
+
+These resources provide a complete learning path from theory to practice in deep learning. Choose the approach that best suits your background and goals.
diff --git a/app/docs/ai/llm-basics/deep-learning/misc/index.en.mdx b/app/docs/ai/llm-basics/deep-learning/misc/index.en.mdx
new file mode 100644
index 00000000..c9e72aa0
--- /dev/null
+++ b/app/docs/ai/llm-basics/deep-learning/misc/index.en.mdx
@@ -0,0 +1,52 @@
+---
+title: Other Resources
+description: Miscellaneous deep learning resources.
+docId: lodydcd211esraq1r55ze9ey
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Other Important Resources
+
+## Machine Learning "Pumpkin Book"
+
+- **Core value**: Detailed derivations of the formulas in Zhihua Zhou's _Machine Learning_
+- **Target audience**: Learners who want a deep understanding of the mathematical principles behind algorithms
+- **Highlights**: Thorough mathematical derivations and proofs
+
+## Li Hongyi Deep Learning Notes
+
+- **GitHub repo**: https://github.com/datawhalechina/leedl-tutorial
+- **Core value**: Very low barrier to entry; covers virtually every aspect of deep learning
+- **Target audience**: Can be read directly even without prior machine learning knowledge
+- **Highlights**: Friendly to readers with weaker math backgrounds
+
+## The "Bible" of Deep Learning
+
+- **Authors**: Ian Goodfellow, Yoshua Bengio, Aaron Courville
+- **Highlights**: Solid theoretical foundations, mathematically rigorous
+- **Coverage**: Theoretical foundations and mathematical principles of deep learning
+- **Suitable for**: Researchers and advanced practitioners
+
+## MIT Deep Learning Textbook
+
+- **Official website**: [https://www.deeplearningbook.org/](https://www.deeplearningbook.org/)
+- **Highlights**: Authoritative theoretical reference, academic standard
+- **Language**: English original, high theoretical depth
+
+## PKU CSDIY
+
+- **Resource link**: [https://csdiy.wiki/](https://csdiy.wiki/)
+- **Highlights**: Self-study guide for computer science, includes a deep learning path
+- **Coverage**: Course recommendations, learning roadmaps, hands-on projects
+
+## Video Courses
+
+### Gupao Programmer AI Course
+
+- **Course**: [AI & Machine Learning] 2023 Comprehensive System Tutorial
+- **Content**: Machine learning algorithms, machine learning in practice
+- **Link**: [Bilibili Video](https://www.bilibili.com/video/BV1hM4y1U7FV)
+- **Highlights**: Systematic coverage, practice-oriented
diff --git a/app/docs/ai/llm-basics/deep-learning/nlp/index.en.mdx b/app/docs/ai/llm-basics/deep-learning/nlp/index.en.mdx
new file mode 100644
index 00000000..c2586dae
--- /dev/null
+++ b/app/docs/ai/llm-basics/deep-learning/nlp/index.en.mdx
@@ -0,0 +1,24 @@
+---
+title: NLP
+description: Natural language processing fundamentals.
+docId: nrelvvfzq0gma7pqfx9fkfxt
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# NLP Fundamentals
+
+## HuggingFace NLP Course
+
+- **Official link**: [https://huggingface.co/learn/llm-course/zh-CN/chapter7/2](https://huggingface.co/learn/llm-course/zh-CN/chapter7/2)
+- **Highlights**: Practical NLP tools and techniques
+- **Content**: Using the Transformers library, fine-tuning models, deployment, and more
+
+## Learning Path
+
+1. **Fundamentals**: Text preprocessing, word vectors, language models
+2. **Classic models**: RNN, LSTM, GRU, Attention
+3. **Modern architectures**: Transformer, BERT, GPT series
+4. **Applied practice**: Text classification, named entity recognition, machine translation
diff --git a/app/docs/ai/llm-basics/embeddings/index.en.mdx b/app/docs/ai/llm-basics/embeddings/index.en.mdx
new file mode 100644
index 00000000..f50da57e
--- /dev/null
+++ b/app/docs/ai/llm-basics/embeddings/index.en.mdx
@@ -0,0 +1,43 @@
+---
+title: Embedding Models
+description: Resources on embeddings and vector representations.
+date: "2024-01-15"
+tags:
+  - embedding
+  - vector
+  - representation
+  - nlp
+  - retrieval
+docId: xnl2yzrb4x748zhhfe26ragt
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+**Embedding** is a technique that maps discrete objects (such as words, sentences, images, and user behaviors) to a continuous vector space.  
+With this representation, semantically similar objects tend to be closer together in the vector space, making computation and modeling more tractable.
+
+## Core Idea
+
+- **Discrete → Continuous**: Transforms symbolic inputs into numerical vectors, enabling neural network processing.
+- **Semantic preservation**: The structure of the vector space retains the semantic relationships between objects.
+- **Computability**: Vectors support operations such as addition, dot product, and cosine similarity, enabling retrieval, clustering, and classification.
+
+## Applications in Large Models
+
+- **Word/sentence vectors**: The most common representation in NLP models (e.g., Word2Vec, BERT, GPT).
+- **Multimodal representations**: Mapping images, audio, video, and other modalities into a shared vector space for cross-modal retrieval.
+- **Retrieval and recommendation**: Semantic retrieval based on vector similarity (vector databases, RAG), and personalized recommender systems.
+- **Fine-tuning and merging**: Optimizing vector representations for specific tasks via methods such as LoRA and SLERP.
+
+## Typical Methods
+
+- **Early methods**: Word2Vec, GloVe
+- **Contextual representations**: ELMo, BERT
+- **Embeddings from generative LLMs**: GPT series, Qwen Embedding, OpenAI Embedding API
+
+## Summary
+
+Embedding is a foundational component of modern machine learning and large model applications.  
+It bridges the discrete and continuous worlds, and is a core tool for semantic understanding, retrieval-augmented generation (RAG), and multimodal fusion.
diff --git a/app/docs/ai/llm-basics/embeddings/qwen3-embedding/index.en.mdx b/app/docs/ai/llm-basics/embeddings/qwen3-embedding/index.en.mdx
new file mode 100644
index 00000000..01d7f1de
--- /dev/null
+++ b/app/docs/ai/llm-basics/embeddings/qwen3-embedding/index.en.mdx
@@ -0,0 +1,28 @@
+---
+title: Qwen3-embedding
+description: Key points, in-depth analysis, and hands-on resources for the Qwen3 Embedding model
+date: "2024-01-15"
+tags:
+  - qwen3
+  - embedding
+  - slerp
+  - lora
+  - fine-tuning
+docId: jq6323xynmyapm5vgncmyymh
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Key Points
+
+- SLERP weight merging algorithm
+
+## In-Depth Analysis
+
+- [Deep Dive into Qwen3 Embedding](https://zhuanlan.zhihu.com/p/1916627276335945288)
+
+## Hands-On Practice
+
+- [Qwen3 Embedding Model LoRA Fine-Tuning in Practice](https://zhuanlan.zhihu.com/p/1914711055654950451)
diff --git a/app/docs/ai/llm-basics/llm-foundations.en.mdx b/app/docs/ai/llm-basics/llm-foundations.en.mdx
new file mode 100644
index 00000000..964f5150
--- /dev/null
+++ b/app/docs/ai/llm-basics/llm-foundations.en.mdx
@@ -0,0 +1,170 @@
+---
+title: LLM Foundations
+description: "LLM knowledge system: deep learning, PyTorch, CUDA, Transformer"
+date: "2025-01-27"
+tags:
+  - llm-basics
+  - deep-learning
+  - pytorch
+  - transformer
+docId: h8awdow89uicdy4kx9iimlta
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+LLM foundations cover a complete knowledge system from deep learning theory to practical development, providing a solid base for understanding and building large models.
+
+## Core Learning Modules
+
+### Deep Learning Fundamentals
+
+- Go to: [Deep Learning Fundamentals](./deep-learning/)
+- Dive into Deep Learning by Mu Li
+- NLP foundational courses
+- Classic machine learning textbooks
+- Integration of theory and practice
+
+### PyTorch Framework
+
+- Go to: [PyTorch Framework](./pytorch/)
+- Beginner tutorial by Xiaotudui
+- Advanced tensor operations
+- Interview preparation highlights
+- Hands-on project guidance
+
+### CUDA Programming
+
+- Go to: [CUDA Programming](./cuda/)
+- CUDA Mode systematic course
+- GPU parallel computing principles
+- Performance optimization techniques
+- FlashAttention implementation
+
+### Transformer Architecture
+
+- Go to: [Transformer Architecture](./transformer/)
+- Detailed explanation of the Attention mechanism
+- Multi-head attention principles
+- Positional encoding design
+- Visual learning resources
+
+### Embedding Models
+
+- Go to: [Embedding Models](./embeddings/)
+- In-depth analysis of Qwen3-embedding
+- SLERP weight merging algorithm
+- Vector representation techniques
+- Similarity computation methods
+
+### Introductory Courses
+
+- Go to: [Introductory Courses](./courses/)
+- CS224N Stanford NLP course
+- CMU Advanced NLP
+- NanoGPT implementation project
+- CS336 language modeling course
+- Happy-LLM hands-on project
+
+## Learning Roadmap
+
+### Beginner Path
+
+1. Math foundations: linear algebra, probability theory, calculus
+2. Deep learning: neural networks, backpropagation, optimization algorithms
+3. Framework mastery: PyTorch basics and model building
+4. Architecture understanding: Transformer and attention mechanisms
+
+### Advanced Development
+
+1. CUDA programming: GPU parallel computing and performance optimization
+2. Model implementation: building a Transformer architecture from scratch
+3. Training optimization: large-scale model training techniques
+4. Deployment: model inference and serving
+
+### Research-Oriented
+
+1. Theory deepening: mathematical principles and algorithmic innovation
+2. Frontier tracking: latest papers and technical trends
+3. Experiment design: scientific experimental methodology
+4. Code reproduction: ability to reproduce top-conference papers
+
+## Key Concepts at a Glance
+
+### Transformer Core
+
+- Self-Attention: self-attention mechanism
+- Multi-Head: multi-head parallel representation learning
+- Position Encoding: positional information encoding
+- Feed Forward: feed-forward neural network
+
+### PyTorch Essentials
+
+- Tensor operations: efficient computation on multi-dimensional arrays
+- Autograd: dynamic computation graph and backpropagation
+- Modular design: building complex models with `nn.Module`
+- GPU acceleration: CUDA support and memory management
+
+### CUDA Optimization
+
+- Parallel computing: leveraging GPU's massive parallelism
+- Memory management: optimizing global and shared memory
+- Operator fusion: reducing memory access and computation overhead
+- Performance profiling: using profiling tools to identify bottlenecks
+
+## Suggested Practice Projects
+
+### Beginner Projects
+
+- Handwritten digit recognition (MNIST)
+- Text classifier implementation
+- Simple seq2seq model
+- Basic attention mechanism
+
+### Intermediate Projects
+
+- miniGPT from scratch
+- Transformer machine translation
+- BERT fine-tuning
+- LLM inference optimization
+
+### Advanced Projects
+
+- Distributed training system
+- Custom CUDA operators
+- Model compression and quantization
+- End-to-end LLM application
+
+## Recommended Resources
+
+### Online Courses
+
+- Mu Li — Dive into Deep Learning
+- Stanford CS224N
+- CMU Advanced NLP
+- Fast.ai Practical Deep Learning
+
+### Classic Textbooks
+
+- _Deep Learning_ (Goodfellow et al.)
+- _Dive into Deep Learning_
+- _Machine Learning_ (Zhihua Zhou)
+- _Statistical Learning Methods_
+
+### Practice Platforms
+
+- Google Colab
+- Kaggle competitions
+- GitHub open-source projects
+- Hugging Face model library
+
+## Learning Advice
+
+1. Build progressively: from fundamental concepts to complex architectures
+2. Balance theory and practice: implement every concept you learn
+3. Project-driven: connect knowledge through complete projects
+4. Engage with communities: join open-source and technical discussions
+5. Stay current: track the latest research and technology
+
+> Core idea: Foundations are not just "knowledge points" — they are the ability to solve complex problems. Combine theory with hands-on practice and build a complete system step by step.
diff --git a/app/docs/ai/llm-basics/pytorch/index.en.mdx b/app/docs/ai/llm-basics/pytorch/index.en.mdx
new file mode 100644
index 00000000..ecaf26ad
--- /dev/null
+++ b/app/docs/ai/llm-basics/pytorch/index.en.mdx
@@ -0,0 +1,100 @@
+---
+title: PyTorch
+description: "PyTorch deep learning framework: beginner tutorials, tensor operations, interview essentials"
+date: "2025-01-27"
+tags:
+  - pytorch
+  - deep-learning-framework
+  - tensor
+  - neural-networks
+docId: psc0xf6oa1m7g8s9wfwiojkf
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+PyTorch is one of the most important frameworks for modern deep learning, dominating both research and large model development.
+
+## Getting Started
+
+### Xiaotudui PyTorch Tutorial
+
+- **Video**: [PyTorch Deep Learning Quick-Start Tutorial](https://www.bilibili.com/video/BV1hE411t7RN/?spm_id_from=333.337.search-card.all.click&vd_source=14245d272f6606a31fe299db9e47ca84)
+- **Highlights**: Easy to understand; suitable for absolute beginners
+- **Coverage**:
+  - Basic concepts
+  - Tensor operations
+  - Building neural networks
+  - Training loop design
+  - Real-world project examples
+
+### Official and Community Resources
+
+- **PyTorch Official Chinese Tutorial**: [pytorch.ac.cn/tutorials](https://pytorch.ac.cn/tutorials/)
+- **Runoob Tutorial**: [runoob.com/pytorch](https://www.runoob.com/pytorch/pytorch-tensor.html)
+- **PyTorch Interview Essentials**: [mstx.cn](https://www.mstx.cn/pytorch.html)
+
+---
+
+## Core Concepts
+
+- **Tensor**: PyTorch's core data structure — a GPU-accelerated multi-dimensional array  
+  ![](./index.assets/word-img-01.png)  
+  ![](./index.assets/word-img-02.png)
+- **Autograd**: Dynamic computation graph and automatic differentiation
+- **nn.Module**: Modular building block for neural networks
+- **Training loop**: Optimizers, loss functions, learning rate schedulers, and more
+
+---
+
+## Advanced Tensor Operations
+
+### Tools and Libraries
+
+- **einops**: [GitHub](https://github.com/arogozhnikov/einops) — Elegant tensor operations (`rearrange`, `reduce`, `repeat`)
+- **The Tensor Cookbook**:
+  - [Official site](https://tensorcookbook.com/)
+  - [Zhihu discussion](https://www.zhihu.com/search?type=content&q=%E5%BC%A0%E9%87%8F%E9%A3%9F%E8%B0%B1)
+  - [tensorgrad (GitHub)](https://github.com/thomasahle/tensorgrad)
+  - [Cookbook PDF]
+
+---
+
+## Classic Textbook
+
+- **Deep Learning with PyTorch Step-by-Step: A Beginner's Guide** (Daniel Voigt Godoy)
+  - [PDF (Z-Library)]
+
+---
+
+## Practical Advice
+
+### Learning Path
+
+1. **Basics**: Tensor operations and autograd
+2. **Model building**: `nn.Module` and loss functions
+3. **Training loop**: Optimizers and learning rate schedulers
+4. **Advanced features**: Distributed training and optimization techniques
+5. **Project practice**: Full end-to-end project development
+
+### Development Tips
+
+1. **Hands-on every concept**: Implement each idea yourself
+2. **Read the source code**: Understand underlying implementation principles
+3. **Build projects**: Solidify knowledge through real-world practice
+4. **Engage with the community**: Contribute to open-source projects
+
+---
+
+## Appendix: Environment Setup
+
+### Jupyter Kernel Registration
+
+```bash
+pip install ipykernel
+
+# Register a kernel
+python -m ipykernel install --user --name myenv
+python -m ipykernel install --user --name yourname --display-name yourname
+```
diff --git a/app/docs/ai/llm-basics/transformer/ai-by-hand/index.en.mdx b/app/docs/ai/llm-basics/transformer/ai-by-hand/index.en.mdx
new file mode 100644
index 00000000..428d2154
--- /dev/null
+++ b/app/docs/ai/llm-basics/transformer/ai-by-hand/index.en.mdx
@@ -0,0 +1,31 @@
+---
+title: "AI by Hand: Build AI Models Manually"
+description: Resources and introduction to the AI by Hand project
+date: "2024-01-16"
+tags:
+  - ai-by-hand
+  - transformer
+  - visualization
+  - tutorial
+docId: k1owc5kfw3vihc5hnmysqttl
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Project Homepage
+
+- [AI by Hand Official Website](https://www.byhand.ai/)  
+  → Provides illustrated and interactive walkthroughs to help you understand AI model computations from scratch.
+
+## Related Introductions
+
+- [CSDN Introduction Article](https://blog.csdn.net/qq_35591253/article/details/144326165)
+- [Dongou Tech Blog — AI by Hand ✍️ with Prof. Tom Yeh](https://dongou.tech/ai/dongou/ai-by-hand-%E2%9C%8D%EF%B8%8F-with-prof-tom-yeh-for-ai-professionals/)
+
+## Learning Value
+
+- Understand the core computations of models like Transformer through "doing it by hand"
+- Visualized explanations of Attention, matrix operations, and other mechanisms
+- Ideal for learners who want to grasp large model principles at the fundamental logic level
diff --git a/app/docs/ai/llm-basics/transformer/index.en.mdx b/app/docs/ai/llm-basics/transformer/index.en.mdx
new file mode 100644
index 00000000..08f5d516
--- /dev/null
+++ b/app/docs/ai/llm-basics/transformer/index.en.mdx
@@ -0,0 +1,49 @@
+---
+title: Transformer
+description: Resources on the Transformer architecture and attention mechanisms
+date: "2024-01-16"
+tags:
+  - transformer
+  - attention
+  - qkv
+  - multi-head-attention
+  - tokenizer
+  - positional-encoding
+  - feedforward
+  - normalization
+  - decoding
+docId: lsokl8ofmo7msxlqyvihbhz5
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Core Papers and Code
+
+- [Attention is All You Need (original paper, 2017)](https://arxiv.org/abs/1706.03762)
+- [GitHub PyTorch reproduction](https://github.com/jadore801120/attention-is-all-you-need-pytorch)
+
+## Key Concepts
+
+- QKV computation in Self-Attention
+- The role of Scaled Dot-Product
+- Principles of Multi-Head Attention
+- Tokenization and Tokenizer
+- Word Embedding
+- Positional Encoding
+- Attention Mechanism
+- Feed Forward Network
+- Masking
+- Layer Normalization
+- Decoding Techniques
+
+## Deep Dive
+
+- Transformer paper paragraph-by-paragraph reading [Paper Reading]
+
+## Attention Mechanism Learning Resources
+
+- [HD bilingual subtitles] Andrew Ng explains Transformer working principles in detail (2025)
+  - [Original course link (DeepLearning.AI Short Courses)](https://www.deeplearning.ai/short-courses/how-transformer-llms-work/)
+- Mastering the Attention Mechanism thoroughly
diff --git a/app/docs/ai/methodology/research-methodology.en.mdx b/app/docs/ai/methodology/research-methodology.en.mdx
new file mode 100644
index 00000000..93cf945e
--- /dev/null
+++ b/app/docs/ai/methodology/research-methodology.en.mdx
@@ -0,0 +1,218 @@
+---
+title: Research Methodology
+description: "Academic survival guide: finding research directions, speed-reading papers, reproducing code, SCI submission, and more"
+date: "2025-01-27"
+tags:
+  - methodology
+  - research
+  - paper-reading
+  - code-reproduction
+  - sci-publication
+docId: r68izu11bkrkk6st194kwk80
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Research requires not only technical skill but also sound methodological guidance. This section provides a complete research methodology — from choosing a research direction to publishing a paper.
+
+## Academic Survival Guide Series
+
+### Academic Survival Guide | How to Find a Research Direction 2.0
+
+**Core methods**:
+
+1. **Literature survey**: Systematically read survey papers and top-conference proceedings
+2. **Problem identification**: Spot gaps and weaknesses in existing research
+3. **Technical trends**: Track frontier developments in the field
+4. **Validation**: Test the feasibility of ideas through small-scale experiments
+
+**Step-by-step process**:
+
+- **Step 1**: Read broadly to build a knowledge map
+- **Step 2**: Dive deep into a specific subfield and find your interests
+- **Step 3**: Analyze the limitations of existing methods
+- **Step 4**: Propose improvements and validate them
+
+**Selection criteria**:
+
+- Alignment with personal interest and strengths
+- Trajectory of technical development
+- Real-world application value
+- Availability of research resources
+
+### Academic Survival Guide | How to Use LLMs for Speed-Reading Papers
+
+**LLM-assisted reading strategies**:
+
+**1. Structured paper interpretation**:
+
+- Use models such as GPT to quickly extract key information
+- Auto-generate summaries and key takeaways
+- Identify the paper's innovations and contributions
+
+**2. Batch literature processing**:
+
+- Process batches of PDF documents
+- Generate comparison tables across papers
+- Build knowledge graphs
+
+**3. Deep-understanding assistance**:
+
+- Explain complex technical concepts
+- Analyze mathematical formulas and algorithms
+- Provide relevant background knowledge
+
+**Recommended tools**:
+
+- **ChatPDF**: Chat directly with a PDF
+- **Perplexity**: Academic search and literature analysis
+- **Claude**: Specialist in long-document processing
+- **Custom prompts**: Design dedicated paper-reading prompt templates
+
+### Academic Survival Guide | How to Reproduce Paper Code
+
+**Reproduction workflow**:
+
+**1. Preparation**:
+
+- Read the paper and supplementary materials carefully
+- Locate the official code repository
+- Analyze the experimental setup and hyperparameters
+
+**2. Environment setup**:
+
+- Recreate the hardware and software environment described in the paper
+- Install the required packages and specific versions
+- Prepare datasets and preprocessing pipelines
+
+**3. Incremental implementation**:
+
+- Start from the simplest baseline
+- Implement and test each module individually
+- Compare intermediate results to verify correctness
+
+**4. Debugging and optimization**:
+
+- Analyze discrepancies between reproduced results and paper results
+- Tune hyperparameters and implementation details
+- Document the debugging process and lessons learned
+
+**Common issues and solutions**:
+
+- **Environment mismatch**: Use Docker or virtual environments
+- **Data unavailability**: Find alternative datasets or synthesize data
+- **Hardware constraints**: Reduce batch size and model scale
+- **Version conflicts**: Pin dependency versions
+
+### Academic Survival Guide | How to Submit Your First SCI Paper
+
+**Pre-submission checklist**:
+
+**1. Paper quality check**:
+
+- Novelty: Ensure a clear technical contribution
+- Experimental completeness: Sufficient experiments and comparisons
+- Writing quality: Logical clarity and accurate expression
+- Formatting: Compliance with the target journal's requirements
+
+**2. Journal selection strategy**:
+
+- Balance impact factor vs. acceptance difficulty
+- Domain relevance
+- Review timeline
+- Open-access policy
+
+**3. Submission materials**:
+
+- Cover letter: A concise introduction to the work
+- Highlights: Emphasize the main contributions
+- Figures and tables: Clear, well-designed visualizations
+- Supplementary materials: Detailed experiments and code
+
+**Managing the submission process**:
+
+- Time planning: Allow sufficient time for revisions
+- Status tracking: Monitor review progress promptly
+- Response preparation: Be ready for revision requests
+
+**Responding to reviewer comments**:
+
+- Positive attitude: Treat feedback as an opportunity to improve
+- Point-by-point response: Address every reviewer comment in detail
+- Change annotations: Clearly mark all modifications in the manuscript
+- Supplementary experiments: Add required experiments as requested
+
+## Extended Learning Resources
+
+### Meituan Beidou LLM Project
+
+- **Resource link**: [Meituan Tech Article](https://mp.weixin.qq.com/s/kyrM35FYIdV1NJr3m7CGoA)
+- **Learning value**: Industrial-scale LLM application practice
+- **Core content**: End-to-end journey from technical solution to product deployment
+
+### Academic Writing Improvement
+
+**Writing techniques**:
+
+- Structured writing: pyramid principle
+- Scientific English: professional terminology and expressions
+- Figure design: data visualization best practices
+- Citation standards: correct bibliographic citation format
+
+**Recommended tools**:
+
+- **Grammar checking**: Grammarly, LanguageTool
+- **Reference management**: Zotero, Mendeley
+- **Figures and charts**: matplotlib, plotly, tikz
+- **Collaborative editing**: Overleaf, Google Docs
+
+## Research Skill Development
+
+### Critical Thinking
+
+1. **Skepticism**: Think independently; do not blindly follow authority
+2. **Logical analysis**: Rationally examine lines of reasoning
+3. **Evidence evaluation**: Objectively assess experimental evidence
+4. **Creative thinking**: Approach problems from different angles
+
+### Academic Communication Skills
+
+1. **Paper presentations**: Clear and accurate academic talks
+2. **Poster sessions**: Effective visual communication
+3. **Academic discussion**: Constructive scholarly dialogue
+4. **Network building**: Establishing academic collaborations
+
+### Project Management Abilities
+
+1. **Time management**: Plan research schedules reasonably
+2. **Resource coordination**: Make effective use of available resources
+3. **Teamwork**: Collaborate with supervisors and peers
+4. **Risk management**: Identify and address research risks
+
+## Mental Resilience and Perseverance
+
+### Dealing with Setbacks
+
+- **Normalize failure**: Recognize that failure is part of research
+- **Learning opportunities**: Extract lessons from every failure
+- **Support systems**: Seek support from supervisors and peers
+- **Mental health**: Maintain a healthy psychological state
+
+### Staying Motivated
+
+- **Short-term goals**: Set achievable milestones
+- **Sense of accomplishment**: Celebrate small wins promptly
+- **Curiosity-driven**: Keep your curiosity about the research question alive
+- **Long-term vision**: Keep the meaning and value of your research in view
+
+## Practical Recommendations
+
+1. **Systematic learning**: Build a complete knowledge system rather than fragmentary learning
+2. **Practice-oriented**: Apply methodology to concrete research projects
+3. **Continuous improvement**: Refine your methods based on practical experience
+4. **Experience sharing**: Exchange insights and lessons with peers
+5. **Lifelong learning**: Remain open to new methods and tools
+
+> **Core idea**: Research methodology is not a rigid set of rules — it is a set of guiding principles to be applied flexibly to specific situations. The key is to cultivate scientific thinking and problem-solving ability.
diff --git a/app/docs/ai/misc-tools/learning-toolkit.en.mdx b/app/docs/ai/misc-tools/learning-toolkit.en.mdx
new file mode 100644
index 00000000..b1662fe7
--- /dev/null
+++ b/app/docs/ai/misc-tools/learning-toolkit.en.mdx
@@ -0,0 +1,100 @@
+---
+title: Learning Toolkit
+description: A collection of practical tools for AI development and learning
+date: "2025-09-19"
+tags:
+  - tools
+  - zotero
+  - github
+  - perplexity
+  - productivity
+docId: uguqyqpacxyj5irjickbt8n9
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+AI learning and development require a variety of practical tools to boost efficiency. This section compiles high-quality tools for paper management, code development, and online services.
+
+## Paper Reading and Management
+
+**Zotero** is a powerful reference management application that supports literature collection, organization, and citation management. It allows annotations and notes directly inside PDFs, and features multi-platform sync and a rich plugin ecosystem.  
+Quick-start and extension resources:
+
+- [Zotero Chinese Guide](https://zotero-chinese.com/user-guide/quick-start)
+- [Translation Plugin Setup Instructions](https://zhuanlan.zhihu.com/p/11419778268)
+- [DeepL Pro Translation Service](https://deepl-pro.com/#/translate)
+
+Team or individual shared learning materials are available in the Intern-S1 Zotero group (Feishu Wiki):  
+[https://aicarrier.feishu.cn/wiki/SGi5wobHlifJjjkShXMcBNnynPc](https://aicarrier.feishu.cn/wiki/SGi5wobHlifJjjkShXMcBNnynPc)
+
+## Development Tools
+
+For code development, **GitHub** is the core platform.
+
+- To speed up access, you can use the GitHub mirror: [https://github.akams.cn/](https://github.akams.cn/)
+- Refer to various online tutorials to learn the basics of Git and GitHub (add specific tutorial links based on your team's practices)
+
+Resources and discussion on accessing paywalled content on platforms like CSDN (use only within legal and ethical boundaries, and respect copyright):
+
+- [CSDN Document Bypass Tutorial (example)](https://zhuanlan.zhihu.com/p/10504181475)
+- Common resource retrieval sites (example): [https://cs.cuckooing.cn/](https://cs.cuckooing.cn/)
+
+⚠️ When using any tools that bypass paywalls, be mindful of legal and ethical boundaries. Support original content creators and the knowledge economy whenever possible.
+
+## AI-Assisted Tools
+
+- **Perplexity**: An AI-powered smart search assistant, great for real-time information retrieval and academic literature search — ideal for literature reviews.  
+  Use this referral link for student discounts: [https://plex.it/referrals/HO4F6X9N](https://plex.it/referrals/HO4F6X9N)
+
+- **Connected Papers**: Visualize and explore citation relationships between academic papers.  
+  Official site: [https://www.connectedpapers.com/](https://www.connectedpapers.com/)
+
+These tools are very helpful for background research and building a literature network.
+
+## Node.js and Jupyter Environment
+
+**Node.js and NPM** are the foundation for front-end and back-end development as well as many toolchains.
+
+- Installation tutorial: [https://blog.csdn.net/weixin_62818371/article/details/146175470](https://blog.csdn.net/weixin_62818371/article/details/146175470)
+- Recommended version: **Node.js v22.19.0 LTS**
+
+Common kernel management commands in a **Jupyter environment**:
+
+```bash
+# Install ipykernel
+pip install ipykernel
+
+# Register a custom kernel
+python -m ipykernel install --user --name myenv
+
+# Register with a custom display name
+python -m ipykernel install --user --name yourname --display-name yourname
+python -m ipykernel install --user --name AI --display-name AI
+```
+
+## Writing and Sharing
+
+For technical blogging and knowledge sharing, **Juejin (稀土掘金)** is a recommended platform:  
+[https://juejin.cn/](https://juejin.cn/)
+
+The platform supports Markdown writing with clean formatting, has a solid technical readership, and is well-suited for publishing tutorials, notes, and technical experience summaries.
+
+## Interviews and Job Hunting
+
+The primary interview review materials are from the "Ancient Greek Notes" series:
+
+- _From 0 to ∞: The Ultimate LLM Knowledge Interview Guide_ (Feishu):  
+  [https://swfvqxo30ma.feishu.cn/wiki/M1Cew0iYaiH9jfkeD5XcXEuZn3d](https://swfvqxo30ma.feishu.cn/wiki/M1Cew0iYaiH9jfkeD5XcXEuZn3d)
+
+- _From 1 to ∞: The Ultimate Multimodal LLM Knowledge Interview Guide_ (Feishu):  
+  [https://scnajei2ds6y.feishu.cn/wiki/XdeBwWZi7iSxMJkXh6qcCmPWnqZ](https://scnajei2ds6y.feishu.cn/wiki/XdeBwWZi7iSxMJkXh6qcCmPWnqZ)
+
+## Usage Tips
+
+- Choosing the right combination of tools for your current task can significantly improve efficiency
+- Prioritize automatable workflows and gradually abstract them into scripts or CI/CD pipelines
+- Regularly review and optimize your toolchain and workflow
+- Actively share useful tools and experiences with your team or community
+- For important data, configurations, and notes, establish a backup strategy and store copies in multiple secure locations to prevent loss or corruption
diff --git a/app/docs/ai/model-datasets-platforms/platform-and-datasets.en.mdx b/app/docs/ai/model-datasets-platforms/platform-and-datasets.en.mdx
new file mode 100644
index 00000000..3daec96f
--- /dev/null
+++ b/app/docs/ai/model-datasets-platforms/platform-and-datasets.en.mdx
@@ -0,0 +1,197 @@
+---
+title: Model, Dataset, and Platform Guide
+description: A comprehensive overview of mainstream AI models, datasets, and development platforms
+date: "2025-01-27"
+tags:
+  - platforms
+  - huggingface
+  - modelscope
+  - datasets
+  - models
+docId: x3xs4hk0mc7lxlgbgskti5qk
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Modern AI development relies on a wide range of model and dataset platforms. This section summarizes the major AI development platforms, model hubs, and dataset resources.
+
+## Major Platforms
+
+### Hugging Face
+
+**Platform highlights**:
+
+- The world's largest AI model community
+- Extensive library of pre-trained models
+- Easy-to-use Transformers library
+- Powerful dataset ecosystem
+
+**Core features**:
+
+- **Model hub**: Hundreds of thousands of pre-trained models
+- **Datasets**: Large-scale dataset collections
+- **Spaces**: Online model demo platform
+- **Datasets library**: Efficient dataset processing toolkit
+
+**Official site**: [https://huggingface.co/](https://huggingface.co/)
+
+### Hugging Face Daily Papers
+
+**Highlights**:
+
+- Daily updates of the latest AI papers
+- Paper summaries and key information extraction
+- Community discussion and sharing
+
+**How to access**:
+
+- [Daily Papers](https://hf.co/papers) — daily paper recommendations
+- [AK's picks](https://hf.co/akhaliq) — curated high-quality papers
+
+**Why it matters**: [Platform introduction article](https://mp.weixin.qq.com/s/25fnXvyUPkp8YXmKnIOTyQ)
+
+### ModelScope
+
+**Platform positioning**: The Chinese counterpart to Hugging Face
+
+**Key advantages**:
+
+- An open-source model community built by Alibaba
+- Focused on Chinese models and applications
+- Better access speeds within mainland China
+- Rich Chinese-language datasets
+
+**Official site**: [https://www.modelscope.cn/](https://www.modelscope.cn/)
+
+## Dataset Resources
+
+### General Dataset Platforms
+
+**Kaggle**:
+
+- **URL**: [https://www.kaggle.com/datasets](https://www.kaggle.com/datasets)
+- **Highlights**: Competition datasets, community sharing
+- **Advantages**: High-quality annotated data, real-world business scenarios
+
+**UCI Machine Learning Repository**:
+
+- **URL**: [https://archive.ics.uci.edu/ml/index.php](https://archive.ics.uci.edu/ml/index.php)
+- **Highlights**: Classic machine learning datasets
+- **Use cases**: Algorithm research, teaching experiments
+
+### Specialized Datasets
+
+#### ImageNet
+
+A classic computer vision dataset and an important milestone in the development of deep learning.
+
+**Characteristics**:
+
+- Over 14 million images
+- 1,000-category classification task
+- Standard benchmark for computer vision models
+
+#### Other Important Datasets
+
+- **COCO**: Object detection and segmentation
+- **OpenImages**: Large-scale image dataset
+- **Common Crawl**: Web-crawled text data
+- **WMT**: Machine translation datasets
+
+## Development and Training Platforms
+
+### Usage Tutorials
+
+Detailed platform usage guides: [Tutorial Link](https://github.com/InternLM/Tutorial/tree/camp4/docs/L0/maas)
+
+### SwanLab — AI Model Training Tracker
+
+**Key features**:
+
+- Visualization of the AI model training process
+- Experiment management and result comparison
+- Team collaboration and sharing
+
+**How to access**:
+
+- **Official site**: [https://swanlab.cn](https://swanlab.cn)
+- **GitHub**: [https://github.com/swanhubx/swanlab](https://github.com/swanhubx/swanlab)
+- **Docs**: [https://docs.swanlab.cn/](https://docs.swanlab.cn/guide_cloud/general/what-is-swanlab.html)
+
+**Use cases**:
+
+- Monitoring training progress
+- Logging hyperparameter tuning runs
+- Comparing model performance
+- Sharing team experiments
+
+## Platform Selection Guide
+
+### International Platforms
+
+**When to use Hugging Face**:
+
+- Need the latest international models
+- Participating in the global AI community
+- Accessing the most comprehensive model library
+- English-language projects
+
+### Domestic Platforms
+
+**When to use ModelScope**:
+
+- Chinese NLP tasks
+- Network access restrictions within mainland China
+- Localized AI applications
+- Compliance requirements
+
+### Choosing a Dataset
+
+**Factors to consider**:
+
+1. **Task fit**: Does the dataset match the specific task requirements?
+2. **Data quality**: Annotation accuracy and completeness
+3. **Scale**: Does it meet the model's training data needs?
+4. **License**: Legal restrictions for commercial use
+5. **Update frequency**: Timeliness of the data
+
+## Best Practices
+
+### Model Selection Strategy
+
+1. **Task alignment**: Choose models optimized for the specific task
+2. **Scale balance**: Find the right trade-off between performance and compute resources
+3. **Community activity**: Select well-maintained models
+4. **Documentation**: Ensure there is detailed usage documentation
+
+### Dataset Usage Standards
+
+1. **Copyright compliance**: Follow the dataset's license requirements
+2. **Data preprocessing**: Standardize data formats and quality
+3. **Split validation**: Properly divide train, validation, and test sets
+4. **Bias checking**: Identify and address dataset bias
+
+### Platform Integration
+
+1. **Multi-platform combination**: Leverage the strengths of different platforms
+2. **Local caching**: Back up important models and data locally
+3. **Version management**: Record the versions of models and datasets in use
+4. **Automated pipelines**: Build automated workflows for model downloads and updates
+
+## Trends
+
+1. **Democratization of models**: Lowering the barrier to using AI models
+2. **Ecosystem convergence**: Better interoperability between platforms
+3. **Quality improvement**: Stricter quality control for models and data
+4. **Localization**: Rise of specialized regional platforms
+5. **Commercialization**: Transition from open-source sharing to commercial services
+
+## Learning Recommendations
+
+1. **Familiarize yourself with major platforms**: Learn how to use the main platforms
+2. **Community participation**: Actively share models and datasets
+3. **Quality awareness**: Pay attention to evaluating data and model quality
+4. **Copyright awareness**: Understand open-source licenses and commercial use standards
+5. **Technology tracking**: Follow platform feature updates and new technology integrations
diff --git a/app/docs/ai/multimodal/RQVAE/index.en.mdx b/app/docs/ai/multimodal/RQVAE/index.en.mdx
new file mode 100644
index 00000000..6ba7cea2
--- /dev/null
+++ b/app/docs/ai/multimodal/RQVAE/index.en.mdx
@@ -0,0 +1,122 @@
+---
+title: RQ-VAE Study Notes
+description: Notes and insights from studying RQ-VAE
+docId: pqplmwaj5o5aszydqo1drzrj
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## RQ-VAE (Residual Quantization Variational Autoencoder)
+
+### Background and Motivation
+
+**Limitations of VQ-VAE**: Vector Quantized VAE (VQ-VAE) introduces discrete latent variables in high-fidelity generation tasks, discretizing continuous latent vectors via a codebook. However, when we want to shorten the discrete code sequence (e.g., represent a high-resolution image with fewer codes), traditional VQ-VAE faces a rate-distortion trade-off. Specifically, reducing the spatial size of quantized feature maps (i.e., fewer position codes) requires exponentially enlarging the codebook to maintain reconstruction quality. An overly large codebook not only inflates the model's parameter count but also leads to codebook collapse.
+
+**Introducing residual quantization**: To address this, researchers proposed RQ-VAE. The core idea is to replace single-step quantization with multi-level residual vector quantization: given a fixed codebook size, the encoder's output residuals are quantized recursively, approximating the original representation from coarse to fine. Through this Residual Quantization (RQ), high-dimensional features can be approximated accurately without enlarging the codebook. In other words, RQ-VAE greatly expands representational capacity by combining codewords from multiple levels: if each level's codebook has size $K$ and there are $L$ levels, the combined space is equivalent to a codebook of size $K^L$, while requiring far fewer parameters than directly training such a massive codebook. This approach preserves reconstruction quality while shortening code sequence length, offering a new solution for generating high-resolution images and other data.
+
+**Application context**: RQ-VAE was initially proposed to improve the efficiency and quality of autoregressive image generation. In high-resolution autoregressive models, overly long discrete code sequences slow down generation and increase computational cost. RQ-VAE can significantly shorten the code sequence while preserving image detail (e.g., compressing a 256×256 image to only 8×8=64 position codes). This is critical for accelerating autoregressive Transformer modeling. The residual quantization idea has also spread to audio coding and recommendation retrieval, discretizing continuous signals into multi-level semantic codes for efficient processing. In short, RQ-VAE's motivation is to overcome the bottleneck of single-level quantization and achieve discrete representation learning that balances high compression ratios with high reconstruction quality.
+
+### Model Architecture
+
+![](index.assets/2.png)
+
+#### Overall Structure
+
+RQ-VAE follows the standard autoencoder architecture, consisting of an encoder, a multi-level codebook quantizer, and a decoder.
+
+- The encoder maps the input (e.g., an image, audio segment, or text embedding) to a low-dimensional latent representation $z$.
+- The latent vector then passes through a multi-level residual quantization module, being represented as a combination of a series of code index sequences.
+- The decoder reconstructs the original data from these discrete codewords.
+
+Unlike traditional VAE, RQ-VAE's latent variables are discrete and multi-layered: each input corresponds to multiple codebook indices that jointly encode its information.
+
+#### Residual Quantization Mechanism
+
+In RQ-VAE, the latent vector $z$ is not represented by a single codeword at once. Instead, it is approximated level by level through recursive residual quantization:
+
+- The initial residual vector $r_0$ is defined as the encoder output $z$ itself.
+- At the first quantization level, the model finds the codebook vector $v_{c_0}^0$ in level-0 codebook $C^0$ closest to $r_0$, records its index $c_0$ as the level-0 codeword, then computes the residual $r_1 = r_0 - v_{c_0}^0$, preserving details not captured by the codeword.
+- At the next level: find the vector $v_{c_1}^1$ closest to $r_1$ in level-1 codebook $C^1$, get index $c_1$, and update the residual $r_2 = r_1 - v_{c_1}^1$.
+- This iterates for $L$ levels, yielding the codeword index sequence $(c_0, c_1, \ldots, c_{L-1})$.
+
+The final quantized representation $\tilde{z}$ is the sum of each level's codebook vectors: $\tilde{z} = \sum_{d=0}^{L-1} v_{c_d}^d$, approximately reconstructing the original latent vector $z$. The decoder takes $\tilde{z}$ as input to regenerate data $\hat{x}$, approximating the original input $x$.
+
+#### Multi-Level Codebook Design
+
+RQ-VAE typically equips each quantization level with an independent codebook $C^d$ (size $K$), rather than using a single large codebook. The rationale: as quantization deepens, the residual vector norm tends to decrease, and each level needs to represent information of different scale. Therefore, each level's codebook can quantize at different granularities. The first-level codeword typically captures the coarsest, most prominent feature patterns; subsequent levels progressively add finer detail corrections. This layer-by-layer refinement gives RQ-VAE's discrete representation a semantic hierarchical structure — for example, in recommender systems, similar items may share partial semantic IDs (prefix of the codeword sequence), indicating high-level semantic similarity. It is worth noting that some research uses shared codebooks, reusing the same set of codeword vectors across all levels. Shared codebooks can reduce parameter counts and improve codeword utilization — experiments have observed significant codeword reuse across levels. Whether using independent or shared codebooks, RQ-VAE achieves a hierarchical discrete representation: each input is represented by a combination of codewords that captures both overall semantics and fine detail.
+
+#### Encoder/Decoder Architecture
+
+The specific network architectures of the encoder and decoder can be adjusted based on the task. For image RQ-VAE, the encoder is typically a convolutional neural network that compresses high-dimensional images into low-resolution feature maps, then flattens them into a series of latent vectors for quantization. The decoder is a symmetric convolutional network that reconstructs images from the quantized features. For one-dimensional sequence tasks such as audio or language, the encoder/decoder can use convolutional stacks or Transformers to convert input signals into latent vectors and back. The key point is that RQ-VAE's encoder-decoder shares the reconstruction objective with standard VQ-VAE; the difference lies in the form of latent code representation — from a single codeword to a multi-codeword combination. This architectural choice enables RQ-VAE to dramatically improve compression ratio and representational flexibility without sacrificing much reconstruction quality.
+
+### Loss Function
+
+RQ-VAE's training objective follows the basic idea of VQ-VAE: balancing **reconstruction error** and **codebook optimization**. The total loss typically consists of two parts:
+
+#### Reconstruction Loss
+
+Encourages the decoder output $\hat{x}$ to be as close as possible to the original input $x$. Common metrics are MSE or perceptual loss. For image data, pixel-level MSE or perceptual feature error is typically used to ensure high-quality reconstruction even after multi-codeword combination.
+
+#### Codebook Vector Loss
+
+This is a special design targeting the quantization layers, used to learn and maintain the effectiveness of the codebooks. The typical approach applies constraints between the encoder output (or residual) and the corresponding codebook vector during quantization. Specifically, VQ-VAE introduced the "stop-gradient" technique, treating the encoder output as a target constant and only updating codebook vectors to minimize their distance from the encoder output. An additional "commitment loss" pushes the encoder output toward the selected codebook vector, preventing the encoder from continuously producing large residuals without convergence. In RQ-VAE's multi-level context, this loss is computed for each quantization level, encouraging codebook vectors at each level to stay close to the residual they are responsible for approximating. For example, at level $d$, we can apply the loss $|\text{sg}(r_d) - v_{c_d}^d|_2^2$ on the selected codeword $v_{c_d}^d$ and residual $r_d$ (where sg denotes stop-gradient, optimizing only the codebook), plus $\beta|r_d - \text{sg}(v_{c_d}^d)|_2^2$ to constrain the encoder. The sum of per-level losses is weighted and combined with the reconstruction loss to form the complete training objective. By adjusting hyperparameter weights such as $\beta$, reconstruction accuracy and codebook learning stability can be balanced.
+
+### Model Training
+
+#### Joint Training
+
+RQ-VAE's encoder, decoder, and multiple codebooks are typically trained end-to-end jointly. Each iteration obtains multi-level codewords and the reconstructed result via forward propagation, computes the combined loss, and updates parameters via backpropagation. Since the quantization operation itself is not differentiable, the **straight-through estimator** is typically used to pass gradients through quantization — gradients are passed directly to the corresponding encoder output, allowing the network to update. Codebook vectors are updated only based on codebook loss using the stop-gradient technique, without interfering with encoder output gradients. In practice, this joint training process requires carefully balancing the coefficients of each loss component to prevent reconstruction error from dominating codebook updates (or vice versa), which could cause the model to fail to converge or leave codewords underutilized.
+
+#### Codebook Initialization and Stability
+
+To mitigate training instabilities such as codebook collapse (many inputs mapping to very few codewords), the following strategies are commonly used:
+
+**Good codebook initialization**: For example, apply k-means clustering to the first batch of training data and set cluster centers as initial codebook vectors. This provides a diverse initial codeword distribution, reducing the risk of the model getting stuck in local optima early on.
+
+**Regularization and frequency control**: Add small perturbations to rarely used codewords or periodically reinitialize unused codewords to encourage exploration.
+
+**Staged training**: If the number of codebook levels is large, consider first training a model with fewer levels until stable reconstruction is achieved, then gradually increasing the number of quantization levels.
+
+It is worth mentioning that Lee et al. observed when proposing RQ-VAE that using **shared codebooks** helps improve codeword utilization. Shared codebooks mean all levels use the same set of vectors, so no level's codebook sits idle — mitigating low-frequency codeword waste. However, shared codebooks may also reduce each level's flexibility to optimize for different residual scales. Overall, RQ-VAE training builds on VQ-VAE experience while requiring appropriate adjustments for multi-level quantization characteristics, ensuring codewords at every level are effectively learned and cooperate to achieve high-quality reconstruction.
+
+### Advantages and Disadvantages
+
+**Advantages**:
+
+- **Efficient discrete representation**: RQ-VAE can represent data with shorter code sequences while preserving detail. Compared to single-codeword schemes, the total number of required codewords is significantly reduced, facilitating downstream autoregressive or sequence modeling. For example in image tasks, RQ-VAE can represent 256×256 images with 8×8=64 positions, each represented by a multi-codeword combination, dramatically reducing computational overhead for autoregressive models.
+- **High-fidelity reconstruction quality**: Through progressive residual quantization, RQ-VAE achieves greater representational capacity than VQ-VAE under the same codebook capacity. Experiments show that increasing quantization depth is more effective than enlarging the codebook for improving reconstruction quality. RQ-VAE can maintain faithful reconstruction at lower feature map resolutions than VQ-VAE, solving the distortion problem that single-level quantization causes when reducing resolution.
+- **Stable codebook utilization**: Since information is distributed across multiple codeword levels, no single codeword needs to carry all the information, which to some extent reduces the tendency for codebook collapse. Especially with the shared codebook strategy, different quantization levels reuse the same codewords, leading to higher codeword utilization — many codewords can play a role at both coarse and fine levels. Overall, RQ-VAE's multi-level quantization avoids the extreme case of "most codewords idle, few codewords overloaded."
+- **Semantic hierarchy and compositional flexibility**: RQ-VAE's multi-codeword combination naturally forms hierarchical semantics. First-level codewords often correspond to categories or coarse attributes; subsequent codewords add detail (e.g., style, texture). This interpretability is beneficial in certain applications. Multi-codeword discrete representations also enable compositional generalization — different data may share partial codewords, reflecting similarity relationships in the discrete space.
+
+**Disadvantages**:
+
+- **Limitations of autoregressive models**: RQ-VAE is often combined with autoregressive Transformers for generation tasks. While it performs well on large datasets, the model tends to overfit on small-scale data and does not surpass state-of-the-art GANs (e.g., StyleGAN2). This is a common problem with the autoregressive paradigm: it is difficult to learn representations that generalize as well as GANs with limited data. Therefore, on small datasets, RQ-VAE does not solve the overfitting problem and requires regularization or stronger priors.
+- **Unidirectional generation and speed**: Autoregressive generation can only decode sequentially in one direction, which remains slower than methods that can sample in parallel (e.g., diffusion models). RQ-VAE reduces sequence length and thus speeds up sampling to some extent, but inherently still requires generating sequences step by step. For tasks requiring bidirectional context (e.g., image editing), the autoregressive + RQ-VAE framework remains limited and may need to be combined with bidirectional models or other generative paradigms in the future.
+- **Training complexity**: Multi-level quantization introduces more training hyperparameters (e.g., number of quantization levels, codebook size per level, loss weights). Training RQ-VAE is often more complex and time-consuming than training single-level VQ-VAE — each additional codebook level adds encoder computation and loss terms, and improper training may cause certain codewords to remain unused for a long time or reduce reconstruction quality. For example, researchers found that more training epochs are needed to fully exploit the potential of deep RQ-VAE, which increases training cost. Therefore, efficiently training deep RQ-VAE is a major challenge.
+- **Model parameters and memory**: Although RQ-VAE avoids a single oversized codebook, if independent codebooks are used with many levels, the total parameter count and storage overhead are not negligible. For example, with $K$ codewords per level at dimension $D$, $L$ independent levels have approximately $L \times K \times D$ parameters. When $L$ is large (e.g., tens of levels in audio models) or $D$ is high-dimensional, the total parameter count is still considerable. However, in common settings (a few levels for image models, codebooks of a few thousand entries), this overhead is generally acceptable. Additionally, multi-codeword summation at inference time slightly increases decoding computation, but its impact relative to total computation is small.
+
+In summary, RQ-VAE provides superior discrete representational capability but still has room for improvement in certain areas, including faster generation, more robust training, and integration with other generative architectures.
+
+### Applications
+
+RQ-VAE, as a powerful tool for discrete representation learning, has been successfully applied across generation and modeling tasks in multiple modalities:
+
+- **Image generation**: RQ-VAE was initially applied to high-resolution image generation. Lee et al. proposed using RQ-VAE-encoded discrete codes as Transformer inputs, building a two-stage model (RQ-VAE + autoregressive Transformer) for image generation. On datasets such as ImageNet, this approach surpassed previous autoregressive models in both unconditional and conditional generation. Notably, on 256×256 images, RQ-VAE compresses feature maps to 8×8, improving Transformer modeling efficiency while still generating high-quality images with significantly improved FID scores. Compared to earlier pixel-level AR models such as PixelCNN, this framework achieves orders of magnitude faster sampling. However, on small-scale datasets (e.g., FFHQ faces), its performance still lags behind the best GAN models of the time. Overall, RQ-VAE + Transformer demonstrated the viability of short-code representation + autoregression in image generation, offering new ideas for subsequent work.
+- **Audio modeling**: In neural audio coding and generation, residual vector quantization has become one of the standard techniques. Google's SoundStream model encodes speech/audio signals through a convolutional encoder and applies multi-level residual quantization (up to tens of levels), achieving lossy compression of broadband audio. SoundStream maintains near-original audio quality while reducing bitrate to the order of a few kbps — the key lies in the multi-level codebook providing high-precision reconstruction. These discrete audio codes can then be used for audio generation models: for example, Google's AudioLM and MusicLM first obtain discrete audio representations via SoundStream, then train language models to generate these discrete codes, producing coherent music or speech. Meta's EnCodec follows a similar design philosophy. The RQ-VAE (or RVQ) approach allows neural networks to learn end-to-end audio compression while providing discrete units for downstream generation tasks. Therefore, in audio denoising, music generation, speech transmission, and similar areas, VAE models based on residual quantization demonstrate superior high-fidelity and high-compression performance.
+- **Language and recommendation**: Although text is inherently a sequence of discrete symbols, RQ-VAE is beginning to play a role in semantic representation. For example, in generative recommender systems, RQ-VAE is used to encode item content features (e.g., product description text) into semantic IDs. Each item is represented as a set of codewords (a tuple of semantic IDs), and similar items share some codewords, maintaining semantic proximity in the discrete space. Models such as Google's TIGER and Kuaishou's OneSug use RQ-VAE to discretize text descriptions or queries, mapping user history sequences into semantic codes and then using sequence-to-sequence models to generate the next recommended item or query. These semantic IDs effectively alleviate the problem of overly large vocabularies caused by using raw IDs in recommendation scenarios, enabling large language models to directly process item sequences. In language modeling, there are also research attempts to discretize continuous semantic representations of sentences to facilitate compression and retrieval — for example, in generative retrieval tasks, RQ-VAE extracts semantic IDs for queries, then generates the most relevant document ID character by character. These attempts show that in language-related domains, RQ-VAE can serve as a tool for discretizing semantic embeddings, improving cross-modal or cross-system integration efficiency. In multimodal applications, people have also envisioned discretizing images and audio through their respective RQ-VAEs alongside text, uniformly representing them as discrete sequences so that a single general-purpose Transformer can understand multiple signals simultaneously. In summary, in language and recommender systems, RQ-VAE provides a method for converting high-dimensional semantic vectors into composable discrete symbols, demonstrating potential to improve downstream task performance.
+
+### Comparison with VQ-VAE
+
+#### VQ-VAE
+
+Classic VQ-VAE uses a single-level codebook to quantize continuous latent vectors into a single discrete codeword, obtaining a discrete representation of images or sequences. Its advantage is a simple structure where discrete codes can be directly used to train autoregressive models. However, because each position has only one codeword, VQ-VAE often requires high spatial resolution or a large codebook to represent sufficient detail. As noted earlier, the single-codebook scheme involves a trade-off between compression ratio and fidelity: to reduce the number of codewords (lower resolution), the codebook must be enlarged to avoid information loss. An overly large codebook then causes stability problems (e.g., underutilized codebook vectors). By contrast, RQ-VAE uses multi-codeword combinations to significantly expand representational capacity without relying heavily on codebook scale. Under the same codebook size, RQ-VAE can represent data with fewer spatial positions while maintaining high-fidelity reconstruction. RQ-VAE can be seen as an extension of VQ-VAE that breaks through its representational capacity limitations for a given codebook size.
+
+#### VQ-VAE-2
+
+VQ-VAE-2 is a hierarchical extension of VQ-VAE proposed by DeepMind for better generation of image details. It introduces two-level (or multi-level) discrete hierarchies: for example, high-level codewords capture the global coarse structure, while low-level codewords capture local fine-grained information. Specifically, VQ-VAE-2 has two (or more) independent VQ modules: it first encodes to obtain a high-level discrete representation, then uses the high-level representation as a condition to predict fine-grained low-level discrete representations during decoding. The difference from RQ-VAE is that VQ-VAE-2's hierarchy is spatially/semantically separated, with each level's codewords corresponding to feature maps at different scales; RQ-VAE's hierarchy is a progressive approximation on the same latent vector, where multiple levels of codewords jointly form the representation of one position. Intuitively, VQ-VAE-2 encodes images in a "first overview, then detail" manner, with different levels acting on different abstraction levels; RQ-VAE refines the encoding at the same abstraction level through residual stacking. Both embody hierarchical discrete representation thinking, but their implementation mechanisms differ. VQ-VAE-2 can improve generation quality (especially textural detail) and provide multi-scale semantic control, but it does not significantly reduce the total number of codes — introducing multiple levels may actually require more codewords for reconstruction. In contrast, one of RQ-VAE's design goals is precisely to reduce discrete sequence length, concentrating information into fewer positions and using multi-codeword stacking at each position to ensure no detail is lost. Therefore, in scenarios requiring compact discrete representations (e.g., autoregressive modeling requiring short sequences), RQ-VAE is relatively more advantageous.
+
+### Summary
+
+RQ-VAE combines residual quantization with variational autoencoder ideas, successfully achieving discrete representation learning with high compression ratios and high fidelity. Through coarse-to-fine quantization using multi-level codebooks, it overcomes the dilemma of traditional VQ-VAE regarding codebook capacity and sequence length, realizing hierarchical representation of discrete latent codes. This approach has achieved remarkable results in applications across images, audio, text, and other domains — not only improving the effectiveness and efficiency of generative models, but also providing a powerful tool for emerging tasks such as cross-modal representation and generative retrieval.
diff --git a/app/docs/ai/multimodal/VAE/index.en.mdx b/app/docs/ai/multimodal/VAE/index.en.mdx
new file mode 100644
index 00000000..3d7f373d
--- /dev/null
+++ b/app/docs/ai/multimodal/VAE/index.en.mdx
@@ -0,0 +1,272 @@
+---
+title: VAE Study Notes
+description: Notes and insights from studying VAE
+docId: k6cgwcc28l9iap5s5oyjbjwo
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## VAE (Variational Autoencoder)
+
+### Overview
+
+The Variational Autoencoder (VAE) is a deep generative model proposed by Kingma and Welling in 2013. Before this, traditional autoencoders (AE) were mainly used to learn low-dimensional representations of data. Their encoder-decoder structure could reconstruct inputs, but because the encoding produces a fixed deterministic vector, it cannot be used directly to generate entirely new samples. In other words, ordinary autoencoders lack constraints on the distribution of the latent space, so there is no guarantee that a point sampled at random from the latent space will decode into meaningful data — limiting autoencoders as generative models.
+
+An ordinary AE only focuses on how accurately it compresses and restores the input, without forcing the latent representations of different samples to form any particular distribution in space. For example, the model might scatter $z$ values for similar images far apart, while $z$ values of different categories might be interleaved — neighboring points in the latent space may not be semantically similar. There is also a lack of continuity, meaning the latent space may have "holes."
+
+VAE's motivation is precisely to address these issues: combining autoencoders with probabilistic graphical models and introducing variational inference to learn the underlying distribution of the data, thereby enabling generative modeling. By imposing a distributional constraint on the encoder output (e.g., assuming the latent variable follows a standard normal distribution), VAE transforms an autoencoder into a true generative model, allowing us to generate new data by sampling latent variables. This key improvement enables VAE to learn the latent space distribution while avoiding overfitting — accurately reconstructing the original input and generating new samples similar to the training data.
+
+### Basic Principles
+
+A traditional autoencoder mainly consists of two parts: an encoder and a decoder.
+
+![](index.assets/v2-97e09bd511a1fb0c3240fa717ce235d2_1440w.png)
+
+In the model above, after repeated training, input data $X$ is ultimately encoded into a representation vector $X'$, where each dimension of $X'$ represents a feature learned about the data, and the value at each dimension represents how $X$ appears on that feature. The decoder network then receives these values of $X'$ and attempts to reconstruct the original input.
+
+**Example:**
+
+Suppose any portrait image can be uniquely determined by a few feature values such as expression, skin tone, gender, and hair style. After feeding a portrait into an autoencoder, we obtain a vector $X'$ containing the image's values on features like expression and skin tone. The decoder then reconstructs the original portrait from these feature values.
+
+![](index.assets/v2-92a9061e7079089b75c37650943c6f25_1440w.png)
+
+In the example above, we use a single value to describe the input image's value on each latent feature. In practice, however, we may prefer to represent each latent feature as a range of possible values. For example, if the input is the Mona Lisa, setting the "smile" feature to a specific single value (asserting definitively that she is or isn't smiling) is clearly less appropriate than setting it to a range of values (e.g., a number within range $x$ to $y$, where some values represent smiling and others represent not smiling). A VAE is a model that replaces single values with "probability distributions over values" to describe observations of features — as shown on the right side of the figure below. After encoding by a VAE, the smile feature of each image is no longer a single value as in the autoencoder, but a probability distribution.
+
+![](index.assets/v2-61521005c5a8213b60bf362f9c25c22f_1440w.png)
+
+Through this approach, we now represent each latent feature for a given input as a probability distribution. When decoding from a latent state, we randomly sample from each latent distribution to generate a vector as the input to the decoder model.
+
+![](index.assets/v2-643d5ffb4aa480808bc9c82a55450a80_r.png)
+
+Through this encode-decode process, we effectively implement a continuous, smooth latent space representation. For all samples drawn from the latent distribution, we expect the decoder model to accurately reconstruct the input. Therefore, values that are close to each other in the latent space should also reconstruct into similar original inputs.
+
+![](index.assets/v2-df06f2d1471615dae76b1e09488091b5_r.png)
+
+The above is the principle behind the VAE's construction. Let's now look at its concrete structure.
+
+![](index.assets/v2-dda0855d2d3e00e786956a827b1c5f26_1440w.png)
+
+As shown above, VAE uses two neural networks to build two probabilistic density distribution models:
+
+- One for variational inference of the original input data, generating a variational probability distribution of the latent variables — called the **inference network**
+- Another for restoring the approximate probability distribution of the original data from the generated latent variable distribution — called the **generative network**
+
+### Model Architecture
+
+VAE's model architecture includes three core parts — Encoder, Decoder, and Latent Variables — each playing a distinct role in data reconstruction and generation. The VAE encoder maps input data $x$ to the parameters of a probability distribution in the latent space (rather than a single deterministic encoding), samples latent variable $z$ from that distribution, and then the decoder generates output $x'$ based on $z$.
+
+#### Encoder
+
+In VAE, the encoder no longer outputs a fixed hidden representation; instead, it outputs the parameters of a latent distribution (typically the mean and log-variance of a Gaussian). The encoder network is denoted $q_\phi(z|x)$, controlled by parameters $\phi$, and approximates the posterior distribution. It receives observed data $x$, extracts its main features, and maps them to the latent space, producing the probability distribution of the latent variable (e.g., $\mu(x)$ and $\sigma(x)$). The encoder plays the role of an "inference network" — given data, it predicts the distribution of the latent variable. This design gives the latent variable uncertainty, improving the model's generalization ability.
+
+#### Latent Variable
+
+$z$: The latent variable lies in the latent space and represents the underlying factors of data generation. Unlike the deterministic encoding of a traditional autoencoder, VAE treats the latent variable as a random variable. We typically assume the latent variable follows a simple prior distribution $p(z)$ (e.g., a standard normal distribution $\mathcal{N}(0,1)$ of dimension $d$), so the encoder learns distribution parameters that make the posterior close to the prior. Introducing a stochastic latent variable makes the model part of a probabilistic generative model: different values of $z$ correspond to different generated results in the data space. The probabilistic representation of the latent variable ensures the continuity and structure of the latent space, supporting the generation of new data by sampling from the latent space.
+
+#### Decoder
+
+The decoder receives the latent variable $z$ (a sampled value) as input and generates output $x'$ in the same distribution as the original data space. The decoder defines the likelihood distribution $p_\theta(x|z)$, controlled by parameters $\theta$, and is typically implemented as a neural network. The decoder acts like a "generative network" — it learns to map from the latent space back to the data space, attempting to reconstruct the original input or generate samples consistent with the training data distribution. Intuitively, the decoder "imagines" what the corresponding input might look like given $z$, implementing the data generation process.
+
+During training, the encoder and decoder are connected through the shared latent variable $z$ to form an autoencoder structure, except that the intermediate representation becomes a probability distribution. The encoder ensures $z$ contains enough information about $x$ to reconstruct the input; the decoder attempts to accurately recover $x$ from $z$. Since we force the latent variable distribution to be close to a preset prior (e.g., standard normal), any $z$ sampled from the prior can potentially generate a reasonable sample through the decoder. This is the key distinction between VAE and ordinary autoencoders: the encoder outputs a continuous probability distribution rather than a fixed encoding. Thus, when generating new data, we can directly sample $z$ from the prior distribution and feed it to the decoder, which will produce generated samples. VAE thus achieves explicit modeling of the data distribution, becoming a true generative model.
+
+### Probabilistic Graphical Modeling and Generative Process
+
+VAE can be viewed as a probabilistic graphical model with latent variables. From a probabilistic graphical perspective, VAE defines the relationship between observed data $x$ and latent variable $z$: the latent variable $z$ generates observed data $x$ through the generative distribution $p_\theta(x|z)$. That is, we assume there exists a hidden random variable $z$ that generates data $x$, through the process: first sample a latent vector $z$ from the prior $p(z)$, then generate the observed sample $x$ according to $p_\theta(x|z)$. In graphical model notation, this is a simple directed graph: $z \rightarrow x$. This generative process can be formalized as:
+
+- **Prior distribution**: $z \sim p(z)$, typically $p(z) = \mathcal{N}(0, I)$, indicating our prior assumption that latent variable dimensions are independent standard Gaussian. Choosing a normal distribution as the prior is partly because Gaussian distributions are easy to handle (e.g., KL divergence has an analytic solution), and partly because sufficiently complex function mappings can transform random variables from simple distributions into arbitrary complex distributions.
+- **Conditional generative distribution**: $x \sim p_\theta(x|z)$, which describes the probability distribution of generating observed data $x$ given latent variable $z$. We typically choose an appropriate distribution form based on data type and parameterize it with the decoder network.
+
+In summary, VAE defines a parameterized joint distribution $p_\theta(x, z) = p(z) p_\theta(x|z)$. The generative process samples from this joint distribution in the above order to obtain $(z, x)$ pairs, yielding new data points $x$. VAE's objective is to maximize the marginal likelihood $p_\theta(x)$ over parameters $\theta$. The marginal probability of observed data is:
+
+$$
+p_\theta(x) = \int p_\theta(x,z)\ dz = \int p_\theta(x|z)p(z)\ dz
+$$
+
+This integral is typically intractable in high dimensions. Therefore, approximate inference is needed to optimize model parameters — leading to the core idea of VAE: **variational inference** and the **Evidence Lower Bound (ELBO)**.
+
+### Variational Inference and the ELBO
+
+Since the marginal likelihood $p_\theta(x)$ is difficult to compute and maximize directly, we turn to variational inference to approximate the posterior and construct a lower bound on the marginal likelihood for optimization. Specifically, for a given data point $x$, we want to estimate the posterior $p_\theta(z|x)$. But directly computing $p_\theta(z|x) = \frac{p_\theta(x,z)}{p_\theta(x)}$ also involves the intractable $p_\theta(x)$.
+
+VAE's elegance lies in introducing an approximate posterior $q_\phi(z|x)$ defined by the encoder, and indirectly maximizing the log-likelihood by minimizing the difference between $q_\phi(z|x)$ and the true posterior $p_\theta(z|x)$.
+
+We measure the difference between two distributions using the Kullback-Leibler divergence (KL divergence): $D_\text{KL}(q\|p) = \mathbb{E}_q[\log \frac{q(z)}{p(z)}]$. KL divergence is non-negative and equals 0 only when $q = p$. Applying KL divergence between the true and approximate posteriors, and through derivation (applying Bayes' theorem and rearranging), we obtain:
+
+$$
+\log p_\theta(x) \ge \mathbb{E}_{q_\phi(z|x)}[\log p_\theta(x|z)] - D_\text{KL}(q_\phi(z|x) \| p(z))
+$$
+
+The left side is the log-likelihood we care about, and the right side is its lower bound. This inequality states that for any approximate posterior distribution $q_\phi(z|x)$, the log marginal likelihood is greater than or equal to the right-hand expression. We call the right-hand quantity the **Evidence Lower Bound (ELBO)**:
+
+$$
+\mathcal{L}(\theta, \phi; x) = \mathbb{E}_{q_\phi(z|x)}[\log p_\theta(x|z)] - D_\text{KL}(q_\phi(z|x) \| p(z))
+$$
+
+It is a lower bound on $\log p_\theta(x)$ for any $\theta, \phi$: $\log p_\theta(x) \ge \mathcal{L}(\theta, \phi; x)$. The two are equal if and only if the approximate posterior perfectly equals the true posterior. Thus, we transform the original problem of "maximizing $\log p_\theta(x)$" into "maximizing the ELBO" — by maximizing the ELBO, we simultaneously improve the data log-likelihood and approximate the true posterior.
+
+### KL Divergence and Reconstruction Error: VAE Loss Function
+
+From the derivation above, the ELBO for a single sample $x$ contains two parts:
+
+1. **Reconstruction term**: $\mathbb{E}_{q_\phi(z|x)}[\log p_\theta(x|z)]$ — the expected log-likelihood of $x|z$, reflecting the decoder's ability to reconstruct the original sample $x$ from latent variable $z$. Maximizing the ELBO requires this term to be as large as possible — i.e., the decoder gives as high a probability as possible to $x$ given $z$ — equivalent to reconstructing the input as accurately as possible.
+2. **Regularization term (KL term)**: $-D_\text{KL}(q_\phi(z|x) \| p(z))$ — the negative KL divergence, or equivalently a penalty term $D_\text{KL}(q_\phi(z|x) \| p(z))$ in the loss. Maximizing the ELBO is equivalent to minimizing the KL divergence between the approximate posterior produced by the encoder and the prior $p(z)$. Intuitively, the KL term constrains the shape of the latent space, ensuring latent representations of different inputs are not too far apart and cover the prior's space — guaranteeing meaningful interpolation and random sampling in the latent space.
+
+The VAE loss function is typically the negative ELBO (since we minimize loss in optimization):
+
+$$
+L(\theta, \phi; x) = -\mathcal{L}(\theta, \phi; x) = -\mathbb{E}_{q_\phi(z|x)}[\log p_\theta(x|z)] + D_\text{KL}(q_\phi(z|x) \| p(z))
+$$
+
+This loss function consists exactly of "reconstruction error + KL regularization." Through minimizing $L$, we equivalently maximize the ELBO, teaching the model to simultaneously reconstruct inputs and regularize the latent space.
+
+When choosing Gaussian prior and Gaussian posterior ($q_\phi(z|x) = \mathcal{N}(z;\mu(x), \text{diag}(\sigma^2(x)))$ and $p(z) = \mathcal{N}(0,I)$), the KL divergence has an analytic solution. For a single dimension:
+
+$$
+D_\text{KL}(\mathcal{N}(\mu, \sigma^2) \| \mathcal{N}(0, 1)) = \frac{1}{2}(\mu^2 + \sigma^2 - 1 - \log\sigma^2)
+$$
+
+Sum over all dimensions for the multi-dimensional case. In practice, the KL divergence loss is computed directly from this closed form, while the reconstruction term uses the appropriate loss for the output distribution (e.g., cross-entropy for Bernoulli assumptions, MSE or negative log-likelihood for Gaussian assumptions).
+
+### The Reparameterization Trick
+
+#### Principle
+
+The encoder outputs the parameters of the latent variable distribution (e.g., $\mu(x), \sigma(x)$), and we need to sample $z$ from this distribution to pass to the decoder. If we sample directly from $q_\phi(z|x)$, the value of $z$ is random and gradients cannot be backpropagated to $\phi$. The **reparameterization trick** solves this:
+
+The core idea is to decouple the sampling process of the random variable from the network parameters, "extracting" the randomness from the network structure so gradients can flow back. Specifically, for Gaussian distributions, we introduce an auxiliary standard normal noise $\epsilon$ and express the sampling of $z$ as:
+
+$$
+z = \mu(x) + \sigma(x) \odot \epsilon, \quad \epsilon \sim \mathcal{N}(0, I)
+$$
+
+Here $\mu(x)$ and $\sigma(x)$ are the mean and standard deviation vectors output by the encoder network, $\epsilon$ is a random noise vector independent of $\phi$ (each dimension independently standard normal), and $\odot$ denotes element-wise multiplication. Through this reparameterization, $z$ is expressed as a deterministic function $z = f_\phi(x, \epsilon)$: for a given input $x$, different $\epsilon$ samples yield different $z$ values, but for any specific $\epsilon$, $z$ is an explicit function of $\mu$ and $\sigma$. Therefore, the expectation $\mathbb{E}_{q_\phi(z|x)}$ can be converted to an expectation over $\epsilon$, allowing exchange of the integral and derivative:
+
+$$
+\mathbb{E}_{q_\phi(z|x)}[h(z)] = \mathbb{E}_{\epsilon \sim \mathcal{N}(0,I)}[h(\mu(x) + \sigma(x) \odot \epsilon)]
+$$
+
+Gradients can now be computed with respect to $\phi$ through the deterministic transformation. Intuitively, the reparameterization trick makes stochastic nodes differentiable: sampling $z$ is treated as sampling independent noise $\epsilon$ then transforming it through a differentiable mapping. This is the most critical part of enabling VAE to be trained end-to-end. Without reparameterization, there would be no practical VAE training scheme.
+
+#### Full Example
+
+**Setup (from encoder output)**
+
+- Mean vector $\mu = [1.2, -0.3]$
+- Log-variance $\log\sigma^2 = [-0.4, 0.8]$
+
+Thus:
+
+$$
+\sigma = \exp\!\left(\tfrac{1}{2}\log\sigma^2\right) = \exp([-0.2, 0.4]) \approx [0.8187, 1.4918]
+$$
+
+**Reparameterized sampling**
+
+Sample standard normal noise $\epsilon \sim \mathcal{N}(0, I)$, take $\epsilon = [0.7, -1.1]$:
+
+$$
+z = \mu + \sigma \odot \epsilon = [1.2, -0.3] + [0.8187 \times 0.7,\ 1.4918 \times (-1.1)] \approx [1.7731, -1.9410]
+$$
+
+This converts the originally "non-differentiable" sampling into a deterministic transformation $z(\mu, \sigma, \epsilon)$ that is differentiable with respect to $\mu$ and $\sigma$.
+
+**Gradients during backpropagation**
+
+Suppose the gradient of some loss $L$ with respect to $z$ is $\frac{\partial L}{\partial z} = g = [0.2, -0.5]$:
+
+$$
+\frac{\partial z}{\partial \mu} = 1 \Rightarrow \frac{\partial L}{\partial \mu} = g = [0.2, -0.5]
+$$
+
+Since $z = \mu + \sigma \odot \epsilon$ and $\sigma = \exp(\tfrac{1}{2}\log\sigma^2)$:
+
+$$
+\frac{\partial L}{\partial \log\sigma^2} = g \odot \left(\tfrac{1}{2}\sigma \odot \epsilon\right) \approx [0.0573, 0.4102]
+$$
+
+Gradients flow directly to $\mu$ and $\log\sigma^2$ — this is the key to the reparameterization trick.
+
+**KL divergence numerical value**
+
+For diagonal Gaussian $q(z|x) = \mathcal{N}(\mu, \text{diag}(\sigma^2))$ vs. standard normal $p(z) = \mathcal{N}(0, I)$:
+
+$$
+\text{KL}(q\|p) = -\tfrac{1}{2}\sum_i\left(1 + \log\sigma_i^2 - \mu_i^2 - \exp(\log\sigma_i^2)\right) \approx 1.013
+$$
+
+### Model Training
+
+#### Training Process
+
+1. **Initialize parameters**: Randomly initialize encoder and decoder parameters $\phi, \theta$.
+2. **Forward pass and sampling**: For each training sample $x^{(i)}$, the encoder outputs $\mu^{(i)}$ and $\sigma^{(i)}$. Sample $\epsilon^{(i)} \sim \mathcal{N}(0, I)$ and compute $z^{(i)} = \mu^{(i)} + \sigma^{(i)} \odot \epsilon^{(i)}$. Feed $z^{(i)}$ to the decoder to obtain the reconstructed output distribution.
+3. **Compute loss**: Calculate reconstruction error and KL divergence; sum them to get the sample loss.
+4. **Backpropagation**: Thanks to reparameterization, gradients flow to $\phi$ via $\mu^{(i)}$ and $\sigma^{(i)}$.
+5. **Parameter update**: Use an optimizer (e.g., SGD or Adam) to update parameters.
+6. **Iterate**: Repeat over multiple epochs until convergence.
+
+#### Sampling Strategy
+
+During training, the expectation in the reconstruction term is approximated via sampling. In practice, typically one sample per data point suffices — this simplifies computation and reduces gradient variance, since noise varies across samples within a mini-batch.
+
+#### Generation Phase
+
+After training, we obtain parameters $\phi^*, \theta^*$. To generate new data: sample $z$ from the prior $p(z) = \mathcal{N}(0, I)$, feed it to the trained decoder $p_{\theta^*}(x|z)$, and obtain a generated sample. Because the latent space has learned to correspond to the data distribution, samples from the prior, after passing through the decoder, produce outputs similar to the training distribution.
+
+### Applications
+
+VAE, as a powerful generative and feature learning model, has broad applications:
+
+- **Image generation**: VAE is commonly used to generate images such as handwritten digits and faces. Although pure VAE-generated images may be less sharp than GAN outputs, they have good diversity and avoid mode collapse. VAE can also achieve continuous image transformation and interpolation — e.g., linearly interpolating $z$ values of two face images in the latent space to generate intermediate transitional faces.
+- **Representation learning**: Because the encoder compresses data into latent variable $z$ with a standard normal prior structure, VAE is often used for unsupervised feature extraction and dimensionality reduction. The latent space learned by VAE tends to be more structured and continuous than that of ordinary autoencoders, useful for visualizing high-dimensional data or as input features for downstream tasks.
+- **Semi-supervised learning**: VAE can naturally combine labeled and unlabeled data for semi-supervised learning. A typical approach is Conditional VAE (CVAE) or adding a class variable to the latent variable — using unlabeled data to learn data distribution structure and limited labeled data to guide the latent space distribution to correlate with categories.
+- **Anomaly detection**: Using VAE to learn data's probability distribution enables detection of anomalous samples. Train VAE on normal data; for a new data point, if VAE cannot reconstruct it well (reconstruction error far exceeds the normal range) or its posterior distribution diverges significantly from the prior, the data likely deviates from the training distribution and is flagged as anomalous. This approach is used in industrial sensor data anomaly detection, network intrusion detection, financial fraud detection, and more.
+
+VAE is also used in **data augmentation** (generating synthetic samples in data-scarce scenarios), **image denoising**, and **reinforcement learning** (as a world model to assist agent decision-making).
+
+### Limitations and Improvements
+
+#### Posterior Collapse
+
+**Posterior collapse** (also called KL vanishing or latent variable collapse) is an notorious problem in VAE training, especially in high-dimensional sequence modeling such as NLP. It refers to the encoder's approximate posterior degenerating almost to the prior, ignoring input information — causing the latent variable $z$ to be nearly useless for reconstruction. This manifests as the KL divergence approaching 0 during training, with the model ignoring latent variables and the decoder relying entirely on its own high capacity to reconstruct the input.
+
+The intuitive cause is that the decoder is too powerful or the training strategy is inappropriate. Once in this state, the model's latent variables carry almost no information, completely losing the purpose of representation learning.
+
+Improvement strategies:
+
+- **KL annealing**: Start KL loss weight at 0 and gradually increase it, ensuring the model first learns reconstruction then gradually applies the prior constraint.
+- **Free Bits**: Set a minimum contribution threshold per KL term dimension, guaranteeing the encoder transmits at least some information in each latent variable dimension.
+- **Adjust network capacity**: Limit decoder capacity (reduce layers, add dropout) to force it to rely on $z$ information.
+- **Improved objective functions**: e.g., Info-VAE adding additional information constraints to the ELBO, VAE with skip connections, Cyclic-VAE, etc.
+
+#### Development of VAE
+
+##### β-VAE
+
+β-VAE was proposed by Higgins et al. in 2017 for learning more **disentangled** latent representations. The core idea is simple: introduce a weight factor $\beta$ to adjust the KL divergence term's weight:
+
+$$
+\mathcal{L}_{\beta\text{-VAE}} = \mathbb{E}_{q_\phi(z|x)}[\log p_\theta(x|z)] - \beta D_\text{KL}(q_\phi(z|x) \| p(z))
+$$
+
+When $\beta = 1$, this recovers the standard VAE. Taking $\beta > 1$ means we place greater emphasis on making the encoder output comply with the prior. By forcing the model to reconstruct data under a stricter bottleneck, β-VAE encourages each dimension of $z$ to learn an independent latent factor. Experiments show that with appropriately independent generative factors, increasing $\beta$ can cause different coordinates of $z$ to correspond to different semantic factors — achieving unsupervised factor disentanglement. The trade-off is reduced reconstruction quality: an overly large $\beta$ may cause the model to sacrifice reconstruction detail to satisfy the prior constraint. β-VAE inspired a series of subsequent research on quantifying and improving disentangled representations.
+
+##### VAE-GAN
+
+VAE-GAN, proposed by Larsen et al. in 2015, fuses VAE and GAN to leverage the strengths of both. In addition to the standard VAE architecture, a GAN discriminator is introduced to perform adversarial training on generated reconstructed samples.
+
+Specifically, VAE-GAN retains the VAE encoder and decoder. A discriminator network $D$ receives real data $x$, reconstructed data $x'$, or purely generated data $\tilde{x}$ (sampled from the prior), and is trained to distinguish real from generated. The decoder is trained to fool the discriminator.
+
+Benefits:
+
+- **Sharper generated samples**: The discriminator pushes the decoder to generate sharper, more detailed images — significantly improving the blurriness issue of vanilla VAE outputs.
+- **More stable adversarial training**: Because the discriminator sees both reconstructed and purely sampled outputs, and the encoder provides a structured starting point for the decoder, training is more stable than vanilla GAN.
+- **Preserves inference capability**: Unlike pure GAN, VAE-GAN retains the encoder, so the model can still infer latent variables from given data, maintaining representation learning capability.
+
+VAE-GAN's main drawback is greater complexity and more hyperparameters. But when successfully trained, it combines both worlds' advantages: generation quality close to GAN with VAE's inference capability and training stability.
+
+### Summary
+
+The Variational Autoencoder (VAE), since its proposal, has developed rich theoretical and practical results. Fundamentally, VAE combines probabilistic graphical models with deep learning, introduces variational inference to approximate the posterior, and cleverly implements differentiable sampling through reparameterization — enabling gradient descent to train deep models with stochastic units. Its loss consists of reconstruction error and KL divergence, learning the data distribution while regularizing the latent space structure, making generation possible. VAE has been widely applied to generation of images, text, and more, as well as feature learning and semi-supervised tasks. Many improvements have been proposed to enhance VAE: addressing posterior collapse to fully utilize latent variables, obtaining more disentangled representations via β-VAE, improving generation quality by combining with GAN, introducing discrete latents with VQ-VAE, and more. Although compared to newer generative models, VAE has limitations such as blurry generated samples, its **efficient inference mechanism, stable training process, and clear probabilistic interpretation** ensure it continues to play an important role in deep generative modeling.
diff --git a/app/docs/ai/multimodal/VQVAE/index.en.mdx b/app/docs/ai/multimodal/VQVAE/index.en.mdx
new file mode 100644
index 00000000..ecbdecf6
--- /dev/null
+++ b/app/docs/ai/multimodal/VQVAE/index.en.mdx
@@ -0,0 +1,128 @@
+---
+title: VQ-VAE Study Notes
+description: Notes and insights from studying VQ-VAE
+docId: otfiks0uz3aue1bdvlyqmj3e
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## VQ-VAE (Vector Quantized Variational Autoencoder)
+
+### Overview
+
+**VQ-VAE** (Vector Quantized VAE) was proposed to address the limitations of the traditional VAE. Traditional VAEs use continuous latent variables and can suffer from **posterior collapse**: when the decoder is very powerful, the encoder's latent variable information is ignored and the model relies almost entirely on the decoder to reconstruct data. VQ-VAE addresses posterior collapse by inserting a discrete **codebook** layer between the encoder and decoder, discretizing the continuous latent space and forcing the decoder to use the latent variable information. At the same time, discrete latent variables can more effectively capture discrete structural features in the data (e.g., phonemes in speech, objects in images), improving the quality of generated samples. Research has shown that VQ-VAE with discrete latent variables can achieve performance comparable to continuous latent variable models on metrics such as log-likelihood. VQ-VAE has also laid the foundation for subsequent generative models; its discrete representations have been used in advanced models such as OpenAI's DALL·E, demonstrating practical value across image, speech, and other tasks.
+
+### Model Architecture
+
+![](index.assets/1.png)
+
+VQ-VAE's structure can be viewed as a standard autoencoder augmented with a vector quantization layer. It consists of three main parts: Encoder, Codebook, and Decoder. The model works as follows:
+
+- The encoder network maps the input data $x$ into the latent space to obtain a continuous hidden representation $z_e(x)$.
+- A vector quantization operation is then inserted into the latent space: $z_e(x)$ is compared against all codebook vectors, the nearest code vector $e_k$ is found, and it is passed to the decoder as the discrete latent variable $z_q(x)$.
+- The decoder receives the selected code vector sequence as input and attempts to reconstruct the original data $\hat{x}$.
+
+During this process, the encoder output goes through a nonlinear discretization step (nearest-neighbor lookup in the codebook), making the whole model a discretely-bottlenecked autoencoder. Note that for high-dimensional data such as images, the encoder typically outputs not a single latent vector but a grid of latent vectors (e.g., $32 \times 32$, each position corresponding to a local region feature). Each vector in the grid is quantized independently, all drawing from the same codebook, so that even with a fixed codebook size, the decoder can combine codes to produce an exponentially rich variety of reconstructions. For example, with a codebook of size 512 and a latent grid of $32 \times 32$, the decoder can theoretically generate up to $512^{32 \times 32}$ different image combinations. VQ-VAE's discrete bottleneck thus both constrains each latent variable to take values from a finite set and provides massive representational capacity through combinatorial composition.
+
+### Vector Quantization Mechanism
+
+The key in VQ-VAE is the **vector quantization** operation — mapping continuous encoder representations to discrete indices. The specific mechanism includes:
+
+#### Codebook
+
+The codebook is a set of learnable embedding vectors, generally denoted $e = \{e_1, e_2, \ldots, e_K\}$, where $K$ is the number of vectors in the codebook (the size of the discrete latent space) and each vector has dimension $D$. These vectors can be thought of as prototype vectors or cluster centers, representing the possible values in the discrete latent space. The encoder output is forced to map onto these prototype vectors to obtain a discretized representation.
+
+#### Nearest-Neighbor Lookup
+
+For each encoder output vector $z_e(x)$, VQ-VAE selects the codebook vector $e_k$ with the minimum Euclidean distance: $k = \arg\min_i |z_e(x) - e_i|_2$, then sets the discrete hidden representation $z_q(x) = e_k$. This nearest-neighbor lookup implements quantization from a continuous vector to a discrete code, corresponding to selecting a 1-of-$K$ discrete encoding. The decoder then receives $z_q(x)$ as input for reconstruction. Since each $z_e(x)$ is replaced by the nearest discrete vector $e_k$, this operation is essentially a k-means quantization (clustering) of the latent space, compressing latent representations to a finite set of cluster centers.
+
+#### Straight-Through Estimator
+
+Because the nearest-neighbor rounding operation is non-smooth and non-differentiable, gradients cannot be backpropagated to the encoder directly. VQ-VAE uses the **straight-through gradient estimator**: during the forward pass, the discrete $z_q(x)$ is used; during backpropagation, the discrete nature of quantization is ignored and the gradient from the decoder with respect to $z_q(x)$ is passed directly to the encoder output $z_e(x)$. In simple terms, the quantization operation is treated as an identity mapping during backpropagation — the decoder's gradient with respect to $z_q(x)$ is copied directly to $z_e(x)$, allowing the encoder parameters to update. This approximate gradient propagation method allows the encoder to still update based on reconstruction error, even with a non-differentiable nearest-neighbor selection step in the middle. In implementation, this is typically achieved using the framework's stop-gradient operation on tensors: the forward output of the quantization step is used in subsequent computation, but its gradient is set to zero and only the original encoder output carries the gradient. This trick effectively bypasses the discontinuity of quantization and has been proven in practice to train VQ-VAE stably.
+
+### Loss Function
+
+VQ-VAE's training objective consists of three loss terms, each optimizing a different component of the model:
+
+#### Reconstruction Loss
+
+The reconstruction loss measures the difference between the model's reconstructed output $\hat{x}$ and the original input $x$, training the encoder and decoder. Depending on the task, MSE or log-likelihood loss (e.g., pixel-level cross-entropy) can be used. This loss drives the encoder-decoder to reconstruct the input as accurately as possible and is the primary driving force for optimizing the overall model. In the formula it is typically written as $L_\text{reconstruction} = -\log p(x|z_q(x))$, representing the negative log-likelihood of the data point given the discrete latent variable.
+
+#### Codebook Loss
+
+The codebook loss (also called the embedding alignment loss) trains the codebook vectors. Because of the straight-through estimator, the reconstruction error gradient does not update the codebook (gradients are passed to the encoder and cut off from the codebook), so an additional mechanism is needed to learn the codebook. This term minimizes the squared error between the encoder output $z_e(x)$ and the selected code vector $e_k$, pushing the codebook vector toward the encoder output. The specific form is $L_\text{codebook} = |\text{sg}[z_e(x)] - e_k|_2^2$, where $\text{sg}$ denotes the stop-gradient operation (the bracketed tensor has zero gradient during backpropagation). Since $\text{sg}$ is applied to $z_e(x)$, this term only produces gradients for codebook parameter $e$, forcing the corresponding $e_k$ toward the current encoder output. Intuitively, the codebook loss continuously adjusts the position of each embedding vector to better represent the actual cluster center of encoder outputs, optimizing dictionary learning.
+
+#### Commitment Loss
+
+The commitment loss (also called the commitment cost) trains the encoder output. Its purpose is to constrain the encoder output to "commit" to the selected discrete vector, preventing the encoder output from growing unboundedly or frequently switching codes, causing instability. The form is $L_\text{commitment} = \beta|z_e(x) - \text{sg}[e_k]|_2^2$, where $\beta$ is a weight hyperparameter for balancing reconstruction error and commitment cost. The stop-gradient is applied to the codebook vector $\text{sg}[e_k]$, so this term only updates encoder parameters (the codebook is unaffected), pushing $z_e(x)$ toward the selected cluster center $e_k$. This prevents encoder outputs from drifting too far from the codebook, encouraging the encoder to produce stable representations that are close to code cluster centers. The original paper reports that the model is relatively robust to the choice of $\beta$, with little difference in results for $\beta$ in the range of 0.1 to 2.0; a typical value is $\beta \approx 0.25$.
+
+#### Total Loss
+
+The total training loss is the sum of the above three terms:
+
+$$
+L_\text{VQ-VAE} = L_\text{reconstruction} + L_\text{codebook} + L_\text{commitment}
+$$
+
+The reconstruction loss affects both encoder and decoder updates, the codebook loss updates only the codebook parameters, and the commitment loss acts only on the encoder output. Note that in VQ-VAE, we assume the latent variable follows a uniform prior (i.e., all codes have equal probability), so the corresponding KL divergence term is constant and independent of model parameters — it is typically ignored during training. This means that unlike a traditional VAE, VQ-VAE's objective function has no explicit KL regularization term; complexity constraints on the model are primarily enforced by the discrete bottleneck and the commitment loss.
+
+### Issues and Improvements
+
+#### Codebook Collapse
+
+When training VQ-VAE, one of the most important potential issues is **codebook collapse**: the model uses only a small fraction of the codebook vectors during training, while most codewords are idle and rarely (or never) selected, or different codewords converge to nearly identical values — significantly reducing the effective discrete representational capacity. This weakens the model's representational power, equivalent to a collapse in the diversity of the latent space. Causes can include imbalanced updates between the encoder and decoder, poor hyperparameter settings (e.g., a commitment loss weight that is too large, preventing the encoder from changing codes), etc. The following training techniques are commonly used to mitigate and prevent codebook collapse.
+
+#### Exponential Moving Average (EMA) Update
+
+Using the **EMA algorithm** to update codebook vectors instead of directly backpropagating gradients. This approach was adopted in VQ-VAE-2 and found to make codebook updates smoother and more stable, preventing individual code vectors from experiencing drastic changes due to gradient noise or competitive failure. Concretely, during each forward pass, each latent vector is assigned to its nearest code vector; the "sample sum + sample count" corresponding to each code vector for the current round is tracked; a decay coefficient is used to combine historical statistics with current-round statistics; and the ratio of accumulated sum to count updates the codebook (approximately moving cluster centers toward the most recently observed data distribution). EMA updates make training smoother, avoiding the instability caused by gradients passing directly through quantization and reducing codebook collapse — effectively preventing the situation where some codes, due to random initialization, are never updated and become abandoned at the start of training.
+
+#### Codebook Reset
+
+This is a strategy for codewords that have not been selected for a long time. During training, continuously monitor the selection frequency of each codebook vector; if certain codes are never or rarely used over an extended number of iterations, these "dead" code vectors can be reset — for example, replacing them with random encoder output vectors from the current mini-batch. By reinitializing idle codewords, they have an opportunity to migrate to denser regions of the data distribution and participate in representation, ensuring codebook coverage does not become too skewed. Codebook reset is analogous to handling empty cluster centers in k-means clustering and can improve codebook utilization to combat collapse.
+
+#### Diversity Regularization
+
+Explicitly introduce regularization terms in the loss function that encourage rich code usage. For example, an entropy regularization or code usage diversity loss can be added to encourage the encoder's discrete output distribution to be close to uniform. Specific approaches include maximizing the entropy of the discrete posterior $q(z|x)$, or tracking the code usage frequency distribution over a period and using its KL divergence from a uniform distribution as a penalty. Such terms mathematically push the model to make fuller use of more codewords rather than concentrating on a few. Research and practice show that moderate entropy/diversity regularization effectively mitigates codebook collapse and improves codebook utilization. However, careful balancing is needed — overly strong regularization may interfere with reconstruction performance, so it is usually applied as an auxiliary monitoring metric or very light regularization.
+
+#### Hyperparameters and Training Strategies
+
+Reasonable selection and tuning of some key hyperparameters is also important for training stability. For example, the commitment loss weight $\beta$ needs to balance the speed of encoder output movement and codebook updates: too large a $\beta$ makes the encoder too rigidly attached to the current code (reducing exploration of code usage); too small a $\beta$ may cause encoder outputs to change too quickly for the codebook to keep up. A value around 0.25 typically achieves a good balance, but adjustment based on the specific task is needed. Also, the codebook size should match the data complexity; an overly large codebook with insufficient data is more prone to collapse with many idle codes — in such cases, reducing codebook size or using hierarchical codebooks can improve utilization. Using larger batch sizes and sufficient training steps also helps the encoder more stably explore a variety of codewords. Some implementations also adopt a staged training strategy: first fix the decoder and train the encoder and codebook so they learn an initial discrete representation, then release the decoder for joint optimization — preventing the decoder from dominating reconstruction too early and ignoring latent variables.
+
+In summary, addressing VQ-VAE's unique training challenges requires carefully combining the above methods and monitoring codebook utilization. Fortunately, these techniques have been validated in many practical settings, allowing us to train high-quality VQ-VAE models fairly stably without severe collapse or failure.
+
+### Applications
+
+VQ-VAE provides a powerful means of compressing data into discrete representations and has been successfully applied in multiple domains:
+
+- **Image generation and compression**: VQ-VAE first demonstrated its power in image generation tasks. By first training VQ-VAE to compress images into discrete codes, then training a pixel-level autoregressive model (e.g., PixelCNN or Transformer) as a prior over the code sequences, the model can generate high-quality and diverse image samples. In this two-stage approach, VQ-VAE is responsible for learning the discrete representation of images, simplifying complex images into easy-to-model discrete token sequences; a powerful generative model then learns the data distribution in this discrete space. Experiments prove that this strategy can generate realistic images in an unsupervised manner. For example, VQ-VAE trained on ImageNet combined with a PixelCNN prior can generate clear natural images. The subsequent VQ-VAE-2 introduces hierarchical multi-scale discrete representations and a stronger prior, further improving the coherence and fidelity of generated images — capable of producing high-quality images close to the original resolution. In image compression, VQ-VAE's offline codebook quantization combined with autoregressive modeling has also been used in lossless/lossy compression algorithms to dramatically reduce bitrate while ensuring perceptual quality.
+- **Speech and audio modeling**: VQ-VAE has been very successful in speech generation and audio representation learning. In the original paper, applying VQ-VAE to speech data showed that the model automatically learns discrete units resembling **phonemes** in an unsupervised manner — the codebook vectors begin to correspond to basic sound units in speech. This demonstrates that VQ-VAE can extract meaningful discrete features from raw audio that are very useful for downstream tasks (e.g., speech synthesis, speaker recognition). Furthermore, by inputting different speaker IDs or other conditions to the decoder, VQ-VAE can also achieve speaker conversion: encoding the content unchanged but reconstructing it in another person's voice — the same codes can be converted into speech with a different timbre by the decoder. OpenAI's Jukebox project extends VQ-VAE to music, using hierarchical VQ encoding to compress raw music waveforms into multi-level discrete codes, then using Transformer models to model these code sequences — successfully generating long, structurally rich songs. These applications demonstrate VQ-VAE's ability to capture long-range structure and discrete semantic units in the audio domain, making high-fidelity speech and music generation possible.
+- **Natural language and sequence modeling**: Although natural language is inherently a sequence of discrete symbols, VQ-VAE ideas have also been explored for discrete latent representations of text sequences. In some text generation or machine translation tasks, researchers have introduced VQ-VAE as an intermediate bottleneck, discretizing continuous hidden semantic vectors to obtain more controllable and interpretable representations. For example, some works apply VQ-VAE to sentence-level generative models, using discrete latent variables to represent the high-order semantic structure of entire sentences — avoiding the posterior collapse problem common in traditional sequence VAEs. These discrete semantic codes can be thought of as "sentence codes" automatically learned by the model, providing clearer signals to the generator. Other research has introduced VQ-VAE into Transformer language models to obtain discrete concept units in hidden layers, improving generation diversity and semantic consistency. Overall, using discrete representations in language models can enhance the model's control over global structure, providing a kind of "switch" for adjusting semantics. For example, the recent T5VQVAE model improves semantic control in generation by embedding a discrete space within a Transformer VAE, outperforming previous methods on various text generation metrics. Although this application area is still being explored, initial results show that discrete latent variables can help learn more abstract linguistic structures, with potential for generating long texts or text with diverse styles.
+- **Reinforcement learning and other domains**: VQ-VAE's discrete representation ideas have also been tried in other domains, such as state representation in reinforcement learning (RL). Researchers compress high-dimensional perceptual inputs (e.g., game screens) into discrete codes via VQ-VAE and find that these codes often correspond to key objects or terrain in the environment, providing more concise and useful state features for downstream decision-making. This approach allows agents to autonomously learn discrete "concepts" without additional annotations, improving long-horizon policy learning. In robot control, action generation, and other sequential decision-making tasks, VQ-VAE has also been tried for extracting discrete motion primitives or pose codes to simplify continuous control problems. In general, in any scenario requiring abstraction of symbolic representations from high-dimensional continuous signals, VQ-VAE may play a role.
+
+### Comparison with VAE
+
+**Latent space type**: VAE's latent space is continuous, typically modeled with continuous distributions such as Gaussian; VQ-VAE's latent space is composed of discrete codes, represented through a finite set in the codebook. In other words, each dimension of a VAE's latent vector can be any real number, while each of VQ-VAE's latent variables can only take a finite number of discrete values (determined by the codebook index).
+
+**Encoder output and inference**: The VAE encoder outputs parameters of the latent distribution (e.g., mean and variance of a Gaussian); during training, a reparameterization trick is used to sample a continuous latent vector $z$ from this distribution, introducing a KL divergence term into the training objective for regularization. By contrast, the VQ-VAE encoder directly outputs a deterministic latent vector and obtains the discrete $z_q$ through nearest-neighbor quantization — the posterior distribution degenerates to a deterministic one-hot distribution ($q(z=k|x)=1$ for the selected code, 0 otherwise). VQ-VAE training does not require random sampling or complex posterior approximation; the encoder is more like a "hard-coded" nearest-neighbor lookup function, simplifying training and avoiding the high gradient variance caused by random sampling in VAEs.
+
+**Loss function composition**: The traditional VAE objective consists of reconstruction error and a KL regularization term, reconstructing the data while pushing the latent variable distribution toward a prior (typically standard normal). VQ-VAE has no explicit KL term; instead, codebook loss and commitment loss ensure the encoder output matches the discrete code clusters and keeps the latent space stable. VAE encourages continuous latent vectors to approximate the prior via KL divergence, while VQ-VAE constrains the latent space through additional MSE losses pulling encoder outputs and embedding vectors together.
+
+**Prior distribution handling**: In VAE, the latent variable prior $p(z)$ is typically fixed in advance as a standard normal or other simple distribution; during training, KL divergence is minimized to pull the encoder posterior toward this prior. VQ-VAE, on the other hand, assumes a uniform prior during training (not explicitly optimized), and after training, a new prior is learned for the discrete latent variables — typically by training an autoregressive model (e.g., PixelCNN, Transformer) as the prior over discrete code sequences, modeling the complex distribution of the latent space. This means VQ-VAE's prior is not limited to a simple distribution but can be fit by a powerful model to the hidden structure of the data.
+
+**Posterior collapse**: VAEs frequently suffer from posterior collapse, especially when the decoder is a powerful autoregressive model — optimization tends to let the encoder output approach the prior and ignore latent variables, resulting in a small KL term but reconstruction primarily relying on decoder memorization, rendering latent variables semantically meaningless. VQ-VAE, on the other hand, because the latent space is quantized to a finite set of discrete values, forces the decoder to rely on these discrete codes to reconstruct the input, greatly mitigating posterior collapse. Indeed, one of VQ-VAE's design goals is to use the discrete bottleneck to force the use of latent variable information, ensuring it cannot be ignored by a powerful decoder. This enables VQ-VAE to learn useful latent representations without a KL penalty.
+
+**Expressiveness of latent representations**: Continuous latent space VAEs may not efficiently represent certain discrete details, such as sharp edges in images, repeating textures, or symbolic features in speech — averaging effects can lead to blurriness. VQ-VAE's discrete latent variables are better at capturing such information, making generated results clearer and more diverse. Experiments show that the original VQ-VAE outperforms typical VAEs in image generation quality, producing samples with higher resolution and sharper details. This is because discrete codes do not suffer from the "interpolation blurring" effect of continuous latent variables, and combined with a strong prior, sampling results are close to the real data distribution.
+
+**Training challenges and stability**: VAE training is relatively stable because the optimization objective (ELBO) is smooth and differentiable with respect to parameters, though issues such as KL weight selection and posterior collapse may arise. VQ-VAE training needs to handle the non-continuous quantization operation; although the straight-through trick solves gradient propagation, issues such as insufficient codebook utilization can still occur. Additionally, encoder gradients in VAE training come directly from reconstruction and KL terms, while in VQ-VAE some gradients are cut off and additional embedding update mechanisms are needed, making hyperparameter tuning somewhat different. However, overall, the benefits of VQ-VAE in avoiding posterior collapse and improving representation discreteness make it a better choice than standard VAE for many tasks.
+
+### Development of VQ-VAE
+
+#### VQ-VAE-2
+
+VQ-VAE-2 is an improved version of the original VQ-VAE, proposed by the DeepMind team in 2019. VQ-VAE-2's core idea is to introduce multi-level discrete latent spaces: it uses a hierarchical encoder-decoder structure that first extracts a high-level coarse-grained discrete representation, then a low-level fine-grained representation, generating images from top to bottom level by level. The benefit is that top-level discrete codes can capture the global overview (e.g., the rough layout of the image), while bottom-level codes capture local details — improving the coherence and detail quality of generated images. VQ-VAE-2 also introduced a PixelCNN-based strong prior: it trains PixelCNN (or PixelSnail) on the highest-level discrete codes to model code distribution, and samples conditioned on each level in sequence. With hierarchical representations and a strong prior, VQ-VAE-2 can generate high-fidelity and diverse image samples that significantly outperform the single-level VQ-VAE. Public experiments show that VQ-VAE-2 trained on ImageNet can generate images at close to native resolution (256×256 or higher), with perceptual quality comparable to GAN methods. This established the place of discrete VAEs in large-scale image generation. VQ-VAE-2 also improved training techniques — for example, using EMA to update codebooks (mentioned above) instead of codebook loss — further mitigating training instability and code collapse.
+
+#### Combining with Autoregressive Models such as Transformer
+
+In recent years, as Transformers have succeeded in generative modeling, combining VQ-VAE with Transformers has become a popular trend. The basic idea is to use VQ-VAE to convert high-dimensional data (e.g., images, audio) into discrete token sequences, then use a Transformer (e.g., GPT) to learn the distribution over these tokens for generating new samples. In this "two-stage" model, VQ-VAE is responsible for learning discrete representations and the Transformer is responsible for sequence prediction in the discrete space. A classic example is OpenAI's DALL·E: it first trains a VQ-VAE to map images to discrete codes, then trains a large-scale Transformer to autoregressively generate image code sequences conditioned on text descriptions, finally obtaining images via VQ-VAE's decoder. DALL·E's success demonstrated the power of this approach for multimodal generation. Similarly, the aforementioned Jukebox uses hierarchical VQ-VAE to compress music and a Transformer to generate music codes, achieving long-duration music generation. Bringing Transformers into the discrete space has several notable advantages: Transformer's powerful long-range dependency modeling can be fully exploited; discrete tokens result in a finite output space that is conducive to probability estimation — significantly improving generation quality and sample diversity. In practice, VQ-VAE and Transformer are a naturally compatible combination: the former provides representational capacity and the latter provides modeling capacity. Together, they can scale generative models to arbitrarily complex data distributions. Given sufficient compute and data, this approach can generate extremely high-quality content in domains such as images, audio, and text. Currently, many state-of-the-art generative models adopt this paradigm (e.g., using VQ-VAE or a variant as an encoder and Transformer as the main generator). It is foreseeable that the VQ-VAE + Transformer architecture will remain an important tool in generative modeling for a considerable time to come, and discrete representation learning will continue to support the development of multimodal AI.
diff --git a/app/docs/ai/multimodal/courses/index.en.mdx b/app/docs/ai/multimodal/courses/index.en.mdx
new file mode 100644
index 00000000..9f095280
--- /dev/null
+++ b/app/docs/ai/multimodal/courses/index.en.mdx
@@ -0,0 +1,25 @@
+---
+title: Multimodal Foundational Courses
+description: Multimodal machine learning courses.
+date: "2024-01-18"
+tags:
+  - multimodal
+  - cmu
+  - course
+docId: pmrtokz6393ywte5zqeskpm0
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## CMU Multimodal Machine Learning
+
+- [Course Homepage](https://cmu-multicomp-lab.github.io/mmml-course/)
+- Instructor: Professor Louis-Philippe Morency (CMU Language Technologies Institute)
+- Topics covered:
+  - Multimodal representation learning
+  - Multimodal alignment
+  - Cross-modal fusion and transfer
+  - Applications: sentiment recognition, dialogue systems, video understanding, and more
+- Highlights: One of the most systematic multimodal machine learning courses in the world; lecture notes and paper reading lists are updated every year.
diff --git a/app/docs/ai/multimodal/llava/index.en.mdx b/app/docs/ai/multimodal/llava/index.en.mdx
new file mode 100644
index 00000000..3d16b2d6
--- /dev/null
+++ b/app/docs/ai/multimodal/llava/index.en.mdx
@@ -0,0 +1,123 @@
+---
+title: LLaVA
+description: "LLaVA multimodal LLM framework: architecture breakdown, CLIP foundations, paper deep dive, and reproduction practice"
+date: "2025-01-27"
+tags:
+  - llava
+  - multimodal
+  - vision-language
+  - clip
+  - visual-instruction-tuning
+docId: pffzdgytknyhaywar8uzyf2e
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+LLaVA (Large Language and Vision Assistant) is a pioneering multimodal LLM framework that established the visual instruction tuning paradigm.
+
+![](index.assets/word-img-03.png)
+
+## Core Architecture
+
+### Basic Structure
+
+```
+ViT Vision Encoder → Projection Layer (cross-modal alignment) → LLM Language Generation
+```
+
+![](index.assets/word-img-04.png)
+
+![](index.assets/word-img-05.png)
+
+### Technical Highlights
+
+- **Visual encoding**: Uses a pre-trained Vision Transformer to process images
+- **Cross-modal alignment**: Maps visual features into the language space via a projection layer
+- **Language generation**: Leverages an LLM for multimodal understanding and generation
+- **Instruction tuning**: Pioneered the visual instruction tuning paradigm
+
+## Learning Resources
+
+### Core Papers
+
+- **Paper**: [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485)
+- **Code**: [LLaVA GitHub](https://github.com/haotian-liu/LLaVA)
+- **Significance**: First paper to propose visual instruction tuning
+
+### CLIP Foundations
+
+**CLIP (Contrastive Language-Image Pre-training)** is an important foundational technology for multimodal learning.
+
+**Architecture**:
+
+- **Dual-tower structure**: Text Encoder + Image Encoder
+- **Contrastive learning**: Pre-trained on (image, text) pairs
+- **Zero-shot capability**: Strong image-text matching and classification
+
+**Learning resources**:
+
+- **Paper**: [Learning Transferable Visual Representations](https://arxiv.org/abs/2103.00020)
+- **Code**: [OpenAI CLIP](https://github.com/openai/CLIP)
+
+### LLaVA Reproduction Project
+
+Plans to reproduce the LLaVA model to gain a deeper understanding of the multimodal training pipeline and technical details.
+
+## Technical Deep Dive
+
+### Visual Instruction Tuning
+
+**Core idea**: Train the model to understand and follow image-based instructions.
+
+**Data construction**:
+
+- Image captioning tasks
+- Visual question answering (VQA)
+- Complex reasoning tasks
+- Instruction-following tasks
+
+### Cross-Modal Alignment
+
+**Alignment challenges**: Semantic space gap between the visual and language modalities
+
+**Solutions**:
+
+- Linear projection layer mapping
+- Contrastive learning pre-training
+- Multi-task joint training
+- Progressive alignment strategies
+
+## Applications
+
+### Image Understanding
+
+- **Image captioning**: Automatically generate detailed image descriptions
+- **Visual QA**: Answer questions based on image content
+- **Scene analysis**: Understand complex scenes and behaviors
+- **Detail detection**: Identify key details in images
+
+### Educational Assistance
+
+- **Visual teaching**: Knowledge explanation based on images
+- **Homework tutoring**: Help understand charts and examples
+- **Creative inspiration**: Visual-content-driven creative guidance
+- **Learning assessment**: Visualized learning outcome evaluation
+
+### Content Creation
+
+- **Storytelling**: Create stories based on images
+- **Marketing copy**: Generate product image descriptions
+- **Social media**: Caption and hashtag generation for photos
+- **Creative design**: Design concepts and idea interpretation
+
+## Learning Recommendations
+
+1. **CLIP foundations**: Understand cross-modal pre-training
+2. **Paper deep dive**: Study LLaVA's technical details thoroughly
+3. **Code analysis**: Read the official implementation
+4. **Reproduction practice**: Attempt a simplified implementation
+5. **Application development**: Build real-world use cases
+
+LLaVA is a landmark work in multimodal LLMs and provides an important foundation for understanding vision-language interaction and building intelligent multimodal systems.
diff --git a/app/docs/ai/multimodal/mllm/index.en.mdx b/app/docs/ai/multimodal/mllm/index.en.mdx
new file mode 100644
index 00000000..3c90a7ba
--- /dev/null
+++ b/app/docs/ai/multimodal/mllm/index.en.mdx
@@ -0,0 +1,24 @@
+---
+title: MLLM — Multimodal Large Language Models
+description: Resources on multimodal large language models (MLLMs)
+docId: gc6tdzkkwxn5t90nw69fibl6
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+- Feather the Throttle: Revisiting Visual Token Pruning for Vision-Language Model Acceleration — arXiv:2412.13180  
+  https://arxiv.org/abs/2412.13180
+
+- Token Activation Map to Visually Explain Multimodal LLMs — arXiv:2506.23270  
+  https://arxiv.org/abs/2506.23270
+
+- GLM-4.5V and GLM-4.1V-Thinking: Towards Versatile Multimodal Reasoning with Scalable Training — arXiv:2507.01006  
+  https://arxiv.org/abs/2507.01006
+
+- Thinking with Images for Multimodal Reasoning: Foundations, Methods, and Future Frontiers — arXiv:2506.23918  
+  https://arxiv.org/abs/2506.23918
+
+- Vision as a Dialect: Unifying Visual Understanding and Generation via Text-Aligned Representations — arXiv:2506.18898  
+  https://arxiv.org/abs/2506.18898
diff --git a/app/docs/ai/multimodal/multimodal-overview.en.mdx b/app/docs/ai/multimodal/multimodal-overview.en.mdx
new file mode 100644
index 00000000..0f6fb7aa
--- /dev/null
+++ b/app/docs/ai/multimodal/multimodal-overview.en.mdx
@@ -0,0 +1,173 @@
+---
+title: Multimodal Large Language Models
+description: "Multimodal LLM technology landscape: LLaVA, QwenVL, ViT, and other mainstream frameworks"
+date: "2025-01-27"
+tags:
+  - multimodal
+  - vision-language
+  - llava
+  - qwen-vl
+docId: zte4s8s8ls4cs25mfrzyepfl
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Multimodal LLMs integrate visual, textual, and other modalities — a critical step toward general AI.
+
+## Core Technical Frameworks
+
+### LLaVA Framework
+
+- Go to: [LLaVA Framework](./llava/)
+- Pioneered visual instruction tuning
+- ViT + projection layer + LLM architecture
+- CLIP foundational technology explained
+- Reproduction project and hands-on guidance
+
+### QwenVL Series
+
+- Go to: [QwenVL Series](./qwenvl/)
+- Flagship Chinese multimodal LLM
+- Technical innovations in Qwen2.5-VL
+- Fine-tuning and reproduction tutorials
+- Source code analysis and simplified implementation
+
+### ViT Vision Encoder
+
+- Go to: [ViT Vision Encoder](./vit/)
+- Vision Transformer principles
+- Model compression and optimization techniques
+- Token merging strategy research
+- Curated learning notes and resources
+
+### MLLM — Multimodal Large Language Models
+
+- Go to: [MLLM](./mllm/)
+- Comparison of mainstream model techniques
+- Fine-grained perception technologies
+- Long-video understanding solutions
+- Multi-turn dialogue interaction design
+
+### Video Multimodal Models
+
+- Go to: [Video Multimodal Models](./video-mm-todo/)
+- Spatiotemporal modeling challenges
+- Long-video understanding problems
+- Multi-granularity understanding solutions
+- Real-time processing technologies
+
+### Multimodal Courses
+
+- Go to: [Multimodal Courses](./courses/)
+- Theory and practice combined
+- Modality alignment and fusion techniques
+- Co-learning methods
+
+## Technical Development Timeline
+
+### Architectural Evolution
+
+```
+Single-modal models → Simple multimodal fusion → Deep cross-modal alignment → Unified multimodal architectures
+```
+
+### Key Breakthroughs
+
+1. CLIP: Contrastive cross-modal pre-training
+2. LLaVA: Visual instruction tuning paradigm
+3. QwenVL: Chinese multimodal capabilities
+4. Qwen2.5-VL: Long video and dynamic resolution
+
+## Learning Path
+
+### Beginner Route
+
+1. Visual foundations: Basic concepts in computer vision
+2. Understanding CLIP: Cross-modal contrastive learning principles
+3. LLaVA introduction: Multimodal framework basics
+4. Simple applications: Image captioning and visual QA
+
+### Advanced Development
+
+1. Architecture deep dive: Multimodal model design principles
+2. QwenVL practice: Industrial-scale model fine-tuning
+3. Performance optimization: Inference acceleration and model compression
+4. Creative applications: Complex task development
+
+## Key Concepts
+
+### Cross-Modal Alignment
+
+- Semantic alignment: Mapping semantics across modalities
+- Temporal alignment: Video/audio time synchronization
+- Spatial alignment: Aligning image regions with text
+
+### Fusion Strategies
+
+- Early fusion: Feature-level fusion
+- Late fusion: Decision-level fusion
+- Deep fusion: Multi-layer interactive fusion
+
+### Instruction Tuning
+
+- Visual instructions: Image-based task instructions
+- Multi-turn dialogue: Continuous multimodal interaction
+- Task generalization: Cross-task capability transfer
+
+## Applications
+
+### Content Understanding
+
+- Image/video description generation
+- Multimodal question answering systems
+- Intelligent document analysis
+- Scene understanding and reasoning
+
+### Creative Assistance
+
+- Image-text content creation
+- Video script generation
+- Design inspiration
+- Marketing material production
+
+### Education and Training
+
+- Visualized tutoring
+- Personalized learning guidance
+- Assignment review assistants
+- Knowledge graph construction
+
+### Industrial Applications
+
+- Quality inspection and sorting
+- Surveillance video understanding
+- Medical imaging analysis
+- Autonomous driving perception
+
+## Technical Challenges and Directions
+
+### Core Challenges
+
+1. Modality gap: Representation discrepancy between modalities
+2. Data alignment: High-quality paired data
+3. Computational complexity: Training/inference overhead
+4. Generalization: Cross-domain and cross-task generalization
+
+### Solution Directions
+
+1. Better pre-training: Large-scale self-supervised learning
+2. Efficient architectures: Lightweight multimodal models
+3. Data augmentation: Synthesis and expansion
+4. Continual learning: Incremental learning and adaptation
+
+## Development Trends
+
+1. Model unification: More unified multimodal architectures
+2. Efficiency improvements: Lower overhead and higher speed
+3. Capability generalization: Cross-modal and cross-task generalization
+4. Real-time interaction: Support for real-time multimodal interaction
+5. Edge deployment: Adaptation for mobile and edge devices
+
+> Learning tip: Start with CLIP and LLaVA, then progressively explore the latest advances. Prioritize hands-on practice and application development.
diff --git a/app/docs/ai/multimodal/qwenvl/index.en.mdx b/app/docs/ai/multimodal/qwenvl/index.en.mdx
new file mode 100644
index 00000000..a6477ccc
--- /dev/null
+++ b/app/docs/ai/multimodal/qwenvl/index.en.mdx
@@ -0,0 +1,237 @@
+---
+title: QwenVL
+description: "QwenVL multimodal LLM series: development history, Qwen2.5-VL technical innovations, and fine-tuning practice"
+date: "2025-01-27"
+tags:
+  - qwen-vl
+  - multimodal
+  - vision-language
+  - chinese-model
+  - mrope
+docId: ybczrbgxo5t4pl0erce7qz6w
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+QwenVL is Alibaba's open-source multimodal LLM series. It excels at Chinese multimodal understanding and has been continuously iterated and upgraded.
+
+## Development History
+
+### Qwen-VL (Generation 1)
+
+- **Paper**: [https://arxiv.org/abs/2308.12966](https://arxiv.org/abs/2308.12966)
+- **Code**: [https://github.com/QwenLM/Qwen-VL](https://github.com/QwenLM/Qwen-VL)
+- **Highlights**: Pioneering work in Chinese multimodal capabilities
+- **Capabilities**: Image understanding, OCR, document analysis
+
+### Qwen2-VL (Generation 2)
+
+- **Paper**: [https://arxiv.org/abs/2409.12191](https://arxiv.org/abs/2409.12191)
+- **Project**: [HuggingFace documentation](https://huggingface.co/docs/transformers/main/model_doc/qwen2_vl)
+- **Improvements**: Architecture optimization and performance gains
+- **New features**: Video understanding, enhanced multi-turn dialogue
+
+### Qwen2.5-VL (Latest Generation)
+
+- **Paper**: [https://arxiv.org/abs/2502.13923](https://arxiv.org/abs/2502.13923)
+- **Project**: [https://github.com/QwenLM/Qwen2.5-VL](https://github.com/QwenLM/Qwen2.5-VL)
+- **Breakthroughs**: Multiple technical innovations and significant performance leaps
+
+## Qwen2.5-VL Technical Innovations
+
+### Core Breakthroughs
+
+#### 1. Window Attention
+
+- **Goal**: Improve efficiency for long-sequence processing
+- **Principle**: Restricts attention computation to local windows
+- **Advantages**: Reduces computational complexity; supports longer sequences
+- **Applications**: Long document understanding, high-resolution image processing
+
+#### 2. Absolute Time Encoding
+
+- **Function**: Enhances temporal modeling capabilities
+- **Applications**: Video understanding, time-series analysis
+- **Advantages**: Better modeling of temporal relationships
+- **Innovation**: Combines absolute and relative temporal information
+
+#### 3. Dynamic Resolution
+
+- **Feature**: Adapts to inputs of varying sizes
+- **Technique**: Adaptive image segmentation and processing
+- **Advantages**: Preserves image detail; improves processing efficiency
+- **Applications**: Understanding images at arbitrary resolutions
+
+#### 4. Long Video Understanding
+
+- **Capability**: Supports understanding of long-duration video content
+- **Technique**: Temporal modeling and memory optimization
+- **Applications**: Film analysis, surveillance video understanding
+- **Challenges**: Computational efficiency and memory management
+
+#### 5. Multimodal Rotary Position Encoding (MROPE)
+
+- **Innovation**: Improved positional encoding mechanism
+- **Advantages**: Better spatial and temporal position modeling
+- **Applications**: Multimodal sequence understanding
+- **Technique**: Combines rotary position encoding with multimodal characteristics
+
+## Fine-Tuning and Reproduction Practice
+
+### Learning Resources
+
+#### Video Tutorials
+
+- **Detailed tutorial**: [Bilibili — Qwen2.5-VL Fine-Tuning Guide](https://www.bilibili.com/video/BV1djF2emEcS)
+- **Coverage**: Environment setup, data preparation, training process, evaluation
+- **Target audience**: Developers who want to practice multimodal model fine-tuning
+
+#### Object Detection Fine-Tuning
+
+- **Specialized tutorial**: [Grounding Task Fine-Tuning Guide](https://zhuanlan.zhihu.com/p/1910287556211352282)
+- **Task characteristics**: Combining object detection with language understanding
+- **Applications**: Visual grounding, object recognition, scene understanding
+- **Technical key points**: Bounding box prediction, multi-task learning
+
+### Fine-Tuning Steps
+
+#### 1. Environment Setup
+
+```bash
+# Install dependencies
+pip install torch transformers
+pip install qwen-vl-utils
+
+# Configure GPU environment
+export CUDA_VISIBLE_DEVICES=0
+```
+
+#### 2. Data Preparation
+
+- **Data format**: Image-text dialogue format
+- **Quality requirements**: High-quality annotated data
+- **Preprocessing**: Image resizing, text cleaning
+- **Augmentation strategy**: Data augmentation and balancing
+
+#### 3. Model Configuration
+
+- **Base model**: Select appropriate pre-trained weights
+- **Fine-tuning strategy**: LoRA or full-parameter fine-tuning
+- **Hyperparameters**: Learning rate, batch size, etc.
+- **Hardware requirements**: GPU memory and compute
+
+#### 4. Training Monitoring
+
+- **Loss curves**: Monitor training and validation loss
+- **Performance metrics**: Accuracy, BLEU scores, etc.
+- **Visualization**: Visual analysis of the training process
+- **Early stopping**: Prevent overfitting
+
+## Source Code Analysis
+
+### Three-Stage Pre-Training Design
+
+#### Stage 1: Visual Pre-Training
+
+- **Goal**: Train the vision encoder
+- **Data**: Image captions, visual knowledge, OCR data
+- **Strategy**: Train ViT only; freeze the language model
+- **Outcome**: Establish foundational visual understanding
+
+#### Stage 2: Multimodal Pre-Training
+
+- **Goal**: Cross-modal alignment and understanding
+- **Data**: Interleaved data, VQA, video, agent interaction data
+- **Strategy**: Unfreeze all parameters; joint training
+- **Focus**: Vision-language alignment learning
+
+#### Stage 3: Long-Context Pre-Training
+
+- **Goal**: Enhance long-sequence processing capability
+- **Data**: Video data, agent interaction data
+- **Strategy**: Increase sequence length; optimize attention mechanisms
+- **Innovations**: Long video understanding and complex reasoning
+
+### Technical Deep-Dive Resources
+
+- **In-depth analysis**: [Qwen2.5-VL Source Code Walkthrough](https://zhuanlan.zhihu.com/p/1921289925552210138)
+- **Content**: Architecture design, training strategies, optimization techniques
+- **Value**: Deeply understand the implementation of an industrial-grade multimodal model
+
+## Simplified Implementation
+
+### Build Qwen2.5 from Scratch
+
+Gain a deep understanding of the model architecture and key technical points through a simplified implementation.
+
+#### Implementation Key Points
+
+1. **Attention mechanism**: Simplified implementation of window attention
+2. **Positional encoding**: Core logic of MROPE
+3. **Multimodal fusion**: Image-text feature alignment mechanism
+4. **Dynamic processing**: Variable-resolution input handling
+
+#### Learning Value
+
+- Master core multimodal model principles
+- Understand engineering implementation details
+- Accumulate hands-on model development experience
+- Build a foundation for innovative research
+
+## Applications
+
+### Document Understanding
+
+- **Enhanced OCR**: Combining text recognition with understanding
+- **Table analysis**: Complex tabular data extraction
+- **Layout analysis**: Document structure understanding
+- **Multilingual**: Mixed Chinese-English document processing
+
+### Video Analysis
+
+- **Content understanding**: Automatic video content summarization
+- **Temporal analysis**: Action recognition and event detection
+- **Multimodal QA**: Video-based question answering systems
+- **Real-time processing**: Streaming video analysis
+
+### Intelligent Assistants
+
+- **Multi-turn dialogue**: Vision-based dialogue systems
+- **Task execution**: Vision-guided task completion
+- **Creative collaboration**: Design and content creation assistance
+- **Education**: Personalized learning tutoring
+
+## Technology Trends
+
+### Efficiency Optimization
+
+- Model compression and quantization
+- Inference acceleration techniques
+- Edge device deployment
+- Real-time interaction capability
+
+### Capability Expansion
+
+- 3D visual understanding
+- Video generation capabilities
+- Multimodal reasoning
+- Cross-lingual understanding
+
+### Application Deepening
+
+- Industry specialization
+- Personalized customization
+- Safety and controllability
+- Ethical compliance
+
+## Learning Recommendations
+
+1. **Progress gradually**: Start with Qwen-VL and work toward the latest versions
+2. **Hands-on practice**: Complete fine-tuning projects to accumulate practical experience
+3. **Study the source code**: Deeply understand industrial-grade implementation details
+4. **Community participation**: Follow the open-source community and technical discussions
+5. **Applied innovation**: Develop innovative applications for specific scenarios
+
+The QwenVL series represents the highest level of Chinese multimodal LLMs. Studying its technical implementation and practical applications is highly valuable for multimodal AI development.
diff --git a/app/docs/ai/multimodal/video-mm-todo/index.en.mdx b/app/docs/ai/multimodal/video-mm-todo/index.en.mdx
new file mode 100644
index 00000000..1a403d80
--- /dev/null
+++ b/app/docs/ai/multimodal/video-mm-todo/index.en.mdx
@@ -0,0 +1,98 @@
+---
+title: Multimodal Video Large Language Models
+description: "Study notes: fine-grained perception and long-video understanding challenges"
+status: note
+docId: ssrhm03fw9sbogk78dmy92ml
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Multimodal Video LLM Study Notes
+
+## 1. Background
+
+Multimodal video large models (MVMs) integrate visual, linguistic, and audio information.  
+Compared to image models, video tasks are significantly harder. The main pain points are:
+
+- **Fine-grained perception**: Requires precise localization of objects, actions, and relationships within a video.
+- **Long-video understanding**: Requires spanning long time horizons, capturing key events, and maintaining global coherence.
+
+---
+
+## 2. Fine-Grained Perception
+
+### 2.1 Observed Issues
+
+- Action recognition is too coarse — e.g., "pick up" vs. "lift" are easily confused.
+- Model answers lack timestamps or keyframes as evidence.
+- Different modalities (subtitles vs. video frames) are not synchronized.
+
+### 2.2 Challenges
+
+- **Data**: Fine-grained annotations are expensive; long-tail action samples are scarce.
+- **Compute**: High-resolution video causes the token count to explode.
+- **Representation**: Tension between local details and global semantics.
+
+### 2.3 Methods
+
+- **Hierarchical action/event modeling**: Global → local → atomic actions.
+- **Token pruning / compression**: Retain keyframes or key patches; reduce redundancy.
+- **Multi-task training**: Joint learning of classification + localization + QA.
+- **Evidence visualization**: Output frame indices or bounding boxes.
+
+---
+
+## 3. Long-Video Understanding
+
+### 3.1 Observed Issues
+
+- Models can only handle a few seconds of video; content is "forgotten" beyond a few minutes.
+- QA answers lack global context and are often off-topic.
+- Summaries or retrieval results are biased toward "local" content, ignoring the main thread.
+
+### 3.2 Challenges
+
+- **Token budget**: Minute-level videos can have hundreds of thousands of frames — impossible to feed in directly.
+- **Structural complexity**: Films, lectures, and meetings often contain multiple scenes, characters, and events.
+- **Cross-modal asynchrony**: Subtitles, audio, and actions do not necessarily occur simultaneously.
+
+### 3.3 Methods
+
+- **Hierarchical modeling**: Shot → Scene → Event → Video.
+- **Retrieval-augmented processing**: First locate key segments via indexing, then feed them into the large model for reasoning.
+- **Memory mechanisms**: Sliding window + cache, external memory banks, chain-of-summary.
+- **Multimodal collaboration**: Combine subtitles (ASR), OCR, and audio to assist the visual stream.
+
+---
+
+## 4. Common Evaluation Metrics
+
+- **Fine-grained perception**:
+  - Action localization mAP@tIoU
+  - QA accuracy + timestamp consistency
+  - Interpretability (match between evidence frames and answers)
+- **Long-video understanding**:
+  - QA accuracy (long context)
+  - Retrieval Recall@K / mAP
+  - Summary quality (ROUGE, human evaluation)
+  - Computational efficiency (fps, GPU memory usage)
+
+---
+
+## 5. Additional Thoughts
+
+- Fine-grained perception feels more like a **small joint task at the intersection of computer vision and NLP** (detection/segmentation/action recognition + QA).
+- Long-video understanding feels more like **systems engineering**: requiring data preprocessing (segmentation/indexing), models (hierarchical), and inference (retrieval + planning).
+- A likely future direction: **multimodal collaborative memory + interpretable reasoning**.
+
+---
+
+## 6. References
+
+- Feather the Throttle: Revisiting Visual Token Pruning — [arXiv:2412.13180](https://arxiv.org/abs/2412.13180)
+- Token Activation Map to Visually Explain Multimodal LLMs — [arXiv:2506.23270](https://arxiv.org/abs/2506.23270)
+- GLM-4.5V / 4.1V-Thinking — [arXiv:2507.01006](https://arxiv.org/abs/2507.01006)
+- Thinking with Images for Multimodal Reasoning — [arXiv:2506.23918](https://arxiv.org/abs/2506.23918)
+- Vision as a Dialect — [arXiv:2506.18898](https://arxiv.org/abs/2506.18898)
diff --git a/app/docs/ai/multimodal/vit/index.en.mdx b/app/docs/ai/multimodal/vit/index.en.mdx
new file mode 100644
index 00000000..7180d727
--- /dev/null
+++ b/app/docs/ai/multimodal/vit/index.en.mdx
@@ -0,0 +1,9 @@
+---
+title: ViT Vision Encoder
+description: Vision Transformer resources
+docId: xd3q72ubqzlesz8x4gewhi5r
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
diff --git a/app/docs/ai/recommender-systems/recommender-roadmap.en.mdx b/app/docs/ai/recommender-systems/recommender-roadmap.en.mdx
new file mode 100644
index 00000000..5219a069
--- /dev/null
+++ b/app/docs/ai/recommender-systems/recommender-roadmap.en.mdx
@@ -0,0 +1,176 @@
+---
+title: Recommender Systems
+description: Recommender systems learning path, hands-on projects, paper resources, and competitions
+date: "2025-01-27"
+tags:
+  - recommender-systems
+  - recsys
+  - llm4rec
+  - collaborative-filtering
+  - deep-learning
+docId: as876rdhtmpnyyeclxt226s1
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+Recommender systems are one of the most important AI applications in industry. From traditional collaborative filtering to modern large-model recommendations, this section covers the complete learning path for recommender systems.
+
+## 8.1 Recommender Systems Learning Path (WIP)
+
+### 8.1.1 Understanding the Business
+
+Search, advertising, and recommendation have different scopes:
+
+- **Search**: Focuses on query-based retrieval, understanding user search intent
+- **Advertising**: Balances three-way interests among advertisers, ad platforms, and user experience
+- **Recommendation**: Focuses on long-term ecosystem, maximizing long-term user value
+
+**Core Differences**:
+
+- Recommendation only cares about whether the ranking order of item fusion scores is accurate
+- Advertising must also ensure score distances (related to bidding mechanisms)
+- Generally, item scores in recommender systems can be summarized as: **recall → ctr × cvr**
+- Ad systems additionally multiply by bid and deep_cvr scores (e.g., payment amount)
+
+**Business Understanding Advice**: Whether it's search, advertising, or recommendation, it's recommended to read "Computational Advertising" to understand the business fundamentals.
+
+### 8.1.2 Learning Models
+
+Evolution of recommender system models:
+
+1. **Traditional Methods**: Collaborative filtering, matrix factorization
+2. **Deep Learning**: DeepFM, Wide&Deep, DIN, etc.
+3. **Pre-trained Models**: BERT4Rec, SASRec, etc.
+4. **LLM Era**: LLM4Rec, ChatRec, etc.
+
+## 8.2 Recommender Learning Resources
+
+### 8.2.1 Wang Shusen's Recommender Systems Course
+
+- **Video**: [Bilibili Recommender Systems Course](https://www.bilibili.com/video/BV1pS4y1a7QT)
+- **Highlights**: Balanced theory and practice, rich industry experience
+- **Content**: From basic collaborative filtering to deep learning recommendation models
+
+### 8.2.2 Datawhale Large Model Recommender Systems Study Group
+
+- **Tutorial**: [GitHub Repository](https://github.com/datawhalechina/fun-rec)
+- **Website**: [https://datawhalechina.github.io/fun-rec/](https://datawhalechina.github.io/fun-rec/#/)
+- **Highlights**: Collaborative learning format, suitable for beginners
+
+### 8.2.3 "Internet Giant Recommender Algorithm in Practice"
+
+- **Resource Link**: [Zhihu Article Introduction](https://zhuanlan.zhihu.com/p/623814354)
+- **Highlights**: Real-world industry experience sharing
+- **Content**: Practical application cases of recommender algorithms at major companies
+
+## 8.3 Beginner Projects
+
+### 8.3.1 Alibaba Tianchi News Recommendation System
+
+- **Competition**: [Tianchi News Recommendation](https://tianchi.aliyun.com/competition/entrance/531842/introduction)
+- **Highlights**: Real business scenarios, high data quality
+- **Learning Value**: Complete recommender system development workflow
+
+**Recommender System Project Collection**: A curated collection of more hands-on project resources
+
+## 8.4 Recommender Systems Study Notes
+
+**Recommender Systems Study Notes Table**: View the complete organized study notes
+
+**Core Content Includes**:
+
+- Classic recommendation algorithm principles
+- Deep learning recommendation models
+- Industry deployment experience
+- Evaluation metrics and optimization strategies
+
+## 8.5 Recommender Systems Papers (To Be Curated)
+
+GitHub paper collection, continuously updated. Content is closer to practical business, without much focus on generative recommendation.
+
+**Recommender Systems Paper Curation Table**: View the complete paper curation list
+
+**Curation Status**:
+
+- Not Started: Pending curation
+- Under Review: Currently being evaluated
+- Completed: Review finished
+- Not Recommended: Quality below standard
+- Not Open-Source: Code not publicly available
+
+### LLM4REC
+
+Applications of large models in recommender systems, including:
+
+- **Pre-trained Recommendation Models**: Pre-training on large-scale data
+- **Generative Recommendation**: Framing recommendation as a generation task
+- **Multimodal Recommendation**: Combining text, images, and other multimodal information
+- **Conversational Recommendation**: Natural language interaction-based recommendation
+
+## 8.6 Related Competitions
+
+### Tencent Advertising Algorithm Competition
+
+- **Website**: [https://algo.qq.com](https://algo.qq.com)
+- **Problem**: Participants predict users' next likely ad interactions based on desensitized multi-modal historical behavior data (collaborative, textual, visual)
+- **Technical Requirements**: Each interaction includes ID-type ad features and multimodal information (images, text, etc.)
+- **Innovation Direction**: Via baseline models and solution review stages, the competition encourages participants to break out of traditional discriminative recommendation frameworks and explore generative recommendation
+
+### Multimodal Short Video Click Prediction — Malanshan Cup
+
+- **Website**: [https://challenge.ai.mgtv.com/](https://challenge.ai.mgtv.com/)
+- **Problem**: Predict videos users watch and completion rate based on Mango TV's multimodal features, combined with user features and behavior data
+- **Technical Highlights**: Full baseline sharing for 2025 MGTV Multimodal Video Recommendation (0.256+)
+
+### Kaggle Playground — FlightRank2025
+
+- **Problem**: 2025 personalized flight recommendation for passengers
+- **Goal**: Build an intelligent flight ranking model to predict which flight option business travelers will choose from search results
+- **Reference Solution**: [CatBoost Ranker Baseline](https://www.kaggle.com/code/ka1242/catboost-ranker-baseline-flightrank-2025)
+
+## Technology Trends
+
+### Traditional Recommendation → LLM Recommendation
+
+1. **Representation Learning**: From sparse features to dense embeddings
+2. **Sequence Modeling**: From static features to dynamic sequences
+3. **Multimodal Fusion**: From single modality to multi-modal information
+4. **Generative Recommendation**: From discriminative models to generative models
+
+### Industry Deployment Considerations
+
+1. **Latency Requirements**: Millisecond-level response time
+2. **Throughput**: High-concurrency request handling
+3. **Storage Optimization**: Model compression and quantization
+4. **A/B Testing**: Online performance evaluation
+
+### Evaluation Metric Framework
+
+**Offline Metrics**:
+
+- Accuracy-based: Precision, Recall, F1
+- Ranking-based: NDCG, MAP, MRR
+- Diversity: Coverage, Diversity
+
+**Online Metrics**:
+
+- Click-Through Rate (CTR)
+- Conversion Rate (CVR)
+- Dwell Time
+- User Retention
+
+## Learning Suggestions
+
+1. **Business Understanding First**: Deeply understand the nature and goals of recommendation business
+2. **Progressive Algorithm Learning**: Start from classic algorithms, gradually go deeper into deep learning methods
+3. **Practice-Oriented**: Accumulate hands-on experience through projects and competitions
+4. **Engineering Skills**: Emphasize system design and engineering implementation
+5. **Continuous Learning**: Follow frontier technologies like large models applied to recommendation
+
+## Other Summaries
+
+- LLM4REC (Large Model Recommendation)
+- Learning Path (WIP): Business understanding, model learning
+- Recommender Learning Resources: Wang Shusen, Datawhale, hands-on projects
diff --git a/app/docs/ai/recommender-systems/wangshusen_recommend_crossing.en.mdx b/app/docs/ai/recommender-systems/wangshusen_recommend_crossing.en.mdx
new file mode 100644
index 00000000..7f151738
--- /dev/null
+++ b/app/docs/ai/recommender-systems/wangshusen_recommend_crossing.en.mdx
@@ -0,0 +1,261 @@
+---
+title: Wang Shusen Recommender Systems Study Notes — Feature Crossing
+description: ""
+date: "2025-09-27"
+tags:
+  - tag-one
+docId: hajz43iblku13mmevia8zrhv
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Wang Shusen Recommender Systems Study Notes — Feature Crossing
+
+## Feature Crossing
+
+### Factorization Machine (FM)
+
+#### Linear Model
+
+- Given $d$ features, denoted as $\mathbf{x} = [x_1, \cdots, x_d]$.
+
+- **Linear model**:
+
+  $$
+  p = b + \sum_{i=1}^{d} w_i x_i.
+  $$
+
+- **The model has $d + 1$ parameters**: $\mathbf{w} = [w_1, \cdots, w_d]$ and $b$.
+
+- **Prediction is a weighted sum of features**. (_Addition only, no multiplication._)
+
+#### Second-Order Crossed Features
+
+- **Given $d$ features, denoted as** $\mathbf{x} = [x_1, \cdots, x_d]$.
+
+- **Linear model + second-order crossed features**:
+
+  $$
+  p = b + \sum_{i=1}^{d} w_i x_i + \sum_{i=1}^{d} \sum_{j=i+1}^{d} u_{ij} x_i x_j.
+  $$
+
+- **The model has $O(d^2)$ parameters**.
+
+**Linear model + second-order crossed features**:
+
+$$
+p = b + \sum_{i=1}^{d} w_i x_i + \sum_{i=1}^{d} \sum_{j=i+1}^{d} u_{ij} x_i x_j.
+$$
+
+$$
+u_{ij} \approx v^T_iv_j
+$$
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-1-1.png)
+
+Matrix $U$ has $d$ rows and $d$ columns; matrix $V$ has $d$ rows and $k$ columns; matrix $V^T$ has $k$ rows and $d$ columns.
+
+- **Factorization Machine (FM)**:
+
+  $$
+  p = b + \sum_{i=1}^{d} w_i x_i + \sum_{i=1}^{d} \sum_{j=i+1}^{d} \left( \mathbf{v}_i^T \mathbf{v}_j \right) x_i x_j.
+  $$
+
+- **FM has $O(kd)$ parameters**. ($k \ll d$)
+
+#### Factorization Machine
+
+- FM is a drop-in replacement for linear models; anywhere linear regression or logistic regression can be used, FM can be used instead.
+- FM uses second-order crossed features, making it more expressive than linear models.
+- Through the approximation $u_{ij} \approx \mathbf{v}_i^T \mathbf{v}_j$, FM reduces the number of second-order cross weights from $O(d^2)$ to $O(kd)$.
+
+### Deep & Cross Network (DCN)
+
+#### Retrieval and Ranking Models
+
+Two-tower models and multi-objective ranking models are just structural frameworks; the internal neural networks can be any network architecture.
+
+#### Cross Layer
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-2-1.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-2-2.png)
+
+Incorporates ResNet concepts to prevent vanishing gradients.
+
+#### Cross Network
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-2-3.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-2-4.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-2-5.png)
+
+**Deep & Cross Network (DCN)**
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-2-6.png)
+
+DCN outperforms fully connected networks in practice and can be used in the user tower and item tower of two-tower models, the shared bottom network of multi-objective ranking models, and the expert networks in MMoE.
+
+### Learning Hidden Unit Contributions (LHUC)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-3-1.png)
+
+The neural network structure is [multiple fully connected layers] → [Sigmoid × 2], so all values in the output vector lie between 0 and 2.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-3-2.png)
+
+### SENet & Bilinear Cross
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-4-1.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-4-2.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-4-3.png)
+
+- **SENet applies field-wise weighting to discrete features.**
+
+- **Field**:
+  - User ID embedding is a 64-dimensional vector.
+  - The 64 elements (i.e., the embedding vector of one feature) form one field and receive the same weight.
+  - The more important the feature, the higher the weight.
+
+- **If there are $m$ fields, the weight vector is $m$-dimensional.**
+
+#### Cross-Field Feature Crossing
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-4-4.png)
+
+**Inner Product**
+
+Both $x^T_i$ and $x_j$ are feature embedding vectors; $f_{ij}$ is a scalar. With $m$ fields, pairwise inner products yield $m^2$ scalars.
+
+**Hadamard Product**
+
+Both $x^T_i$ and $x_j$ are feature embedding vectors; $f_{ij}$ is a vector. With $m$ fields, pairwise Hadamard products yield $m^2$ vectors — too many. One must manually specify which vector pairs to cross, rather than crossing all pairs.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-4-5.png)
+
+**Bilinear Cross (inner product)**
+
+With $m$ fields, there are $m^2$ scalar values $f_{ij}$ and $m^2/2$ parameter matrices $W_{ij}$.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-4-6.png)
+
+**Bilinear Cross (Hadamard)**
+
+With $m$ fields, there are $m^2$ vector values $f_{ij}$ and $m^2/2$ parameter matrices $W_{ij}$.
+
+#### FiBiNet
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/4-4-7.png)
+
+## Behavior Sequences
+
+### User Behavior Sequence Modeling
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/5-1-1.png)
+
+**LastN Features**
+
+- **LastN**: Item IDs from the user's most recent $n$ interactions (clicks, likes, etc.).
+- Embed **LastN** item IDs to get $n$ vectors.
+- Take the average of these $n$ vectors as one type of user feature.
+- Applicable to retrieval two-tower models, pre-ranking three-tower models, and full-ranking models.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/5-1-2.png)
+
+### DIN Model
+
+**DIN Model**
+
+- DIN replaces simple averaging with **weighted averaging**, i.e., an attention mechanism.
+- Weights: similarity between the candidate item and the user's **LastN** items.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/5-2-1.png)
+
+**DIN Model**
+
+- For a given candidate item, compute its similarity to each of the user's **LastN** items.
+- Use the similarity as weights to compute a weighted sum of the user's **LastN** item vectors, yielding a single vector.
+- Use this vector as a user feature input to the ranking model, estimating click-through rate, like rate, etc. for (user, candidate item) pairs.
+- This is essentially an attention mechanism.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/5-2-2.png)
+
+**Simple Averaging vs. Attention Mechanism**
+
+- Both _simple averaging_ and attention mechanism are applicable to full-ranking models.
+- _Simple averaging_ is applicable to two-tower and three-tower models.
+  - _Simple averaging_ only uses **LastN**, which is a user-intrinsic feature.
+  - The average of LastN vectors is used as input to the user tower.
+- Attention mechanism is not applicable to two-tower or three-tower models.
+  - Attention mechanism requires **LastN** + **candidate item**.
+  - The user tower cannot see the candidate item, so attention mechanism cannot be applied inside the user tower.
+
+### SIM Model
+
+**DIN Model**
+
+- Computes a weighted average of the user's **LastN** vectors.
+- Weights are the similarity between the candidate item and each LastN item.
+
+**DIN Model Drawbacks**
+
+- Attention layer computation $\propto n$ (length of user behavior sequence).
+- Can only record the most recent few hundred items; otherwise computation is too expensive.
+- Drawback: Focuses on short-term interests, forgetting long-term interests.
+
+**How to Improve DIN?**
+
+- **Goal**: Retain long user behavior sequences ($n$ large) without excessive computation.
+
+- **Improved DIN**:
+  - DIN computes a weighted average of **LastN** vectors using similarity as weights.
+  - If a **LastN** item is very different from the candidate item, its weight is near zero.
+  - Quickly eliminate **LastN** items unrelated to the candidate item, reducing computation in the attention layer.
+
+#### SIM Model
+
+- Retains long-term user behavior history; $n$ can be in the thousands.
+- For each candidate item, quickly search the user's **LastN** records to find $k$ similar items.
+- Convert **LastN** into **TopK**, then feed into the attention layer.
+- **SIM** reduces computation (from $n$ to $k$).
+
+**Step 1: Search**
+
+- **Method 1: Hard Search**
+  - Filter **LastN** items to keep only those with the same category as the candidate item.
+  - Simple, fast, requires no training.
+
+- **Method 2: Soft Search**
+  - Embed items to get vectors.
+  - Use the candidate item vector as a **query** and perform $k$-nearest neighbor search, keeping the $k$ nearest items in **LastN**.
+  - Better performance, more complex to implement.
+
+**Step 2: Attention Mechanism**
+
+**Using Temporal Information**
+
+- Let $\delta$ be the time elapsed since the user interacted with a **LastN** item.
+- Discretize $\delta$, then embed it to get vector **d**.
+- Concatenate the two vectors to represent a **LastN** item:
+  - Vector **x** is the item embedding.
+  - Vector **d** is the time embedding.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/5-3-1.png)
+
+Why does SIM **use temporal information**?
+
+- **DIN** has a short sequence, recording the user's recent behavior.
+- **SIM** has a long sequence, recording the user's long-term behavior.
+- The more distant the interaction in time, the less important it is.
+
+#### Conclusions
+
+- Long sequences (long-term interests) outperform short sequences (recent interests).
+- Attention mechanism outperforms simple averaging.
+- **Soft search** vs. **Hard search**? Depends on engineering infrastructure.
+- Using temporal information provides improvement.
diff --git a/app/docs/ai/recommender-systems/wangshusen_recommend_note/wangshusen_recommend_note_retrieval.en.mdx b/app/docs/ai/recommender-systems/wangshusen_recommend_note/wangshusen_recommend_note_retrieval.en.mdx
new file mode 100644
index 00000000..a149d26d
--- /dev/null
+++ b/app/docs/ai/recommender-systems/wangshusen_recommend_note/wangshusen_recommend_note_retrieval.en.mdx
@@ -0,0 +1,1333 @@
+---
+title: Wang Shusen Recommender Systems Study Notes — Retrieval
+description: ""
+date: "2025-09-22"
+tags:
+  - tag-one
+docId: c3a4nmid9plytif5ameigj7d
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Wang Shusen Recommender Systems Study Notes — Retrieval
+
+## Retrieval
+
+### Item-Based Collaborative Filtering (ItemCF)
+
+#### Basic Idea
+
+If a user likes $item_1$, and $item_1$ is similar to $item_2$, then the user is likely to also like $item_2$.
+
+#### ItemCF Implementation
+
+User's interest in an item: $like(user, item_j)$
+
+Similarity between items: $sim(item_j, item)$
+
+Estimated user interest in a candidate item: $\sum_j like(user, item_j) \times sim(item_j, item)$
+
+#### Item Similarity
+
+**Computing item similarity** (considering degree of user preference)
+
+Let users who like item $i_1$ form set $\mathcal{W}_1$
+
+Let users who like item $i_2$ form set $\mathcal{W}_2$
+
+Define intersection $\mathcal{V} = \mathcal{W}_1 \cap \mathcal{W}_2$
+
+Similarity between the two items:
+
+$$
+sim(i_1, i_2) = \frac{|\mathcal{V}|}{\sqrt{|\mathcal{W}_1| \cdot |\mathcal{W}_2|}}.
+$$
+
+**Computing item similarity (weighted by degree of user preference)**
+
+Let users who like item $i_1$ form set $\mathcal{W}_1$
+
+Let users who like item $i_2$ form set $\mathcal{W}_2$
+
+Define intersection $\mathcal{V} = \mathcal{W}_1 \cap \mathcal{W}_2$
+
+Similarity between the two items:
+
+$$
+sim(i_1, i_2) = \frac{\sum_{v \in \mathcal{V}} like(v, i_1) \cdot like(v, i_2)}
+{\sqrt{\sum_{u_1 \in \mathcal{W}_1} like^2(u_1, i_1)} \cdot \sqrt{\sum_{u_2 \in \mathcal{W}_2} like^2(u_2, i_2)}}
+$$
+
+#### Full ItemCF Retrieval Workflow
+
+**Offline Precomputation**
+
+Build "user → item" index
+
+- Record item IDs the user has recently clicked or interacted with.
+- Given any user ID, retrieve the list of items they have recently shown interest in.
+
+Build "item → item" index
+
+- Compute pairwise similarity between items.
+- For each item, index the $k$ most similar items.
+- Given any item ID, quickly find its $k$ most similar items.
+
+**Online Retrieval**
+
+1. Given a user ID, use the "user → item" index to find the list of items the user has recently interacted with (last-n).
+2. For each item in the last-n list, use the "item → item" index to find the top-k similar items.
+3. For the retrieved similar items (up to $nk$ total), use the formula to estimate user interest scores.
+4. Return the top 100 highest-scoring items as recommendation results.
+
+Using indexes, offline computation is large but online computation is small.
+
+#### Summary
+
+**ItemCF Principle**
+
+User likes item $i_1$; then the user likes item $i_2$ which is similar to $i_1$.
+
+Item similarity:
+
+- If users who like $i_1$ and $i_2$ have large overlap, then $i_1$ and $i_2$ are similar.
+
+- Formula:
+
+$$
+sim(i_1, i_2) = \frac{|\mathcal{W}_1 \cap \mathcal{W}_2|}{\sqrt{|\mathcal{W}_1| \cdot |\mathcal{W}_2|}}
+$$
+
+**ItemCF Retrieval Channel**
+
+Maintain two indexes:
+
+- User → item list: $n$ items the user has recently interacted with.
+- Item → item list: $k$ items with highest similarity.
+
+Online retrieval:
+
+- Use both indexes to retrieve up to $nk$ items per request.
+- Estimate user interest score for each item:
+
+$$
+\sum_j like(user, item_j) \times sim(item_j, item).
+$$
+
+- Return the top 100 highest-scoring items as retrieval results.
+
+### Swing Retrieval Channel
+
+#### Swing Model
+
+Let items liked by user $u_1$ form set $\mathcal{J}_1$.
+
+Let items liked by user $u_2$ form set $\mathcal{J}_2$.
+
+Define the overlap between two users:
+
+$$
+\textbf{overlap}(u_1, u_2) = |\mathcal{J}_1 \cap \mathcal{J}_2|
+$$
+
+If users $u_1$ and $u_2$ have high overlap, they may come from the same small circle; their weight should be reduced.
+
+#### Swing Model
+
+Let users who like item $i_1$ form set $\mathcal{W}_1$.
+
+Let users who like item $i_2$ form set $\mathcal{W}_2$.
+
+Define intersection $\mathcal{V} = \mathcal{W}_1 \cap \mathcal{W}_2$.
+
+Similarity between the two items:
+
+$$
+sim(i_1, i_2) = \sum_{u_1 \in \mathcal{V}} \sum_{u_2 \in \mathcal{V}} \frac{1}{\alpha + \text{overlap}(u_1, u_2)}
+$$
+
+#### Summary
+
+- The only difference between Swing and ItemCF is item similarity.
+- **ItemCF**: If the proportion of overlapping users between two items is high, the two items are considered similar.
+- **Swing**: Additionally considers whether overlapping users come from the same small circle.
+  - Users who like both items form set $\mathcal{V}$.
+  - For users $u_1$ and $u_2$ in $\mathcal{V}$, their overlap is $\text{overlap}(u_1, u_2)$.
+  - High overlap between two users means they may come from the same small circle; their weight is reduced.
+
+### User-Based Collaborative Filtering (UserCF)
+
+#### Basic Idea
+
+If user $user_1$ is similar to user $user_2$, and $user_2$ likes a certain item, then $user_1$ is also likely to like that item.
+
+#### UserCF Implementation
+
+Similarity between users: $sim(user, user_j)$
+
+User's interest in an item: $like(user_j, item)$
+
+Estimated user interest in a candidate item: $\sum_j sim(user, user_j) \times like(user_j, item)$
+
+#### User Similarity
+
+**Computing user similarity**
+
+Let items liked by user $u_1$ form set $\mathcal{J}_1$.
+
+Let items liked by user $u_2$ form set $\mathcal{J}_2$.
+
+Define intersection $I = \mathcal{J}_1 \cap \mathcal{J}_2$.
+
+Similarity between the two users:
+
+$$
+sim(u_1, u_2) = \frac{|I|}{\sqrt{|\mathcal{J}_1| \cdot |\mathcal{J}_2|}}
+$$
+
+**Downweighting popular items**
+
+Let items liked by user $u_1$ form set $\mathcal{J}_1$.
+
+Let items liked by user $u_2$ form set $\mathcal{J}_2$.
+
+Define intersection $I = \mathcal{J}_1 \cap \mathcal{J}_2$.
+
+Similarity between the two users:
+
+$$
+sim(u_1, u_2) = \frac{\sum_{l \in I} \frac{1}{\log(1 + n_l)}}{\sqrt{|\mathcal{J}_1| \cdot |\mathcal{J}_2|}}.
+$$
+
+Where $n_l$ denotes the number of users who like item $l$, reflecting the item's popularity.
+
+#### Full UserCF Retrieval Workflow
+
+**Offline Precomputation**
+
+Build "user → item" index
+
+- Record item IDs the user has recently clicked or interacted with.
+- Given any user ID, retrieve the list of items they have recently shown interest in.
+
+Build "user → user" index
+
+- For each user, index the $k$ most similar users.
+- Given any user ID, quickly find the $k$ most similar users.
+
+**Online Retrieval**
+
+1. Given a user ID, use the "user → user" index to find the top-k most similar users.
+2. For each top-k similar user, use the "user → item" index to find the list of items they have recently shown interest in (last-n).
+3. For the retrieved $nk$ similar items, use the formula to estimate user interest scores.
+4. Return the top 100 highest-scoring items as retrieval results.
+
+#### Summary
+
+**UserCF Principle**
+
+User $u_1$ is similar to $u_2$, and $u_2$ likes a certain item; then $u_1$ may also like that item.
+
+**User similarity**:
+
+- If users $u_1$ and $u_2$ have large overlap in liked items, they are similar.
+- **Formula**:
+
+$$
+sim(u_1, u_2) = \frac{|\mathcal{J}_1 \cap \mathcal{J}_2|}{\sqrt{|\mathcal{J}_1| \cdot |\mathcal{J}_2|}}.
+$$
+
+**UserCF Retrieval Channel**
+
+Maintain two indexes:
+
+- User → item list: $n$ items the user has recently interacted with.
+- User → user list: $k$ users with highest similarity.
+
+Online retrieval:
+
+- Use both indexes to retrieve up to $nk$ items per request.
+- Estimate interest score for user $user$ on each item $item$:
+
+$$
+\sum_j sim(user, user_j) \times like(user_j, item).
+$$
+
+- Return the top 100 highest-scoring items as retrieval results.
+
+### Discrete Feature Processing
+
+1. **Build a dictionary**: Map categories to indices.
+   - China → 1
+   - USA → 2
+   - India → 3
+
+2. **Vectorize**: Map indices to vectors.
+   - One-hot encoding: Map indices to high-dimensional sparse vectors.
+   - Embedding: Map indices to low-dimensional dense vectors.
+
+#### One-Hot Encoding
+
+**One-hot encoding for nationality features**
+
+Nationality: China, USA, India, etc. — 200 categories.
+
+Dictionary: China → 1, USA → 2, India → 3, ...
+
+One-hot encoding: Represent nationality as a 200-dimensional sparse vector.
+
+- Unknown → 0 → [0,0,0,0,...,0]
+- China → 1 → [**1**,0,0,0,...,0]
+- USA → 2 → [0,**1**,0,0,...,0]
+- India → 3 → [0,0,**1**,0,...,0]
+
+#### Embedding
+
+One-hot encoding can be mapped to embedding vectors.
+
+Number of parameters: vector dimension × number of categories.
+
+- Suppose embedding vectors are 4-dimensional.
+- There are 200 nationalities.
+- Number of parameters = 4 × 200 = 800.
+
+Implementation: TensorFlow and PyTorch provide embedding layers.
+
+- Parameters are stored as a matrix of size vector dimension × number of categories.
+- Input is an index, e.g., "USA" has index 2.
+- Output is a vector, e.g., "USA" corresponds to the 2nd column of the parameter matrix.
+
+#### Summary
+
+Discrete feature processing: one-hot encoding, embedding.
+
+When the number of categories is large, use embedding.
+
+- Word embedding.
+- User ID embedding.
+- Item ID embedding.
+
+### Matrix Completion
+
+![](./wangshusen_recommend_note_retrieval.assets/2-5-1.png)
+
+Embedding matrix **A** outputs a column vector **a** for each user; the number of users equals the number of columns.
+
+Embedding matrix **B** outputs a column vector **b** for each item; the number of items equals the number of columns.
+
+#### Basic Idea
+
+User embedding parameter matrix is **A**. User $u$ corresponds to the $u$-th column, denoted vector $\mathbf{a}_u$.
+
+Item embedding parameter matrix is **B**. Item $i$ corresponds to the $i$-th column, denoted vector $\mathbf{b}_i$.
+
+Inner product $\langle \mathbf{a}_u, \mathbf{b}_i \rangle$ estimates user $u$'s interest in item $i$.
+
+The goal of training is to learn matrices **A** and **B** so that estimated values fit the observed true interest scores; **A** and **B** are the embedding layer parameters.
+
+#### Dataset
+
+Dataset: a collection of (user ID, item ID, interest score) triples, denoted $\Omega = \{(u, i, y)\}$.
+
+Interest scores in the dataset are system-recorded, for example:
+
+- Exposed but not clicked → 0 points
+- Click, like, favorite, share → 1 point each
+- Minimum score is 0, maximum is 4
+
+#### Training
+
+Map user ID and item ID to vectors.
+
+- User $u$ → vector $\mathbf{a}_u$
+- Item $i$ → vector $\mathbf{b}_i$
+
+Solve the optimization problem using gradient descent to obtain parameters **A** and **B**:
+
+$$
+\min_{\mathbf{A}, \mathbf{B}} \sum_{(u,i,y) \in \Omega} \left( y - \langle \mathbf{a}_u, \mathbf{b}_i \rangle \right)^2.
+$$
+
+#### Matrix Completion
+
+![](./wangshusen_recommend_note_retrieval.assets/2-5-2.png)
+
+After training, gray (missing) positions can be filled in, enabling recommendations based on interest scores.
+
+#### Why It Performs Poorly in Practice...
+
+**Drawback 1**: Only uses ID embedding; ignores item and user attributes.
+
+- Item attributes: category, keywords, location, author information.
+- User attributes: gender, age, geolocation, interested categories.
+- Two-tower models can be seen as an upgraded version of matrix completion.
+
+**Drawback 2**: Negative sample selection is wrong.
+
+- Sample: (user, item) pair denoted $(u, i)$.
+- Positive sample: exposed and then clicked/interacted. (Correct approach)
+- Negative sample: exposed but not clicked/interacted. (Wrong approach)
+
+**Drawback 3**: Training method is suboptimal.
+
+- Inner product $\langle {\mathbf{a}_u}, {\mathbf{b}_i} \rangle$ is inferior to cosine similarity.
+- Using squared loss (regression) is worse than cross-entropy loss (classification).
+
+#### Model Storage
+
+1. Training yields matrices **A** and **B**.
+   - Each column of **A** corresponds to one user.
+   - Each column of **B** corresponds to one item.
+
+2. Store columns of matrix **A** in a key-value table.
+   - Key is user ID; value is a column of **A**.
+   - Given a user ID, return a vector (the _user's embedding_).
+
+3. Storage and indexing of matrix **B** is more complex.
+
+#### Online Serving
+
+1. Use the user ID as key to query the key-value table and retrieve the user's vector, denoted $\mathbf{a}$.
+
+2. Nearest-neighbor search: find the $k$ items the user is most likely interested in, as retrieval results.
+   - Item $i$'s embedding vector is $\mathbf{b}_i$.
+   - Inner product $\langle \mathbf{a}, \mathbf{b}_i \rangle$ estimates user interest in item $i$.
+   - Return the $k$ items with the largest inner product.
+
+> **If enumerating all items, time complexity is proportional to the number of items. To find the k items of highest interest, interest scores for all items must be computed.**
+
+#### Systems Supporting Nearest-Neighbor Search
+
+**Systems**: Milvus, Faiss, HnswLib, etc.
+
+**Nearest-neighbor criteria**:
+
+- Smallest Euclidean distance (L2 distance)
+- Largest vector inner product (inner product similarity)
+- Largest cosine angle (cosine similarity)
+
+![](./wangshusen_recommend_note_retrieval.assets/2-5-3.png)
+
+**a** represents a user's embedding vector; the scatter points represent item embedding vectors. The goal is to find the nearest-neighbor vector to **a** (nearest neighbor in matrix completion means the largest inner product).
+
+![](./wangshusen_recommend_note_retrieval.assets/2-5-4.png)
+
+Partition the scatter points into regions; represent each region with a centroid vector, replacing all item vectors within that region.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-5-5.png)
+
+Find the nearest-neighbor region vector to **a**, then compute the top-k nearest item vectors within that region.
+
+#### Summary
+
+**Matrix Completion**
+
+- Embed item IDs and user IDs, mapping them to vectors.
+- Inner product $\langle {\mathbf{a}_u}, {\mathbf{b}_i} \rangle$ as user $u$'s estimated interest in item $i$.
+- Fit $\langle {\mathbf{a}_u}, {\mathbf{b}_i} \rangle$ to observed true interest scores to learn embedding layer parameters.
+- Matrix completion has many drawbacks and performs poorly.
+
+**Online Retrieval**
+
+- Use user vector ${\mathbf{a}}$ as query; find item $i$ that maximizes $\langle {\mathbf{a}}, {\mathbf{b}_i} \rangle$.
+- Brute-force enumeration is too slow. In practice, use approximate nearest-neighbor search.
+- Vector databases such as Milvus, Faiss, and HnswLib support approximate nearest-neighbor search.
+
+### Two-Tower Model: Architecture and Training
+
+#### Two-Tower Model
+
+![](./wangshusen_recommend_note_retrieval.assets/2-6-1.png)
+
+![](./wangshusen_recommend_note_retrieval.assets/2-6-2.png)
+
+![](./wangshusen_recommend_note_retrieval.assets/2-6-3.png)
+
+**Training the Two-Tower Model**
+
+- Pointwise: Treat each positive and negative sample independently; do simple binary classification.
+- Pairwise: Each batch contains one positive sample and one negative sample.
+- Listwise: Each batch contains one positive sample and multiple negative samples.
+
+**Positive and Negative Sample Selection**
+
+- Positive samples: Items the user clicked.
+- Negative samples [1,2]:
+  - Items not retrieved?
+  - Items retrieved but filtered by pre-ranking or full ranking?
+  - Items exposed but not clicked?
+
+#### Pointwise Training
+
+- Frame retrieval as a binary classification task.
+- For positive samples, encourage $\cos(\mathbf{a}, \mathbf{b})$ to approach $+1$.
+- For negative samples, encourage $\cos(\mathbf{a}, \mathbf{b})$ to approach $-1$.
+- Control ratio of positive to negative samples at $1:2$ or $1:3$.
+
+#### Pairwise Training
+
+The two item towers share the same parameters.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-6-4.png)
+
+**Basic idea**: Encourage $\cos(\mathbf{a}, \mathbf{b}^+)$ to be greater than $\cos(\mathbf{a}, \mathbf{b}^-)$.
+
+- If $\cos(\mathbf{a}, \mathbf{b}^+)$ exceeds $\cos(\mathbf{a}, \mathbf{b}^-) + m$, there is no loss. $m$ is a hyperparameter.
+- Otherwise, loss = $\cos(\mathbf{a}, \mathbf{b}^-) + m - \cos(\mathbf{a}, \mathbf{b}^+)$.
+
+**Loss Functions**
+
+**Triplet hinge loss:**
+
+$$
+L(\mathbf{a}, \mathbf{b}^+, \mathbf{b}^-) = \max \{ 0, \cos(\mathbf{a}, \mathbf{b}^-) + m - \cos(\mathbf{a}, \mathbf{b}^+) \}
+$$
+
+**Triplet logistic loss:**
+
+$$
+L(\mathbf{a}, \mathbf{b}^+, \mathbf{b}^-) = \log(1 + \exp[\sigma \cdot (\cos(\mathbf{a}, \mathbf{b}^-) - \cos(\mathbf{a}, \mathbf{b}^+))]).
+$$
+
+#### Listwise Training
+
+- One data point contains:
+  - One user with feature vector $\mathbf{a}$.
+  - One positive sample with feature vector $\mathbf{b}^+$.
+  - Multiple negative samples with feature vectors $\mathbf{b}_1^-, \dots, \mathbf{b}_n^-$.
+
+- Encourage $\cos(\mathbf{a}, \mathbf{b}^+)$ to be as large as possible.
+
+- Encourage $\cos(\mathbf{a}, \mathbf{b}_1^-), \dots, \cos(\mathbf{a}, \mathbf{b}_n^-)$ to be as small as possible.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-6-5.png)
+
+Encourage $\cos(\mathbf{a}, \mathbf{b}^+)$ to approach 1; encourage $\cos(\mathbf{a}, \mathbf{b}_1^-), \dots, \cos(\mathbf{a}, \mathbf{b}_n^-)$ to approach 0.
+
+Loss function is cross-entropy loss.
+
+#### Summary
+
+**Two-Tower Model**
+
+- User tower and item tower each output a single vector.
+- Cosine similarity of the two vectors serves as the estimated interest score.
+
+- Three training approaches:
+  - **Pointwise**: Each batch uses one user and one item (positive or negative).
+  - **Pairwise**: Each batch uses one user, one positive sample, and one negative sample.
+  - **Listwise**: Each batch uses one user, one positive sample, and multiple negative samples.
+
+**Models Not Suitable for Retrieval: Early Fusion Models**
+
+![](./wangshusen_recommend_note_retrieval.assets/2-6-6.png)
+
+The two-tower model uses late fusion for retrieval — its advantage is that item representations can be precomputed via the item tower. Each time a user arrives, their representation is computed via the user tower; fast nearest-neighbor methods then retrieve the top-k items closest to the user representation.
+
+If early fusion is used instead, item representations cannot be precomputed. To retrieve k items, the user's features must be fused with each item's features and passed through the neural network to get an interest score — requiring processing every item, losing the advantage of fast nearest-neighbor methods.
+
+### Two-Tower Model: Positive and Negative Samples
+
+**Positive Samples**
+
+- Positive samples: (user, item) pairs that were exposed and then clicked. (User is interested in the item)
+
+- Problem: A small fraction of items account for most clicks, causing positive samples to be mostly popular items.
+- Solution: Oversample rare items, or downsample popular items.
+- Oversampling (up-sampling): A sample appears multiple times.
+- Downsampling (down-sampling): Some samples are discarded.
+
+**How to Select Negative Samples**
+
+Negative sample selection criteria differ for retrieval, pre-ranking, full ranking, and re-ranking.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-7-1.png)
+
+#### Simple Negative Samples
+
+**Simple Negative Samples: Full Item Pool**
+
+- Items not retrieved are very likely to be uninteresting to the user.
+- Items not retrieved ≈ the full item pool.
+- Sample from the full item pool as negative samples.
+- Uniform or non-uniform sampling?
+
+**Uniform sampling**: Unfair to rare items.
+
+- Positive samples are mostly popular items.
+- If negative samples are uniformly sampled, negative samples are mostly rare items.
+
+**Non-uniform sampling**: Aimed at suppressing popular items.
+
+- Negative sample probability proportional to popularity (_click count_).
+- $\text{sampling probability} \propto (\text{click count})^{0.75}$. 0.75 is an empirical value.
+
+**Simple Negative Samples: In-Batch Negative Samples**
+
+- A batch has $n$ positive samples.
+
+- One user $p$ paired with $n-1$ items forms negative samples.
+
+- The batch has $n(n-1)$ negative samples in total.
+
+- All are simple negative samples. (The first user doesn't like the second item.)
+
+- Probability of an item appearing in the batch $\propto \text{click count}$. More popular items have higher probability of appearing.
+
+- Probability of an item becoming a negative sample should ideally be $\propto {\text{click count}}^{0.75}$, but here it is $\propto \text{click count}$.
+
+- Popular items have too high a probability of becoming negative samples.
+
+- Sampling probability of item $i$: $p_i \propto {\text{click count}}$. More popular items have higher probability.
+
+- Estimated user interest in item $i$: $\cos(\mathbf{a}, \mathbf{b}_i)$
+
+- During training, adjust to: $\cos(\mathbf{a}, \mathbf{b}_i) - \log p_i$. This debiases and prevents over-suppression of popular items. Reasoning: $\cos(\mathbf{a}, \mathbf{b}_i) - \log p_i$ is used for training; more popular items have a smaller adjusted value. When a popular item and a rare item have the same raw $\cos(\mathbf{a}, \mathbf{b}_i)$, the popular item's adjusted value is smaller. After training, the popular item's $\cos(\mathbf{a}, \mathbf{b}_i)$ ends up larger than the rare item's.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-7-2.png)
+
+![](./wangshusen_recommend_note_retrieval.assets/2-7-3.png)
+
+#### Hard Negative Samples
+
+**Hard Negative Samples**
+
+- Items filtered out by pre-ranking (relatively hard).
+- Items ranked near the bottom by full ranking (very hard).
+
+**Binary classification of positive and negative samples**:
+
+- Full item pool (easy): high classification accuracy.
+- Items filtered by pre-ranking (relatively hard): more likely to be misclassified.
+- Items ranked near the bottom by full ranking (very hard): even more likely to be misclassified.
+
+**Training Data**
+
+- Mix multiple types of negative samples.
+
+- 50% of negative samples from the full item pool (simple negative samples).
+
+- 50% of negative samples from items that did not pass ranking (hard negative samples).
+
+#### Common Mistakes
+
+Items exposed but not clicked should not be used as negative samples for retrieval model training.
+
+They can be used as negative samples for ranking model training.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-7-4.png)
+
+#### **Principles for Negative Sample Selection**
+
+**Retrieval goal**: Quickly find items the user may be interested in.
+
+- Full item pool (easy): Vast majority are completely uninteresting to the user.
+- Filtered by ranking (hard): User may be interested, but not enough.
+- Exposed but not clicked (unusable): User is interested but may not have clicked by chance.
+  - Can be used as negative samples for ranking; cannot be used as negative samples for retrieval.
+
+#### Summary
+
+- **Positive samples**: Exposed and then clicked.
+
+- **Simple negative samples**:
+  - Full item pool.
+  - In-batch negative samples.
+
+- **Hard negative samples**: Retrieved but filtered out by ranking.
+
+- **Error**: Using exposed-but-not-clicked items as negative samples for retrieval.
+
+### Two-Tower Model: Online Retrieval and Updates
+
+#### Online Retrieval
+
+![](./wangshusen_recommend_note_retrieval.assets/2-8-1.png)
+
+**Two-Tower Model Retrieval**
+
+**Offline storage**: Store item vectors $\mathbf{b}$ in a vector database.
+
+1. After training, use the item tower to compute feature vector $\mathbf{b}$ for every item.
+2. Store hundreds of millions of item vectors $\mathbf{b}$ in a vector database (e.g., Milvus, Faiss, HnswLib).
+3. Vector database builds an index to accelerate nearest-neighbor search.
+
+**Online retrieval**: Find the top-k items the user is most interested in.
+
+1. Given user ID and profile, compute user vector $\mathbf{a}$ online using the neural network.
+
+2. Nearest-neighbor search:
+   - Use vector $\mathbf{a}$ as query; call the vector database for nearest-neighbor search.
+   - Return the $k$ items with the highest cosine similarity as retrieval results.
+
+Why store item vectors $\mathbf{b}$ offline and compute user vector $\mathbf{a}$ online?
+
+- Each retrieval uses one user vector $\mathbf{a}$ and hundreds of millions of item vectors $\mathbf{b}$.  
+  (Computing item vectors online is too costly.)
+
+- User interests change dynamically; item features are relatively stable.  
+  (Storing user vectors offline is possible but hurts recommendation quality.)
+
+#### Model Updates
+
+**Full Update vs. Incremental Update**
+
+**Full update**: At midnight tonight, train the model using all of yesterday's data.
+
+- Start from yesterday's model parameters (not random initialization).
+- Train for 1 epoch on yesterday's data — each day's data is used only once.
+- Release new **user tower neural network** and **item vectors** for online retrieval.
+- Full update has lower requirements on data pipelines and systems.
+
+**Incremental update**: Apply online learning to update model parameters.
+
+- User interests change in real time.
+- Collect online data in real time; apply stream processing to generate TFRecord files.
+- Apply online learning to incrementally update ID Embedding parameters.
+- Release updated user ID embeddings for the user tower to compute user vectors online.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-8-2.png)
+
+**Question**: Can we only do incremental updates and skip full updates?
+
+- User behavior differs across time periods within a day; hourly data is biased; minute-level data is even more biased.
+- Full update: randomly shuffle one full day of data; train for 1 epoch.
+- Incremental update: train for 1 epoch in chronological order.
+- Random shuffling is better than sequential ordering; full training is better than incremental training.
+
+#### Summary
+
+**Two-Tower Model**
+
+- User tower and item tower each output a vector; cosine similarity of the two vectors estimates interest.
+
+- Three training approaches: pointwise, pairwise, listwise.
+
+- Positive samples: items the user clicked.
+
+- Negative samples: full item pool (simple), items filtered out by ranking (hard).
+
+**Retrieval**
+
+- After training, store item vectors in a vector database for online nearest-neighbor search.
+
+- Online retrieval: given user ID and profile, call the user tower to compute user vector $\mathbf{a}$ in real time.
+
+- Use $\mathbf{a}$ as query; search the vector database; find the $k$ item vectors with highest cosine similarity; return $k$ item IDs.
+
+**Model Updates**
+
+- Full update: at midnight, use all of yesterday's data to train the full neural network for 1 epoch of SGD.
+
+- Incremental update: use real-time data to train the neural network; only update ID Embeddings; freeze fully connected layers.
+
+- Production systems:
+  - Combine full updates and incremental updates.
+  - Publish the latest user ID embeddings every few tens of minutes for the user tower to compute user vectors online.
+
+### Two-Tower Model + Self-Supervised Learning
+
+**Two-Tower Model Problem**
+
+- Recommender systems exhibit strong head effects:
+  - A small fraction of items account for most clicks.
+  - Most items have low click counts.
+- High-click items learn good representations; long-tail items learn poor representations.
+- Self-supervised learning: apply _data augmentation_ to better learn long-tail item vectors.
+
+#### Review of Two-Tower Model
+
+**In-Batch Negative Samples**
+
+- A _batch_ has $n$ positive sample pairs.
+- Form $n$ _lists_; each _list_ has 1 positive pair and $n-1$ negative pairs.
+
+**Listwise Training**
+
+- A _batch_ contains $n$ positive sample pairs (_with clicks_):
+
+  $$(\mathbf{a_1}, \mathbf{b_1}), (\mathbf{a_2}, \mathbf{b_2}), \dots, (\mathbf{a_n}, \mathbf{b_n}).$$
+
+- Negative samples: $\{(\mathbf{a_i}, \mathbf{b_j})\}$ for all $i \neq j$.
+
+- Encourage $\cos(\mathbf{a_i}, \mathbf{b_i})$ to be as large as possible; encourage $\cos(\mathbf{a_i}, \mathbf{b_j})$ to be as small as possible.
+
+**Loss Function**
+
+![](./wangshusen_recommend_note_retrieval.assets/2-9-1.png)
+
+**Debiasing**
+
+- Sampling probability of item $j$:
+
+  $$p_j \propto \text{click count}$$
+
+- Estimated interest of user $i$ in item $j$: $\cos(\mathbf{a_i}, \mathbf{b_j})$
+
+- During training, replace $\cos(\mathbf{a_i}, \mathbf{b_j})$ with:
+
+  $$\cos(\mathbf{a_i}, \mathbf{b_j}) - \log p_j$$
+
+**Training the Two-Tower Model**
+
+- Randomly sample $n$ (user, item) pairs from click data to form a _batch_.
+
+- Two-tower model loss function:
+
+$$
+L_{\text{main}}[i] = -\log \left( \frac{\exp(\cos(\mathbf{a_i}, \mathbf{b_i}) - \log p_i)}{\sum_{j=1}^{n} \exp(\cos(\mathbf{a_i}, \mathbf{b_j}) - \log p_j)} \right)
+$$
+
+- Apply gradient descent to minimize the loss:
+
+$$
+\frac{1}{n} \sum_{i=1}^{n} L_{\text{main}}[i]
+$$
+
+#### Self-Supervised Learning
+
+![](./wangshusen_recommend_note_retrieval.assets/2-9-2.png)
+
+![](./wangshusen_recommend_note_retrieval.assets/2-9-3.png)
+
+- The two vector representations $\mathbf{b'_i}$ and $\mathbf{b''_i}$ of item $i$ have high similarity.
+
+- The vector representations $\mathbf{b'_i}$ and $\mathbf{b''_j}$ of items $i$ and $j$ have low similarity.
+
+- Encourage $\cos(\mathbf{b'_i}, \mathbf{b''_i})$ to be as large as possible; encourage $\cos(\mathbf{b'_i}, \mathbf{b''_j})$ to be as small as possible.
+
+**Feature transformation converts an item's feature vector into another vector.**
+
+For example, an item's feature vector might be (audience gender, category, city, occupation) = (0, 5, 1.5, 9). Feature transformation converts (0, 5, 1.5, 9) into, say, (0.9, 7.3, 10.8, 9.6) (a simplified example).
+
+**Feature Transformation: Random Mask**
+
+- Randomly select some discrete features (e.g., _category_) and mask them.
+- Example:
+  - An item's _category_ feature is $\mathcal{U} = \{\text{electronics}, \text{photography}\}$.
+  - After _masking_, the _category_ feature becomes $\mathcal{U'} = \{\text{default}\}$. Default represents the missing value.
+  - _Masking_ means discarding all values of a feature.
+  - For example, if electronics = 1 and photography = 2, with missing default = 0, the pre-mask category value might be 1.5; after masking, it becomes 0.
+- Random mask does not mask all features — only some are randomly masked.
+
+**Feature Transformation: Dropout (applies only to multi-valued discrete features)**
+
+- An item can belong to multiple _categories_; _category_ is a multi-valued discrete feature.
+- _Dropout_: randomly discard 50% of feature values.
+- Example:
+  - An item's _category_ feature is $\mathcal{U} = \{\text{beauty}, \text{photography}\}$.
+  - After _dropout_, _category_ becomes $\mathcal{U'} = \{\text{beauty}\}$.
+  - For example, if beauty = 1 and photography = 2, the pre-dropout category value might be 1.5; post-dropout it becomes 1.
+
+**Feature Transformation: Complementary Features**
+
+- Suppose an item has 4 features:
+
+  _ID_, _category_, _keywords_, _city_
+
+- Randomly split into two groups: $\{\text{ID}, \text{keywords}\}$ and $\{\text{category}, \text{city}\}$
+
+- $\{\text{ID}, \text{default}, \text{keywords}, \text{default}\} \quad \Rightarrow \quad$ item representation
+
+- $\{\text{default}, \text{category}, \text{default}, \text{city}\} \quad \Rightarrow \quad$ item representation
+
+- Since they represent the same item, encourage the two item representations to be similar.
+
+**Feature Transformation: Mask a Group of Correlated Features**
+
+- A group of correlated features:
+  - Audience gender: $\mathcal{U} = \{\text{male}, \text{female}, \text{neutral}\}$
+
+  - Category: $\mathcal{V} = \{\text{beauty}, \text{electronics}, \text{football}, \text{photography}, \text{tech}, \dots\}$
+
+  - $u = \text{female}$ and $v = \text{beauty}$ co-occur with high probability $p(u, v)$.
+
+  - $u = \text{female}$ and $v = \text{electronics}$ co-occur with low probability $p(u, v)$.
+
+- $p(u)$: probability of a feature value being $u$.
+  - $p(\text{male}) = 20\%$
+  - $p(\text{female}) = 30\%$
+  - $p(\text{neutral}) = 50\%$
+
+- $p(u, v)$: probability that one feature is $u$ and another is $v$ simultaneously.
+  - $p(\text{female}, \text{beauty}) = 3\%$
+  - $p(\text{female}, \text{electronics}) = 0.1\%$
+
+- Offline: compute pairwise feature correlations using mutual information (_MI_):
+  (e.g., computing MI between _category_ and _audience gender_)
+
+$$
+MI(\mathcal{U}, \mathcal{V}) = \sum_{u \in \mathcal{U}} \sum_{v \in \mathcal{V}} p(u, v) \cdot \log \frac{p(u, v)}{p(u) \cdot p(v)}
+$$
+
+- Suppose an item has $k$ features. Offline: compute pairwise MI for all features, producing a $k \times k$ matrix.
+
+- Randomly select one feature as seed; find the $k/2$ features most correlated with the seed.
+
+- _Mask_ the seed and its $k/2$ correlated features; keep the remaining $k/2$ features.
+
+- Example: An item has four features $\{\text{ID}, \text{category}, \text{keywords}, \text{city}\}$. The seed is _keywords_, and the most correlated feature is _city_. After masking: $\{\text{ID}, \text{category}, \text{default}, \text{default}\}$.
+
+- Pros and cons:
+  - Pro: Outperforms _random mask_, _dropout_, and _complementary features_.
+  - Con: Complex method, difficult to implement and maintain.
+
+**Training the Model**
+
+- Uniformly sample $m$ items from all items to form a _batch_.
+
+- Apply two types of feature transformations; the item tower outputs two sets of vectors:
+
+$$
+\mathbf{b'_1}, \mathbf{b'_2}, \dots, \mathbf{b'_m} \quad \text{and} \quad \mathbf{b''_1}, \mathbf{b''_2}, \dots, \mathbf{b''_m}
+$$
+
+- Loss function for the $i$-th item:
+
+$$
+L_{\text{self}}[i] = -\log \left( \frac{\exp(\cos(\mathbf{b'_i}, \mathbf{b''_i}))}{\sum_{j=1}^{m} \exp(\cos(\mathbf{b'_i}, \mathbf{b''_j}))} \right)
+$$
+
+![](./wangshusen_recommend_note_retrieval.assets/2-9-4.png)
+
+- Self-supervised learning loss function:
+
+$$
+L_{\text{self}}[i] = -\log \left( \frac{\exp(\cos(\mathbf{b'_i}, \mathbf{b''_i}))}{\sum_{j=1}^{m} \exp(\cos(\mathbf{b'_i}, \mathbf{b''_j}))} \right)
+$$
+
+- Apply gradient descent to minimize self-supervised learning loss:
+
+$$
+\frac{1}{m} \sum_{i=1}^{m} L_{\text{self}}[i]
+$$
+
+#### Summary
+
+- Two-tower model learns poor vector representations for low-exposure items.
+
+- Self-supervised learning:
+  - Apply random feature transformations to items.
+  - Feature vectors $\mathbf{b'_i}$ and $\mathbf{b''_i}$ have high similarity (_same item_).
+  - Feature vectors $\mathbf{b'_i}$ and $\mathbf{b''_j}$ have low similarity (_different items_).
+
+- Experimental results: recommendations for low-exposure items and new items become more accurate.
+
+- Randomly sample $n$ (user, item) pairs from click data to form a _batch_.
+
+- Uniformly sample $m$ _items_ from all items to form a _batch_.
+
+- Apply gradient descent to minimize loss:
+
+$$
+\frac{1}{n} \sum_{i=1}^{n} L_{\text{main}}[i] + \alpha \cdot \frac{1}{m} \sum_{j=1}^{m} L_{\text{self}}[j].
+$$
+
+### Deep Retrieval
+
+- The classical two-tower model represents users and items as vectors; online nearest-neighbor search is performed.
+
+- _Deep Retrieval_ represents items as paths (_path_); online search finds paths best matching the user.
+
+- _Deep Retrieval_ is similar to Alibaba's _TDM_.
+
+**Outline**
+
+1. Indexes:
+   - path → List&lt;item&gt;
+   - item → List&lt;path&gt;
+
+2. Estimation model: neural network estimates user interest in a path.
+
+3. Online retrieval: user → path → item.
+
+4. Training:
+   - Learn neural network parameters.
+   - Learn item representations (item → path).
+
+#### Indexes
+
+**Items Represented as Paths**
+
+- Depth: _depth_ = 3 (number of layers).
+
+- Width: _width_ = K.
+
+- An item is represented as one path (_path_), e.g., $\mathbf{[2,4,1]}$.
+
+- An item can be represented as multiple paths, e.g., $\{\mathbf{[2,4,1]}, \mathbf{[4,1,1]}\}$.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-10-1.png)
+
+**Item-to-Path Index**
+
+**Index**: item → List⟨path⟩
+
+- One item corresponds to multiple paths.
+- A path is represented by 3 nodes: path = $[a, b, c]$.
+
+**Index**: path → List⟨item⟩
+
+- One path corresponds to multiple items.
+
+#### Estimation Model
+
+**Estimating User Interest in a Path**
+
+- A path is represented by 3 nodes: path = $[a, b, c]$.
+
+- Given user features $\mathbf{x}$, estimate user interest in node $a$: $p_1(a | \mathbf{x})$.
+
+- Given $\mathbf{x}$ and $a$, estimate user interest in node $b$: $p_2(b | a; \mathbf{x})$.
+
+- Given $\mathbf{x}, a, b$, estimate user interest in node $c$: $p_3(c | a, b; \mathbf{x})$.
+
+- Estimated user interest in path = $[a, b, c]$:
+
+$$
+p(a, b, c | \mathbf{x}) = p_1(a | \mathbf{x}) \times p_2(b | a; \mathbf{x}) \times p_3(c | a, b; \mathbf{x})
+$$
+
+![](./wangshusen_recommend_note_retrieval.assets/2-10-2.png)
+
+User feature vector $\mathbf{x}$ is passed through a neural network and then a softmax activation to output the $p_1$ vector. $p_1$ represents the interest scores the network assigns to the $K$ nodes at layer L1; the higher the score, the more likely a node is selected. If L1 has $K$ nodes, $p_1$ is a $K$-dimensional vector. Node $a$ is selected from L1's $K$ nodes based on $p_1$.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-10-3.png)
+
+Node $a$ is embedded to get emb($a$). The original user feature vector $\mathbf{x}$ is concatenated with emb($a$) and fed into another neural network, followed by a softmax layer to output $p_2$. $p_2$ represents interest scores for the $K$ nodes at layer L2. Node $b$ is selected from L2.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-10-4.png)
+
+Node $c$ is derived analogously.
+
+#### Online Retrieval
+
+**Retrieval**: user → path → item
+
+- **Step 1**: Given user features, use _beam search_ to retrieve a set of paths.
+
+- **Step 2**: Use the index "path → List⟨item⟩" to retrieve a set of items.
+
+- **Step 3**: Score and rank the items; select a subset.
+
+**Beam Search**
+
+- Suppose there are 3 layers, each with $K$ nodes; there are $K^3$ paths total.
+
+- Scoring all $K^3$ paths with the neural network is too expensive.
+
+- Beam search reduces computation.
+
+- Requires setting the hyperparameter beam size.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-10-5.png)
+
+Select the top 4 nodes at L1 based on the neural network's scores for the $K$ nodes.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-10-6.png)
+
+For each selected node $a$, compute user interest in path $[a, b]$:
+
+$$
+p(a,b | \mathbf{x})= p_1(a | \mathbf{x}) \times p_2(b | a; \mathbf{x})
+$$
+
+Compute $4 \times K$ scores, one for each path; select the top 4 paths.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-10-7.png)
+
+For each selected pair of nodes $a, b$, compute user interest in path $[a, b, c]$:
+
+$$
+p(a,b,c | \mathbf{x})= p(a,b | \mathbf{x}) \times p_3(c | a,b; \mathbf{x})
+$$
+
+Again compute $4 \times K$ scores; select the top 4 paths.
+
+**Beam Search**
+
+- User interest in path = $[a, b, c]$:
+
+$$
+p(a, b, c | \mathbf{x}) = p_1(a | \mathbf{x}) \times p_2(b | a; \mathbf{x}) \times p_3(c | a, b; \mathbf{x})
+$$
+
+- Optimal path:
+
+$$
+[a^\star, b^\star, c^\star] = \arg\max\limits_{a, b, c} p(a, b, c | \mathbf{x})
+$$
+
+- Greedy algorithm (_beam size_ = 1) selected path $[a, b, c]$ is not necessarily optimal.
+
+**Online Retrieval**
+
+- **Step 1**: Given user features, use the neural network and _beam search_ to retrieve a set of paths.
+
+- **Step 2**: Use indexes to retrieve a set of items.
+  - Look up index path → List⟨item⟩.
+  - Each path corresponds to multiple items.
+
+- **Step 3**: Rank items; select a subset.
+
+#### Training
+
+**Joint Learning of Neural Network Parameters and Item Representations**
+
+- Neural network $p(a, b, c | \mathbf{x})$ estimates user interest in path $[a, b, c]$.
+
+- Represent each item as multiple paths $\{[a, b, c]\}$; build indexes:
+  - item → List⟨path⟩,
+  - path → List⟨item⟩.
+
+- Positive sample (user, item): $\text{click}(\text{user}, \text{item}) = 1$.
+
+**Learning Neural Network Parameters**
+
+- Item is represented by $J$ paths: $[a_1, b_1, c_1], \dots, [a_J, b_J, c_J]$.
+
+- User interest in path $[a, b, c]$:
+
+$$
+p(a, b, c \mid \mathbf{x}) = p_1(a \mid \mathbf{x}) \times p_2(b \mid a; \mathbf{x}) \times p_3(c \mid a, b; \mathbf{x})
+$$
+
+- If the user clicked the item, they are interested in all $J$ paths.
+
+- Should maximize $\sum_{j=1}^{J} p(a_j, b_j, c_j \mid \mathbf{x})$.
+
+- Loss function:
+  The larger the total interest in $J$ paths, the smaller the loss.
+
+$$
+\text{loss} = -\log \left( \sum_{j=1}^{J} p(a_j, b_j, c_j \mid \mathbf{x}) \right)
+$$
+
+**Learning Item Representations**
+
+- User _user_'s interest in path _path_ = $[a, b, c]$:
+
+$$
+p(\text{path} \mid \text{user}) = p(a, b, c \mid \mathbf{x})
+$$
+
+- Relevance of item _item_ and path _path_:
+
+$$
+\text{score}(\text{item}, \text{path}) = \sum_{\text{user}} p(\text{path} \mid \text{user}) \times \text{click}(\text{user}, \text{item})
+$$
+
+$\text{click}(\text{user}, \text{item})$ = 1 if the user clicked the item, 0 otherwise.
+
+- Select $J$ paths with highest $\text{score}(\text{item}, \text{path})$ as _item_'s representation.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-10-8.png)
+
+- Select $J$ paths $\Pi = \{\text{path}_1, \dots, \text{path}_J\}$ as the item's representation.
+
+- **Loss function** (select paths highly correlated with _item_):
+
+$$
+\text{loss}(\text{item}, \Pi) = -\log \left( \sum_{j=1}^{J} \text{score}(\text{item}, \text{path}_j) \right)
+$$
+
+- **Regularization term** (avoid too many items clustering on one path):
+
+$$
+\text{reg}(\text{path}_j) = (\text{number of items on path}_j)^4.
+$$
+
+**Greedy Algorithm to Update Paths**
+
+- Suppose the item is already represented by $J$ paths $\Pi = \{\text{path}_1, \dots, \text{path}_J\}$. Now update paths in $\Pi$.
+
+- Fix $\{\text{path}_i\}_{i \neq l}$ and select a new $\text{path}_l$ from unselected paths:
+  Unselected paths are not limited to the current $J$ paths; they can be selected from external candidates. The selection pool can be the top-$N$ paths with highest $\text{score}(\text{item}, \text{path})$. $\text{loss}(\text{item}, \Pi)$ represents the loss of the path set consisting of $\{\text{path}_i\}$ and $\text{path}_l$. $\text{reg}(\text{path}_l)$ prevents too many items on one path.
+
+$$
+\text{path}_l \gets \arg\min_{\text{path}_l} \text{loss}(\text{item}, \Pi) + \alpha \cdot \text{reg}(\text{path}_l)
+$$
+
+- Selected paths have high score $\text{score}(\text{item}, \text{path}_l)$ without too many items on the path.
+
+#### Comparison
+
+**Updating the Neural Network**
+
+- Neural network predicts user interest in a path:
+
+$$
+p(\text{path} \mid \mathbf{x})
+$$
+
+- Training data required:
+  1. "Item → path" index.
+  2. Items clicked by the user.
+
+- If the user clicked the item, and the item corresponds to path $\text{path}$, update neural network parameters to increase $p(\text{path} \mid \mathbf{x})$.
+
+**Updating Item Representations**
+
+- Assess item–path relevance:
+
+  item $\longleftarrow_{\text{user clicked item}}$ user $\longrightarrow_{\text{neural network score}}$ path
+
+- Associate each item with $J$ paths:
+  - Item and path must have high relevance.
+  - A path cannot have too many items.
+
+#### Summary
+
+**Retrieval: User → Path → Item**
+
+- Given user features $\mathbf{x}$, use the neural network to estimate user interest in path $\text{path} = [a, b, c]$; score denoted $p(\text{path} \mid \mathbf{x})$.
+
+- Use beam search to find the $s$ paths with highest score $p(\text{path} \mid \mathbf{x})$.
+
+- Use index "$\text{path} \rightarrow \text{List}\{\text{item}\}$" to retrieve $n$ items on each path.
+
+- A total of $s \times n$ items are retrieved; do preliminary ranking and return the top-scoring items.
+
+**Training: Joint Learning of User–Path and Item–Path Relationships**
+
+- An item is represented by $J$ paths: $\text{path}_1, \dots, \text{path}_J$.
+
+- If the user clicked the item, update neural network parameters to increase:
+
+$$
+\sum_{j=1}^{J} p(\text{path}_j \mid \mathbf{x}).
+$$
+
+- If user interest score $p(\text{path} \mid \mathbf{x})$ is high and the user clicked item $\text{item}$, then $\text{item}$ and $\text{path}$ have relevance.
+
+- Find the $J$ paths most relevant to $\text{item}$, while avoiding too many items on one path.
+
+### Other Retrieval Channels
+
+#### Geo-Location Based Retrieval
+
+GeoHash Retrieval
+
+- Users may be interested in nearby events.
+- GeoHash: encoding of latitude/longitude; represents a rectangular geographic area.
+- Index: GeoHash → list of quality posts _(sorted by time, descending)_.
+- This retrieval channel has no personalization.
+
+**Same-City Retrieval**
+
+- Users may be interested in events in the same city.
+- Index: city → list of quality posts _(sorted by time, descending)_.
+- This retrieval channel has no personalization.
+
+##### Author-Based Retrieval
+
+**Followed Author Retrieval**
+
+- Users are interested in posts published by authors they follow.
+
+- Index:
+
+  user → followed authors  
+  author → published posts
+
+- Retrieval:
+
+  user → followed authors → most recent posts
+
+**Interacted Author Retrieval**
+
+- If the user is interested in a post _(like, favorite, share)_, they may be interested in other posts by that author.
+
+- Index:  
+  user → interacted authors
+
+- Retrieval:  
+  user → interacted authors → most recent posts
+
+**Similar Author Retrieval**
+
+- If a user likes a certain author, they like similar authors.
+
+- Index:  
+  author → similar authors _(k authors)_
+
+- Retrieval:  
+  user → interested authors _(n authors)_ → similar authors _(nk authors)_ → most recent posts _(nk posts)_
+
+#### Cache-Based Retrieval
+
+**Cache-Based Retrieval**
+
+Idea: reuse full-ranking results from the previous $n$ recommendation sessions.
+
+- Background:
+  - Full ranking outputs hundreds of posts and sends them to re-ranking.
+  - Re-ranking applies diversity sampling and selects a few dozen.
+  - More than half of full-ranking results are never exposed — wasted.
+
+- Cache posts ranked in the top 50 by full ranking that were not exposed; use them as a retrieval channel.
+
+Cache has a fixed size and requires an eviction mechanism.
+
+- Once a post is successfully exposed, it is evicted from the cache.
+- If the cache exceeds capacity, remove the oldest-entered post.
+- A post can be retrieved at most 10 times; it is evicted after 10 retrievals.
+- Each post is stored for at most 3 days; evicted after 3 days.
+
+### Exposure Filtering & Bloom Filter
+
+#### Exposure Filtering Problem
+
+- If a user has seen an item, don't expose it to them again.
+
+- For each user, record items already exposed to them. (Xiaohongshu only retrieves posts published within the past month, so only the past month's exposure history needs to be recorded per user.)
+- For each retrieved item, determine if it has already been exposed to that user; exclude previously exposed items.
+
+- If a user has seen $n$ items and $r$ items are retrieved, brute-force comparison requires $O(nr)$ time.
+
+#### Bloom Filter
+
+- Bloom filter determines whether an item ID is in the set of already-exposed items.
+
+- If the answer is **no**, that item is definitely not in the set.
+
+- If the answer is **yes**, the item is very likely in the set. (May have false positives: a non-exposed item is incorrectly judged as exposed and filtered out.)
+
+- Bloom filter represents an item set as an $m$-dimensional binary vector.
+- Each user has a set of exposed items, represented as a binary vector requiring $m$ bits of storage.
+- Bloom filter has $k$ hash functions; each maps an item ID to an integer between $0$ and $m-1$.
+
+![](./wangshusen_recommend_note_retrieval.assets/2-12-1.png)
+
+![](./wangshusen_recommend_note_retrieval.assets/2-12-2.png)
+
+**Bloom Filter**
+
+- Exposed item set size is $n$; binary vector dimension is $m$; uses $k$ hash functions.
+
+- Bloom filter false positive rate: $\delta \approx \left( 1 - \exp \left( -\frac{kn}{m} \right) \right)^{k}$.
+  - Larger $n$ means more 1s in the vector; higher false positive rate. (The probability that an unexposed item's $k$ hash positions are all 1 increases.)
+
+  - Larger $m$ means a longer vector; less likely hash collisions occur.
+
+  - $k$ too large or too small is suboptimal; $k$ has an optimal value.
+
+- Given tolerable false positive rate $\delta$, optimal parameters are:
+
+$$
+k = 1.44 \cdot \ln \left( \frac{1}{\delta} \right), \quad m = 2n \cdot \ln \left( \frac{1}{\delta} \right)
+$$
+
+![](./wangshusen_recommend_note_retrieval.assets/2-12-3.png)
+
+**Bloom Filter Drawbacks**
+
+- Bloom filter represents an item set as a binary vector.
+
+- Adding an item to the set only requires setting $k$ positions in the vector to 1. (If already 1, no change.)
+
+- Bloom filter only supports adding items; it does not support deleting items. Removing an item from the set cannot undo its effect on the vector.
+
+- Every day, items older than 1 month need to be removed from the item set. (Expired items can never be retrieved; no need to record them in the Bloom filter. Reducing $n$ lowers the false positive rate.)
diff --git a/app/docs/ai/recommender-systems/wangshusen_recommend_note_coldstart.en.mdx b/app/docs/ai/recommender-systems/wangshusen_recommend_note_coldstart.en.mdx
new file mode 100644
index 00000000..076246c1
--- /dev/null
+++ b/app/docs/ai/recommender-systems/wangshusen_recommend_note_coldstart.en.mdx
@@ -0,0 +1,500 @@
+---
+title: Wang Shusen Recommender Systems Study Notes — Cold Start
+description: ""
+date: "2025-09-27"
+tags:
+  - tag-one
+docId: bvccoatft6y7bph83oivdcfe
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Wang Shusen Recommender Systems Study Notes — Cold Start
+
+## Item Cold Start
+
+### Item Cold Start: Evaluation Metrics
+
+**Item Cold Start**
+
+- Newly published posts on Xiaohongshu.
+- Newly uploaded videos on Bilibili.
+- Newly published articles on Toutiao.
+
+**New Post Cold Start**
+
+- New posts lack user interaction, making recommendation difficult and less effective.
+- Supporting newly published, low-exposure posts can strengthen authors' motivation to publish.
+
+**Cold Start Optimization Goals**
+
+1. Precise recommendation: Overcome cold start difficulties, recommend new posts to suitable users without causing dissatisfaction.
+
+2. Incentivize publishing: Direct traffic toward low-exposure new posts, encouraging authors to publish.
+
+3. Discover high-potential content: Through initial small-scale traffic probing, identify high-quality posts and give them traffic boosts.
+
+**Evaluation Metrics**
+
+- Author-side metrics:
+  - Publishing penetration rate, average posts per user.
+
+- User-side metrics:
+  - New post metrics: click-through rate and interaction rate for new posts.
+  - Platform-wide metrics: consumption time, DAU, MAU.
+
+- Content-side metrics:
+  - Proportion of high-heat posts.
+
+#### Author-Side Metrics
+
+**Publishing Penetration Rate**
+
+- Publishing penetration rate = Number of daily publishers / DAU
+- A user counts as a publisher if they publish at least one post.
+- **Example**:
+  - Daily publishers = 1 million
+  - DAU = 20 million
+  - Publishing penetration rate = 100 / 2000 = 5%
+
+**Average Posts per User**
+
+- Average posts per user = Daily published posts / DAU
+- **Example**:
+  - Daily published posts = 2 million
+  - DAU = 20 million
+  - Average posts per user = 200 / 2000 = 0.1
+
+Publishing penetration rate and average posts per user reflect authors' motivation to publish.
+
+An important optimization goal for cold start is to incentivize publishing and grow the content pool.
+
+The more exposure new posts receive, and the earlier their first exposure and interaction occur, the higher the author's motivation to publish.
+
+#### User-Side Metrics
+
+**New Post Consumption Metrics**
+
+- Click-through rate and interaction rate for new posts.
+  - Issue: The Gini coefficient of exposure is large.
+  - A small number of top new posts occupy the majority of exposure.
+
+- Evaluate high-exposure and low-exposure new posts separately.
+  - High-exposure: e.g., >1000 impressions.
+  - Low-exposure: e.g., `<1000` impressions.
+
+#### **Content-Side Metrics**
+
+**Proportion of High-Heat Posts**
+
+- High-heat post: received 1000+ clicks within the first 30 days.
+- A higher proportion of high-heat posts indicates stronger ability to discover quality content during the cold start phase.
+
+#### **Summary**
+
+- **Author-side metrics**: Publishing penetration rate, average posts per user.
+- **User-side metrics**: New post consumption metrics, platform-wide consumption metrics.
+- **Content-side metrics**: Proportion of high-heat posts.
+
+**Cold Start Optimization Points**
+
+- **Optimize the full pipeline** (_including retrieval and ranking_).
+- **Traffic control** (_how traffic is allocated between new and old posts_).
+
+### Item Cold Start: Simple Retrieval Channels
+
+#### Retrieval Basis
+
+**Cold Start Retrieval Challenges**
+
+- Lack of user interaction means item ID embeddings haven't been learned well, leading to poor two-tower model performance.
+- Lack of user interaction means ItemCF is not applicable.
+
+#### Two-Tower Model
+
+**ID Embedding**
+
+**Improvement 1: Use default embedding for new posts**
+
+- When the item tower performs ID embedding, let all new posts share a single ID rather than using their own real IDs.
+- Default embedding: the embedding vector corresponding to the shared ID.
+- New posts only get their own ID embedding vectors after the next model training.
+
+**Improvement 2: Leverage similar post embedding vectors**
+
+- Find the top-k high-exposure posts with the most similar content.
+- Average the embedding vectors of these k high-exposure posts as the new post's embedding.
+
+**Multiple Retrieval Pools**
+
+- Multiple retrieval pools give new posts more exposure opportunities:
+  - Posts from the last 1 hour,
+  - Posts from the last 6 hours,
+  - Posts from the last 24 hours,
+  - Posts from the last 30 days.
+
+- Sharing a single two-tower model means multiple retrieval pools add no additional training cost.
+
+#### Category-Based Retrieval
+
+**Category-Based Retrieval**
+
+- The system maintains a category index:
+  $$\text{category} \rightarrow \text{post list (sorted by time, descending)}$$
+
+- Use the category index for retrieval:
+  $$\text{user profile} \rightarrow \text{category} \rightarrow \text{post list}$$
+
+- Retrieve the top k posts from the list (i.e., the most recent k posts).
+
+**Keyword-Based Retrieval**
+
+- The system maintains a keyword index:
+  $$\text{keyword} \rightarrow \text{post list (sorted by time, descending)}$$
+
+- Retrieval is based on **keywords** in the user profile.
+
+**Drawbacks**
+
+- Drawback 1: Only effective for very recently published posts.
+  - Retrieves the most recent k posts in a given category/keyword.
+  - After a few hours of publication, posts have no more opportunity to be retrieved.
+
+- Drawback 2: Weak personalization, insufficiently precise.
+
+### Item Cold Start: Clustering-Based Retrieval
+
+#### Clustering-Based Retrieval
+
+**Basic Idea**
+
+- If a user likes a post, they will likely enjoy posts with similar content.
+
+- Pre-train a neural network that maps posts to vectors based on their category and image-text content.
+
+- Cluster the post vectors into 1000 $clusters$, recording the centroid direction of each $cluster$. (k-means clustering using cosine similarity.)
+
+**Cluster Index**
+
+- After a new post is published, use the neural network to map it to a feature vector.
+
+- Find the most similar vector among the 1000 cluster vectors (corresponding to 1000 $clusters$), and assign the new post to that $cluster$.
+
+- Index:
+
+  $$
+  cluster \rightarrow \text{post ID list (sorted by time, descending)}
+  $$
+
+**Online Retrieval**
+
+- Given a user ID, find their last-$n$ interacted posts and use them as seed posts.
+
+- Map each seed post to a vector and find the most similar $cluster$. (This tells us which $clusters$ the user is interested in.)
+
+- From each $cluster$'s post list, retrieve the most recent $m$ posts.
+
+- Retrieve at most $mn$ new posts in total.
+
+#### Content Similarity Model
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-3-1.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-3-2.png)
+
+#### Training the Content Similarity Model
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-3-3.png)
+
+**Model Training**
+
+Basic idea: Encourage $\cos(\mathbf{a}, \mathbf{b}^+)$ to be greater than $\cos(\mathbf{a}, \mathbf{b}^-)$
+
+**Triplet hinge loss:**
+
+$$
+L(\mathbf{a}, \mathbf{b}^+, \mathbf{b}^-) = \max\{0, \cos(\mathbf{a}, \mathbf{b}^-) + m - \cos(\mathbf{a}, \mathbf{b}^+)\}
+$$
+
+**Triplet logistic loss:**
+
+$$
+L(\mathbf{a}, \mathbf{b}^+, \mathbf{b}^-) = \log(1 + \exp(\cos(\mathbf{a}, \mathbf{b}^-) - \cos(\mathbf{a}, \mathbf{b}^+)))
+$$
+
+**&lt;Seed Post, Positive Sample>**
+
+Method 1: Manual annotation of pairwise similarity
+
+Method 2: Algorithmically auto-select positive samples
+
+Selection criteria:
+
+- Use only high-exposure posts as pairs (because they have sufficient user interaction information).
+- Two posts share the same secondary category, e.g., both are "Recipe Tutorials".
+
+- Use ItemCF item similarity to select positive samples.
+
+**&lt;Seed Post, Negative Sample>**
+
+- Randomly select from all posts meeting the following conditions:
+  - Sufficient text length _(so that neural network text feature extraction is effective)_.
+  - High post quality, avoiding image-text mismatch.
+
+#### Summary
+
+- **Basic Idea**: Based on the user's likes, favorites, and shares, recommend posts with similar content.
+
+- **Offline Training**: Multimodal neural network maps image-text content to vectors.
+
+- **Online Service**:
+
+  $$
+  \text{Posts the user likes} \rightarrow \text{Feature vector} \rightarrow \text{Nearest cluster} \rightarrow \text{New posts}
+  $$
+
+### Item Cold Start: Look-Alike Audience Expansion
+
+#### Look-Alike Origins in Internet Advertising
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-4-1.png)
+
+- How to compute user similarity?
+
+- UserCF: Two users share common interests.
+
+- Embedding: The $\cos$ similarity between two user vectors is high.
+
+#### Look-Alike for New Post Retrieval
+
+- Clicks, likes, favorites, shares — indicates users may be interested in a post.
+
+- Use users who interacted with the post as seed users.
+
+- Use look-alike to expand to similar users.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-4-2.png)
+
+- Near-real-time update of feature vectors.
+
+- The feature vector is the average of vectors from interacted users.
+
+- Each time a user interacts with the item, update the post's feature vector.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-4-3.png)
+
+Use the two-tower model to compute a user's feature vector, then perform nearest-neighbor search in the vector database. This process is called Look-Alike retrieval.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-4-4.png)
+
+If seed users like a certain post, similar users may also like that post — this is the Look-Alike expansion retrieval channel.
+
+### Item Cold Start: Traffic Control
+
+**Reasons for Supporting New Posts**
+
+- **Goal 1**: Incentivize publishing, grow the content pool.
+  - The more exposure new posts receive, the higher the author's motivation to create.
+  - Reflected in publishing penetration rate and average posts per user.
+
+- **Goal 2**: Discover high-quality posts.
+  - Explore by giving every new post sufficient exposure.
+  - Discovery capability is reflected in the proportion of high-heat posts.
+
+**Industry Approach**
+
+- Assume the recommendation system only distributes posts with age `<30` days.
+
+- Assume natural distribution: new posts (age `<24` hours) account for 1/30 of impressions.
+
+- Support new posts to make their impression share much greater than 1/30.
+
+**Evolution of Traffic Control Techniques**
+
+1. Force-insert new posts into recommendation results.
+2. Boost the ranking scores of new posts.
+3. Use boosting to ensure minimum exposure for new posts.
+4. Differentiated exposure guarantees.
+
+#### New Post Score Boosting
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-5-1.png)
+
+**New Post Boosting**
+
+- **Goal**: Give new posts more exposure opportunities.
+  - With natural distribution, 24-hour new posts account for 1/30 of impressions.
+  - With manual intervention, significantly increase this share.
+
+- Intervene at the pre-ranking and re-ranking stages to boost new posts.
+
+- **Advantages**: Easy to implement, good ROI.
+
+- **Disadvantages**:
+  - Impressions are sensitive to the boost coefficient.
+  - Difficult to precisely control impressions; tends to cause over-exposure or under-exposure.
+
+#### New Post Exposure Guarantee
+
+- **Exposure guarantee**: Regardless of post quality, ensure 100 impressions within 24 hours.
+
+- On top of the existing boost coefficient, multiply by an additional boost factor, e.g.:
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-5-2.png)
+
+**Dynamic Boost for Exposure Guarantee**
+
+Use the following four values to compute the boost coefficient:
+
+- Target time: e.g., 24 hours.
+- Target impressions: e.g., 100.
+- Publishing time: e.g., the post has been published for 12 hours.
+- Current impressions: e.g., the post has received 20 impressions.
+
+Calculation formula:
+
+$$
+\text{boost coefficient} = f\left( \frac{\text{publishing time}}{\text{target time}}, \frac{\text{current impressions}}{\text{target impressions}} \right) = f(0.5, 0.2)
+$$
+
+**Challenges with Exposure Guarantee**
+
+Guarantee success rate is well below 100%
+
+- Many posts fail to reach 100 impressions within 24 hours.
+- Retrieval and ranking deficiencies.
+- Poorly tuned boost coefficients.
+
+Changes in the online environment can cause guarantee failures
+
+- Online environment changes: new retrieval channels, upgraded ranking models, changed re-ranking diversification rules...
+- Counter-measure: Adjust boost coefficients after online environment changes.
+
+Does more score boosting always benefit new posts?
+
+- Benefit: More score boost means more impressions.
+- Drawback: Post gets recommended to less suitable audiences.
+  - An excessively high boost coefficient inflates the estimated interest score, routing posts to unsuitable audiences.
+  - Click-through rate, like rate, and other metrics will be lower.
+  - Long-term, this is penalized by the recommendation system and makes it hard to grow into a popular post.
+
+#### Differentiated Exposure Guarantee
+
+- **Exposure guarantee**: Regardless of new post quality, provide support — guarantee 100 impressions in the first 24 hours.
+
+- **Differentiated exposure guarantee**: Different posts have different targets; ordinary posts get 100 impressions, high-quality content gets 100–500 impressions.
+
+**Differentiated Exposure Guarantee**
+
+- **Base guarantee**: 100 impressions in 24 hours.
+
+- **Content quality**: Use a model to evaluate content quality; give additional guarantee targets up to +200 impressions.
+
+- **Author quality**: Based on the author's historical post quality; give additional guarantee targets up to +200 impressions.
+
+- **A post has a minimum guarantee of 100 and a maximum of 500 impressions.**
+
+#### Summary
+
+- **Traffic control**: How traffic is allocated between new and old posts.
+
+- **Supporting new posts**: Dedicated retrieval channels, score boosting at ranking stage.
+
+- **Exposure guarantee**: Help new posts reach 100 impressions in the first 24 hours.
+
+- **Differentiated guarantee**: Based on content quality and author quality, determine the guarantee target.
+
+### Item Cold Start: A/B Testing
+
+**New Post Cold Start A/B Testing**
+
+- **Author-side metrics**:
+  - Publishing penetration rate, average posts per user.
+
+- **User-side metrics**:
+  - Click-through rate and interaction rate for new posts.
+  - Platform-wide metrics: consumption time, DAU, MAU.
+
+#### User-Side Experiment
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-6-1.png)
+
+**User-Side Experiment**
+
+**Drawbacks**
+
+- Constraint: Exposure guarantee of 100 impressions.
+- Assumption: The more new post impressions, the lower user time-in-app.
+- New strategy: Double the ranking weight for new posts.
+
+- Results (looking at consumption metrics only)
+  - A/B test diff is negative (treatment group worse than control group).
+
+  - If rolled out, diff would shrink (e.g., -2% → -1%).
+  - This is because new posts have an exposure guarantee. The treatment group's new posts get more impressions, the control group fewer. For example, with 90 new posts guaranteed 100 impressions each (9000 total), the treatment group gets 6000 and the control group gets 3000. After the experiment ends, each group's 50% users both get 4500 impressions, causing the diff to be overstated.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-6-2.png)
+
+#### Author-Side Experiment
+
+**Author-Side Experiment: Approach 1**
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-6-3.png)
+
+**Drawback: New Posts Compete with Each Other for Traffic**
+
+- Setup:
+  - New and old posts each have their own queue, no competition.
+
+  - Re-ranking: 1/3 traffic to new posts, 2/3 to old posts.
+
+- New strategy: Double the weight of new posts.
+
+- Results (looking at publishing metrics only):
+  - A/B test diff is positive (treatment group better than control group).
+
+  - If rolled out, diff disappears (e.g., 2% → 0).
+
+**Drawback: New Posts Compete with Old Posts for Traffic**
+
+- Setup: New and old posts compete freely.
+
+- New strategy: Double the ranking weight for new posts.
+
+- During A/B test, 50% new posts (with strategy) compete with 100% old posts.
+
+- After rollout, 100% new posts (with strategy) compete with 100% old posts.
+
+- Author-side A/B test results differ somewhat from post-rollout results.
+
+**Author-Side Experiment: Approach 2**
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-6-4.png)
+
+**Advantages and Disadvantages of Approach 2 vs. Approach 1**
+
+- Advantage: New posts in the two buckets don't compete with each other; author-side experiment results are more reliable.
+
+- Same issue: New and old posts still compete; author-side A/B test results still differ somewhat from rollout results.
+
+- Disadvantage: New post pool shrinks by half, negatively affecting user experience.
+
+**Author-Side Experiment: Approach 3**
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/7-6-5.png)
+
+Has an impact on business operations.
+
+#### Summary
+
+- Cold start A/B testing needs to observe both **author publishing metrics** and **user consumption metrics**.
+
+- All A/B testing approaches have flaws. (Xiaohongshu has better approaches, but none are perfect.)
+
+- When designing an approach, ask yourself:
+  - Will the treatment and control groups' new posts compete with each other for traffic?
+  - How do new and old posts compete for traffic?
+  - If we isolate both posts and users simultaneously, will the content pool shrink?
+  - If we apply an exposure guarantee to new posts, what happens?
diff --git a/app/docs/ai/recommender-systems/wangshusen_recommend_note_improvement.en.mdx b/app/docs/ai/recommender-systems/wangshusen_recommend_note_improvement.en.mdx
new file mode 100644
index 00000000..21bb3d3b
--- /dev/null
+++ b/app/docs/ai/recommender-systems/wangshusen_recommend_note_improvement.en.mdx
@@ -0,0 +1,488 @@
+---
+title: Wang Shusen Recommender Systems Study Notes — Improving Metrics
+description: ""
+date: "2025-09-27"
+tags:
+  - tag-one
+docId: qmy3p4vc45ek61ce4n62fpxy
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+## Methods for Improving Metrics
+
+### Methods for Improving Metrics
+
+#### Evaluation Metrics for Recommender Systems
+
+- Daily Active Users ($DAU$) and retention are the most critical metrics.
+- The industry currently uses $LT7$ and $LT30$ most commonly to measure retention.
+  - If a user logs into the app today ($t_0$) and logs in on 4 of the next 7 days ($t_0 \sim t_6$), their $LT7$ for today ($t_0$) equals 4.
+  - Clearly $1 \leq LT7 \leq 7$ and $1 \leq LT30 \leq 30$.
+  - Growth in $LT$ typically indicates improved user experience. (Unless $LT$ grows while $DAU$ falls.)
+  - If the app bans low-activity users, $DAU$ drops and $LT$ grows.
+
+- Other core metrics: user session duration, total reads (i.e., total clicks), total impressions. These are less important than $DAU$ and retention.
+  - When session duration grows, $LT$ typically grows.
+  - When session duration grows, reads and impressions may fall.
+
+- Non-core metrics: click-through rate, interaction rate, etc.
+- For UGC platforms, publishing volume and publishing penetration rate are also core metrics.
+
+#### What Methods Are Available to Improve Metrics?
+
+1. Improve retrieval models and add new retrieval channels.
+2. Improve pre-ranking and full-ranking models.
+3. Improve diversity in retrieval, pre-ranking, and full ranking.
+4. Apply special treatment to new users, low-activity users, and other special groups.
+5. Leverage the three interaction behaviors: follows, shares, and comments.
+
+### Improving Metrics: Retrieval
+
+**Retrieval Models & Retrieval Channels**
+
+- Recommender systems have dozens of retrieval channels with a fixed total retrieval quota. A larger quota leads to better metrics but higher pre-ranking compute cost.
+- Two-tower models ($two\text{-}tower$) and item-to-item ($I2I$) are the two most important retrieval model classes, occupying the majority of the retrieval quota.
+- Many niche models occupy very little quota. Adding certain retrieval models can improve core metrics while keeping total retrieval volume fixed.
+- Multiple content pools exist: e.g., 30-day items, 1-day items, 6-hour items, new user high-quality pool, user-segment-specific pools.
+- The same model can be used with multiple content pools, yielding multiple retrieval channels.
+
+#### Improving Two-Tower Models
+
+**Direction 1: Optimize positive and negative samples.**
+
+- **Simple positive samples**: (user, item) pairs with clicks.
+- **Simple negative samples**: randomly combined (user, item) pairs.
+- **Hard negative samples**: (user, item) pairs ranked low by the ranking models.
+
+**Direction 2: Improve neural network architecture.**
+
+- **Baseline**: User tower and item tower are each fully connected networks, each outputting a single vector as user/item representation.
+- **Improvement**: Replace fully connected networks in user and item towers with $DCN$.
+- **Improvement**: Use user behavior sequences ($last\text{-}n$) in the user tower.
+- **Improvement**: Replace single-vector model with multi-vector model. (The standard two-tower model is also called a single-vector model.)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/8-2-1.png)
+
+**Direction 3: Improve model training methods.**
+
+- **Baseline**: Binary classification — teach the model to distinguish positive from negative samples.
+- **Improvement**: Combine binary classification with in-batch negative sampling. (For in-batch negative sampling, apply debiasing.)
+- **Improvement**: Apply self-supervised learning to improve embeddings for long-tail items.
+
+#### Item-to-Item (I2I)
+
+- $I2I$ is a broad class of models that retrieve based on similar items.
+- The most common usage is $U2I2I$ ($user \rightarrow item \rightarrow item$).
+  - User $u$ likes item $i_1$ (an item the user has interacted with historically).
+  - Find $i_1$'s similar items $i_2$ (i.e., $I2I$).
+  - Recommend $i_2$ to $u$.
+
+- How to compute item similarity?
+- Method 1: ItemCF and its variants.
+  - Some users like both items $i_1$ and $i_2$; then $i_1$ and $i_2$ are considered similar.
+  - $ItemCF$, $Online\ ItemCF$, $Swing$, $Online\ Swing$ all share this underlying idea.
+  - Use all 4 $I2I$ models simultaneously online, each with a specific quota.
+- Method 2: Compute vector similarity based on item vector representations. (Both two-tower models and graph neural networks can compute item vector representations.)
+
+#### Niche Retrieval Models
+
+**I2I-like Models**
+
+- **U2U2I** ($user \rightarrow user \rightarrow item$): Given that user $u_1$ is similar to $u_2$ and $u_2$ likes item $i$, recommend item $i$ to $u_1$.
+- **U2A2I** ($user \rightarrow author \rightarrow item$): Given that user $u$ likes author $a$ and $a$ published item $i$, recommend item $i$ to $u$.
+- **U2A2A2I** ($user \rightarrow author \rightarrow author \rightarrow item$): Given that user $u$ likes author $a_1$, $a_1$ is similar to $a_2$, and $a_2$ published item $i$, recommend item $i$ to $u$.
+
+#### Summary: Improving Retrieval Models
+
+- **Two-tower models**: Optimize positive/negative samples, improve neural network architecture, improve training methods.
+- **I2I models**: Use $ItemCF$ and its variants simultaneously; compute item similarity using item vector representations.
+- **Add niche retrieval models**, such as $PDN$, $Deep\ Retrieval$, $SINE$, $M2GRL$, etc.
+- **Adjust quotas across retrieval channels while keeping total retrieval volume fixed**. (Different quotas can be set for different user segments.)
+
+### Improving Metrics: Ranking Models
+
+**Ranking Models**
+
+1. Improving the full-ranking model
+2. Improving the pre-ranking model
+3. User behavior sequence modeling
+4. Online learning
+5. Aged model ("old soup model")
+
+#### Improving the Full-Ranking Model
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/8-3-1.png)
+
+**Full-Ranking Model: Backbone**
+
+- The backbone takes discrete and continuous features as input and outputs a vector, which serves as input to multi-objective estimation.
+- **Improvement 1**: Widen and deepen the backbone for more compute and better predictions.
+- **Improvement 2**: Automated feature crossing, e.g., $bilinear$ [1] and $LHUC$ [2].
+- **Improvement 3**: Feature engineering, e.g., adding statistical features and multimodal content features.
+
+**Full-Ranking Model: Multi-Objective Estimation**
+
+- Based on the backbone output vector, simultaneously estimate multiple objectives such as click-through rate.
+- **Improvement 1**: Add new estimation targets and incorporate their estimated results into the fusion formula.
+  - Standard targets include CTR, like rate, favorite rate, share rate, comment rate, follow rate, completion rate...
+  - Discover additional targets, e.g., entering the comment section, liking comments written by others...
+  - Add new estimation targets to the fusion formula.
+
+- **Improvement 2**: Structures like $MMoE$ and $PLE$ may help but often do not.
+- **Improvement 3**: Correcting $position\ bias$ may help, or may not.
+
+#### Improving the Pre-Ranking Model
+
+**Pre-Ranking Model**
+
+- Pre-ranking scores 10× more items than full ranking; the pre-ranking model must be fast.
+- **Simple model**: Multi-vector two-tower model, simultaneously estimating multiple targets like CTR.
+- **Complex model**: Three-tower model performs well but is harder to implement in engineering.
+
+**Pre-ranking / Full-ranking Consistency Modeling**
+
+- Distill full-ranking to train pre-ranking, making pre-ranking more consistent with full ranking.
+- **Method 1**: Pointwise distillation.
+  - Let $y$ be the user's true behavior; let $p$ be the full-ranking model's prediction.
+  - Use $\frac{y + p}{2}$ as the pre-ranking model's training target.
+  - **Example**:
+    - For CTR target: user clicked ($y=1$), full-ranking predicts $p=0.6$.
+    - Use $\frac{y + p}{2} = 0.8$ as the pre-ranking's CTR target.
+
+- **Method 2**: Pairwise or listwise distillation.
+  - Given $k$ candidate items, rank them according to full-ranking predictions.
+  - Apply learning to rank ($LTR$), training pre-ranking to fit item order (not values).
+  - **Example**:
+    - For items $i$ and $j$, full-ranking predicts CTR $p_i > p_j$.
+    - $LTR$ encourages pre-ranking predictions to satisfy $q_i > q_j$; otherwise a penalty is applied.
+    - $LTR$ typically uses pairwise logistic loss.
+
+- **Advantage**: Pre-ranking / full-ranking consistency modeling can improve core metrics.
+- **Disadvantage**: If full ranking has a bug and its predictions $p$ are biased, this pollutes pre-ranking training data.
+
+#### User Behavior Sequence Modeling
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/8-3-2.png)
+
+- The simplest method is to average item vectors as a user feature.
+- $DIN$ uses an attention mechanism to compute a weighted average of item vectors.
+- The industry is currently evolving along the $SIM$ direction: first filter items by category and other attributes, then use $DIN$ to compute a weighted average of the filtered item vectors.
+
+**User Behavior Sequence Modeling**
+
+- **Improvement 1**: Increase sequence length for more accurate predictions, at the cost of higher compute and longer inference time.
+- **Improvement 2**: Filtering methods, e.g., by category or by item vector clustering.
+  - Offline: use a multimodal neural network to extract item content features and represent items as vectors.
+  - Offline: cluster item vectors into 1000 classes; each item has a cluster ID.
+  - Online during ranking: the user's behavior sequence has $n = 1,000,000$ items. A candidate item has cluster ID 70; filter the $n$ items to keep only those with cluster ID 70. Only a few thousand of the $n$ items are retained.
+  - Multiple filtering methods are used simultaneously; take the union of filtered results.
+
+- **Improvement 3**: Use features beyond IDs for items in the user behavior sequence.
+- **Summary**: Evolve along the $SIM$ direction — keep the raw sequence as long as possible, then apply filtering to reduce sequence length, then feed filtered results into $DIN$.
+
+#### Online Learning
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/8-3-3.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/8-3-4.png)
+
+**Resource Consumption of Online Learning**
+
+- Requires both full batch updates at midnight and continuous incremental updates throughout the day.
+- Suppose online learning requires 10,000 $CPU\ cores$ to incrementally update a single full-ranking model. How much additional compute does the entire recommender system need for online learning?
+- For A/B testing, multiple different models run simultaneously online.
+- If there are $m$ models online, $m$ sets of online learning machines are needed.
+- Of $m$ models online: 1 is $holdout$, 1 is fully rolled out, and $m-2$ are new models under test.
+
+- Each set of online learning machines has high cost, so $m$ is small, constraining model development iteration efficiency.
+- Online learning brings large metric improvements but constrains model development iteration efficiency.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/8-3-5.png)
+
+#### Aged Model ("Old Soup Model")
+
+- Train the model for 1 $epoch$ per day using newly generated data.
+- Over time, the aged model becomes extremely well-trained and difficult to surpass.
+- Improving the model architecture and retraining makes it very hard to catch up with the aged model...
+- **Problem 1**: How to quickly determine if a new model architecture is better than the aged model? (No need to actually catch up with the online aged model — just determine which architecture is better.)
+  - For both new and old model architectures, randomly initialize fully connected layers.
+  - Embedding layers can be randomly initialized or reuse parameters from the trained aged model.
+  - Train both new and old models on $n$ days of data. (Train 1 $epoch$ from oldest to newest.)
+  - If the new model is significantly better, it is likely superior.
+  - Only compare which architecture is better, not actually catch up with the aged model.
+- **Problem 2**: How to more quickly catch up with and surpass the online aged model? (Train the new model on only a few dozen days of data, yet match a model trained for hundreds of days.)
+  - Having already concluded the new model is likely better, train it on a few dozen days of data to catch up with the aged model quickly.
+  - **Method 1**: Reuse as many embedding layers from the aged model as possible; avoid random initialization of embedding layers. (Embedding layers encode "memory" of users and items and learn more slowly than fully connected layers.)
+  - **Method 2**: Use the aged model as $teacher$ to distill the new model. (True user behavior is $y$; aged model's prediction is $p$; use $\frac{y + p}{2}$ as the training target for the new model.)
+
+#### Summary: Improving Ranking Models
+
+- **Full-ranking model**: Improve model backbone (wider/deeper, feature crossing, feature engineering); improve multi-objective estimation (new targets, $MMoE$, $position\ bias$).
+- **Pre-ranking model**: Three-tower model (replacing multi-vector two-tower); pre-ranking/full-ranking consistency modeling.
+- **User behavior sequence modeling**: Evolve along the $SIM$ direction — extend sequence length, improve item filtering methods.
+- **Online learning**: Large metric improvements, but reduces model iteration efficiency.
+- **Aged model** constrains model iteration efficiency; requires special techniques.
+
+### Improving Metrics: Diversity
+
+#### Ranking Diversity
+
+**Full-Ranking Diversity**
+
+- **Full-ranking stage**: Rank item $i$ combining interest score and diversity score.
+  - $s_i$: Interest score, i.e., fused score from CTR and other estimated targets.
+  - $d_i$: Diversity score, i.e., difference between item $i$ and already selected items.
+  - Rank items using $s_i + d_i$.
+
+- Commonly use MMR, DPP, and similar methods to compute diversity scores; full ranking uses a sliding window, pre-ranking does not.
+  - Full ranking determines final exposure; adjacent exposed items should have low similarity. So computing full-ranking diversity uses a sliding window.
+  - Pre-ranking should consider overall diversity, not just diversity within a sliding window.
+
+- Beyond diversity scores, full ranking uses diversification strategies to increase diversity.
+  - **Category**: After selecting item $i$, the next 5 positions cannot share $i$'s second-level category.
+  - **Multimodal**: Precompute multimodal content vector representations for items and cluster the full catalog into 1000 groups; during full ranking, after selecting item $i$, the next 10 positions cannot belong to the same cluster as $i$.
+
+**Pre-Ranking Diversity**
+
+- Pre-ranking scores 5000 items and selects 500 to send to full ranking.
+- Improving diversity in both pre-ranking and full ranking can improve core recommender system metrics.
+- Rank 5000 items by $s_i$; top 200 items go directly to full ranking.
+- For the remaining 4800 items, compute interest score $s_i$ and diversity score $d_i$ for each item $i$.
+- Rank remaining 4800 items by $s_i + d_i$; top 300 go to full ranking.
+
+#### Retrieval Diversity
+
+**Two-Tower Model: Adding Noise**
+
+- The user tower takes user features as input and outputs a user vector representation; then ANN search retrieves items with high vector similarity.
+- During online retrieval (after computing the user vector, before ANN search), add random noise to the user vector.
+- The narrower the user's interests (e.g., if the user's recent $n$ interactions cover only a few categories), the stronger the noise added.
+- Adding noise makes retrieved items more diverse and can improve core recommender system metrics.
+
+**Two-Tower Model: Sampling User Behavior Sequences**
+
+- The user's most recent $n$ interactions (user behavior sequence) are input to the user tower.
+- Keep the most recent $r$ items ($r \ll n$).
+- Randomly sample $t$ items from the remaining $n - r$ items ($t \ll n$). (Can be uniform sampling or non-uniform sampling for category balance.)
+- Use the resulting $r + t$ items as the user behavior sequence instead of all $n$ items.
+- **Why does sampling the user behavior sequence improve metrics?**
+  - On one hand, injecting randomness makes retrieval results more diverse.
+  - On the other hand, $n$ can be very large, capturing interests from much further in the user's history.
+
+**U2I2I: Sampling User Behavior Sequences**
+
+- In $U2I2I$ ($user \to item \to item$), the first $item$ refers to one of the user's most recent $n$ interactions, called a **seed item** in $U2I2I$.
+- $n$ items cover relatively few categories with category imbalance.
+  - The system has 200 categories; a given user's $n$ items may cover only 15 categories.
+  - The football category accounts for $0.4n$ items; TV drama accounts for $0.2n$ items; other categories account for fewer than $0.05n$ each.
+
+- Apply non-uniform random sampling to select $t$ items from $n$ items, achieving category balance. (Concept and effect are similar to sampling user behavior sequences in two-tower models.)
+- Use the sampled $t$ items (instead of the original $n$ items) as $U2I2I$ seed items.
+- On one hand, categories are more balanced, improving diversity. On the other hand, $n$ can be larger, covering more categories.
+
+**Exploration Traffic**
+
+- 2% of each user's exposed items are non-personalized, used for interest exploration.
+- Maintain a curated content pool of high-quality items with high interaction metrics. (Content pool can be segmented by user group, e.g., males aged 30–40.)
+- Randomly sample a few items from the curated pool, skip ranking, and directly insert them into the final ranked results.
+- Interest exploration negatively impacts core metrics in the short term but has positive long-term effects.
+
+#### Summary: Improving Diversity
+
+- **Full ranking**: Combine interest score and diversity score for ranking; apply rule-based diversification.
+- **Pre-ranking**: Use only interest score to select some items; use combined interest score and diversity score to select additional items.
+- **Retrieval**: Add noise to two-tower user vectors; apply non-uniform random sampling to user behavior sequences (applicable to both two-tower and U2I2I).
+- **Interest exploration**: Reserve a small fraction of traffic for non-personalized recommendation.
+
+### Improving Metrics: Special Treatment for Special User Groups
+
+**Why Special Treatment for Special User Groups?**
+
+1. New users and low-activity users have little behavioral history; personalized recommendation is inaccurate.
+2. New users and low-activity users are prone to churn; measures must be taken to improve retention.
+3. Special users' behavior (e.g., CTR, interaction rate) differs from mainstream users; models trained on all users' behavior are biased for special user groups.
+
+**Methods to Improve Metrics**
+
+1. Build special content pools for retrieval targeting special user groups.
+2. Apply special ranking strategies to protect special users.
+3. Apply special ranking models to eliminate bias in model predictions.
+
+#### Building Special Content Pools
+
+**Special Content Pools**
+
+- Why special content pools?
+- New users and low-activity users have little behavioral history; personalized retrieval is inaccurate. (Since personalization is poor, at least ensure content quality is good.)
+- Build special content pools tailored to specific groups to improve user satisfaction. For example, for middle-aged women who like commenting, build a comment-promoting content pool to meet these users' interaction needs.
+
+**How to Build Special Content Pools**
+
+- Method 1: Select high-quality items based on interaction counts and interaction rates received by items.
+  - Target segment: consider only a specific group, e.g., males aged 18–25 in second-tier cities.
+  - Build content pool: score items using this group's interaction counts and interaction rates; select top-scoring items for the content pool.
+  - The content pool has a weak personalization effect.
+  - Content pool is updated periodically: add new items, remove items with low interaction rates or expired relevance.
+  - This content pool only applies to that specific group.
+- Method 2: Apply causal inference to assess items' contribution to group retention rate; select items based on their contribution.
+
+**Retrieval from Special Content Pools**
+
+- Typically use two-tower models to retrieve from special content pools.
+  - Two-tower models are personalized.
+  - For new users, two-tower personalization is inaccurate.
+  - Compensate with high-quality content and weak personalization.
+
+- Additional training cost?
+  - For regular users, regardless of how many content pools there are, only one two-tower model is trained.
+  - For new users, since there is very little interaction history, a separate model must be trained.
+
+- Additional inference cost?
+  - Content pools update periodically, requiring ANN index updates.
+  - Online retrieval requires ANN search.
+  - Special content pools are much smaller (10–100× smaller than the full content pool), so additional compute is minimal.
+
+#### Special Ranking Strategies
+
+Differentiated Ranking Models
+
+- Special user groups behave differently from regular users. New users and low-activity users have CTR and interaction rates that are higher or lower than average.
+- Ranking models are dominated by mainstream users and make inaccurate predictions for special users.
+  - Models trained on all users' data make severely biased predictions for new users.
+  - If 90% of an app's users are female, models trained on all users' data are biased for male users.
+- Problem: For special users, how to make ranking model predictions more accurate?
+
+- **Method 1: Large model + small model.**
+  - Train a large model on all users' behavior; the large model's prediction $p$ fits user behavior $y$.
+  - Train a small model on special users' behavior; the small model's prediction $q$ fits the large model's residual $y - p$.
+  - For mainstream users, use only the large model's prediction $p$.
+  - For special users, combine large and small model predictions: $p + q$.
+
+- **Method 2: Fuse multiple experts, similar to MMoE.**
+  - Use a single model with multiple experts, each outputting a vector.
+  - Compute a weighted average of expert outputs.
+  - Compute weights based on user features.
+  - For new users, the model takes user features like recency and activity level as input and outputs weights for expert aggregation.
+
+- **Method 3: After large model prediction, calibrate with a small model.**
+  - Use the large model to estimate CTR and interaction rates.
+  - Feed user features and large model CTR/interaction rate estimates into a small model (e.g., GBDT).
+  - Train the small model on special user group data; small model output fits users' true behavior.
+
+**Wrong Approach**
+
+- Use one ranking model per user group; recommendation system maintains multiple large models simultaneously.
+  - One main model; each user group has its own model.
+  - Update the main model nightly with all users' data.
+  - Based on the trained main model, retrain for 1 epoch on a specific group's data to create that group's model.
+
+- Short-term metric improvement; high maintenance cost, harmful long-term.
+  - Initially, the low-activity male user model has 0.2% higher AUC than the main model.
+  - After several main model iterations, AUC cumulatively improves by 0.5%.
+  - Too many special group models, unmaintained and unupdated long-term.
+  - If the low-activity male user model is taken offline and replaced with the main model, AUC for low-activity male users actually improves by 0.3%!
+
+#### Summary: Special Treatment for Special User Groups
+
+- **Retrieval**: For special user groups, build special content pools and add corresponding retrieval channels.
+- **Ranking strategy**: Exclude low-quality items to protect new users and low-activity users; use special fusion formulas for special user groups.
+- **Ranking model**: Combine large and small models, with the small model fitting the large model's residual; use a single model with multiple experts; calibrate large model predictions with a small model.
+
+### Improving Metrics: Leveraging Interaction Behaviors
+
+#### Follows
+
+**Value of Follow Count for Retention**
+
+- For a user, the more authors they follow, the stronger the platform's pull on them.
+- User retention rate ($r$) is positively correlated with number of authors followed ($f$).
+- If a user's $f$ is small, the recommender system should encourage them to follow more authors.
+
+- How to use follow relationships to improve user retention?
+- Method 1: Use ranking strategies to increase follow counts.
+  - For user $u$, the model estimates the follow rate $p_i$ for candidate item $i$.
+  - Let user $u$ have already followed $f$ authors.
+  - Define a monotonically decreasing function $w(f)$: the more authors already followed, the smaller $w(f)$.
+  - Add $w(f) \cdot p_i$ to the ranking fusion formula to encourage follows. (If $f$ is small and $p_i$ is large, $w(f) \cdot p_i$ gives item $i$ a large bonus.)
+
+- Method 2: Build a follow-promoting content pool and retrieval channel.
+  - Items in this pool have high follow rates and can promote follows.
+  - If a user's follow count $f$ is small, apply this pool to that user.
+  - Retrieval quota can be fixed or negatively correlated with $f$.
+
+**Value of Fan Count for Incentivizing Publishing**
+
+- UGC platforms treat author publishing volume and publishing rate as core metrics, hoping authors publish more.
+- Items published by authors are pushed to users, generating likes, comments, follows, and other interactions.
+- Interactions (especially follows and comments) improve authors' publishing motivation.
+- The fewer fans an author has, the more each new fan boosts their publishing motivation.
+
+- Use ranking strategies to help low-fan new authors gain fans.
+- Let author $a$'s fan count (number of followers) be $f_a$.
+- Author $a$'s item $i$ may be recommended to user $u$; the model estimates follow rate $p_{ui}$.
+- Define a monotonically decreasing function $w(f_a)$ as weight; the more fans author $a$ has, the smaller $w(f_a)$.
+- Add $w(f_a) \cdot p_{ui}$ to the ranking fusion formula to help low-fan authors gain fans.
+
+**Implicit Follow Relationships**
+
+- **Retrieval channel U2A2I**: user → author → item.
+- **Explicit follow relationships**: User $u$ follows author $a$; recommend $a$'s published items to $u$. (CTR and interaction rate are typically higher than other retrieval channels.)
+- **Implicit follow relationships**: User $u$ enjoys watching author $a$'s items but has not followed $a$.
+- **The number of implicitly followed authors far exceeds explicitly followed authors**. Mining implicit follow relationships and building U2A2I retrieval channels can improve core recommender system metrics.
+
+#### Shares
+
+**Promoting Shares (Share-back Traffic)**
+
+- A platform-A user shares an item to platform B, attracting off-platform traffic to A.
+- Recommender systems that promote shares (also called share-back traffic) can improve DAU and consumption metrics.
+- Does simply increasing share count work?
+  - The model estimates share rate $p$; the fusion formula contains a term $w \cdot p$, giving items with high share rates more exposure.
+  - Increasing weight $w$ promotes shares, attracting off-platform traffic, but negatively impacts CTR and other interaction rates.
+
+**KOL Modeling**
+
+- Goal: Attract as much off-platform traffic as possible without harming clicks and other interactions.
+- Whose shares attract large off-platform traffic? Key Opinion Leaders (KOLs) on other platforms!
+- How to determine if a user on our platform is a KOL on other platforms?
+- Look at how much off-platform traffic their historical shares have driven.
+
+- Method 2: Build a share-promoting content pool and retrieval channel, effective for off-platform KOLs.
+
+#### Comments
+
+**Comments Promote Publishing**
+
+- UGC platforms treat author publishing volume and publishing rate as core metrics, hoping authors publish more.
+- Interactions like follows and comments can improve authors' publishing motivation.
+- If a newly published item has not received many comments yet, boost its estimated comment rate so the item gains comments quickly.
+- Add an extra term $w_i \cdot p_i$ to the ranking fusion formula.
+  - $w_i$: weight, negatively correlated with the number of comments item $i$ already has.
+  - $p_i$: the model's estimated comment rate for recommending item $i$ to the user.
+
+**Other Value of Comments**
+
+- Some users enjoy writing comments and interacting with authors and other commenters.
+  - Add a comment-promoting content pool for these users, giving them more opportunities to participate in discussions.
+  - Helps improve these users' retention.
+
+- Some users regularly leave high-quality comments (with high like counts on their comments).
+  - High-quality comments contribute to retention of authors and other users. (Authors and other users find such comments interesting or helpful.)
+  - Use ranking and retrieval strategies to encourage these users to comment more.
+
+#### Summary: Leveraging Interaction Behaviors
+
+- **Follows**:
+  - **Retention value** (encourage new users to follow more authors, improving new user retention).
+  - **Publishing value** (help new authors gain more fans, improving author publishing motivation).
+  - **Use implicit follow relationships for retrieval**.
+
+- **Shares**: Identify which users are off-platform KOLs; leverage the value of their shares to attract off-platform traffic.
+
+- **Comments**:
+  - **Publishing value** (encourage new items to receive comments, improving author publishing motivation).
+  - **Retention value** (create more commenting opportunities for discussion-loving users).
+  - **Encourage high-quality commenters to comment more**.
diff --git a/app/docs/ai/recommender-systems/wangshusen_recommend_note_rank.en.mdx b/app/docs/ai/recommender-systems/wangshusen_recommend_note_rank.en.mdx
new file mode 100644
index 00000000..2ebfafc6
--- /dev/null
+++ b/app/docs/ai/recommender-systems/wangshusen_recommend_note_rank.en.mdx
@@ -0,0 +1,354 @@
+---
+title: Wang Shusen Recommender Systems Study Notes — Ranking
+description: ""
+date: "2025-09-27"
+tags:
+  - tag-one
+docId: vjwogf9afghpbvi71e4dfsgj
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Wang Shusen Recommender Systems Study Notes — Ranking
+
+## Ranking
+
+### Multi-Objective Ranking Model
+
+**User–Post Interactions**
+
+- For each post, the system records:
+  - **Number of impressions**
+  - **Number of clicks**
+  - **Number of likes**
+  - **Number of favorites**
+  - **Number of shares**
+
+- **Click-through rate (CTR)** = clicks / impressions
+- **Like rate** = likes / clicks
+- **Favorite rate** = favorites / clicks
+- **Share rate** = shares / clicks
+
+**Basis for Ranking**
+
+- The ranking model estimates click-through rate, like rate, favorite rate, share rate, and other scores.
+- Fuse these estimated scores. (_e.g., weighted sum._)
+- Rank and truncate based on the fused score.
+
+#### Multi-Objective Model
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-1-1.png)
+
+- User features: e.g., user ID, user profile
+- Item features: e.g., item ID, item profile, author information
+- Statistical features: e.g., how many posts a user has been exposed to, clicked, or liked over a recent time period; how many impressions, clicks, or likes an item has received over a recent time period
+- Context features: e.g., current time, user's location
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-1-2.png)
+
+Training:
+
+- For click-through rate, the model essentially classifies whether an item was clicked — a binary classification problem, using cross-entropy loss.
+
+- Total loss function: $\sum_{i=1}^{4} \alpha_i \cdot \text{CrossEntropy}(y_i, p_i)$.
+- Compute gradients of the loss function and update parameters via gradient descent.
+
+**Training**
+
+- Challenge: Class imbalance.
+  - Out of every 100 impressions, approximately 10 clicks and 90 non-clicks.
+  - Out of every 100 clicks, approximately 10 favorites and 90 non-favorites.
+  - The gap between negative and positive samples is large; extra negative samples add little value and waste compute.
+
+- Solution: Negative sample down-sampling (_down-sampling_)
+  - Keep a small fraction of negative samples.
+  - Balance positive and negative sample counts to save compute.
+
+#### Predicted Value Calibration
+
+- Let the number of positive and negative samples be $n_+$ and $n_-$.
+- Down-sample negative samples, discarding a fraction.
+- Use $\alpha \cdot n_-$ negative samples, where $\alpha \in (0,1)$ is the sampling rate.
+- Because there are fewer negative samples, the **estimated click-through rate** is higher than the **true click-through rate**. The smaller $\alpha$, the fewer negative samples, and the more the model overestimates click-through rate.
+
+- **True click-through rate**:
+
+$$
+p_{\text{true}} = \frac{n_+}{n_+ + n_-} \quad (\text{expected value})
+$$
+
+- **Estimated click-through rate**:
+
+$$
+p_{\text{pred}} = \frac{n_+}{n_+ + \alpha \cdot n_-} \quad (\text{expected value})
+$$
+
+- From the two equations above, the calibration formula is:
+
+$$
+p_{\text{true}} = \frac{\alpha \cdot p_{\text{pred}}}{(1 - p_{\text{pred}}) + \alpha \cdot p_{\text{pred}}}.
+$$
+
+### Multi-gate Mixture-of-Experts (MMoE)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-2-1.png)
+
+Expert networks 1, 2, and 3 are the expert neural networks.
+
+User features, item features, statistical features, and behavior features are fed into the left neural network, then passed through a softmax activation function to produce a weight vector. This type of network is called a gating network. The three values $p_1, p_2, p_3$ in the vector represent the weights assigned to $x_1, x_2, x_3$ respectively. The right side works analogously.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-2-2.png)
+
+The weight vector from the left gating network is combined with the feature vectors from the expert networks and fed into the left task network to predict click-through rate. The right side works analogously.
+
+#### Polarization
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-2-3.png)
+
+When the left gating network's weight vector assigns close-to-1 weight to expert 3 and close-to-0 weight to the others, and the right gating network's weight vector assigns close-to-1 weight to expert 2 and close-to-0 weight to the others, the result is that expert 1's output doesn't participate in the model at all. This situation should be avoided.
+
+#### Addressing Polarization
+
+- If there are $n$ "experts," each softmax's input and output are $n$-dimensional vectors.
+
+- **During training, apply dropout to the softmax output**:
+  - Each of the $n$ softmax output values is masked with 10% probability.
+  - Each "expert" has a 10% probability of being randomly dropped.
+
+### Score Fusion
+
+**Simple Weighted Sum**
+
+$$
+p_{\text{click}} + w_1 \cdot p_{\text{like}} + w_2 \cdot p_{\text{collect}} + \cdots
+$$
+
+**Click Rate Multiplied by Weighted Sum of Other Terms**
+
+$$
+p_{\text{click}} \cdot \left( 1 + w_1 \cdot p_{\text{like}} + w_2 \cdot p_{\text{collect}} + \cdots \right)
+$$
+
+- $p_{\text{click}} = \frac{\text{clicks}}{\text{impressions}}$
+- $p_{\text{like}} = \frac{\text{likes}}{\text{clicks}}$
+
+**Fusion Score Formula from a Domestic Short-Video App**
+
+- Rank $n$ candidate videos by estimated watch time $p_{\text{time}}$.
+- If a video ranks $r_{\text{time}}$, its score is $\frac{1}{r_{\text{time}}^{\alpha} + \beta}$.
+- Apply similar treatment to estimated scores for clicks, likes, shares, comments, etc.
+- Final fused score: ($\alpha_{1,2,3\dots}$ are hyperparameters)
+
+$$
+\frac{w_1}{r_{\text{time}}^{\alpha_1} + \beta_1} + \frac{w_2}{r_{\text{click}}^{\alpha_2} + \beta_2} + \frac{w_3}{r_{\text{like}}^{\alpha_3} + \beta_3} + \cdots
+$$
+
+**Fusion Score Formula from an E-commerce Platform**
+
+- E-commerce conversion funnel:
+
+  $$\text{impression} \rightarrow \text{click} \rightarrow \text{add to cart} \rightarrow \text{payment}$$
+
+- Model estimates: $p_{\text{click}}$, $p_{\text{cart}}$, $p_{\text{pay}}$.
+
+- Final fused score: ($\alpha_{1,2,3,4\dots}$ are hyperparameters)
+
+$$
+p_{\text{click}}^{\alpha_1} \times p_{\text{cart}}^{\alpha_2} \times p_{\text{pay}}^{\alpha_3} \times \text{price}^{\alpha_4}
+$$
+
+### Video Watch Time Modeling
+
+#### Video Watch Duration
+
+**Image-text Posts vs. Videos**
+
+- Main ranking signals for image-text posts:
+
+  _Clicks, likes, favorites, shares, comments..._
+
+- Video ranking additionally uses watch duration and completion rate.
+
+- Direct regression fitting of watch duration performs poorly. YouTube's duration modeling approach is recommended.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-4-1.png)
+
+If $p = y$, then $\exp(z) = t$.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-4-2.png)
+
+#### Video Watch Duration Modeling
+
+- Let $z$ denote the output of the last fully connected layer. Set $p = \text{sigmoid}(z)$.
+- Let $t$ denote the observed watch duration. (_If no click, $t = 0$._)
+- Training: minimize cross-entropy loss
+
+$$
+\left( \frac{t}{1 + t} \cdot \log p + \frac{1}{1 + t} \cdot \log (1 - p) \right).
+$$
+
+- Inference: use $\exp(z)$ as the estimated watch duration.
+- Include $\exp(z)$ as a term in the fusion formula.
+
+#### Video Completion Rate
+
+**Regression Approach**
+
+- Example: A 10-minute video was watched for 4 minutes, so the actual completion rate is $y = 0.4$.
+
+- Fit estimated completion rate $p$ to $y$:
+
+$$
+\text{loss} = y \cdot \log p + (1 - y) \cdot \log (1 - p).
+$$
+
+- Online estimated completion rate: model outputs $p = 0.73$, meaning an estimated 73% of the video is watched.
+
+**Binary Classification Approach**
+
+- Define a completion threshold, e.g., 80% completion.
+- Example: A 10-minute video — watching >8 minutes is a positive sample; watching `<8` minutes is a negative sample.
+- Train with binary classification: completion >80% vs. completion `<80%`.
+- Online estimated completion rate: model outputs $p = 0.73$, meaning:
+
+$$
+\mathbb{P}(\text{completion} > 80\%) = 0.73.
+$$
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-4-3.png)
+
+Longer videos have lower completion rates, so directly using estimated completion rate biases ranking toward short videos over long videos. Adjustment is needed to treat short and long videos fairly.
+
+- Online: estimate completion rate and then adjust:
+
+  $$
+  p_{\text{finish}} = \frac{\text{estimated completion rate}}{f(\text{video length})}
+  $$
+
+- Include $p_{\text{finish}}$ as a term in the fusion formula.
+
+### Ranking Model Features
+
+#### Features
+
+**User Profile**
+
+- User ID (_embedded during retrieval and ranking_).
+- Demographic attributes: gender, age.
+- Account information: new/returning, activity level...
+- Categories, keywords, and brands of interest.
+
+**Item Profile**
+
+- Item ID (_embedded during retrieval and ranking_).
+- Publication time (_or age_).
+- GeoHash (_latitude/longitude encoding_), city.
+- Title, category, keywords, brand...
+- Word count, image count, video resolution, number of tags...
+- Information richness, image aesthetics...
+
+**User Statistical Features**
+
+- Number of impressions, clicks, likes, favorites, etc. in the last 30 days (7 days, 1 day, 1 hour)...
+- Bucketed by post type (_image-text / video_). (_e.g., the user's click-through rate on image-text posts vs. video posts in the last 7 days._)
+- Bucketed by post category. (_e.g., the user's click-through rate on beauty posts, food posts, and tech posts in the last 30 days._)
+
+**Post Statistical Features**
+
+- Number of impressions, clicks, likes, favorites, etc. in the last 30 days (7 days, 1 day, 1 hour)...
+- Bucketed by user gender, user age...
+- Author features:
+  - Number of published posts
+  - Number of followers
+  - Consumption metrics (_impressions, clicks, likes, favorites_)
+
+**Context Features**
+
+- User location GeoHash (_latitude/longitude encoding_), city.
+- Current time (_discretized and embedded_).
+- Whether it's a weekend, whether it's a holiday.
+- Phone brand, phone model, operating system.
+
+#### Feature Processing
+
+- **Discrete features**: embed.
+  - User ID, post ID, author ID.
+  - Category, keywords, city, phone brand.
+
+- **Continuous features**: bucket into discrete features.
+  - Age, post word count, video duration.
+
+- **Continuous features**: other transformations.
+  - Apply $\log(1 + x)$ to impression counts, click counts, like counts, etc.
+  - Convert to click-through rate, like rate, etc., and apply smoothing.
+
+#### Feature Coverage
+
+- Many features cannot achieve 100% sample coverage.
+- Example: Many users don't fill in their age, so the user age feature has coverage far below 100%.
+- Example: Many users set privacy permissions; the app cannot access user location, so context features are missing.
+- Improving feature coverage can make the full-ranking model more accurate.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-5-1.png)
+
+The main server retrieves a batch of item IDs from the retrieval server. The user sends a request, and the main server forwards the user ID, context features, and item IDs to the ranking server.
+
+User profile database has lower load, since only one user's features are read per request. Item profile database has higher load, since thousands of posts' features must be read per request. Similarly, user statistics database has lower load, while item statistics database has higher load. User profile vectors can be large, but item profile vectors should not be too large.
+
+Both user profiles and item profiles are relatively static and can even be cached locally on the ranking server. Statistical data is dynamic and requires timely database updates.
+
+After receiving features, the ranking server packages them and passes them to TF Serving. TF scores the posts and returns results to the ranking server. The ranking server applies a series of rules to rank the posts and returns the top-ranked posts to the server.
+
+### Pre-Ranking
+
+**Pre-Ranking vs. Full Ranking**
+
+| **Pre-Ranking**                 | **Full Ranking**            |
+| ------------------------------- | --------------------------- |
+| Scores thousands of posts.      | Scores hundreds of posts.   |
+| Per-inference cost must be low. | Per-inference cost is high. |
+| Accuracy is lower.              | Accuracy is higher.         |
+
+#### Full-Ranking Model & Two-Tower Model
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-6-1.png)
+
+**Full-Ranking Model**
+
+- Early fusion: concatenate all features first, then feed into neural network.
+- High online inference cost: if there are $n$ candidate posts, the large model must run $n$ inferences.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-6-2.png)
+
+**Two-Tower Model**
+
+- Late fusion: feed user and item features into separate neural networks without fusing user and item features.
+
+- Low online computation:
+  - User tower only needs one online inference to compute user representation $a$.
+  - Item representation $b$ is pre-stored in a vector database; item tower does no online inference.
+
+- Accuracy is lower than full-ranking model.
+  Late fusion is less accurate than early fusion.
+
+#### Three-Tower Model for Pre-Ranking
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-6-4.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/3-6-3.png)
+
+- With $n$ items, the upper model layers require $n$ inferences.
+- The majority of pre-ranking computation is in the upper model layers.
+
+**Three-Tower Model Inference**
+
+- Retrieve features from multiple data sources:
+  - 1 user's profile and statistical features.
+  - $n$ items' profiles and statistical features.
+
+- User tower: runs only 1 inference.
+- Item tower: runs inference only on cache misses.
+- Cross tower: must run $n$ inferences.
+- Upper network: runs $n$ inferences to score $n$ items.
diff --git a/app/docs/ai/recommender-systems/wangshusen_recommend_note_rerank.en.mdx b/app/docs/ai/recommender-systems/wangshusen_recommend_note_rerank.en.mdx
new file mode 100644
index 00000000..64c07ea5
--- /dev/null
+++ b/app/docs/ai/recommender-systems/wangshusen_recommend_note_rerank.en.mdx
@@ -0,0 +1,422 @@
+---
+title: Wang Shusen Recommender Systems Study Notes — Re-Ranking
+description: ""
+date: "2025-09-27"
+tags:
+  - tag-one
+docId: ol03smbujgwztho45ycj52ah
+lang: en
+translatedFrom: zh
+translatedAt: 2026-04-15T12:00:00Z
+translatorAgent: claude-sonnet-4-6
+---
+
+# Wang Shusen Recommender Systems Study Notes — Re-Ranking
+
+## Re-Ranking
+
+### Diversity in Recommender Systems
+
+#### Measuring Item Similarity
+
+**Similarity Measures**
+
+- Based on item attribute tags.
+  - Category, brand, keywords...
+
+- Based on item vector representations.
+  - Item vectors learned by the retrieval two-tower model _(not ideal)_.
+  - Content-based vector representations _(preferred)_.
+
+**Attribute Tag-Based Similarity**
+
+- Item attribute tags: category, brand, keywords...
+- Compute similarity based on _first-level category_, _second-level category_, and _brand_.
+  - Item $i$: Beauty, Makeup, Chanel.
+  - Item $j$: Beauty, Perfume, Chanel.
+  - Similarity: $\text{sim}_1(i, j) = 1$, $\text{sim}_2(i, j) = 0$, $\text{sim}_3(i, j) = 1$.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-1-1.png)
+
+Content-based item representation can improve diversity.
+
+CNN can process images to output feature vectors; BERT can process text to output feature vectors. The two vectors are then concatenated.
+
+**How to train CNN and BERT?**
+
+- **CLIP** is the currently recognized most effective pre-training method.
+- **Idea**: For _image–text_ pairs, predict whether the image and text match.
+- **Advantage**: No manual annotation needed. Posts on Xiaohongshu naturally contain images + text, and most posts have matching image-text content.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-1-2.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-1-3.png)
+
+- A batch contains $m$ positive pairs.
+- One image paired with $m - 1$ texts forms negative samples.
+- The batch contains $m(m - 1)$ negative pairs in total.
+
+#### Methods for Improving Diversity
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-1-4.png)
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-1-5.png)
+
+Post-processing in pre-ranking also requires diversity algorithms.
+
+Post-processing in full ranking is also called re-ranking.
+
+### Maximal Marginal Relevance (MMR)
+
+**Diversity**
+
+- Full ranking scores $n$ candidate items; let the fused scores be  
+  $$\text{reward}_1, \dots, \text{reward}_n$$
+- Let the similarity between items $i$ and $j$ be $\text{sim}(i,j)$.
+- Select $k$ items from $n$, requiring both high ranking scores and high diversity.
+
+#### MMR Diversity Algorithm
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-2-1.png)
+
+- Compute the Marginal Relevance score for each item $i$ in set $\mathcal{R}$:
+
+  $$
+  \text{MR}_i = \theta \cdot \text{reward}_i - (1 - \theta) \cdot \max\limits_{j \in \mathcal{S}} \text{sim}(i, j)
+  $$
+
+- $\text{reward}_i$ is the item's full-ranking score; $\max\limits_{j \in \mathcal{S}} \text{sim}(i, j)$ measures how similar the unselected item $i$ is to already selected items. A higher ranking score and lower similarity yield a higher MR score.
+
+- **Maximal Marginal Relevance (MMR)**:
+  Select the item with the highest MR score from unselected items:
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \text{MR}_i
+  $$
+
+**MMR Diversity Algorithm**
+
+1. Initialize selected items $\mathcal{S}$ as the empty set; initialize unselected items $\mathcal{R}$ as the full set $\{1, \dots, n\}$.
+2. Select the item with the highest ranking score $\text{reward}_i$, move it from $\mathcal{R}$ to $\mathcal{S}$.
+3. Repeat for $k - 1$ rounds:
+   a. Compute scores $\{\text{MR}_i\}_{i \in \mathcal{R}}$ for all items in $\mathcal{R}$.
+   b. Select the item with the highest score and move it from $\mathcal{R}$ to $\mathcal{S}$.
+
+#### **Sliding Window**
+
+- **MMR**:
+
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \left\{ \theta \cdot \text{reward}_i - (1 - \theta) \cdot \max\limits_{j \in \mathcal{S}} \text{sim}(i, j) \right\}
+  $$
+
+- As more items are selected (i.e., $\mathcal{S}$ grows), it becomes harder to find item $i \in \mathcal{R}$ that is dissimilar to all items in $\mathcal{S}$.
+
+- Assuming $\text{sim}$ has range $[0,1]$: when $\mathcal{S}$ is large, the diversity score $\max\limits_{j \in \mathcal{S}} \text{sim}(i, j)$ is always approximately 1, causing MMR to break down.
+
+- **Solution**: Set a sliding window $\mathcal{W}$, e.g., the most recently selected 10 items, and replace $\mathcal{S}$ in the MMR formula with $\mathcal{W}$.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-2-2.png)
+
+- **Standard MMR**:
+
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \left\{ \theta \cdot \text{reward}_i - (1 - \theta) \cdot \max\limits_{j \in \mathcal{S}} \text{sim}(i, j) \right\}.
+  $$
+
+- **With sliding window**:
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \left\{ \theta \cdot \text{reward}_i - (1 - \theta) \cdot \max\limits_{j \in \mathcal{W}} \text{sim}(i, j) \right\}
+  $$
+
+### Re-Ranking Rules
+
+#### Re-Ranking Rules
+
+**Rule: At most $k$ consecutive posts of a certain type**
+
+- Xiaohongshu recommendation system items are divided into image-text posts and video posts.
+- At most $k = 5$ consecutive image-text posts; at most $k = 5$ consecutive video posts.
+- If positions $i$ through $i+4$ are all image-text posts, then position $i+5$ must be a video post.
+
+**Rule: At most 1 post of a certain type in every $k$ posts**
+
+- Promoted posts from operations have their ranking score multiplied by a factor greater than 1 (boost), helping them get more exposure.
+- To prevent boost from harming user experience, limit to at most 1 promoted post per $k = 9$ posts.
+- If position $i$ is a promoted post, then positions $i+1$ through $i+8$ cannot be promoted posts.
+
+**Rule: At most $k$ posts of a certain type in the first $t$ posts**
+
+- The top $t$ posts receive the most visibility and matter most for user experience.  
+  (_Xiaohongshu's top 4 form the first screen_)
+- Xiaohongshu recommendation system has posts with e-commerce cards; too many may hurt user experience.
+- In the first $t=1$ posts, at most $k=0$ posts with e-commerce cards.
+- In the first $t=4$ posts, at most $k=1$ post with e-commerce cards.
+
+#### MMR + Re-Ranking Rules
+
+- MMR selects one item per round:
+
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \left\{ \theta \cdot \text{reward}_i - (1 - \theta) \cdot \max\limits_{j \in \mathcal{W}} \text{sim}(i, j) \right\}
+  $$
+
+- Re-ranking combines MMR with rules to maximize MR subject to rule constraints.
+
+- Each round, first use rules to exclude some items from $\mathcal{R}$, yielding subset $\mathcal{R'}$.
+
+- Replace $\mathcal{R}$ with $\mathcal{R'}$ in the MMR formula; selected items satisfy the rules.
+
+### DPP: Mathematical Foundations
+
+#### Parallelepiped
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-4-1.png)
+
+- A **parallelepiped** in 2D space is a **parallelogram**.
+
+- Points in a parallelogram can be expressed as:
+
+  $$\mathbf{x} = \alpha_1 \mathbf{v}_1 + \alpha_2 \mathbf{v}_2.$$
+
+- Coefficients $\alpha_1$ and $\alpha_2$ have range $[0,1]$.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-4-2.png)
+
+- A **parallelepiped** in 3D space is a **parallelepiped** (3D).
+
+- Points in a parallelepiped can be expressed as:
+
+  $$\mathbf{x} = \alpha_1 \mathbf{v}_1 + \alpha_2 \mathbf{v}_2 + \alpha_3 \mathbf{v}_3.$$
+
+- Coefficients $\alpha_1, \alpha_2, \alpha_3$ have range $[0,1]$.
+
+**Parallelepiped**
+
+- A set of vectors $\mathbf{v}_1, \cdots, \mathbf{v}_k \in \mathbb{R}^d$ defines a $k$-dimensional parallelepiped:
+
+  $$P(\mathbf{v}_1, \cdots, \mathbf{v}_k) = \{\alpha_1 \mathbf{v}_1 + \cdots + \alpha_k \mathbf{v}_k \mid 0 \leq \alpha_1, \cdots, \alpha_k \leq 1\}.$$
+
+- Requires $k \leq d$; for example, a $k = 2$-dimensional parallelogram in $d = 3$-dimensional space.
+
+- If $\mathbf{v}_1, \cdots, \mathbf{v}_k$ are linearly dependent, then volume $\text{vol}(P) = 0$. (Example: $k = 3$ vectors lying on a plane yield a parallelepiped with volume 0.)
+
+**Area of a Parallelogram**
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-4-3.png)
+
+Using $\mathbf{v}_2$ as the base, how to compute the height $\mathbf{q}_1$?
+
+- Compute the projection of $\mathbf{v}_1$ onto $\mathbf{v}_2$:
+
+  $$\text{Proj}_{\mathbf{v}_2}(\mathbf{v}_1) = \frac{\mathbf{v}_1^T \mathbf{v}_2}{\|\mathbf{v}_2\|_2^2} \cdot \mathbf{v}_2.$$
+
+- Compute
+
+  $$\mathbf{q}_1 = \mathbf{v}_1 - \text{Proj}_{\mathbf{v}_2}(\mathbf{v}_1).$$
+
+- Property: base $\mathbf{v}_2$ and height $\mathbf{q}_1$ are orthogonal.
+
+**Volume of a Parallelepiped**
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-4-4.png)
+
+- **Volume** = base area × $\|\text{height}\|_2$.
+
+- Parallelogram $P(\mathbf{v}_1, \mathbf{v}_2)$ is the base of parallelepiped $P(\mathbf{v}_1, \mathbf{v}_2, \mathbf{v}_3)$.
+
+- Height $\mathbf{q}_3$ is perpendicular to the base $P(\mathbf{v}_1, \mathbf{v}_2)$.
+
+**When is Volume Maximized/Minimized?**
+
+- Let $\mathbf{v}_1$, $\mathbf{v}_2$, $\mathbf{v}_3$ all be unit vectors.
+
+- When the three vectors are orthogonal, the parallelepiped is a cube; volume is maximized at $\text{vol} = 1$.
+
+- When the three vectors are linearly dependent, volume is minimized at $\text{vol} = 0$.
+
+#### **Measuring Item Diversity**
+
+- Given $k$ items, represent them as unit vectors $\mathbf{v}_1, \cdots, \mathbf{v}_k \in \mathbb{R}^d$. ($d \geq k$)
+
+- Use parallelepiped volume to measure item diversity; volume ranges between $0$ and $1$.
+
+- If $\mathbf{v}_1, \cdots, \mathbf{v}_k$ are mutually orthogonal (_high diversity_), volume is maximized at $\text{vol} = 1$.
+
+- If $\mathbf{v}_1, \cdots, \mathbf{v}_k$ are linearly dependent (_low diversity_), volume is minimized at $\text{vol} = 0$.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-4-5.png)
+
+- Given $k$ items represented as unit vectors $\mathbf{v}_1, \cdots, \mathbf{v}_k \in \mathbb{R}^d$. ($d \geq k$)
+
+- Arrange them as columns of matrix $\mathbf{V} \in \mathbb{R}^{d \times k}$.
+
+- With $d \geq k$, determinant and volume satisfy:
+
+  $$\det(\mathbf{V}^T \mathbf{V}) = \text{vol}(P(\mathbf{v}_1, \cdots, \mathbf{v}_k))^2.$$
+
+- Therefore, the determinant $\det(\mathbf{V}^T \mathbf{V})$ can be used to measure the diversity of vectors $\mathbf{v}_1, \cdots, \mathbf{v}_k$.
+
+### DPP: Diversity Algorithm
+
+#### **Diversity Problem**
+
+- Full ranking scores $n$ items: $\text{reward}_1, \cdots, \text{reward}_n$.
+
+- Vector representations of $n$ items: $\mathbf{v}_1, \cdots, \mathbf{v}_n \in \mathbb{R}^d$.
+
+- Select $k$ items from $n$ to form set $\mathcal{S}$:
+  - **High value**: sum of scores $\sum_{j \in \mathcal{S}} \text{reward}_j$ should be maximized.
+  - **High diversity**: volume of parallelepiped $P(\mathcal{S})$ formed by $k$ vectors in $\mathcal{S}$ should be maximized.
+
+- Let the $k$ item vectors in $\mathcal{S}$ form the columns of matrix $\mathbf{V}_{\mathcal{S}} \in \mathbb{R}^{d \times k}$.
+
+- Use these $k$ vectors as edges to form parallelepiped $P(\mathcal{S})$.
+
+- Volume $\text{vol}(P(\mathcal{S}))$ can measure the diversity of items in $\mathcal{S}$.
+
+- With $k \leq d$, determinant and volume satisfy:
+
+  $$\det(\mathbf{V}_{\mathcal{S}}^T \mathbf{V}_{\mathcal{S}}) = \text{vol}(P(\mathcal{S}))^2$$
+
+#### **Determinantal Point Process (DPP)**
+
+- DPP is a classical statistical machine learning method:
+
+  $$
+  \arg\max_{\mathcal{S}: |\mathcal{S}|=k} \log \det(\mathbf{V}_{\mathcal{S}}^T \mathbf{V}_{\mathcal{S}})
+  $$
+
+- Hulu's paper applies DPP to recommender systems:
+
+  $$
+  \arg\max_{\mathcal{S}: |\mathcal{S}|=k} \theta \cdot \left( \sum_{j \in \mathcal{S}} \text{reward}_j \right) + (1 - \theta) \cdot \log \det(\mathbf{V}_{\mathcal{S}}^T \mathbf{V}_{\mathcal{S}})
+  $$
+
+- **DPP applied to recommender systems**:
+
+  $$
+  \arg\max_{\mathcal{S}: |\mathcal{S}|=k} \theta \cdot \left( \sum_{j \in \mathcal{S}} \text{reward}_j \right) + (1 - \theta) \cdot \log \det(\mathbf{V}_{\mathcal{S}}^T \mathbf{V}_{\mathcal{S}})
+  $$
+
+- Let $\mathbf{A}$ be an $n \times n$ matrix with element $(i,j)$ equal to $a_{ij} = \mathbf{v}_i^T \mathbf{v}_j$.
+
+- Given vectors $\mathbf{v}_1, \cdots, \mathbf{v}_n \in \mathbb{R}^d$, computing $\mathbf{A}$ takes $O(n^2 d)$ time.
+
+- $\mathbf{A}_{\mathcal{S}} = \mathbf{V}_{\mathcal{S}}^T \mathbf{V}_{\mathcal{S}}$ is a $k \times k$ submatrix of $\mathbf{A}$. If $i, j \in \mathcal{S}$, then $a_{ij}$ is an element of $\mathbf{A}_{\mathcal{S}}$.
+
+- **DPP applied to recommender systems**:
+
+  $$
+  \arg\max_{\mathcal{S}: |\mathcal{S}|=k} \theta \cdot \left( \sum_{j \in \mathcal{S}} \text{reward}_j \right) + (1 - \theta) \cdot \log \det(\mathbf{A}_{\mathcal{S}})
+  $$
+
+- **DPP is a combinatorial optimization problem**: select a subset $\mathcal{S}$ of size $k$ from $\{1, \cdots, n\}$.
+
+- Let $\mathcal{S}$ denote selected items and $\mathcal{R}$ unselected items; solve greedily:
+
+  $$
+  \arg\max_{i \in \mathcal{R}} \theta \cdot \text{reward}_i + (1 - \theta) \cdot \log \det(\mathbf{A}_{\mathcal{S} \cup \{i\}})
+  $$
+
+#### Solving DPP
+
+**Brute Force Algorithm**
+
+- **Greedy algorithm**:
+
+  $$
+  \arg\max_{i \in \mathcal{R}} \theta \cdot \text{reward}_i + (1 - \theta) \cdot \log \det(\mathbf{A}_{\mathcal{S} \cup \{i\}}).
+  $$
+
+- **Complexity analysis**:
+  - For a single $i$, computing the determinant of $\mathbf{A}_{\mathcal{S} \cup \{i\}}$ takes $O(|\mathcal{S}|^3)$ time.
+
+  - For all $i \in \mathcal{R}$, computing determinants takes $O(|\mathcal{S}|^3 \cdot |\mathcal{R}|)$ time.
+
+  - The above must be solved $k$ times to select $k$ items. Using brute-force determinant computation, total time complexity is:
+
+    $$
+    O(|\mathcal{S}|^3 \cdot |\mathcal{R}| \cdot k) = O(nk^4).
+    $$
+
+- **Total time complexity of brute-force algorithm**:
+  $$
+  O(n^2 d + nk^4).
+  $$
+
+**Hulu's Fast Algorithm**
+
+- Hulu's paper designs a numerical algorithm that selects $k$ items from $n$ in only $O(n^2 d + nk^2)$ time.
+
+- Given vectors $\mathbf{v}_1, \cdots, \mathbf{v}_n \in \mathbb{R}^d$, computing $\mathbf{A}$ takes $O(n^2 d)$ time.
+
+- Compute all determinants in $O(nk^2)$ time using **Cholesky decomposition**.
+
+- **Cholesky decomposition** $\mathbf{A}_{\mathcal{S}} = \mathbf{L} \mathbf{L}^T$, where $\mathbf{L}$ is a lower triangular matrix (**all elements above the diagonal are zero**).
+
+- **Cholesky decomposition enables computing the determinant of $\mathbf{A}_{\mathcal{S}}$**:
+  - The determinant of lower triangular matrix $\mathbf{L}$ equals the product of diagonal elements.
+
+  - The determinant of $\mathbf{A}_{\mathcal{S}}$ is:
+
+    $$
+    \det(\mathbf{A}_{\mathcal{S}}) = \det(\mathbf{L})^2 = \prod_i l_{ii}^2.
+    $$
+
+- Given $\mathbf{A}_{\mathcal{S}} = \mathbf{L} \mathbf{L}^T$, one can quickly derive the Cholesky decomposition of all $\mathbf{A}_{\mathcal{S} \cup \{i\}}$, thus quickly computing all determinants $\det(\mathbf{A}_{\mathcal{S} \cup \{i\}})$.
+
+- **Greedy algorithm**:
+
+  $$
+  \arg\max_{i \in \mathcal{R}} \theta \cdot \text{reward}_i + (1 - \theta) \cdot \log \det(\mathbf{A}_{\mathcal{S} \cup \{i\}}).
+  $$
+
+- **Initialization**: $\mathcal{S}$ contains only one item; $\mathbf{A}_{\mathcal{S}}$ is a $1 \times 1$ matrix.
+
+- **Each round**:
+  - Based on $\mathbf{A}_{\mathcal{S}} = \mathbf{L} \mathbf{L}^T$ from the previous round, quickly derive the Cholesky decomposition of $\mathbf{A}_{\mathcal{S} \cup \{i\}}$ ($\forall i \in \mathcal{R}$).
+  - From this, compute $\log \det(\mathbf{A}_{\mathcal{S} \cup \{i\}})$.
+
+#### DPP Extensions
+
+**Sliding Window**
+
+- Let $\mathbf{S}$ denote selected items and $\mathcal{R}$ unselected items; DPP greedy solution:
+
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \theta \cdot \text{reward}_i + (1 - \theta) \cdot \log \det (\mathbf{A}_{\mathbf{S} \cup \{i\}}).
+  $$
+
+- As set $\mathbf{S}$ grows, similar items accumulate; item vectors tend toward linear dependence.
+
+- Determinant $\det(\mathbf{A}_{\mathbf{S}})$ collapses to zero; its logarithm approaches negative infinity.
+
+![](https://raw.githubusercontent.com/H0SH123/Books-and-Notes/main/RecommenderSystem/images/6-5-1.png)
+
+- Greedy algorithm:
+
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \theta \cdot \text{reward}_i + (1 - \theta) \cdot \log \det (\mathbf{A}_{\mathbf{S} \cup \{i\}})
+  $$
+
+- With sliding window:
+
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \theta \cdot \text{reward}_i + (1 - \theta) \cdot \log \det (\mathbf{A}_{\mathcal{W} \cup \{i\}})
+  $$
+
+**Rule Constraints**
+
+- Each round of the greedy algorithm selects one item from $\mathcal{R}$:
+
+  $$
+  \arg\max\limits_{i \in \mathcal{R}} \theta \cdot \text{reward}_i + (1 - \theta) \cdot \log \det (\mathbf{A}_{\mathcal{W} \cup \{i\}})
+  $$
+
+- Many rule constraints exist, e.g., at most 5 consecutive video posts (_if 5 video posts have already appeared consecutively, the next must be an image-text post_).
+
+- Use rules to exclude some items from $\mathcal{R}$, yielding subset $\mathcal{R'}$, then solve:
+
+  $$
+  \arg\max\limits_{i \in \mathcal{R'}} \theta \cdot \text{reward}_i + (1 - \theta) \cdot \log \det (\mathbf{A}_{\mathcal{W} \cup \{i\}})
+  $$

From a90ccefb6d4ae97cc45714d185b915bc7d03d673 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 18:13:49 +0000
Subject: [PATCH 14/19] =?UTF-8?q?fix(docs):=20=E7=BF=BB=E8=AF=91=E6=A0=BC?=
 =?UTF-8?q?=E5=BC=8F=E5=BE=AE=E8=B0=83=20=E2=80=94=20=E8=A1=A5=20H1=20?=
 =?UTF-8?q?=E6=A0=87=E9=A2=98=E3=80=81=E4=BF=AE=E6=A0=87=E9=A2=98=E5=B1=82?=
 =?UTF-8?q?=E7=BA=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- wangshusen_recommend_note_improvement.en.mdx: 补 H1
- wangshusen_recommend_note_retrieval.en.mdx: H5 → H4
- leworldmodel.md / 1545-...en.md: 小修
- generated/site-leaderboard.json: 自动更新
---
 app/docs/CommunityShare/Geek/leworldmodel.md  |  2 +-
 ...45-find-kth-bit-in-nth-binary-string.en.md |  4 +-
 ...wangshusen_recommend_note_retrieval.en.mdx |  2 +-
 ...ngshusen_recommend_note_improvement.en.mdx |  2 +
 generated/site-leaderboard.json               | 60 +++++++++----------
 5 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/app/docs/CommunityShare/Geek/leworldmodel.md b/app/docs/CommunityShare/Geek/leworldmodel.md
index 9dd611f6..e77fb22f 100644
--- a/app/docs/CommunityShare/Geek/leworldmodel.md
+++ b/app/docs/CommunityShare/Geek/leworldmodel.md
@@ -11,4 +11,4 @@ Joint Embedding Predictive Architectures (JEPAs) offer a compelling framework fo
 
 <br />
 
-<https://arxiv.org/pdf/2603.19312>
+&lt;https://arxiv.org/pdf/2603.19312&gt;
diff --git a/app/docs/CommunityShare/Leetcode/1545-find-kth-bit-in-nth-binary-string.en.md b/app/docs/CommunityShare/Leetcode/1545-find-kth-bit-in-nth-binary-string.en.md
index 01e90c67..1110de1b 100644
--- a/app/docs/CommunityShare/Leetcode/1545-find-kth-bit-in-nth-binary-string.en.md
+++ b/app/docs/CommunityShare/Leetcode/1545-find-kth-bit-in-nth-binary-string.en.md
@@ -67,9 +67,9 @@ Then I tried the mathematical flip approach. Observing $S_i = S_{i-1} + "1" + \t
 
 **Three cases:**
 
-- **Left half** ($k < \text{mid}$): This is a copy of $S_{n-1}$. Recurse: "what is the $k$-th bit of $S_{n-1}$?"
+- **Left half** ($k &lt; \text{mid}$): This is a copy of $S_{n-1}$. Recurse: "what is the $k$-th bit of $S_{n-1}$?"
 - **Middle** ($k = \text{mid}$): By the construction formula, this bit is always `"1"`.
-- **Right half** ($k > \text{mid}$): The right portion is the reversed invert of $S_{n-1}$.
+- **Right half** ($k &gt; \text{mid}$): The right portion is the reversed invert of $S_{n-1}$.
 
 Due to the **reverse**, the 1st character of the right half corresponds to the last character of the left half, and so on.
 
diff --git a/app/docs/ai/recommender-systems/wangshusen_recommend_note/wangshusen_recommend_note_retrieval.en.mdx b/app/docs/ai/recommender-systems/wangshusen_recommend_note/wangshusen_recommend_note_retrieval.en.mdx
index a149d26d..bed3c807 100644
--- a/app/docs/ai/recommender-systems/wangshusen_recommend_note/wangshusen_recommend_note_retrieval.en.mdx
+++ b/app/docs/ai/recommender-systems/wangshusen_recommend_note/wangshusen_recommend_note_retrieval.en.mdx
@@ -1221,7 +1221,7 @@ GeoHash Retrieval
 - Index: city → list of quality posts _(sorted by time, descending)_.
 - This retrieval channel has no personalization.
 
-##### Author-Based Retrieval
+#### Author-Based Retrieval
 
 **Followed Author Retrieval**
 
diff --git a/app/docs/ai/recommender-systems/wangshusen_recommend_note_improvement.en.mdx b/app/docs/ai/recommender-systems/wangshusen_recommend_note_improvement.en.mdx
index 21bb3d3b..8398b775 100644
--- a/app/docs/ai/recommender-systems/wangshusen_recommend_note_improvement.en.mdx
+++ b/app/docs/ai/recommender-systems/wangshusen_recommend_note_improvement.en.mdx
@@ -11,6 +11,8 @@ translatedAt: 2026-04-15T12:00:00Z
 translatorAgent: claude-sonnet-4-6
 ---
 
+# Wang Shusen Recommender Systems Study Notes — Improving Metrics
+
 ## Methods for Improving Metrics
 
 ### Methods for Improving Metrics
diff --git a/generated/site-leaderboard.json b/generated/site-leaderboard.json
index 48f39f9e..c8651b8f 100644
--- a/generated/site-leaderboard.json
+++ b/generated/site-leaderboard.json
@@ -28,33 +28,33 @@
       },
       {
         "id": "gmpls10e2dz0bbizotvhglc8",
-        "title": "Static Array",
-        "url": "/docs/computer-science/data-structures/array/01-static-array"
+        "title": "静态数组",
+        "url": "/docs/computer-science/data-structures/array/01-static-array.zh"
       },
       {
         "id": "nuojcaq1s6r5nggul0uq3r3j",
-        "title": "Dynamic Array",
-        "url": "/docs/computer-science/data-structures/array/02-dynamic-array"
+        "title": "动态数组",
+        "url": "/docs/computer-science/data-structures/array/02-dynamic-array.zh"
       },
       {
         "id": "ai7cmwf4irjaobqf7uokj3b4",
-        "title": "Array",
-        "url": "/docs/computer-science/data-structures/array"
+        "title": "数组",
+        "url": "/docs/computer-science/data-structures/array/index.zh"
       },
       {
         "id": "vti0bt2qlnr681msbk6igznc",
-        "title": "Data Structures Fundamentals",
-        "url": "/docs/computer-science/data-structures"
+        "title": "数据结构基础",
+        "url": "/docs/computer-science/data-structures/index.zh"
       },
       {
         "id": "gkjk6stzpb44n9lv8u2ij7xx",
-        "title": "Singly Linked List",
-        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list"
+        "title": "单链表",
+        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh"
       },
       {
         "id": "lt9yrqt0ksl2liabq9ocw0z4",
-        "title": "Linked List",
-        "url": "/docs/computer-science/data-structures/linked-list"
+        "title": "链表",
+        "url": "/docs/computer-science/data-structures/linked-list/index.zh"
       },
       {
         "id": "i88bna4sg5pr4ekhg32drv2i",
@@ -549,7 +549,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
       },
       {
         "id": "rv6egbynttb4mt1n0412bue0",
@@ -658,8 +658,8 @@
       },
       {
         "id": "fostlzqqx6l10qz1egd8dw5m",
-        "title": "Counting Stars-Inter-Uni Programming Contest.md",
-        "url": "/docs/CommunityShare/Leetcode/Counting Stars-Inter-Uni Programming Contest"
+        "title": "Counting Stars — 校际编程竞赛",
+        "url": "/docs/CommunityShare/Leetcode/counting-stars-inter-uni-programming-contest.zh"
       },
       {
         "id": "l4db26ijmpeivh78a21981ia",
@@ -1168,7 +1168,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
       }
     ]
   },
@@ -1181,33 +1181,33 @@
     "contributedDocs": [
       {
         "id": "gmpls10e2dz0bbizotvhglc8",
-        "title": "Static Array",
-        "url": "/docs/computer-science/data-structures/array/01-static-array"
+        "title": "静态数组",
+        "url": "/docs/computer-science/data-structures/array/01-static-array.zh"
       },
       {
         "id": "nuojcaq1s6r5nggul0uq3r3j",
-        "title": "Dynamic Array",
-        "url": "/docs/computer-science/data-structures/array/02-dynamic-array"
+        "title": "动态数组",
+        "url": "/docs/computer-science/data-structures/array/02-dynamic-array.zh"
       },
       {
         "id": "ai7cmwf4irjaobqf7uokj3b4",
-        "title": "Array",
-        "url": "/docs/computer-science/data-structures/array"
+        "title": "数组",
+        "url": "/docs/computer-science/data-structures/array/index.zh"
       },
       {
         "id": "vti0bt2qlnr681msbk6igznc",
-        "title": "Data Structures Fundamentals",
-        "url": "/docs/computer-science/data-structures"
+        "title": "数据结构基础",
+        "url": "/docs/computer-science/data-structures/index.zh"
       },
       {
         "id": "gkjk6stzpb44n9lv8u2ij7xx",
-        "title": "Singly Linked List",
-        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list"
+        "title": "单链表",
+        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh"
       },
       {
         "id": "lt9yrqt0ksl2liabq9ocw0z4",
-        "title": "Linked List",
-        "url": "/docs/computer-science/data-structures/linked-list"
+        "title": "链表",
+        "url": "/docs/computer-science/data-structures/linked-list/index.zh"
       },
       {
         "id": "ksjj9shalh6hqezx6t6am5vw",
@@ -1631,7 +1631,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
       }
     ]
   },
@@ -1645,7 +1645,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
       }
     ]
   },

From 2643421421a66cc5d81285dcf95bc334ddddce26 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 18:18:13 +0000
Subject: [PATCH 15/19] =?UTF-8?q?fix(lint):=20Header.tsx=20=E7=9A=84=20<a?=
 =?UTF-8?q?=20href=3D"/...">=20=E6=94=B9=20next/link=20<Link>?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

修 CI lint error: @next/next/no-html-link-for-pages
---
 app/components/Header.tsx | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/app/components/Header.tsx b/app/components/Header.tsx
index a3184096..25f5657b 100644
--- a/app/components/Header.tsx
+++ b/app/components/Header.tsx
@@ -1,3 +1,4 @@
+import Link from "next/link";
 import { ThemeToggle } from "./ThemeToggle";
 import { Button } from "@/components/ui/button";
 import { MessageCircle } from "lucide-react";
@@ -30,7 +31,7 @@ export function Header() {
 
         <div className="flex items-center justify-between h-10">
           <nav className="hidden md:flex items-center gap-8 font-sans text-xs font-bold uppercase tracking-widest text-[var(--foreground)]">
-            <a
+            <Link
               href="/"
               className="hover:text-[#CC0000] transition-colors"
               data-umami-event="navigation_click"
@@ -38,8 +39,8 @@ export function Header() {
               data-umami-event-label="home"
             >
               首页
-            </a>
-            <a
+            </Link>
+            <Link
               href="/#features"
               className="hover:text-[#CC0000] transition-colors"
               data-umami-event="navigation_click"
@@ -47,8 +48,8 @@ export function Header() {
               data-umami-event-label="features"
             >
               特点
-            </a>
-            <a
+            </Link>
+            <Link
               href="/#community"
               className="hover:text-[#CC0000] transition-colors"
               data-umami-event="navigation_click"
@@ -56,8 +57,8 @@ export function Header() {
               data-umami-event-label="community"
             >
               社区
-            </a>
-            <a
+            </Link>
+            <Link
               href="/#contact"
               className="hover:text-[#CC0000] transition-colors"
               data-umami-event="navigation_click"
@@ -65,7 +66,7 @@ export function Header() {
               data-umami-event-label="contact"
             >
               联系我们
-            </a>
+            </Link>
           </nav>
 
           <div className="flex items-center gap-2">

From c001f95fe25c8c8600e0cc13d2997c88c24db9a4 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 18:30:18 +0000
Subject: [PATCH 16/19] =?UTF-8?q?feat(home):=20=E7=94=A8=20DispatchNetwork?=
 =?UTF-8?q?=20=E6=A8=AA=E6=9D=A1=E6=9B=BF=E4=BB=A3=20Features=20+=20Commun?=
 =?UTF-8?q?ity=20=E4=B8=A4=E6=AE=B5=20useless=20=E5=8C=BA=E5=9D=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新增 app/components/DispatchNetwork.tsx：48px 高报纸末版"发行网络"风格横条
- 内容：GitHub / Discord / Zotero 三个入口 + Join CTA
- 移除 page.tsx 对 Features 和 Community 的引用
- 组件文件保留未删，后续可能复用；主页不再渲染

用户原因：Top Rank 之后的 Mission Statement 四格口号和 Community 三卡链接
完全 useless，和 Footer 重复劳动。方案三（极简 bar）被选中。
---
 app/components/DispatchNetwork.tsx | 85 ++++++++++++++++++++++++++++++
 app/page.tsx                       |  6 +--
 generated/site-leaderboard.json    | 60 ++++++++++-----------
 3 files changed, 117 insertions(+), 34 deletions(-)
 create mode 100644 app/components/DispatchNetwork.tsx

diff --git a/app/components/DispatchNetwork.tsx b/app/components/DispatchNetwork.tsx
new file mode 100644
index 00000000..16829c85
--- /dev/null
+++ b/app/components/DispatchNetwork.tsx
@@ -0,0 +1,85 @@
+import Link from "next/link";
+import { Github as GithubIcon } from "./icons/Github";
+import { MessageCircle, BookMarked, ArrowRight } from "lucide-react";
+
+/**
+ * DispatchNetwork — 主页 Top Rank 之后的极简网络入口横条
+ * 替代原先 Features（口号四格）+ Community（链接三卡）两个 section
+ * 设计意图：报纸末版的"发行网络"小栏，48px 高横条，不重复 Footer
+ */
+export function DispatchNetwork() {
+  return (
+    <section
+      id="community"
+      className="border-t-4 border-[var(--foreground)] bg-[var(--background)]"
+    >
+      <div className="container mx-auto px-6">
+        <div className="flex items-center justify-between gap-4 py-3 font-mono text-[11px] uppercase tracking-[0.25em] text-[var(--foreground)] md:text-xs">
+          {/* 左：栏目标签 */}
+          <span className="font-bold whitespace-nowrap">
+            Dispatch Network
+            <span className="mx-2 hidden text-neutral-400 md:inline">·</span>
+            <span className="hidden font-normal text-neutral-500 md:inline">
+              Sec. Net-01
+            </span>
+          </span>
+
+          {/* 中：三个外链 */}
+          <nav className="flex items-center gap-3 md:gap-6">
+            <Link
+              href="https://github.com/involutionhell"
+              target="_blank"
+              rel="noopener noreferrer"
+              className="inline-flex items-center gap-1.5 hover:text-[#CC0000] transition-colors"
+              data-umami-event="social_click"
+              data-umami-event-platform="github"
+              data-umami-event-location="dispatch_network"
+            >
+              <GithubIcon className="h-3.5 w-3.5" />
+              <span>GitHub</span>
+            </Link>
+            <span className="text-neutral-400">·</span>
+            <Link
+              href="https://discord.com/invite/6CGP73ZWbD"
+              target="_blank"
+              rel="noopener noreferrer"
+              className="inline-flex items-center gap-1.5 hover:text-[#CC0000] transition-colors"
+              data-umami-event="social_click"
+              data-umami-event-platform="discord"
+              data-umami-event-location="dispatch_network"
+            >
+              <MessageCircle className="h-3.5 w-3.5" />
+              <span>Discord</span>
+            </Link>
+            <span className="text-neutral-400">·</span>
+            <Link
+              href="https://www.zotero.org/groups/6053219/involution_hell"
+              target="_blank"
+              rel="noopener noreferrer"
+              className="inline-flex items-center gap-1.5 hover:text-[#CC0000] transition-colors"
+              data-umami-event="social_click"
+              data-umami-event-platform="zotero"
+              data-umami-event-location="dispatch_network"
+            >
+              <BookMarked className="h-3.5 w-3.5" />
+              <span>Zotero</span>
+            </Link>
+          </nav>
+
+          {/* 右：加入 CTA */}
+          <Link
+            href="https://discord.com/invite/6CGP73ZWbD"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="inline-flex items-center gap-1 font-bold whitespace-nowrap hover:text-[#CC0000] transition-colors"
+            data-umami-event="cta_click"
+            data-umami-event-label="join_dispatch"
+          >
+            <span className="hidden sm:inline">Join</span>
+            <ArrowRight className="h-3.5 w-3.5" />
+          </Link>
+        </div>
+      </div>
+    </section>
+  );
+}
diff --git a/app/page.tsx b/app/page.tsx
index 76a383c1..0d1289be 100644
--- a/app/page.tsx
+++ b/app/page.tsx
@@ -1,7 +1,6 @@
 import { Header } from "./components/Header";
 import { Hero } from "./components/Hero";
-import { Features } from "./components/Features";
-import { Community } from "./components/Community";
+import { DispatchNetwork } from "./components/DispatchNetwork";
 import { Footer } from "./components/Footer";
 import { FloatWindow } from "./components/float-window/FloatWindow";
 
@@ -10,8 +9,7 @@ export default function DocsIndex() {
     <>
       <Header />
       <Hero />
-      <Features />
-      <Community />
+      <DispatchNetwork />
       <Footer />
       <FloatWindow />
     </>
diff --git a/generated/site-leaderboard.json b/generated/site-leaderboard.json
index c8651b8f..48f39f9e 100644
--- a/generated/site-leaderboard.json
+++ b/generated/site-leaderboard.json
@@ -28,33 +28,33 @@
       },
       {
         "id": "gmpls10e2dz0bbizotvhglc8",
-        "title": "静态数组",
-        "url": "/docs/computer-science/data-structures/array/01-static-array.zh"
+        "title": "Static Array",
+        "url": "/docs/computer-science/data-structures/array/01-static-array"
       },
       {
         "id": "nuojcaq1s6r5nggul0uq3r3j",
-        "title": "动态数组",
-        "url": "/docs/computer-science/data-structures/array/02-dynamic-array.zh"
+        "title": "Dynamic Array",
+        "url": "/docs/computer-science/data-structures/array/02-dynamic-array"
       },
       {
         "id": "ai7cmwf4irjaobqf7uokj3b4",
-        "title": "数组",
-        "url": "/docs/computer-science/data-structures/array/index.zh"
+        "title": "Array",
+        "url": "/docs/computer-science/data-structures/array"
       },
       {
         "id": "vti0bt2qlnr681msbk6igznc",
-        "title": "数据结构基础",
-        "url": "/docs/computer-science/data-structures/index.zh"
+        "title": "Data Structures Fundamentals",
+        "url": "/docs/computer-science/data-structures"
       },
       {
         "id": "gkjk6stzpb44n9lv8u2ij7xx",
-        "title": "单链表",
-        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh"
+        "title": "Singly Linked List",
+        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list"
       },
       {
         "id": "lt9yrqt0ksl2liabq9ocw0z4",
-        "title": "链表",
-        "url": "/docs/computer-science/data-structures/linked-list/index.zh"
+        "title": "Linked List",
+        "url": "/docs/computer-science/data-structures/linked-list"
       },
       {
         "id": "i88bna4sg5pr4ekhg32drv2i",
@@ -549,7 +549,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
       },
       {
         "id": "rv6egbynttb4mt1n0412bue0",
@@ -658,8 +658,8 @@
       },
       {
         "id": "fostlzqqx6l10qz1egd8dw5m",
-        "title": "Counting Stars — 校际编程竞赛",
-        "url": "/docs/CommunityShare/Leetcode/counting-stars-inter-uni-programming-contest.zh"
+        "title": "Counting Stars-Inter-Uni Programming Contest.md",
+        "url": "/docs/CommunityShare/Leetcode/Counting Stars-Inter-Uni Programming Contest"
       },
       {
         "id": "l4db26ijmpeivh78a21981ia",
@@ -1168,7 +1168,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
       }
     ]
   },
@@ -1181,33 +1181,33 @@
     "contributedDocs": [
       {
         "id": "gmpls10e2dz0bbizotvhglc8",
-        "title": "静态数组",
-        "url": "/docs/computer-science/data-structures/array/01-static-array.zh"
+        "title": "Static Array",
+        "url": "/docs/computer-science/data-structures/array/01-static-array"
       },
       {
         "id": "nuojcaq1s6r5nggul0uq3r3j",
-        "title": "动态数组",
-        "url": "/docs/computer-science/data-structures/array/02-dynamic-array.zh"
+        "title": "Dynamic Array",
+        "url": "/docs/computer-science/data-structures/array/02-dynamic-array"
       },
       {
         "id": "ai7cmwf4irjaobqf7uokj3b4",
-        "title": "数组",
-        "url": "/docs/computer-science/data-structures/array/index.zh"
+        "title": "Array",
+        "url": "/docs/computer-science/data-structures/array"
       },
       {
         "id": "vti0bt2qlnr681msbk6igznc",
-        "title": "数据结构基础",
-        "url": "/docs/computer-science/data-structures/index.zh"
+        "title": "Data Structures Fundamentals",
+        "url": "/docs/computer-science/data-structures"
       },
       {
         "id": "gkjk6stzpb44n9lv8u2ij7xx",
-        "title": "单链表",
-        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh"
+        "title": "Singly Linked List",
+        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list"
       },
       {
         "id": "lt9yrqt0ksl2liabq9ocw0z4",
-        "title": "链表",
-        "url": "/docs/computer-science/data-structures/linked-list/index.zh"
+        "title": "Linked List",
+        "url": "/docs/computer-science/data-structures/linked-list"
       },
       {
         "id": "ksjj9shalh6hqezx6t6am5vw",
@@ -1631,7 +1631,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
       }
     ]
   },
@@ -1645,7 +1645,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
       }
     ]
   },

From 94d8de4142f7f8dfc83959799dab44e173121b0f Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 18:35:34 +0000
Subject: [PATCH 17/19] =?UTF-8?q?fix(pr281):=20=E4=BF=AE=E5=A4=8D=20Copilo?=
 =?UTF-8?q?t=20Review=20=E6=8F=90=E5=87=BA=E7=9A=84=206=20=E5=A4=84?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. top-docs/route.ts: limit 参数加 Number.isFinite 校验防 NaN
2. top-docs/route.ts: 返回结构统一为 ApiResponse{success,data}，补齐 title 字段
   （通过 fumadocs source.getPage 回填，同时保留埋点带来的 title）
3. HotDocsPreview.tsx: 改走同源 /api/analytics/top-docs 走 ISR，
   不再直连 BACKEND_URL，消除 '白做缓存' 问题
4. docs/[...slug]/page.tsx: generateMetadata 也按 locale 取页面，
   避免英文页显示中文 title/description
5. middleware.ts: Accept-Language 解析改为按 q 值排序取首选语言，
   正确处理 'fr-CA,fr;q=0.9,en;q=0.8' 这类多语言 header
6. computer-science/index.en.mdx: 删除正文末尾混入的整段中文
---
 app/api/analytics/top-docs/route.ts    | 44 ++++++++++++++++---
 app/components/HotDocsPreview.tsx      | 19 +++++---
 app/docs/[...slug]/page.tsx            |  4 +-
 app/docs/computer-science/index.en.mdx | 31 -------------
 generated/site-leaderboard.json        | 60 +++++++++++++-------------
 middleware.ts                          | 24 +++++++++--
 6 files changed, 105 insertions(+), 77 deletions(-)

diff --git a/app/api/analytics/top-docs/route.ts b/app/api/analytics/top-docs/route.ts
index 1a70650a..9d0b7bfb 100644
--- a/app/api/analytics/top-docs/route.ts
+++ b/app/api/analytics/top-docs/route.ts
@@ -1,12 +1,36 @@
 import { prisma } from "@/lib/db";
 import { NextRequest } from "next/server";
+import { source } from "@/lib/source";
 
 export const revalidate = 300;
 
+/** 将 NaN/非正数的 limit 回退到默认值，同时加上限保护 */
+function parseLimit(raw: string | null, fallback = 5, max = 20): number {
+  const n = Number(raw);
+  if (!Number.isFinite(n) || n <= 0) return fallback;
+  return Math.min(Math.floor(n), max);
+}
+
+/**
+ * 从 path 尝试解析为文档标题：/docs/ai/rl → 查 fumadocs source
+ * 查不到时回退为 path 最后一段。
+ */
+function resolveTitle(path: string): string {
+  // /docs/ai/rl → ["ai", "rl"]
+  const slug = path
+    .replace(/^\/docs\/?/, "")
+    .split("/")
+    .filter(Boolean);
+  if (slug.length === 0) return path;
+  const page = source.getPage(slug);
+  if (page?.data?.title) return page.data.title as string;
+  return slug[slug.length - 1];
+}
+
 export async function GET(req: NextRequest) {
   const { searchParams } = new URL(req.url);
   const window = searchParams.get("window") ?? "7d";
-  const limit = Math.min(Number(searchParams.get("limit") ?? "5"), 20);
+  const limit = parseLimit(searchParams.get("limit"));
 
   const since = new Date();
   if (window === "7d") {
@@ -28,19 +52,27 @@ export async function GET(req: NextRequest) {
   });
 
   // 统计各路径 PV（内存过滤 /docs/ 前缀）
-  const counts: Record<string, number> = {};
+  const counts: Record<string, { count: number; title?: string }> = {};
   for (const row of rows) {
     const data = row.eventData as { path?: string; title?: string } | null;
     const path = data?.path;
     if (path && path.startsWith("/docs/")) {
-      counts[path] = (counts[path] ?? 0) + 1;
+      if (!counts[path]) counts[path] = { count: 0, title: data?.title };
+      counts[path].count += 1;
+      // 优先保留带 title 的埋点数据
+      if (!counts[path].title && data?.title) counts[path].title = data.title;
     }
   }
 
   const top = Object.entries(counts)
-    .sort((a, b) => b[1] - a[1])
+    .sort((a, b) => b[1].count - a[1].count)
     .slice(0, limit)
-    .map(([path, views]) => ({ path, views }));
+    .map(([path, { count, title }]) => ({
+      path,
+      title: title ?? resolveTitle(path),
+      views: count,
+    }));
 
-  return Response.json(top);
+  // 统一 ApiResponse 包裹，和后端 /analytics/top-docs 以及 /rank HotDocsTab 一致
+  return Response.json({ success: true, data: top });
 }
diff --git a/app/components/HotDocsPreview.tsx b/app/components/HotDocsPreview.tsx
index 37260593..c3e2c06d 100644
--- a/app/components/HotDocsPreview.tsx
+++ b/app/components/HotDocsPreview.tsx
@@ -7,16 +7,25 @@ interface TopDocDto {
 }
 
 async function fetchTopDocs(): Promise<TopDocDto[]> {
-  const backendUrl = process.env.BACKEND_URL ?? "http://localhost:8081";
+  // 同源请求 Next ISR 路由，避开对 BACKEND_URL 的硬依赖，
+  // 并复用 app/api/analytics/top-docs 的 revalidate=300 缓存。
+  // Server Component 中 fetch 需要绝对 URL，优先读显式站点地址，
+  // 其次 VERCEL_URL（预览/生产），本地回退到 3010。
+  const siteUrl =
+    process.env.NEXT_PUBLIC_SITE_URL ??
+    (process.env.VERCEL_URL ? `https://${process.env.VERCEL_URL}` : null) ??
+    "http://localhost:3010";
   try {
     const res = await fetch(
-      `${backendUrl}/analytics/top-docs?window=7d&limit=5`,
-      { next: { revalidate: 300 } },
+      `${siteUrl}/api/analytics/top-docs?window=7d&limit=5`,
+      {
+        next: { revalidate: 300 },
+      },
     );
     if (!res.ok) return [];
     const json = await res.json();
-    // 后端用 ApiResponse<List<TopDocDto>> 包裹，data 字段存实际数据
-    return json.data ?? json;
+    // 统一 ApiResponse<{ path, title, views }[]> 结构
+    return Array.isArray(json?.data) ? json.data : [];
   } catch {
     return [];
   }
diff --git a/app/docs/[...slug]/page.tsx b/app/docs/[...slug]/page.tsx
index 39612464..3f251721 100644
--- a/app/docs/[...slug]/page.tsx
+++ b/app/docs/[...slug]/page.tsx
@@ -137,7 +137,9 @@ export async function generateStaticParams() {
 
 export async function generateMetadata({ params }: Param): Promise<Metadata> {
   const { slug } = await params;
-  const page = source.getPage(slug);
+  const locale = await getLocaleFromCookie();
+  // metadata 需与页面主体同语言，避免英文页显示中文 title/desc 造成 SEO 错乱
+  const { page } = getPageWithLocale(slug, locale);
   if (page == null) {
     notFound();
   }
diff --git a/app/docs/computer-science/index.en.mdx b/app/docs/computer-science/index.en.mdx
index b16f4db2..6e5121a4 100644
--- a/app/docs/computer-science/index.en.mdx
+++ b/app/docs/computer-science/index.en.mdx
@@ -42,34 +42,3 @@ We recommend learning in the following order:
 ---
 
 _This knowledge base is maintained by a student community. Contributions are welcome!_
-
-# 计算机科学
-
-欢迎来到计算机科学知识库！我们在此收集了计算机科学各个领域的核心概念和深入分析。
-
-## 主体内容
-
-### 数据结构和算法
-
-- [数据结构基础](/computer-science/data-structures)
-- 常见算法分析
-- 复杂性理论
-
-### 编程语言
-
-- 编程范式
-- 语言设计原则
-- 编译器理论
-
-## 学习建议
-
-我们建议按照以下顺序学习：
-
-1. 首先掌握基本的数据结构
-2. 理解常见算法的实现
-3. 学习算法复杂度分析
-4. 深入特定领域的高级主题
-
----
-
-_This knowledge base is maintained by a student community. Contributions are welcome!_
diff --git a/generated/site-leaderboard.json b/generated/site-leaderboard.json
index 48f39f9e..c8651b8f 100644
--- a/generated/site-leaderboard.json
+++ b/generated/site-leaderboard.json
@@ -28,33 +28,33 @@
       },
       {
         "id": "gmpls10e2dz0bbizotvhglc8",
-        "title": "Static Array",
-        "url": "/docs/computer-science/data-structures/array/01-static-array"
+        "title": "静态数组",
+        "url": "/docs/computer-science/data-structures/array/01-static-array.zh"
       },
       {
         "id": "nuojcaq1s6r5nggul0uq3r3j",
-        "title": "Dynamic Array",
-        "url": "/docs/computer-science/data-structures/array/02-dynamic-array"
+        "title": "动态数组",
+        "url": "/docs/computer-science/data-structures/array/02-dynamic-array.zh"
       },
       {
         "id": "ai7cmwf4irjaobqf7uokj3b4",
-        "title": "Array",
-        "url": "/docs/computer-science/data-structures/array"
+        "title": "数组",
+        "url": "/docs/computer-science/data-structures/array/index.zh"
       },
       {
         "id": "vti0bt2qlnr681msbk6igznc",
-        "title": "Data Structures Fundamentals",
-        "url": "/docs/computer-science/data-structures"
+        "title": "数据结构基础",
+        "url": "/docs/computer-science/data-structures/index.zh"
       },
       {
         "id": "gkjk6stzpb44n9lv8u2ij7xx",
-        "title": "Singly Linked List",
-        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list"
+        "title": "单链表",
+        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh"
       },
       {
         "id": "lt9yrqt0ksl2liabq9ocw0z4",
-        "title": "Linked List",
-        "url": "/docs/computer-science/data-structures/linked-list"
+        "title": "链表",
+        "url": "/docs/computer-science/data-structures/linked-list/index.zh"
       },
       {
         "id": "i88bna4sg5pr4ekhg32drv2i",
@@ -549,7 +549,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
       },
       {
         "id": "rv6egbynttb4mt1n0412bue0",
@@ -658,8 +658,8 @@
       },
       {
         "id": "fostlzqqx6l10qz1egd8dw5m",
-        "title": "Counting Stars-Inter-Uni Programming Contest.md",
-        "url": "/docs/CommunityShare/Leetcode/Counting Stars-Inter-Uni Programming Contest"
+        "title": "Counting Stars — 校际编程竞赛",
+        "url": "/docs/CommunityShare/Leetcode/counting-stars-inter-uni-programming-contest.zh"
       },
       {
         "id": "l4db26ijmpeivh78a21981ia",
@@ -1168,7 +1168,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
       }
     ]
   },
@@ -1181,33 +1181,33 @@
     "contributedDocs": [
       {
         "id": "gmpls10e2dz0bbizotvhglc8",
-        "title": "Static Array",
-        "url": "/docs/computer-science/data-structures/array/01-static-array"
+        "title": "静态数组",
+        "url": "/docs/computer-science/data-structures/array/01-static-array.zh"
       },
       {
         "id": "nuojcaq1s6r5nggul0uq3r3j",
-        "title": "Dynamic Array",
-        "url": "/docs/computer-science/data-structures/array/02-dynamic-array"
+        "title": "动态数组",
+        "url": "/docs/computer-science/data-structures/array/02-dynamic-array.zh"
       },
       {
         "id": "ai7cmwf4irjaobqf7uokj3b4",
-        "title": "Array",
-        "url": "/docs/computer-science/data-structures/array"
+        "title": "数组",
+        "url": "/docs/computer-science/data-structures/array/index.zh"
       },
       {
         "id": "vti0bt2qlnr681msbk6igznc",
-        "title": "Data Structures Fundamentals",
-        "url": "/docs/computer-science/data-structures"
+        "title": "数据结构基础",
+        "url": "/docs/computer-science/data-structures/index.zh"
       },
       {
         "id": "gkjk6stzpb44n9lv8u2ij7xx",
-        "title": "Singly Linked List",
-        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list"
+        "title": "单链表",
+        "url": "/docs/computer-science/data-structures/linked-list/01-singly-linked-list.zh"
       },
       {
         "id": "lt9yrqt0ksl2liabq9ocw0z4",
-        "title": "Linked List",
-        "url": "/docs/computer-science/data-structures/linked-list"
+        "title": "链表",
+        "url": "/docs/computer-science/data-structures/linked-list/index.zh"
       },
       {
         "id": "ksjj9shalh6hqezx6t6am5vw",
@@ -1631,7 +1631,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
       }
     ]
   },
@@ -1645,7 +1645,7 @@
       {
         "id": "l6eepr5ctjgrhdgupy3twr1t",
         "title": "Prompt Repetition Improves Non-Reasoning LLMs",
-        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms"
+        "url": "/docs/CommunityShare/Amazing-AI-Tools/prompt-repetition-improves-non-reasoning-llms.zh"
       }
     ]
   },
diff --git a/middleware.ts b/middleware.ts
index 40feaf7a..465013b9 100644
--- a/middleware.ts
+++ b/middleware.ts
@@ -21,11 +21,27 @@ export function middleware(req: NextRequest) {
     (req as NextRequest & { geo?: { country?: string } }).geo?.country ?? "";
   const acceptLang = req.headers.get("accept-language") ?? "";
 
-  // 默认中文；只有明确英文 Accept-Language 且非中国 IP 才切 en
+  // 解析 Accept-Language header 按 q 值排序的优先级列表
+  // 例如 "fr-CA,fr;q=0.9,en;q=0.8,zh;q=0.5" → [fr-CA, fr, en, zh]
+  // 之前只 startsWith 判断会忽略 q 值较低但明确列出的语言。
+  const preferred = acceptLang
+    .split(",")
+    .map((part) => {
+      const [tag, ...params] = part.trim().split(";");
+      const qParam = params.find((p) => p.trim().startsWith("q="));
+      const q = qParam ? parseFloat(qParam.slice(2)) : 1;
+      return { tag: tag.toLowerCase(), q: Number.isFinite(q) ? q : 0 };
+    })
+    .filter((item) => item.tag)
+    .sort((a, b) => b.q - a.q);
+
+  const firstMatch = preferred.find((item) =>
+    /^(en|zh)(-|$)/.test(item.tag),
+  )?.tag;
+
+  // 默认中文；只有 Accept-Language 首选为英文且非中国 IP 才切 en
   const isExplicitlyEnglish =
-    !acceptLang.toLowerCase().startsWith("zh") &&
-    acceptLang.toLowerCase().startsWith("en") &&
-    country !== "CN";
+    firstMatch?.startsWith("en") === true && country !== "CN";
   const locale = isExplicitlyEnglish ? "en" : "zh";
 
   const res = NextResponse.next();

From d9c86e8f71e117a6fce9fda1b1871a7410faf4a6 Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 19:07:52 +0000
Subject: [PATCH 18/19] =?UTF-8?q?fix(search):=20=E6=8C=89=E8=AF=AD?=
 =?UTF-8?q?=E8=A8=80=E5=88=86=E7=89=87=20search.json=EF=BC=8C=E8=A7=84?=
 =?UTF-8?q?=E9=81=BF=20Vercel=2019MB=20ISR=20=E4=B8=8A=E9=99=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

之前 150 篇中文 + 150 篇英文翻译全塞进同一个 /search.json，
构建产物 23MB 触发 FALLBACK_BODY_TOO_LARGE 导致 Vercel 部署失败。

方案：
- 新增 lib/search-index.ts 抽出 pageToIndex 和 isEnglishPage 工具
- /search.zh.json — 中文 / 无语言标的原文 + lang="zh" 翻译版（约 13MB）
- /search.en.json — 仅 lang="en" 翻译版，用 Orama 英文分词（约 11MB）
- layout.tsx 读 locale cookie 动态选 API（默认 zh）
- createSearchAPI 的 indexes 必须是 Dynamic<T>=()=>T[]|Promise<T[]>，不是裸 Promise

下一步扩容：文档数量继续增长可按目录再切片（ai/cs/leetcode 各自独立 json）。
---
 app/api/analytics/top-docs/route.ts | 78 -----------------------------
 app/components/HotDocsPreview.tsx   | 29 ++++++-----
 app/layout.tsx                      |  8 ++-
 app/search.en.json/route.ts         | 21 ++++++++
 app/search.json/route.ts            | 18 -------
 app/search.zh.json/route.ts         | 33 ++++++++++++
 lib/search-index.ts                 | 49 ++++++++++++++++++
 7 files changed, 126 insertions(+), 110 deletions(-)
 delete mode 100644 app/api/analytics/top-docs/route.ts
 create mode 100644 app/search.en.json/route.ts
 delete mode 100644 app/search.json/route.ts
 create mode 100644 app/search.zh.json/route.ts
 create mode 100644 lib/search-index.ts

diff --git a/app/api/analytics/top-docs/route.ts b/app/api/analytics/top-docs/route.ts
deleted file mode 100644
index 9d0b7bfb..00000000
--- a/app/api/analytics/top-docs/route.ts
+++ /dev/null
@@ -1,78 +0,0 @@
-import { prisma } from "@/lib/db";
-import { NextRequest } from "next/server";
-import { source } from "@/lib/source";
-
-export const revalidate = 300;
-
-/** 将 NaN/非正数的 limit 回退到默认值，同时加上限保护 */
-function parseLimit(raw: string | null, fallback = 5, max = 20): number {
-  const n = Number(raw);
-  if (!Number.isFinite(n) || n <= 0) return fallback;
-  return Math.min(Math.floor(n), max);
-}
-
-/**
- * 从 path 尝试解析为文档标题：/docs/ai/rl → 查 fumadocs source
- * 查不到时回退为 path 最后一段。
- */
-function resolveTitle(path: string): string {
-  // /docs/ai/rl → ["ai", "rl"]
-  const slug = path
-    .replace(/^\/docs\/?/, "")
-    .split("/")
-    .filter(Boolean);
-  if (slug.length === 0) return path;
-  const page = source.getPage(slug);
-  if (page?.data?.title) return page.data.title as string;
-  return slug[slug.length - 1];
-}
-
-export async function GET(req: NextRequest) {
-  const { searchParams } = new URL(req.url);
-  const window = searchParams.get("window") ?? "7d";
-  const limit = parseLimit(searchParams.get("limit"));
-
-  const since = new Date();
-  if (window === "7d") {
-    since.setDate(since.getDate() - 7);
-  } else if (window === "30d") {
-    since.setDate(since.getDate() - 30);
-  } else {
-    since.setFullYear(since.getFullYear() - 10);
-  }
-
-  // Prisma 对 JSON 字段的 startsWith 过滤不能直接嵌套写在 where，
-  // 这里先按 eventType + createdAt 过滤，再在内存里按 path 前缀筛
-  const rows = await prisma.analyticsEvent.findMany({
-    where: {
-      eventType: "page_view",
-      createdAt: { gte: since },
-    },
-    select: { eventData: true },
-  });
-
-  // 统计各路径 PV（内存过滤 /docs/ 前缀）
-  const counts: Record<string, { count: number; title?: string }> = {};
-  for (const row of rows) {
-    const data = row.eventData as { path?: string; title?: string } | null;
-    const path = data?.path;
-    if (path && path.startsWith("/docs/")) {
-      if (!counts[path]) counts[path] = { count: 0, title: data?.title };
-      counts[path].count += 1;
-      // 优先保留带 title 的埋点数据
-      if (!counts[path].title && data?.title) counts[path].title = data.title;
-    }
-  }
-
-  const top = Object.entries(counts)
-    .sort((a, b) => b[1].count - a[1].count)
-    .slice(0, limit)
-    .map(([path, { count, title }]) => ({
-      path,
-      title: title ?? resolveTitle(path),
-      views: count,
-    }));
-
-  // 统一 ApiResponse 包裹，和后端 /analytics/top-docs 以及 /rank HotDocsTab 一致
-  return Response.json({ success: true, data: top });
-}
diff --git a/app/components/HotDocsPreview.tsx b/app/components/HotDocsPreview.tsx
index c3e2c06d..0b0f412a 100644
--- a/app/components/HotDocsPreview.tsx
+++ b/app/components/HotDocsPreview.tsx
@@ -7,24 +7,27 @@ interface TopDocDto {
 }
 
 async function fetchTopDocs(): Promise<TopDocDto[]> {
-  // 同源请求 Next ISR 路由，避开对 BACKEND_URL 的硬依赖，
-  // 并复用 app/api/analytics/top-docs 的 revalidate=300 缓存。
-  // Server Component 中 fetch 需要绝对 URL，优先读显式站点地址，
-  // 其次 VERCEL_URL（预览/生产），本地回退到 3010。
-  const siteUrl =
-    process.env.NEXT_PUBLIC_SITE_URL ??
-    (process.env.VERCEL_URL ? `https://${process.env.VERCEL_URL}` : null) ??
-    "http://localhost:3010";
+  // 直连 Java 后端 /analytics/top-docs（GA4 数据 + Caffeine 缓存），
+  // Next 不再做聚合，把 CPU 留给后端。
+  // BACKEND_URL 不设置时不做任何硬编码 fallback：不同开发者端口不一致（8080/8081/其他），
+  // 生产环境必须显式配置，本地未配也直接 no-op 返回空，而不是假装连 8081。
+  const backendUrl = process.env.BACKEND_URL;
+  if (!backendUrl) {
+    if (process.env.NODE_ENV !== "production") {
+      console.warn(
+        "[HotDocsPreview] BACKEND_URL 未配置，跳过 Top Docs 请求。本地请在 .env.local 设置。",
+      );
+    }
+    return [];
+  }
   try {
     const res = await fetch(
-      `${siteUrl}/api/analytics/top-docs?window=7d&limit=5`,
-      {
-        next: { revalidate: 300 },
-      },
+      `${backendUrl}/analytics/top-docs?window=7d&limit=5`,
+      { next: { revalidate: 300 } },
     );
     if (!res.ok) return [];
     const json = await res.json();
-    // 统一 ApiResponse<{ path, title, views }[]> 结构
+    // 后端 ApiResponse<List<TopDocDto>> 结构：{ success, data: [...] }
     return Array.isArray(json?.data) ? json.data : [];
   } catch {
     return [];
diff --git a/app/layout.tsx b/app/layout.tsx
index c98c631a..95735220 100644
--- a/app/layout.tsx
+++ b/app/layout.tsx
@@ -11,6 +11,7 @@ import { UmamiIdentity } from "@/app/components/UmamiIdentity";
 import { AuthProvider } from "@/lib/use-auth";
 // import { SearchWrapper } from "@/app/components/SearchWrapper";
 import { CustomSearchDialog } from "@/app/components/CustomSearchDialog";
+import { cookies } from "next/headers";
 
 const geistSans = localFont({
   src: "./fonts/GeistVF.woff",
@@ -126,6 +127,11 @@ export const metadata: Metadata = {
 export default async function RootLayout({
   children,
 }: Readonly<{ children: React.ReactNode }>) {
+  // 读取 locale cookie，选对应语言的搜索索引分片。
+  // 分片目的：规避 Vercel 单页 ISR 19.07MB 硬上限（FALLBACK_BODY_TOO_LARGE）。
+  const cookieStore = await cookies();
+  const locale = cookieStore.get("locale")?.value === "en" ? "en" : "zh";
+  const searchApi = `/search.${locale}.json`;
   return (
     <html lang="zh-CN" suppressHydrationWarning>
       <head>
@@ -212,7 +218,7 @@ export default async function RootLayout({
               search={{
                 SearchDialog: CustomSearchDialog,
                 // 使用静态索引，兼容 next export 与本地开发
-                options: { type: "static", api: "/search.json" },
+                options: { type: "static", api: searchApi },
               }}
             >
               <main id="main-content" className="relative z-10">
diff --git a/app/search.en.json/route.ts b/app/search.en.json/route.ts
new file mode 100644
index 00000000..d3670f64
--- /dev/null
+++ b/app/search.en.json/route.ts
@@ -0,0 +1,21 @@
+import { createSearchAPI } from "fumadocs-core/search/server";
+import { source } from "@/lib/source";
+import { pageToIndex, isEnglishPage } from "@/lib/search-index";
+
+export const dynamic = "force-static";
+
+/**
+ * 英文搜索索引分片：只包含 lang==="en" 的翻译版文档，
+ * 用 Orama 默认英文分词（无需 mandarin tokenizer）。
+ */
+const api = createSearchAPI("advanced", {
+  indexes: () =>
+    Promise.all(source.getPages().filter(isEnglishPage).map(pageToIndex)),
+  language: "english",
+  search: {
+    threshold: 0.3,
+    tolerance: 1,
+  },
+});
+
+export const GET = api.staticGET;
diff --git a/app/search.json/route.ts b/app/search.json/route.ts
deleted file mode 100644
index 66672463..00000000
--- a/app/search.json/route.ts
+++ /dev/null
@@ -1,18 +0,0 @@
-import { createFromSource } from "fumadocs-core/search/server";
-import { source } from "@/lib/source";
-import { createTokenizer } from "@orama/tokenizers/mandarin";
-
-// Ensure this route is statically generated during `next export`.
-export const dynamic = "force-static";
-
-// Static search database for static export (Next.js `output: "export"`).
-const api = createFromSource(source, {
-  components: {
-    tokenizer: createTokenizer(),
-  },
-  search: {
-    threshold: 0.3,
-    tolerance: 1,
-  },
-});
-export const GET = api.staticGET;
diff --git a/app/search.zh.json/route.ts b/app/search.zh.json/route.ts
new file mode 100644
index 00000000..c63e7275
--- /dev/null
+++ b/app/search.zh.json/route.ts
@@ -0,0 +1,33 @@
+import { createSearchAPI } from "fumadocs-core/search/server";
+import { createTokenizer } from "@orama/tokenizers/mandarin";
+import { source } from "@/lib/source";
+import { pageToIndex, isEnglishPage } from "@/lib/search-index";
+
+export const dynamic = "force-static";
+
+/**
+ * 中文搜索索引分片：包含所有原文（中文 + 未标语言的默认文档）+ 显式 lang="zh" 的翻译版。
+ * 排除 lang==="en" 的英文翻译版，后者由 /search.en.json 负责。
+ *
+ * 这样拆分是为了避开 Vercel 单个 ISR 响应 19.07MB 的硬上限（FALLBACK_BODY_TOO_LARGE）。
+ * 文档数量继续增长时可进一步按目录切分。
+ */
+const api = createSearchAPI("advanced", {
+  // Dynamic 形式：createSearchAPI 期望 () => T[] | Promise<T[]>，不是裸 Promise
+  indexes: () =>
+    Promise.all(
+      source
+        .getPages()
+        .filter((page) => !isEnglishPage(page))
+        .map(pageToIndex),
+    ),
+  components: {
+    tokenizer: createTokenizer(),
+  },
+  search: {
+    threshold: 0.3,
+    tolerance: 1,
+  },
+});
+
+export const GET = api.staticGET;
diff --git a/lib/search-index.ts b/lib/search-index.ts
new file mode 100644
index 00000000..a806701c
--- /dev/null
+++ b/lib/search-index.ts
@@ -0,0 +1,49 @@
+import type { AdvancedIndex } from "fumadocs-core/search/server";
+import { source } from "@/lib/source";
+import { basename, extname } from "path";
+
+type Page = ReturnType<typeof source.getPages>[number];
+
+/**
+ * 把一个 fumadocs 页面转成 Orama 索引项（复用 fumadocs-core 默认实现逻辑），
+ * 单独抽出来是因为我们需要分片（zh / en），用 createSearchAPI 手动传 indexes。
+ */
+export async function pageToIndex(page: Page): Promise<AdvancedIndex> {
+  const data = page.data as {
+    structuredData?: unknown;
+    load?: () => Promise<{ structuredData: unknown }>;
+    title?: string;
+    description?: string;
+  };
+
+  let structuredData: unknown;
+  if ("structuredData" in data && data.structuredData) {
+    structuredData = data.structuredData;
+  } else if (typeof data.load === "function") {
+    structuredData = (await data.load()).structuredData;
+  }
+
+  if (!structuredData) {
+    throw new Error(
+      `[search-index] 页面缺少 structuredData: ${page.path ?? page.url}`,
+    );
+  }
+
+  return {
+    id: page.url,
+    title: data.title ?? basename(page.path, extname(page.path)),
+    description: data.description,
+    url: page.url,
+    structuredData,
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  } as any;
+}
+
+/**
+ * 判断一个 fumadocs 页面是否为英文翻译版。
+ * 翻译版 frontmatter 会声明 `lang: "en"` 且通常 `translatedFrom: "zh"`。
+ */
+export function isEnglishPage(page: Page): boolean {
+  const lang = (page.data as { lang?: string }).lang;
+  return lang === "en";
+}

From 6793ddaecf1097cf592be446ce407b833424daba Mon Sep 17 00:00:00 2001
From: longsizhuo <longsizhuo@gmail.com>
Date: Wed, 15 Apr 2026 19:15:46 +0000
Subject: [PATCH 19/19] =?UTF-8?q?feat(analytics):=20=E5=9F=8B=E7=82=B9?=
 =?UTF-8?q?=E8=BF=81=E5=88=B0=20Java=20=E5=90=8E=E7=AB=AF=20/analytics/eve?=
 =?UTF-8?q?nts?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Vercel Fluid CPU 用到 85%，原因是前端 /api/analytics 每次埋点都跑 Next function
做 resolveUserId + Prisma 直写。现改为走 next.config 已有的
/analytics/:path* rewrite 直转 Java，Vercel 只做 edge 代理不跑 function。

前端改动：
- lib/analytics.ts: URL /api/analytics → /analytics/events，header x-satoken → satoken
- 删除 app/api/analytics/route.ts（Prisma 直写逻辑搬到 Java）

配套后端改动（另开 PR）：
- AnalyticsController 加 POST /events
- 新增 AnalyticsEventIngestService 用 JdbcTemplate INSERT
- SaTokenConfigure 白名单放行 /analytics/events（匿名也收）

AnalyticsEvent 表保留：GA4 走 top-docs，这张表暂无读取方但
未来自建 dashboard / 登录用户精细追踪可直接复用。
---
 app/api/analytics/route.ts | 32 --------------------------------
 lib/analytics.ts           | 19 ++++++++++---------
 2 files changed, 10 insertions(+), 41 deletions(-)
 delete mode 100644 app/api/analytics/route.ts

diff --git a/app/api/analytics/route.ts b/app/api/analytics/route.ts
deleted file mode 100644
index 3bc2070b..00000000
--- a/app/api/analytics/route.ts
+++ /dev/null
@@ -1,32 +0,0 @@
-import { prisma } from "@/lib/db";
-import { resolveUserId } from "@/lib/server-auth";
-
-export async function POST(req: Request) {
-  try {
-    const { eventType, eventData } = await req.json();
-
-    if (!eventType) {
-      return Response.json(
-        { error: "Event type is required" },
-        { status: 400 },
-      );
-    }
-
-    // 服务端验证身份，不信任客户端传入的 userId
-    const userId = await resolveUserId(req);
-
-    await prisma.analyticsEvent.create({
-      data: {
-        eventType,
-        eventData: eventData ?? {},
-        // userId 对应 user_accounts.id（BigInt）；匿名访问为 null
-        ...(userId != null && { userId }),
-      },
-    });
-
-    return Response.json({ success: true });
-  } catch (error) {
-    console.error("Analytics API error:", error);
-    return Response.json({ error: "Failed to log event" }, { status: 500 });
-  }
-}
diff --git a/lib/analytics.ts b/lib/analytics.ts
index eb198bc3..07cabefd 100644
--- a/lib/analytics.ts
+++ b/lib/analytics.ts
@@ -13,12 +13,14 @@ function getStoredToken(): string | null {
 }
 
 /**
- * 向 Next.js 内置 /api/analytics 发送埋点事件。
- * 失败静默，不抛异常，不影响用户主流程。
+ * 向后端 /analytics/events 上报埋点事件。
+ *
+ * 历史：之前走 Next.js 路由 /api/analytics 做 Prisma 直写，占用 Vercel Fluid CPU；
+ * 现改为走 next.config 的 /analytics/:path* rewrite 直接转发到 Java 后端，
+ * Next 这一层只做 edge 代理不跑 function，CPU 用量显著降低。
  *
- * Header 命名注意：/api/analytics 的 resolveUserId 从 `x-satoken` 读取 token（见 lib/server-auth.ts），
- * 然后在内部再以 `satoken` header 转发给后端 /auth/me 验证。所以客户端 → Next 这一跳必须用 `x-satoken`，
- * 否则 userId 永远解析不到，埋点记录的 uniqueUsers 会恒为 0。
+ * Header 约定：Java 后端 SaToken 读取 `satoken` header 识别登录用户，匿名也会放行。
+ * 失败静默，不抛异常，不影响用户主流程。
  */
 export async function trackEvent(
   eventType: string,
@@ -26,16 +28,15 @@ export async function trackEvent(
 ): Promise<void> {
   try {
     const token = getStoredToken();
-    // 用 Record<string, string> 而不是 HeadersInit（联合类型），保证可变 + 类型安全
     const headers: Record<string, string> = {
       "Content-Type": "application/json",
     };
-    // 客户端 → Next 路由必须用 x-satoken（见上方注释）
+    // 登录用户附带 satoken，后端用它解析 userId；匿名时不加 header，后端按 userId=null 记录
     if (token) {
-      headers["x-satoken"] = token;
+      headers["satoken"] = token;
     }
 
-    await fetch("/api/analytics", {
+    await fetch("/analytics/events", {
       method: "POST",
       headers,
       body: JSON.stringify({ eventType, eventData: eventData ?? {} }),