[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-llm-推論":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"f692f9ba-d4c2-4bdf-aa5c-5669027c5b6e","LLM 推論","llm-推論",2,null,[11,20],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"c701c93e-a74b-49a7-ac72-40ed577a6e92","nvidia-b300-vs-h200-deepseek-perf-zh","NVIDIA B300 對 H200：DeepSeek 實…","B300 有 288GB HBM3e 和 8TB\u002Fs 頻寬。這篇直接比 H200，拆解 DeepSeek 推論、KV cache、雲端成本與部署取捨。","industry","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1775161680437-1ibz.png","zh","2026-04-02T20:27:38.70665+00:00",{"id":21,"slug":22,"title":23,"summary":24,"category":25,"image_url":26,"cover_image":26,"language":18,"created_at":27},"fdb08bdf-a3bd-4c4d-acaf-ce8035f24449","turboquant-google-paper-explained-zh","TurboQuant 是什麼？Google 新論文重點","Google 的 TurboQuant 盯上 LLM 的 KV cache 瓶頸，用低位元量化降低記憶體用量與推論成本。這篇帶你看它在解什麼問題、和其他優化法差在哪。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1775160957331-6iua.png","2026-04-02T20:15:40.07166+00:00"]