[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-long-context-inference":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"beebd757-436f-4c56-b227-5482b5918c4c","long-context inference","long-context-inference",0,null,[11],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"bfbd028b-4704-4de5-8f54-55625836952f","5-kv-cache-takeaways-for-llamacpp-users-en","5 KV cache takeaways for llama.cpp users","5 takeaways from TurboQuant: under-3-bit KV cache compression, memory savings, and the tradeoffs llama.cpp users should watch.","industry","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1779285258553-domr.png","en","2026-05-20T13:53:43.522918+00:00"]