[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-benchlm":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"b13bfc1e-0547-40df-bc95-1126506a1298","BenchLM","benchlm",2,null,[11,20,28],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"a5281bf5-661d-4288-b00e-0aa245e1fb03","why-coding-benchmarks-are-finally-telling-the-truth-en","Why coding benchmarks are finally telling the truth","BenchLM’s coding leaderboard says LiveCodeBench and SWE-bench Pro are the only signals that still matter.","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1778670666742-3yxf.png","en","2026-05-13T11:10:26.190997+00:00",{"id":21,"slug":22,"title":23,"summary":24,"category":25,"image_url":26,"cover_image":26,"language":18,"created_at":27},"0c006cb0-0acc-43c4-baba-ab78092f0d9b","kimi-k2-6-benchlm-2026-scores-en","Kimi K2.6 Scores: BenchLM’s 2026 Breakdown","Kimi K2.6 ranks #12 overall on BenchLM, with strong coding and agentic scores, plus a 256K context window and open weights.","model-release","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1777900276785-cezo.png","2026-05-04T13:10:39.364394+00:00",{"id":29,"slug":30,"title":31,"summary":32,"category":25,"image_url":33,"cover_image":33,"language":18,"created_at":34},"cb45188a-2e6e-4ac7-95f0-39cbd2f7d7a2","gpt-5-4-benchmarks-2026-scores-rankings-en","GPT-5.4 Scores 97.6 in Knowledge Benchmarks","GPT-5.4 tops knowledge benchmarks with 97.6, ranks #2 overall on BenchLM, and posts a 1.05M-token context window.","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1776082204490-nq2r.png","2026-04-13T12:09:40.792366+00:00"]