[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-gpt-53-codex":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"25a0528a-b3e9-4b2f-ab06-39dc153230d4","GPT-5.3 Codex","gpt-53-codex",0,null,[11],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"a5281bf5-661d-4288-b00e-0aa245e1fb03","why-coding-benchmarks-are-finally-telling-the-truth-en","Why coding benchmarks are finally telling the truth","BenchLM’s coding leaderboard says LiveCodeBench and SWE-bench Pro are the only signals that still matter.","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1778670666742-3yxf.png","en","2026-05-13T11:10:26.190997+00:00"]