[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-llm-as-judge":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"9eaab736-4b70-447b-a84d-6f3249165921","LLM-as-judge","llm-as-judge",1,null,[11],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"082ebaa3-ad6f-421a-860a-8566846fb9c1","llm-judge-reliability-conformal-transitivity-zh","LLM 評審別只看平均分","這篇論文提醒：LLM 當評審時，平均表現看起來穩，不代表每個輸入都可靠。作者用 transitivity 檢查與 conformal prediction sets，抓出輸入層級的不一致與不確定性。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1776406189176-acr2.png","zh","2026-04-17T06:09:32.920971+00:00"]