[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-transitivity":3},{"tag":4,"articles":9},{"id":5,"name":6,"slug":6,"article_count":7,"description_zh":8,"description_en":8},"30bd8151-e8ea-4d2d-9552-c6234550a671","transitivity",1,null,[10],{"id":11,"slug":12,"title":13,"summary":14,"category":15,"image_url":16,"cover_image":16,"language":17,"created_at":18},"082ebaa3-ad6f-421a-860a-8566846fb9c1","llm-judge-reliability-conformal-transitivity-zh","LLM 評審別只看平均分","這篇論文提醒：LLM 當評審時，平均表現看起來穩，不代表每個輸入都可靠。作者用 transitivity 檢查與 conformal prediction sets，抓出輸入層級的不一致與不確定性。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1776406189176-acr2.png","zh","2026-04-17T06:09:32.920971+00:00"]