[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-safe-reinforcement-learning":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"6dd191a0-3d85-48dc-9cc2-383cff4ef256","safe reinforcement learning","safe-reinforcement-learning",1,null,[11],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"947e3be0-2b4b-4719-90d1-ddd1ac80f18a","safe-continual-rl-changing-environments-zh","安全持續學習還沒解題","這篇 arXiv 研究把安全 RL 和持續 RL 放在一起看，指出環境一變，現有方法常常顧不了安全，也守不住舊行為。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1776838196623-anqk.png","zh","2026-04-22T06:09:32.609993+00:00"]