[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"tag-non-stationary-environments":3},{"tag":4,"articles":10},{"id":5,"name":6,"slug":7,"article_count":8,"description_zh":9,"description_en":9},"97745400-0c4c-4c5e-9adb-8d10c8039b31","non-stationary environments","non-stationary-environments",2,null,[11],{"id":12,"slug":13,"title":14,"summary":15,"category":16,"image_url":17,"cover_image":17,"language":18,"created_at":19},"947e3be0-2b4b-4719-90d1-ddd1ac80f18a","safe-continual-rl-changing-environments-zh","安全持續學習還沒解題","這篇 arXiv 研究把安全 RL 和持續 RL 放在一起看，指出環境一變，現有方法常常顧不了安全，也守不住舊行為。","research","https:\u002F\u002Fxxdpdyhzhpamafnrdkyq.supabase.co\u002Fstorage\u002Fv1\u002Fobject\u002Fpublic\u002Fcovers\u002Finline-1776838196623-anqk.png","zh","2026-04-22T06:09:32.609993+00:00"]