{"data":{"id":"ae497967-0d37-4e36-b4f1-a7185f4706c5","title":"A Simple Unified Uncertainty-Guided Framework for Offline-to-Online Reinforcement Learning","summary":"This paper presents SUNG, a framework for offline-to-online reinforcement learning (RL), which is training an AI agent first on existing data and then improving it through live interactions. The framework addresses two main problems: limited exploration due to offline data constraints and distribution shift (when the agent encounters data patterns it wasn't trained on). SUNG uses uncertainty estimation via a VAE (variational autoencoder, a type of neural network that learns data patterns) to guide both exploration (trying new actions) and exploitation (using known good actions), achieving strong performance on standard benchmarks.","solution":"N/A -- no mitigation discussed in source.","labels":["research"],"sourceUrl":"http://ieeexplore.ieee.org/document/11267513","publishedAt":"2025-11-25T13:16:21.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":[],"issueType":"research","affectedPackages":null,"affectedVendors":[],"affectedVendorsRaw":[],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":"2025-11-25T13:16:21.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":null,"aiComponentTargeted":"model","llmSpecific":false,"classifierConfidence":0.85,"researchCategory":"peer_reviewed","atlasIds":null}}