{"data":{"id":"7d6197e5-0de8-4771-b985-56b0a39c67f6","title":"The Most Overestimated Q Value Regularization in High-Dimensional Discrete Action Spaces for Offline Reinforcement Learning","summary":"This paper addresses a problem in offline reinforcement learning (RL, a type of AI training that learns from pre-collected data without needing new real-world interaction) where Q value overestimation (the AI incorrectly thinking certain actions are better than they actually are) causes training problems in robotic tasks with many possible actions. The researchers propose MQR (most overestimated Q value regularization), an algorithm that specifically penalizes the single action with the worst overestimation rather than equally penalizing all actions, and demonstrate it achieves 99.04% success rates in real-world robotic grasping tasks.","solution":"N/A -- no mitigation discussed in source.","labels":["research"],"sourceUrl":"http://ieeexplore.ieee.org/document/11304592","publishedAt":"2025-12-19T13:19:26.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":[],"issueType":"research","affectedPackages":null,"affectedVendors":[],"affectedVendorsRaw":[],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":"2025-12-19T13:19:26.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":null,"aiComponentTargeted":"model","llmSpecific":false,"classifierConfidence":0.85,"researchCategory":"peer_reviewed","atlasIds":null}}