{"data":{"id":"48c9fafb-8e2d-4da7-9b65-6a231b2d5d52","title":"CVE-2025-46560: vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs. Versions starting from 0.8.0 and p","summary":"vLLM (a system for running large language models efficiently) versions 0.8.0 through 0.8.4 have a critical performance bug in how it processes multimodal input (text, images, audio). The bug uses an inefficient algorithm (quadratic time complexity, meaning it slows down exponentially as input size grows) when replacing placeholder tokens (special markers like <|audio_|> that get expanded into repeated tokens), which allows attackers to crash or freeze the system by sending specially crafted malicious inputs.","solution":"This issue has been patched in version 0.8.5.","labels":["security"],"sourceUrl":"https://nvd.nist.gov/vuln/detail/CVE-2025-46560","publishedAt":"2025-04-30T05:15:52.097Z","cveId":"CVE-2025-46560","cweIds":["CWE-1333"],"cvssScore":"6.5","cvssSeverity":"medium","severity":"medium","attackType":["denial_of_service"],"issueType":"vulnerability","affectedPackages":null,"affectedVendors":[],"affectedVendorsRaw":["vLLM"],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":"unknown","epssScore":0.00574,"patchAvailable":null,"disclosureDate":null,"capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":["availability"],"aiComponentTargeted":"inference","llmSpecific":true,"classifierConfidence":0.95,"researchCategory":null,"atlasIds":null}}