{"data":{"id":"56d482da-641e-43c7-ad60-88718cf5f946","title":"GHSA-5jv2-g5wq-cmr4: vLLM: GGUF dequantize kernel int truncation exposes uninitialized GPU memory in multi-tenant serving","summary":"vLLM's GGUF dequantize kernels (specialized functions for decompressing quantized neural network weights) use a 32-bit integer parameter to specify tensor size, which truncates when processing very large tensors. This causes the output tensor to be only partially filled by the processing kernel, leaving the remaining memory uninitialized. In multi-tenant deployments (where multiple users share GPU hardware), this unfilled memory may retain sensitive data from other users' previous requests, leaking their information.","solution":"Change the `int k` parameter to `int64_t k` in the `to_cuda_ggml_t` function pointer type declaration at `ggml-common.h:1067`. The source text states: \"This is a single root cause -- the `int` type on the `k` parameter in `to_cuda_ggml_t` -- with a single fix: change `int k` to `int64_t k`. All dequantize functions inherit this type through the same typedef.\"","labels":["security"],"sourceUrl":"https://github.com/advisories/GHSA-5jv2-g5wq-cmr4","publishedAt":"2026-06-17T14:03:11.000Z","cveId":"CVE-2026-53923","cweIds":null,"cvssScore":null,"cvssSeverity":"medium","severity":"medium","attackType":["data_extraction"],"issueType":"vulnerability","affectedPackages":["vllm@>= 0.5.5, <= 0.23.0"],"affectedVendors":[],"affectedVendorsRaw":["vLLM"],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":"unknown","epssScore":0,"patchAvailable":null,"disclosureDate":"2026-06-17T14:03:11.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":["confidentiality"],"aiComponentTargeted":"inference","llmSpecific":false,"classifierConfidence":0.95,"researchCategory":null,"atlasIds":null}}