{"data":{"id":"75bab064-0585-4de1-bb0f-c9d0bfa0e4ec","title":"NAP-Tuning: Neural Augmented Prompt Tuning for Adversarially Robust Vision-Language Models","summary":"Vision-Language Models (VLMs, AI systems that understand both images and text together) like CLIP are powerful but vulnerable to adversarial attacks (malicious inputs designed to fool AI systems, especially in images). This research presents NAP-Tuning, a method that uses learnable text prompts and lightweight neural modules called TokenRefiners to clean up distorted features inside the model's layers, making these systems more resistant to such attacks while keeping normal performance intact.","solution":"N/A -- no mitigation discussed in source.","labels":["safety","research"],"sourceUrl":"http://ieeexplore.ieee.org/document/11368741","publishedAt":"2026-01-30T13:17:15.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":["model_evasion"],"issueType":"research","affectedPackages":null,"affectedVendors":["NVIDIA"],"affectedVendorsRaw":["CLIP","Vision-Language Models","ViT"],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":"2026-01-30T13:17:15.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"advanced","impactType":["safety"],"aiComponentTargeted":"model","llmSpecific":false,"classifierConfidence":0.85,"researchCategory":"peer_reviewed","atlasIds":null}}