{"data":{"id":"a5dfebc2-4120-4fd3-a1d8-f952096c1f5a","title":"Complementary Text-Guided Attention for Zero-Shot Adversarial Robustness","summary":"CLIP and similar vision-language models (AI systems trained on paired images and text to understand both) are vulnerable to adversarial examples (carefully crafted image modifications designed to fool AI systems). Researchers proposed two methods, TGA-ZSR and Comp-TGA, that use text-guided attention (the model's focus on image regions based on text descriptions) to make these models more robust, achieving 9.58% and 11.95% improvements in accuracy when tested on adversarial examples.","solution":"N/A -- no mitigation discussed in source.","labels":["research","safety"],"sourceUrl":"http://ieeexplore.ieee.org/document/11417939","publishedAt":"2026-03-02T13:18:34.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":["model_evasion"],"issueType":"research","affectedPackages":null,"affectedVendors":["OpenAI"],"affectedVendorsRaw":["CLIP"],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":"2026-03-02T13:18:34.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"advanced","impactType":["safety"],"aiComponentTargeted":"model","llmSpecific":false,"classifierConfidence":0.85,"researchCategory":"peer_reviewed","atlasIds":null}}