{"data":{"id":"45886efb-e2aa-4d45-8aa8-098842e012e8","title":"ZUMA: Training-Free Zero-Shot Unified Multimodal Anomaly Detection","summary":"ZUMA is a training-free framework for multimodal anomaly detection (MAD, identifying unusual patterns using both image and 3D data together) that works without needing labeled training examples, addressing privacy concerns. It uses CLIP (a model trained on images and text) and introduces cross-domain calibration (a technique that bridges differences between how CLIP was trained and how 3D point cloud data works) and dynamic semantic interaction (using natural language descriptions as reference points to spot anomalies) to detect defects in 2D images, 3D objects, or both together without requiring training.","solution":"N/A -- no mitigation discussed in source.","labels":["research"],"sourceUrl":"http://ieeexplore.ieee.org/document/11367454","publishedAt":"2026-01-29T13:23:19.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":[],"issueType":"research","affectedPackages":null,"affectedVendors":[],"affectedVendorsRaw":["OpenAI (CLIP)"],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":"2026-01-29T13:23:19.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":null,"aiComponentTargeted":"model","llmSpecific":false,"classifierConfidence":0.85,"researchCategory":"peer_reviewed","atlasIds":null}}