{"data":{"id":"61471733-33d2-42ec-9794-c4032cf882d7","title":"Introducing Gemma 4 12B: a unified, encoder-free multimodal model","summary":"Google DeepMind introduced Gemma 4 12B, a multimodal AI model (a system that processes text, images, and audio together) designed to run efficiently on laptop computers with 16GB of memory. The model uses an encoder-free architecture (meaning it processes images and audio directly without separate translation layers), achieving performance comparable to larger models while reducing memory usage and latency. It supports native audio inputs and includes Multi-Token Prediction drafters to speed up response generation.","solution":"N/A -- no mitigation discussed in source.","labels":["industry"],"sourceUrl":"https://deepmind.google/blog/introducing-gemma-4-12b-a-unified-encoder-free-multimodal-model/","publishedAt":"2026-06-09T14:10:19.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":[],"issueType":"news","affectedPackages":null,"affectedVendors":["Google"],"affectedVendorsRaw":["Google DeepMind","Gemma 4 12B","Gemma 4 26B","HuggingFace","Ollama","LM Studio","llama.cpp","MLX","SGLang","vLLM","Unsloth"],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":"2026-06-09T14:10:19.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":null,"aiComponentTargeted":"model","llmSpecific":true,"classifierConfidence":0.95,"researchCategory":null,"atlasIds":null}}