{"data":{"id":"6b0f9f54-6b08-4796-bafc-06732318ebcf","title":"Jailbreak and Guard Aligned Language Models With Only Few In-Context Demonstrations","summary":"This research shows that large language models can be tricked or protected using in-context learning (ICL, a technique where an AI learns from examples provided in its current input rather than from training). The researchers developed two methods: an In-Context Attack that uses harmful examples to make LLMs produce unsafe outputs, and an In-Context Defense that uses refusal examples to strengthen safety. The study demonstrates that both attacking and defending LLM safety through carefully chosen demonstrations are effective and scalable.","solution":"N/A -- no mitigation discussed in source.","labels":["security","research"],"sourceUrl":"http://ieeexplore.ieee.org/document/11370531","publishedAt":"2026-02-02T13:17:25.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":["jailbreak","prompt_injection"],"issueType":"research","affectedPackages":null,"affectedVendors":[],"affectedVendorsRaw":[],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":"2026-02-02T13:17:25.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":["safety","integrity"],"aiComponentTargeted":"model","llmSpecific":true,"classifierConfidence":0.92,"researchCategory":"peer_reviewed","atlasIds":null}}