{"data":{"id":"28cdaa3e-ad48-4c33-abdb-adbe4d97e248","title":"Harmful Fine-tuning Attacks and Defenses for Large Language Models: A Survey","summary":"This academic survey examines harmful fine-tuning attacks (methods where attackers modify an AI model's training process to make it behave dangerously) and the defenses designed to stop them. The paper reviews different types of attacks, how they work, and various protection strategies researchers have developed to keep large language models safe from this threat.","solution":"N/A -- no mitigation discussed in source.","labels":["security","research"],"sourceUrl":"https://dl.acm.org/doi/abs/10.1145/3817114?af=R","publishedAt":"2026-06-24T12:01:14.584Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":["model_poisoning"],"issueType":"research","affectedPackages":null,"affectedVendors":[],"affectedVendorsRaw":[],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":null,"capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":["integrity","safety"],"aiComponentTargeted":"training_data","llmSpecific":true,"classifierConfidence":0.92,"researchCategory":"peer_reviewed","atlasIds":null}}