{"data":{"id":"7cd87027-6608-46ca-a064-548bc0e2d03c","title":"Auditing the Gatekeepers: Fuzzing \"AI Judges\" to Bypass Security Controls","summary":"Researchers discovered that AI judges (LLMs acting as automated security gatekeepers to enforce safety policies) can be manipulated through prompt injection (tricking an AI by hiding instructions in its input) using stealthy formatting symbols rather than obvious gibberish. They created a tool called AdvJudge-Zero, a fuzzer (software that finds vulnerabilities by testing with unexpected inputs), which automatically identifies innocent-looking character sequences that exploit the model's decision-making logic to bypass security controls.","solution":"Palo Alto Networks customers are better protected through Prisma AIRS and the Unit 42 AI Security Assessment service. Organizations concerned about potential compromise can contact the Unit 42 Incident Response team.","labels":["security","research"],"sourceUrl":"https://unit42.paloaltonetworks.com/fuzzing-ai-judges-security-bypass/","publishedAt":"2026-03-10T10:00:29.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":["prompt_injection","jailbreak"],"issueType":"news","affectedPackages":null,"affectedVendors":[],"affectedVendorsRaw":[],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":null,"capecIds":null,"crossRefCount":0,"attackSophistication":"advanced","impactType":["integrity","safety"],"aiComponentTargeted":"model","llmSpecific":true,"classifierConfidence":0.92,"researchCategory":null,"atlasIds":null}}