{"data":{"id":"45efc10d-c886-4539-81c3-a3ffbbb2362d","title":"Breaking Instruction Hierarchy in OpenAI's gpt-4o-mini","summary":"OpenAI released gpt-4o-mini with safety improvements aimed at strengthening 'instruction hierarchy,' which is supposed to prevent users from tricking the AI into ignoring its built-in rules through commands like 'ignore all previous instructions.' However, researchers have already demonstrated bypasses of this protection, and analysis shows that system instructions (the AI's core rules) still cannot be fully trusted as a security boundary (a hard limit that stops attackers).","solution":"N/A -- no mitigation discussed in source.","labels":["security","safety"],"sourceUrl":"https://embracethered.com/blog/posts/2024/chatgpt-gpt-4o-mini-instruction-hierarchie-bypasses/","publishedAt":"2024-07-22T13:14:05.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"medium","attackType":["jailbreak"],"issueType":"news","affectedPackages":null,"affectedVendors":["OpenAI"],"affectedVendorsRaw":["OpenAI","gpt-4o-mini"],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":null,"capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":["safety","integrity"],"aiComponentTargeted":"model","llmSpecific":true,"classifierConfidence":0.85,"researchCategory":null,"atlasIds":null}}