{"data":{"id":"230bfe9c-82a4-4f0f-9029-0adcb2c28670","title":"Turn specs into evals for any agent with ASSERT","summary":"ASSERT is an open-source framework that automatically converts written behavior requirements into evaluation tests for AI systems (like chatbots or agents). Instead of manually creating tests, ASSERT takes plain-language specifications and generates test scenarios, metrics, and scorecards to check whether an AI system behaves as intended, addressing the problem that generic evaluation metrics often miss application-specific requirements.","solution":"N/A -- no mitigation discussed in source.","labels":["research","safety"],"sourceUrl":"https://commandline.microsoft.com/assert-written-intent-executable-evals/","publishedAt":"2026-06-10T16:00:00.000Z","cveId":null,"cweIds":null,"cvssScore":null,"cvssSeverity":null,"severity":"info","attackType":[],"issueType":"news","affectedPackages":null,"affectedVendors":["Microsoft"],"affectedVendorsRaw":["Microsoft"],"classifierModel":"claude-haiku-4-5-20251001","classifierPromptVersion":"v3","cvssVector":null,"attackVector":null,"attackComplexity":null,"privilegesRequired":null,"userInteraction":null,"exploitMaturity":null,"epssScore":null,"patchAvailable":null,"disclosureDate":"2026-06-10T16:00:00.000Z","capecIds":null,"crossRefCount":0,"attackSophistication":"moderate","impactType":["safety"],"aiComponentTargeted":"agent","llmSpecific":false,"classifierConfidence":0.75,"researchCategory":null,"atlasIds":null}}