diff --git a/messages/agent.scorer.create.md b/messages/agent.scorer.create.md new file mode 100644 index 00000000..3144194e --- /dev/null +++ b/messages/agent.scorer.create.md @@ -0,0 +1,87 @@ +# summary + +Create an agent scorer definition using an interactive interview or a spec file. + +# description + +Creates an AiAgentScorerDefinition metadata XML file either interactively (prompting for each field) or from a YAML spec file. + +Run with no flags to start the interactive interview. The command prompts you for the scorer's data type, input scope, engine type, output values, and agent associations. + +Alternatively, provide a --spec flag pointing to a YAML file that defines the scorer. This is useful for repeatable automation or when the scorer has many output values. + +Use --preview to see the generated XML without writing it to disk. + +# flags.api-name.summary + +API name of the scorer definition. + +# flags.agent-api-name.summary + +API name of the agent to associate with this scorer. + +# flags.data-type.summary + +Data type produced by the scorer (Text, Number, or OpenEnded). + +# flags.label.summary + +Display label for the scorer version. + +# flags.description.summary + +Description of what this scorer evaluates. + +# flags.engine-type.summary + +Engine type for scoring (Manual or PromptTemplate). + +# flags.status.summary + +Initial status of the scorer version (Available or Draft). + +# flags.spec.summary + +Path to a scorer spec YAML file. Bypasses interactive prompts. + +# flags.spec-schema.summary + +Output the JSON Schema for the --spec YAML file and exit. + +# flags.output-dir.summary + +Output directory for the generated metadata XML files (scorer definition and prompt template). + +# flags.preview.summary + +Preview the generated XML without writing to disk. + +# examples + +- Show the JSON Schema for the spec YAML file: + + <%= config.bin %> <%= command.id %> --spec-schema + +- Create a scorer interactively: + + <%= config.bin %> <%= command.id %> + +- Create a scorer from a spec file: + + <%= config.bin %> <%= command.id %> --spec specs/expert-analysis-scorer.yaml + +- Preview the XML that would be generated: + + <%= config.bin %> <%= command.id %> --spec specs/expert-analysis-scorer.yaml --preview + +- Create a manual scorer with flags (non-interactive): + + <%= config.bin %> <%= command.id %> --api-name Expert_Analysis --data-type Text --engine-type Manual --label Expert_Analysis --agent-api-name My_Agent --status Available + +- Create a prompt-based scorer (generates both scorer definition and prompt template): + + <%= config.bin %> <%= command.id %> --api-name sentiment_analysis --data-type Text --engine-type PromptTemplate --label sentiment_analysis --agent-api-name My_Agent + +# error.missingRequiredFlags + +Missing required flags: %s. When using --json, all required flags must be provided. diff --git a/package.json b/package.json index 17160dc7..9ec907eb 100644 --- a/package.json +++ b/package.json @@ -95,6 +95,10 @@ "description": "Command to validate an Agent Script file.", "external": true }, + "scorer": { + "description": "Commands to create and manage agent scorers.", + "external": true + }, "adl": { "description": "Commands to manage Agentforce Data Libraries.", "external": true, diff --git a/schemas/agent-scorer-create__spec.json b/schemas/agent-scorer-create__spec.json new file mode 100644 index 00000000..37ae4d5e --- /dev/null +++ b/schemas/agent-scorer-create__spec.json @@ -0,0 +1,189 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$ref": "#/definitions/ScorerSpecFile", + "definitions": { + "ScorerSpecFile": { + "type": "object", + "description": "YAML spec file for creating an agent scorer definition via `sf agent scorer create --spec `.", + "properties": { + "apiName": { + "type": "string", + "description": "API name of the scorer definition. Max 35 characters, must start with a letter, only alphanumerics and underscores.", + "pattern": "^[A-Za-z][A-Za-z0-9_]{0,34}$", + "maxLength": 35 + }, + "dataType": { + "type": "string", + "enum": ["Text", "Number", "LightningType"], + "description": "Data type produced by the scorer. Use 'Text' for categorical labels, 'Number' for numeric scales, 'LightningType' for open-ended evaluations." + }, + "scorerType": { + "type": "string", + "enum": ["Predefined", "OpenEnded"], + "description": "Set to 'OpenEnded' when dataType is 'LightningType' for free-form evaluation." + }, + "lightningType": { + "type": "string", + "enum": [ + "lightning__textType", + "lightning__multilineTextType", + "lightning__richTextType", + "lightning__numberType", + "lightning__integerType", + "lightning__booleanType", + "lightning__dateType", + "lightning__dateTimeType", + "lightning__dateTimeStringType", + "lightning__urlType", + "lightning__objectType", + "lightning__listType" + ], + "description": "Required when dataType is 'LightningType'. Specifies the lightning type for open-ended values." + }, + "semanticType": { + "type": "string", + "enum": ["Dimension", "Measurement"], + "description": "How this scorer is used in analytics. 'Dimension' for categorical grouping, 'Measurement' for numeric aggregation." + }, + "inputScope": { + "type": "string", + "enum": ["Session", "Intent"], + "default": "Session", + "description": "Whether the scorer evaluates an entire session or a single intent within a session." + }, + "label": { + "type": "string", + "description": "Display label for the scorer version.", + "minLength": 1 + }, + "description": { + "type": "string", + "description": "Human-readable description of what this scorer evaluates." + }, + "engineType": { + "type": "string", + "enum": ["Manual", "PromptTemplate"], + "description": "'Manual' for human-evaluated scoring, 'PromptTemplate' for LLM-evaluated scoring." + }, + "promptContent": { + "type": "string", + "description": "Prompt text for PromptTemplate engine type. Use {!$Input:Session} to reference the session data, {!$Input:AllowedLabels} for allowed output values, and {!$Input:FallbackLabel} for the fallback value. Ignored when engineType is 'Manual'." + }, + "promptTemplateName": { + "type": "string", + "description": "API name of an existing prompt template to use instead of generating a new one. Mutually exclusive with promptContent." + }, + "status": { + "type": "string", + "enum": ["Available", "Draft"], + "default": "Draft", + "description": "Initial status of the scorer version." + }, + "agentAssociation": { + "$ref": "#/definitions/AgentAssociation" + }, + "outputEnumValues": { + "type": "array", + "description": "Output value definitions. Required for 'Text' dataType. For 'Text' scorers, exactly one value must have isFallback: true.", + "items": { + "$ref": "#/definitions/OutputEnumValue" + } + }, + "specification": { + "$ref": "#/definitions/NumberSpecification", + "description": "Required when dataType is 'Number'. Defines the numeric scale." + } + }, + "required": ["apiName", "dataType", "label", "engineType", "agentAssociation"], + "additionalProperties": false + }, + "AgentAssociation": { + "type": "object", + "description": "Associates the scorer with an agent in the org.", + "properties": { + "agentApiName": { + "type": "string", + "description": "API name of the agent to associate with this scorer." + }, + "isActive": { + "type": "boolean", + "description": "Whether scoring is active for this agent association." + }, + "samplingRate": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1.0, + "description": "Fraction of sessions to score (0.0 to 1.0). Only relevant when isActive is true." + }, + "inputScope": { + "type": "string", + "enum": ["Session", "Intent"], + "description": "Override input scope for this specific agent association." + } + }, + "required": ["agentApiName", "isActive"], + "additionalProperties": false + }, + "OutputEnumValue": { + "type": "object", + "description": "A possible output value for the scorer.", + "properties": { + "value": { + "type": "string", + "description": "The output label (e.g., 'Good', 'Bad', 'N/A').", + "minLength": 1 + }, + "outcomeType": { + "type": "string", + "enum": ["Pass", "Fail", "NotApplicable"], + "description": "Maps this value to a pass/fail outcome for reporting." + }, + "isFallback": { + "type": "boolean", + "default": false, + "description": "Whether this is the fallback value. Exactly one value must be the fallback for Text scorers." + }, + "isSystemFallback": { + "type": "boolean", + "default": false, + "description": "Whether this is a system-generated fallback. Typically false for user-defined scorers." + } + }, + "required": ["value", "outcomeType"], + "additionalProperties": false + }, + "NumberSpecification": { + "type": "object", + "properties": { + "valueSpecification": { + "type": "object", + "description": "Defines the numeric scale. The number of generated values ((max - min) / step + 1) must not exceed 101.", + "properties": { + "min": { + "type": "number", + "description": "Minimum value of the scale." + }, + "max": { + "type": "number", + "description": "Maximum value of the scale. Must be greater than min." + }, + "step": { + "type": "number", + "exclusiveMinimum": 0, + "description": "Step size between values." + }, + "threshold": { + "type": "number", + "description": "Optional threshold value (must be between min and max)." + } + }, + "required": ["min", "max", "step"], + "additionalProperties": false + } + }, + "required": ["valueSpecification"], + "additionalProperties": false + } + } +} diff --git a/src/commands/agent/scorer/create.ts b/src/commands/agent/scorer/create.ts new file mode 100644 index 00000000..70812e94 --- /dev/null +++ b/src/commands/agent/scorer/create.ts @@ -0,0 +1,785 @@ +/* + * Copyright 2026, Salesforce, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import { join, resolve, dirname } from 'node:path'; +import { mkdirSync, readFileSync, writeFileSync, existsSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { SfCommand, Flags, toHelpSection } from '@salesforce/sf-plugins-core'; +import { Messages, EnvironmentVariable } from '@salesforce/core'; +import { Agent } from '@salesforce/agents'; +import { XMLBuilder } from 'fast-xml-parser'; +import { confirm, select, input as inquirerInput } from '@inquirer/prompts'; +import YAML from 'yaml'; +import { FlaggablePrompt, makeFlags, promptForFlag } from '../../../flags.js'; +import { theme } from '../../../inquirer-theme.js'; + +Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); +const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.scorer.create'); + +export type AgentScorerCreateResult = { + path: string; + apiName: string; + contents: string; + promptTemplatePath?: string; +}; + +export type ScorerSpecFile = { + apiName: string; + dataType: 'Text' | 'Number' | 'LightningType'; + scorerType?: 'Predefined' | 'OpenEnded'; + lightningType?: string; + semanticType?: 'Dimension' | 'Measurement'; + inputScope?: 'Session' | 'Intent'; + label: string; + description?: string; + engineType: 'Manual' | 'PromptTemplate'; + promptContent?: string; + promptTemplateName?: string; + status?: 'Available' | 'Draft'; + agentAssociation: { + agentApiName: string; + isActive: boolean; + samplingRate?: number; + inputScope?: 'Session' | 'Intent'; + }; + outputEnumValues?: Array<{ + value: string; + outcomeType: 'Pass' | 'Fail' | 'NotApplicable'; + isFallback?: boolean; + isSystemFallback?: boolean; + }>; + specification?: { + valueSpecification: { + min: number; + max: number; + step: number; + threshold?: number; + }; + }; +}; + +const MAX_ENUM_VALUES = 101; + +const SUPPORTED_LIGHTNING_TYPES = [ + 'lightning__textType', + 'lightning__multilineTextType', + 'lightning__richTextType', + 'lightning__numberType', + 'lightning__integerType', + 'lightning__booleanType', + 'lightning__dateType', + 'lightning__dateTimeType', + 'lightning__dateTimeStringType', + 'lightning__urlType', + 'lightning__objectType', + 'lightning__listType', +]; + +const FLAGGABLE_PROMPTS = { + label: { + message: messages.getMessage('flags.label.summary'), + promptMessage: 'Scorer label (display name)', + validate: (d: string): boolean | string => d.length > 0 || 'Label cannot be empty', + required: true, + }, + 'api-name': { + message: messages.getMessage('flags.api-name.summary'), + promptMessage: 'Scorer API name', + validate: (d: string): boolean | string => { + if (!d.length) return 'API name cannot be empty'; + if (d.length > 35) return 'API name cannot exceed 35 characters'; + if (!/^[A-Za-z][A-Za-z0-9_]*$/.test(d)) return 'Must start with letter, only alphanumerics and underscores'; + return true; + }, + required: true, + }, + 'data-type': { + message: messages.getMessage('flags.data-type.summary'), + promptMessage: 'What data type does this scorer produce?', + options: ['Text', 'Number', 'OpenEnded'], + validate: (d: string): boolean | string => ['Text', 'Number', 'OpenEnded'].includes(d) || 'Invalid data type', + required: true, + }, + description: { + message: messages.getMessage('flags.description.summary'), + promptMessage: 'Description (optional, press Enter to skip)', + validate: (): boolean | string => true, + }, + 'engine-type': { + message: messages.getMessage('flags.engine-type.summary'), + promptMessage: 'Scoring engine type', + options: ['Manual', 'PromptTemplate'], + validate: (d: string): boolean | string => + ['Manual', 'PromptTemplate'].includes(d) || 'Invalid engine type', + required: true, + }, + status: { + message: messages.getMessage('flags.status.summary'), + promptMessage: 'Initial status', + options: ['Draft', 'Available'], + validate: (d: string): boolean | string => ['Available', 'Draft'].includes(d) || 'Invalid status', + default: 'Draft', + }, +} satisfies Record; + +type OutputEnumValue = { + value: string; + outcomeType: string; + isFallback: boolean; + isSystemFallback: boolean; +}; + +async function promptForSingleEnumValue(index: number): Promise { + const value = await promptForFlag({ + message: 'Output value name', + promptMessage: `Output value #${index + 1} (e.g., "Good", "Bad", "N/A")`, + validate: (d: string): boolean | string => d.length > 0 || 'Value cannot be empty', + }); + + const outcomeType = await promptForFlag({ + message: 'Outcome type', + promptMessage: 'Outcome type for this value', + options: ['Pass', 'Fail', 'NotApplicable'], + validate: (d: string): boolean | string => + ['Pass', 'Fail', 'NotApplicable'].includes(d) || 'Invalid', + }); + + const isFallback = await confirm({ + message: 'Is this the fallback value?', + default: index === 0, + theme, + }); + + const addMore = await confirm({ + message: 'Add another output value?', + default: index < 1, + theme, + }); + + return { value, outcomeType, isFallback, isSystemFallback: false, addMore }; +} + +async function promptForOutputEnumValues(): Promise { + const values: OutputEnumValue[] = []; + let addMore = true; + + while (addMore) { + // eslint-disable-next-line no-await-in-loop + const result = await promptForSingleEnumValue(values.length); + addMore = result.addMore; + values.push({ value: result.value, outcomeType: result.outcomeType, isFallback: result.isFallback, isSystemFallback: result.isSystemFallback }); + } + + return values; +} + +type NumberSpecification = { + min: number; + max: number; + step: number; + threshold?: number; +}; + +async function promptForNumberSpecification(): Promise { + const minStr = await inquirerInput({ + message: 'Minimum value', + default: '0', + validate: (d: string): boolean | string => !isNaN(parseFloat(d)) || 'Must be a number', + theme, + }); + + const maxStr = await inquirerInput({ + message: 'Maximum value', + default: '5', + validate: (d: string): boolean | string => !isNaN(parseFloat(d)) || 'Must be a number', + theme, + }); + + const min = parseFloat(minStr); + const max = parseFloat(maxStr); + + if (min >= max) { + throw new Error(`Minimum value (${min}) must be less than maximum value (${max})`); + } + + const stepStr = await inquirerInput({ + message: 'Step size', + default: '1', + validate: (d: string): boolean | string => { + const n = parseFloat(d); + if (isNaN(n) || n <= 0) return 'Step must be a positive number'; + const numValues = Math.floor((max - min) / n) + 1; + if (numValues > MAX_ENUM_VALUES) return `Step too small: would generate ${numValues} values (max ${MAX_ENUM_VALUES})`; + return true; + }, + theme, + }); + + const step = parseFloat(stepStr); + const numValues = Math.floor((max - min) / step) + 1; + + const addThreshold = await confirm({ + message: `Add a threshold value? (${numValues} output values will be generated from ${min} to ${max})`, + default: false, + theme, + }); + + let threshold: number | undefined; + if (addThreshold) { + const thresholdStr = await inquirerInput({ + message: `Threshold (must be between ${min} and ${max})`, + validate: (d: string): boolean | string => { + const n = parseFloat(d); + if (isNaN(n)) return 'Must be a number'; + if (n < min || n > max) return `Must be between ${min} and ${max}`; + return true; + }, + theme, + }); + threshold = parseFloat(thresholdStr); + } + + return { min, max, step, threshold }; +} + +function generateNumberEnumValues(spec: NumberSpecification): OutputEnumValue[] { + const values: OutputEnumValue[] = []; + const epsilon = 1e-9; + let current = spec.min; + + while (current <= spec.max + epsilon) { + const rounded = Math.round(current * 1e9) / 1e9; + values.push({ + value: String(rounded), + outcomeType: 'NotApplicable', + isFallback: false, + isSystemFallback: false, + }); + current += spec.step; + } + + return values; +} + +type AgentAssociation = { + agentApiName: string; + isActive: boolean; + samplingRate?: number; + inputScope?: 'Session' | 'Intent'; +}; + +function buildScorerXml(spec: ScorerSpecFile): string { + const engine: Record = {}; + if (spec.engineType === 'PromptTemplate') { + engine.engineRef = spec.promptTemplateName ?? spec.apiName; + } + engine.engineType = spec.engineType; + + const agentAssociationXml: Record = { + agentApiName: spec.agentAssociation.agentApiName, + ...(spec.agentAssociation.inputScope ? { inputScope: spec.agentAssociation.inputScope } : {}), + isActive: spec.agentAssociation.isActive, + samplingRate: spec.agentAssociation.samplingRate ?? 1.0, + }; + + const scorerVersion: Record = { + agentAssociation: agentAssociationXml, + ...(spec.description ? { description: spec.description } : {}), + engine, + label: spec.label, + }; + + // For Number type with specification, generate enum values from spec + if (spec.dataType === 'Number' && spec.specification) { + const numSpec = spec.specification.valueSpecification; + const enumValues = generateNumberEnumValues(numSpec); + scorerVersion.outputEnumValue = enumValues.map((v) => ({ + isFallback: false, + isSystemFallback: false, + outcomeType: v.outcomeType, + value: v.value, + })); + scorerVersion.specification = { + valueSpecification: { + min: numSpec.min, + max: numSpec.max, + step: numSpec.step, + ...(numSpec.threshold != null ? { threshold: numSpec.threshold } : {}), + }, + }; + } else if (spec.outputEnumValues) { + scorerVersion.outputEnumValue = spec.outputEnumValues.map((v) => ({ + isFallback: v.isFallback ?? false, + isSystemFallback: v.isSystemFallback ?? false, + outcomeType: v.outcomeType, + value: v.value, + })); + } + + scorerVersion.status = spec.status ?? 'Draft'; + scorerVersion.versionNumber = 1; + + const definition: Record = { + '@_xmlns': 'http://soap.sforce.com/2006/04/metadata', + dataType: spec.dataType, + inputScope: spec.inputScope ?? 'Session', + }; + + if (spec.lightningType) { + definition.lightningType = spec.lightningType; + } + if (spec.scorerType) { + definition.scorerType = spec.scorerType; + } + if (spec.semanticType) { + definition.semanticType = spec.semanticType; + } + + definition.scorerVersion = scorerVersion; + + const xmlObj = { + '?xml': { '@_version': '1.0', '@_encoding': 'UTF-8' }, + AiAgentScorerDefinition: definition, + }; + + const builder = new XMLBuilder({ + format: true, + ignoreAttributes: false, + indentBy: ' ', + suppressBooleanAttributes: false, + }); + + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return builder.build(xmlObj); +} + +function getPromptTemplateType(spec: ScorerSpecFile): string { + if (spec.scorerType === 'OpenEnded') { + return 'agentforce_session_tracing__scorerOpenEnded'; + } + if (spec.semanticType === 'Measurement') { + return 'agentforce_session_tracing__scorerMeasurement'; + } + return 'agentforce_session_tracing__scorerMultilabel'; +} + +function buildPromptTemplateXml(apiName: string, promptContent: string, spec: ScorerSpecFile): string { + const templateType = getPromptTemplateType(spec); + + const isOpenEnded = spec.scorerType === 'OpenEnded'; + + const inputs = [ + { + apiName: 'Session', + definition: 'lightningtype://propertyType/agentforce_session_tracing__stdmDetailViewType', + referenceName: 'Input:Session', + required: true, + }, + { + apiName: 'AllowedLabels', + definition: 'primitive://String', + referenceName: 'Input:AllowedLabels', + required: !isOpenEnded, + }, + { + apiName: 'FallbackLabel', + definition: 'primitive://String', + referenceName: 'Input:FallbackLabel', + required: !isOpenEnded, + }, + ]; + + const xmlObj = { + '?xml': { '@_version': '1.0', '@_encoding': 'UTF-8' }, + GenAiPromptTemplate: { + '@_xmlns': 'http://soap.sforce.com/2006/04/metadata', + developerName: apiName, + masterLabel: apiName, + overridable: false, + templateVersions: { + content: promptContent, + inputs, + primaryModel: 'sfdc_ai__DefaultOpenAIGPT4OmniMini', + status: 'Published', + }, + type: templateType, + visibility: 'Global', + }, + }; + + const builder = new XMLBuilder({ + format: true, + ignoreAttributes: false, + indentBy: ' ', + suppressBooleanAttributes: false, + }); + + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return builder.build(xmlObj); +} + +function labelToApiName(label: string): string { + return label.replace(/\s+/g, '_').replace(/[^A-Za-z0-9_]/g, ''); +} + +export default class AgentScorerCreate extends SfCommand { + public static readonly summary = messages.getMessage('summary'); + public static readonly description = messages.getMessage('description'); + public static readonly examples = messages.getMessages('examples'); + + public static readonly envVariablesSection = toHelpSection( + 'ENVIRONMENT VARIABLES', + EnvironmentVariable.SF_TARGET_ORG + ); + + public static readonly flags = { + 'target-org': Flags.requiredOrg(), + 'api-version': Flags.orgApiVersion(), + ...makeFlags(FLAGGABLE_PROMPTS), + 'agent-api-name': Flags.string({ + summary: messages.getMessage('flags.agent-api-name.summary'), + }), + spec: Flags.file({ + summary: messages.getMessage('flags.spec.summary'), + exists: true, + }), + 'spec-schema': Flags.boolean({ + summary: messages.getMessage('flags.spec-schema.summary'), + default: false, + }), + 'output-dir': Flags.directory({ + summary: messages.getMessage('flags.output-dir.summary'), + default: join('force-app', 'main', 'default'), + }), + preview: Flags.boolean({ + summary: messages.getMessage('flags.preview.summary'), + }), + }; + + // eslint-disable-next-line complexity + public async run(): Promise { + const { flags } = await this.parse(AgentScorerCreate); + + if (flags['spec-schema']) { + const schemaPath = resolve( + dirname(fileURLToPath(import.meta.url)), + '..', '..', '..', '..', 'schemas', 'agent-scorer-create__spec.json' + ); + const schema = readFileSync(schemaPath, 'utf8'); + this.styledJSON(JSON.parse(schema) as unknown as import('@salesforce/ts-types').AnyJson); + return { path: '', apiName: '', contents: '' }; + } + + const connection = flags['target-org'].getConnection(flags['api-version']); + + const spec = flags.spec + ? this.loadSpecFromFile(flags.spec) + : await this.runInteractiveInterview(flags, connection); + + return this.generateOutput(spec, flags); + } + + private loadSpecFromFile(specPath: string): ScorerSpecFile { + const spec = YAML.parse(readFileSync(resolve(specPath), 'utf8')) as ScorerSpecFile; + this.log(`Reading scorer spec from ${specPath}`); + return spec; + } + + private async runInteractiveInterview( + flags: Record, + connection: ReturnType + ): Promise { + if (this.jsonEnabled()) { + const missing = Object.entries(FLAGGABLE_PROMPTS) + .filter(([key, p]) => 'required' in p && p.required && !flags[key]) + .map(([key]) => key); + if (!flags['agent-api-name']) missing.push('agent-api-name'); + if (missing.length) { + throw messages.createError('error.missingRequiredFlags', [missing.join(', ')]); + } + } + + this.log(); + this.styledHeader('Scorer Definition'); + + const label = (flags.label as string) ?? (await promptForFlag(FLAGGABLE_PROMPTS.label)); + + const defaultApiName = labelToApiName(label); + const apiName = (flags['api-name'] as string) ?? (await inquirerInput({ + message: 'Scorer API name', + default: defaultApiName, + validate: FLAGGABLE_PROMPTS['api-name'].validate, + theme, + })); + + const description = (flags.description as string) ?? (await promptForFlag(FLAGGABLE_PROMPTS.description)); + const status = (flags.status as string) ?? (await promptForFlag(FLAGGABLE_PROMPTS.status)); + const dataType = (flags['data-type'] as string) ?? (await promptForFlag(FLAGGABLE_PROMPTS['data-type'])); + + const dataTypeDetails = await this.promptForDataTypeDetails(dataType); + + const semanticType = await select({ + message: 'Semantic type (how this scorer is used in analytics)', + choices: [ + { name: 'None', value: '' }, + { name: 'Dimension (categorical grouping)', value: 'Dimension' }, + { name: 'Measurement (numeric aggregation)', value: 'Measurement' }, + ], + theme, + }); + + const engineType = (flags['engine-type'] as string) ?? (await promptForFlag(FLAGGABLE_PROMPTS['engine-type'])); + const engineConfig = await this.promptForEngineConfig(engineType); + const agentAssociation = await this.promptForAgentAssociationDetails( + connection, engineType, flags['agent-api-name'] as string | undefined + ); + + const resolvedDataType = dataType === 'OpenEnded' ? 'LightningType' : dataType; + + return { + apiName, + dataType: resolvedDataType as ScorerSpecFile['dataType'], + scorerType: dataTypeDetails.scorerType, + lightningType: dataTypeDetails.lightningType, + semanticType: (semanticType || undefined) as ScorerSpecFile['semanticType'], + inputScope: 'Session', + label, + description: description || undefined, + engineType: engineType as ScorerSpecFile['engineType'], + promptContent: engineConfig.promptContent, + promptTemplateName: engineConfig.promptTemplateName, + status: status as ScorerSpecFile['status'], + outputEnumValues: dataTypeDetails.outputEnumValues as ScorerSpecFile['outputEnumValues'], + specification: dataTypeDetails.specification, + agentAssociation, + }; + } + + private async generateOutput( + spec: ScorerSpecFile, + flags: Record + ): Promise { + if (spec.dataType === 'Text' && spec.outputEnumValues) { + const fallbackCount = spec.outputEnumValues.filter((v) => v.isFallback).length; + if (fallbackCount !== 1) { + throw new Error( + `Text scorers must have exactly 1 fallback value, but found ${fallbackCount}.` + ); + } + } + + const scorerXml = buildScorerXml(spec); + const outputDir = resolve(flags['output-dir'] as string); + const scorerDir = join(outputDir, 'aiAgentScorerDefinitions'); + const scorerFileName = `${spec.apiName}.aiAgentScorerDefinition-meta.xml`; + const scorerPath = join(scorerDir, scorerFileName); + + let promptTemplatePath: string | undefined; + let promptTemplateXml: string | undefined; + if (spec.engineType === 'PromptTemplate' && !spec.promptTemplateName) { + const content = spec.promptContent ?? buildDefaultPromptContent(spec); + promptTemplateXml = buildPromptTemplateXml(spec.apiName, content, spec); + const promptDir = join(outputDir, 'genAiPromptTemplates'); + const promptFileName = `${spec.apiName}.genAiPromptTemplate-meta.xml`; + promptTemplatePath = join(promptDir, promptFileName); + } + + if (flags.preview) { + this.log('\n--- Scorer Definition (preview) ---\n'); + this.log(scorerXml); + if (promptTemplateXml) { + this.log('\n--- Prompt Template (preview) ---\n'); + this.log(promptTemplateXml); + } + return { path: scorerPath, apiName: spec.apiName, contents: scorerXml, promptTemplatePath }; + } + + mkdirSync(scorerDir, { recursive: true }); + if (existsSync(scorerPath) && !this.jsonEnabled()) { + const overwrite = await confirm({ + message: `${scorerFileName} already exists. Overwrite?`, + default: false, + theme, + }); + if (!overwrite) { + this.log('Operation canceled.'); + return { path: '', apiName: spec.apiName, contents: '' }; + } + } + writeFileSync(scorerPath, scorerXml); + this.log(`\nScorer definition written to: ${scorerPath}`); + + if (promptTemplateXml && promptTemplatePath) { + const promptDir = join(outputDir, 'genAiPromptTemplates'); + mkdirSync(promptDir, { recursive: true }); + writeFileSync(promptTemplatePath, promptTemplateXml); + this.log(`Prompt template written to: ${promptTemplatePath}`); + } + + return { path: scorerPath, apiName: spec.apiName, contents: scorerXml, promptTemplatePath }; + } + + private async promptForDataTypeDetails(dataType: string): Promise<{ + outputEnumValues?: OutputEnumValue[]; + specification?: ScorerSpecFile['specification']; + lightningType?: string; + scorerType?: ScorerSpecFile['scorerType']; + }> { + if (dataType === 'Number') { + this.log(); + this.styledHeader('Number Scale'); + const numSpec = await promptForNumberSpecification(); + return { specification: { valueSpecification: numSpec } }; + } + + if (dataType === 'OpenEnded') { + this.log(); + this.styledHeader('Open Scorer Configuration'); + + const lightningType = await select({ + message: 'Select the lightning type for open-ended values', + choices: SUPPORTED_LIGHTNING_TYPES.map((t) => ({ name: t, value: t })), + theme, + }); + + const addEnumValues = await confirm({ + message: 'Add output enum values?', + default: false, + theme, + }); + const outputEnumValues = addEnumValues ? await promptForOutputEnumValues() : undefined; + return { scorerType: 'OpenEnded', lightningType, outputEnumValues }; + } + + // Text + this.log(); + this.styledHeader('Output Values'); + const outputEnumValues = await promptForOutputEnumValues(); + return { outputEnumValues }; + } + + private async promptForEngineConfig(engineType: string): Promise<{ promptContent?: string; promptTemplateName?: string }> { + if (engineType !== 'PromptTemplate') return {}; + + this.log(); + this.styledHeader('Prompt Template'); + + const promptChoice = await select({ + message: 'Prompt template source', + choices: [ + { name: 'Generate a new default prompt template', value: 'generate' }, + { name: 'Use an existing prompt template', value: 'existing' }, + ], + theme, + }); + + if (promptChoice === 'existing') { + const promptTemplateName = await inquirerInput({ + message: 'Existing prompt template API name', + validate: (d: string): boolean | string => d.length > 0 || 'Name cannot be empty', + theme, + }); + return { promptTemplateName }; + } + + return {}; + } + + // eslint-disable-next-line class-methods-use-this + private async promptForAgentAssociationDetails( + connection: ReturnType, + engineType: string, + agentApiNameFlag?: string + ): Promise { + let agentAssociation: AgentAssociation; + if (agentApiNameFlag) { + agentAssociation = { agentApiName: agentApiNameFlag, isActive: false }; + } else { + const agentsInOrg = await Agent.listRemote(connection); + if (!agentsInOrg.length) { + throw new Error('No agents found in the org.'); + } + const agentApiName = await select({ + message: 'Select the agent to associate with this scorer', + choices: agentsInOrg + .filter((a) => !a.IsDeleted) + .sort((a, b) => a.DeveloperName.localeCompare(b.DeveloperName)) + .map((a) => ({ name: a.DeveloperName, value: a.DeveloperName })), + theme, + }); + agentAssociation = { agentApiName, isActive: false }; + } + + const associationInputScope = await select({ + message: 'Input scope for this agent association', + choices: [ + { name: 'Session', value: 'Session' }, + { name: 'Intent', value: 'Intent' }, + ], + default: 'Session', + theme, + }); + agentAssociation.inputScope = associationInputScope as 'Session' | 'Intent'; + + if (engineType === 'PromptTemplate') { + const isActive = await confirm({ + message: 'Activate scoring for this agent?', + default: false, + theme, + }); + agentAssociation.isActive = isActive; + + if (isActive) { + const samplingRateStr = await promptForFlag({ + message: 'Sampling rate (0.0 - 1.0)', + promptMessage: 'Sampling rate (0.0 to 1.0, where 1.0 = score every session)', + validate: (d: string): boolean | string => { + const n = parseFloat(d); + if (isNaN(n) || n < 0 || n > 1) return 'Must be between 0.0 and 1.0'; + return true; + }, + default: '1.0', + }); + agentAssociation.samplingRate = parseFloat(samplingRateStr); + } + } + + return agentAssociation; + } + +} + +function buildDefaultPromptContent(spec: Partial): string { + if (spec.scorerType === 'OpenEnded') { + return [ + 'Analyze the following agent-user conversation and provide your evaluation.', + '', + 'Your response must conform to the expected data type.', + '', + 'session audit data:', + '{!$Input:Session}', + ].join('\n'); + } + + return [ + 'Analyze the following agent-user conversation and evaluate it based on your scoring criteria.', + '', + 'Respond with ONLY one of the allowed values: {!$Input:AllowedLabels}', + 'or fallback to: {!$Input:FallbackLabel}', + '', + 'session audit data:', + '{!$Input:Session}', + ].join('\n'); +} diff --git a/test/commands/agent/scorer/create.test.ts b/test/commands/agent/scorer/create.test.ts new file mode 100644 index 00000000..357659b3 --- /dev/null +++ b/test/commands/agent/scorer/create.test.ts @@ -0,0 +1,1062 @@ +/* + * Copyright 2026, Salesforce, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* eslint-disable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call, @typescript-eslint/no-explicit-any */ + +import { join } from 'node:path'; +import { mkdirSync, writeFileSync, rmSync } from 'node:fs'; +import { expect } from 'chai'; +import esmock from 'esmock'; +import sinon from 'sinon'; +import YAML from 'yaml'; +import { TestContext, MockTestOrgData } from '@salesforce/core/testSetup'; +import { stubSfCommandUx } from '@salesforce/sf-plugins-core'; +import type { ScorerSpecFile } from '../../../../src/commands/agent/scorer/create.js'; + +function makeTextSpec(overrides: Partial = {}): ScorerSpecFile { + return { + apiName: 'Test_Scorer', + dataType: 'Text', + inputScope: 'Session', + label: 'Test Scorer', + description: 'A test scorer', + engineType: 'Manual', + status: 'Draft', + agentAssociation: { + agentApiName: 'My_Agent', + isActive: false, + }, + outputEnumValues: [ + { value: 'Positive', outcomeType: 'Pass', isFallback: false, isSystemFallback: false }, + { value: 'Negative', outcomeType: 'Fail', isFallback: false, isSystemFallback: false }, + { value: 'Neutral', outcomeType: 'NotApplicable', isFallback: true, isSystemFallback: false }, + ], + ...overrides, + }; +} + +function makeNumberSpec(overrides: Partial = {}): ScorerSpecFile { + return { + apiName: 'Numeric_Scorer', + dataType: 'Number', + inputScope: 'Session', + label: 'Numeric Scorer', + engineType: 'Manual', + status: 'Available', + agentAssociation: { + agentApiName: 'My_Agent', + isActive: false, + }, + specification: { + valueSpecification: { + min: 0, + max: 5, + step: 1, + }, + }, + ...overrides, + }; +} + +function makeOpenSpec(overrides: Partial = {}): ScorerSpecFile { + return { + apiName: 'Open_Scorer', + dataType: 'LightningType', + scorerType: 'OpenEnded', + lightningType: 'lightning__textType', + inputScope: 'Session', + label: 'Open Scorer', + engineType: 'PromptTemplate', + status: 'Draft', + agentAssociation: { + agentApiName: 'My_Agent', + isActive: true, + samplingRate: 0.5, + inputScope: 'Intent', + }, + ...overrides, + }; +} + +function makePromptTemplateSpec(overrides: Partial = {}): ScorerSpecFile { + return { + apiName: 'Prompt_Scorer', + dataType: 'Text', + inputScope: 'Session', + label: 'Prompt Scorer', + engineType: 'PromptTemplate', + status: 'Draft', + promptContent: 'Evaluate this session.\n\n{!$Input:Session}', + agentAssociation: { + agentApiName: 'My_Agent', + isActive: true, + samplingRate: 1.0, + }, + outputEnumValues: [ + { value: 'Pass', outcomeType: 'Pass', isFallback: false, isSystemFallback: false }, + { value: 'Fail', outcomeType: 'Fail', isFallback: true, isSystemFallback: false }, + ], + ...overrides, + }; +} + +type WrittenFile = { path: string; content: string }; + +async function loadMockedCommand( + yamlSpec: ScorerSpecFile, + opts?: { existsSync?: () => boolean; confirmResult?: boolean } +): Promise<{ Command: any; writtenFiles: WrittenFile[]; createdDirs: string[] }> { + const yamlContent = YAML.stringify(yamlSpec); + const writtenFiles: WrittenFile[] = []; + const createdDirs: string[] = []; + const fileExists = opts?.existsSync ?? (() => false); + + const fsMock: Record = { + readFileSync: () => yamlContent, + writeFileSync: (path: string, content: string) => { + writtenFiles.push({ path, content }); + }, + mkdirSync: (path: string) => { + createdDirs.push(path); + }, + existsSync: fileExists, + }; + + const mocks: Record = { 'node:fs': fsMock }; + + if (opts?.confirmResult !== undefined) { + mocks['@inquirer/prompts'] = { + confirm: sinon.stub().resolves(opts.confirmResult), + select: sinon.stub().resolves('Text'), + input: sinon.stub().resolves(''), + }; + } + + const mod = await esmock('../../../../src/commands/agent/scorer/create.js', mocks); + return { Command: mod.default, writtenFiles, createdDirs }; +} + +describe('agent scorer create', () => { + const $$ = new TestContext(); + let testOrg: MockTestOrgData; + + before(async function () { + // Warm up esmock to check it can load the module + try { + await esmock('../../../../src/commands/agent/scorer/create.js', { + 'node:fs': { + readFileSync: () => '', + writeFileSync: () => {}, + mkdirSync: () => {}, + existsSync: () => false, + }, + }); + } catch (e: any) { + console.error('esmock warmup failed:', e.message); + this.skip(); + } + }); + + beforeEach(async () => { + stubSfCommandUx($$.SANDBOX); + testOrg = new MockTestOrgData(); + await $$.stubAuths(testOrg); + }); + + afterEach(() => { + $$.restore(); + }); + + describe('--spec flag (YAML-driven) with --preview', () => { + it('should create a Text scorer from a YAML spec', async () => { + const { Command } = await loadMockedCommand(makeTextSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test-scorer.yaml', + '--preview', + '--json', + ]); + + expect(result.apiName).to.equal('Test_Scorer'); + expect(result.contents).to.include('AiAgentScorerDefinition'); + expect(result.contents).to.include('Text'); + expect(result.contents).to.include('Session'); + expect(result.contents).to.include('Manual'); + expect(result.contents).to.include('Draft'); + expect(result.contents).to.include('My_Agent'); + expect(result.contents).to.include('Positive'); + expect(result.contents).to.include('Negative'); + expect(result.contents).to.include('Neutral'); + }); + + it('should create a Number scorer with specification', async () => { + const { Command } = await loadMockedCommand(makeNumberSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'numeric-scorer.yaml', + '--preview', + '--json', + ]); + + expect(result.apiName).to.equal('Numeric_Scorer'); + expect(result.contents).to.include('Number'); + expect(result.contents).to.include('0'); + expect(result.contents).to.include('5'); + expect(result.contents).to.include('1'); + expect(result.contents).to.include('0'); + expect(result.contents).to.include('5'); + expect(result.contents).to.include('Available'); + }); + + it('should create a Number scorer with threshold', async () => { + const spec = makeNumberSpec({ + specification: { valueSpecification: { min: 1, max: 10, step: 1, threshold: 7 } }, + }); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'threshold-scorer.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('7'); + expect(result.contents).to.include('1'); + expect(result.contents).to.include('10'); + }); + + it('should create an OpenEnded (LightningType) scorer', async () => { + const { Command } = await loadMockedCommand(makeOpenSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'open-scorer.yaml', + '--preview', + '--json', + ]); + + expect(result.apiName).to.equal('Open_Scorer'); + expect(result.contents).to.include('LightningType'); + expect(result.contents).to.include('lightning__textType'); + expect(result.contents).to.include('OpenEnded'); + expect(result.contents).to.include('Session'); + }); + + it('should include inputScope in agent association when specified', async () => { + const { Command } = await loadMockedCommand(makeOpenSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'open-scorer.yaml', + '--preview', + '--json', + ]); + + const agentAssocBlock = result.contents.substring( + result.contents.indexOf(''), + result.contents.indexOf('') + ''.length + ); + expect(agentAssocBlock).to.include('Intent'); + }); + + it('should include outputEnumValues for OpenEnded scorer when provided', async () => { + const spec = makeOpenSpec({ + outputEnumValues: [ + { value: 'GOOD', outcomeType: 'Pass', isFallback: false, isSystemFallback: false }, + { value: 'BAD', outcomeType: 'Fail', isFallback: false, isSystemFallback: false }, + { value: 'N/A', outcomeType: 'NotApplicable', isFallback: true, isSystemFallback: false }, + ], + }); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'open-scorer.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('LightningType'); + expect(result.contents).to.include('OpenEnded'); + expect(result.contents).to.include('GOOD'); + expect(result.contents).to.include('BAD'); + expect(result.contents).to.include('N/A'); + expect(result.contents).to.include('Pass'); + expect(result.contents).to.include('Fail'); + expect(result.contents).to.include('NotApplicable'); + expect(result.contents).to.include('true'); + }); + + it('should not include outputEnumValue for OpenEnded scorer when none provided', async () => { + const { Command } = await loadMockedCommand(makeOpenSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'open-scorer.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).not.to.include(''); + expect(result.contents).not.to.include(''); + }); + + it('should generate prompt template path for PromptTemplate engine', async () => { + const { Command } = await loadMockedCommand(makePromptTemplateSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'prompt-scorer.yaml', + '--preview', + '--json', + ]); + + expect(result.promptTemplatePath).to.be.a('string'); + expect(result.promptTemplatePath).to.include('genAiPromptTemplates'); + expect(result.promptTemplatePath).to.include('Prompt_Scorer.genAiPromptTemplate-meta.xml'); + expect(result.contents).to.include('Prompt_Scorer'); + expect(result.contents).to.include('PromptTemplate'); + }); + + it('should not generate prompt template for Manual engine', async () => { + const { Command } = await loadMockedCommand(makeTextSpec({ engineType: 'Manual' })); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'manual-scorer.yaml', + '--preview', + '--json', + ]); + + expect(result.promptTemplatePath).to.be.undefined; + expect(result.contents).not.to.include(''); + expect(result.contents).to.include('Manual'); + }); + + it('should use promptTemplateName as engineRef and skip prompt template file generation', async () => { + const spec = makePromptTemplateSpec({ promptTemplateName: 'My_Existing_Template' }); + const { Command, writtenFiles } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + expect(result.contents).to.include('My_Existing_Template'); + expect(result.contents).to.include('PromptTemplate'); + expect(result.promptTemplatePath).to.be.undefined; + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile).to.be.undefined; + expect(writtenFiles).to.have.length(1); + }); + + it('should omit inputScope from agent association XML when not specified', async () => { + const spec = makeTextSpec(); + spec.agentAssociation.inputScope = undefined; + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + const agentAssocBlock = result.contents.substring( + result.contents.indexOf(''), + result.contents.indexOf('') + ''.length + ); + expect(agentAssocBlock).to.include('My_Agent'); + expect(agentAssocBlock).not.to.include(''); + }); + + it('should include semanticType when set', async () => { + const { Command } = await loadMockedCommand(makeTextSpec({ semanticType: 'Dimension' })); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('Dimension'); + }); + + it('should include description when provided', async () => { + const { Command } = await loadMockedCommand(makeTextSpec({ description: 'Evaluates politeness' })); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('Evaluates politeness'); + }); + + it('should omit description when not provided', async () => { + const { Command } = await loadMockedCommand(makeTextSpec({ description: undefined })); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).not.to.include(''); + }); + + it('should default samplingRate to 1.0', async () => { + const spec = makeTextSpec(); + spec.agentAssociation.samplingRate = undefined; + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('1'); + }); + + it('should use custom samplingRate', async () => { + const spec = makeTextSpec(); + spec.agentAssociation.samplingRate = 0.25; + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('0.25'); + }); + + it('should set versionNumber to 1', async () => { + const { Command } = await loadMockedCommand(makeTextSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('1'); + }); + }); + + describe('prompt template type selection', () => { + it('should use scorerOpenEnded type for OpenEnded scorerType', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makeOpenSpec()); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include('agentforce_session_tracing__scorerOpenEnded'); + }); + + it('should use scorerMeasurement type for Measurement semanticType', async () => { + const { Command, writtenFiles } = await loadMockedCommand( + makePromptTemplateSpec({ semanticType: 'Measurement' }) + ); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include('agentforce_session_tracing__scorerMeasurement'); + }); + + it('should use scorerMultilabel type for default Text scorers', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makePromptTemplateSpec()); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include('agentforce_session_tracing__scorerMultilabel'); + }); + }); + + describe('number enum value generation', () => { + it('should generate correct values for integer steps', async () => { + const spec = makeNumberSpec({ + specification: { valueSpecification: { min: 0, max: 3, step: 1 } }, + }); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('0'); + expect(result.contents).to.include('1'); + expect(result.contents).to.include('2'); + expect(result.contents).to.include('3'); + }); + + it('should generate correct values for decimal steps', async () => { + const spec = makeNumberSpec({ + specification: { valueSpecification: { min: 0, max: 1, step: 0.5 } }, + }); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('0'); + expect(result.contents).to.include('0.5'); + expect(result.contents).to.include('1'); + }); + + it('should set outcomeType to NotApplicable for number values', async () => { + const spec = makeNumberSpec({ + specification: { valueSpecification: { min: 1, max: 2, step: 1 } }, + }); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + const matches = result.contents.match(/NotApplicable<\/outcomeType>/g); + expect(matches).to.have.length(2); + }); + + it('should handle large step generating few values', async () => { + const spec = makeNumberSpec({ + specification: { valueSpecification: { min: 0, max: 100, step: 50 } }, + }); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('0'); + expect(result.contents).to.include('50'); + expect(result.contents).to.include('100'); + }); + }); + + describe('XML structure', () => { + it('should include XML declaration and namespace', async () => { + const { Command } = await loadMockedCommand(makeTextSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include(''); + expect(result.contents).to.include('xmlns="http://soap.sforce.com/2006/04/metadata"'); + }); + + it('should include isActive in agent association', async () => { + const spec = makeTextSpec(); + spec.agentAssociation.isActive = true; + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('true'); + }); + + it('should include isFallback and isSystemFallback', async () => { + const spec = makeTextSpec({ + outputEnumValues: [ + { value: 'Good', outcomeType: 'Pass', isFallback: false, isSystemFallback: false }, + { value: 'Bad', outcomeType: 'Fail', isFallback: true, isSystemFallback: false }, + ], + }); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('false'); + expect(result.contents).to.include('true'); + expect(result.contents).to.include('false'); + }); + + it('should include label in scorerVersion', async () => { + const { Command } = await loadMockedCommand(makeTextSpec({ label: 'My Custom Label' })); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include(''); + }); + }); + + describe('file writing', () => { + it('should write scorer XML to correct path', async () => { + const { Command, writtenFiles, createdDirs } = await loadMockedCommand(makeTextSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + expect(result.path).to.include('/tmp/out'); + expect(result.path).to.include('aiAgentScorerDefinitions'); + expect(result.path).to.include('Test_Scorer.aiAgentScorerDefinition-meta.xml'); + expect(writtenFiles).to.have.length(1); + expect(writtenFiles[0].content).to.include('AiAgentScorerDefinition'); + expect(createdDirs.some((d) => d.includes('aiAgentScorerDefinitions'))).to.be.true; + }); + + it('should write both scorer and prompt template for PromptTemplate', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makePromptTemplateSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + expect(writtenFiles).to.have.length(2); + const scorerFile = writtenFiles.find((f) => f.path.includes('aiAgentScorerDefinitions')); + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(scorerFile).to.not.be.undefined; + expect(promptFile).to.not.be.undefined; + expect(promptFile!.path).to.include('Prompt_Scorer.genAiPromptTemplate-meta.xml'); + expect(promptFile!.content).to.include('GenAiPromptTemplate'); + expect(result.promptTemplatePath).to.equal(promptFile!.path); + }); + + it('should not write files with --preview', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makeTextSpec()); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(writtenFiles).to.have.length(0); + }); + + it('should use default prompt content when promptContent not in spec', async () => { + const spec = makePromptTemplateSpec(); + delete (spec as any).promptContent; + const { Command, writtenFiles } = await loadMockedCommand(spec); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include('{!$Input:Session}'); + expect(promptFile!.content).to.include('{!$Input:AllowedLabels}'); + expect(promptFile!.content).to.include('{!$Input:FallbackLabel}'); + }); + + it('should use OpenEnded default prompt for OpenEnded type', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makeOpenSpec()); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include('{!$Input:Session}'); + expect(promptFile!.content).not.to.include('{!$Input:AllowedLabels}'); + }); + }); + + describe('output directory', () => { + it('should default to force-app/main/default', async () => { + const { Command } = await loadMockedCommand(makeTextSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.path).to.include('force-app/main/default/aiAgentScorerDefinitions'); + }); + + it('should use custom --output-dir', async () => { + const { Command } = await loadMockedCommand(makeTextSpec()); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/custom/path', + '--preview', + '--json', + ]); + + expect(result.path).to.include('/custom/path/aiAgentScorerDefinitions'); + }); + }); + + describe('overwrite behavior', () => { + it('should cancel when user declines overwrite', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makeTextSpec(), { + existsSync: () => true, + confirmResult: false, + }); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + ]); + + expect(result.path).to.equal(''); + expect(result.contents).to.equal(''); + expect(writtenFiles).to.have.length(0); + }); + + it('should skip overwrite prompt in --json mode', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makeTextSpec(), { + existsSync: () => true, + }); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + expect(result.path).to.not.equal(''); + expect(writtenFiles).to.have.length(1); + }); + }); + + describe('prompt template XML details', () => { + it('should include developerName and masterLabel matching apiName', async () => { + const { Command, writtenFiles } = await loadMockedCommand( + makePromptTemplateSpec({ apiName: 'My_Prompt_Scorer' }) + ); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include('My_Prompt_Scorer'); + expect(promptFile!.content).to.include('My_Prompt_Scorer'); + }); + + it('should set overridable to false and visibility to Global', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makePromptTemplateSpec()); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include('false'); + expect(promptFile!.content).to.include('Global'); + }); + + it('should set primaryModel and status Published', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makePromptTemplateSpec()); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include('sfdc_ai__DefaultOpenAIGPT4OmniMini'); + expect(promptFile!.content).to.include('Published'); + }); + + it('should include Session input with correct definition', async () => { + const { Command, writtenFiles } = await loadMockedCommand(makePromptTemplateSpec()); + + await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--output-dir', '/tmp/out', + '--json', + ]); + + const promptFile = writtenFiles.find((f) => f.path.includes('genAiPromptTemplates')); + expect(promptFile!.content).to.include( + 'lightningtype://propertyType/agentforce_session_tracing__stdmDetailViewType' + ); + expect(promptFile!.content).to.include('Input:Session'); + }); + }); + + describe('--json mode error handling', () => { + it('should throw when required flags are missing', async () => { + const { Command } = await loadMockedCommand(makeTextSpec()); + + try { + await Command.run(['--target-org', testOrg.username, '--json']); + expect.fail('should have thrown'); + } catch (err: unknown) { + const error = err as { message: string }; + expect(error.message).to.include('Missing required flags'); + } + }); + + it('should list all missing required flags', async () => { + const { Command } = await loadMockedCommand(makeTextSpec()); + + try { + await Command.run(['--target-org', testOrg.username, '--label', 'Foo', '--json']); + expect.fail('should have thrown'); + } catch (err: unknown) { + const error = err as { message: string }; + expect(error.message).to.include('api-name'); + expect(error.message).to.include('data-type'); + expect(error.message).to.include('engine-type'); + expect(error.message).to.include('agent-api-name'); + } + }); + }); + + describe('Text scorer fallback validation', () => { + let tmpDir: string; + let specFile: string; + + beforeEach(() => { + tmpDir = join(process.cwd(), 'tmp-test-fallback-' + Date.now()); + mkdirSync(tmpDir, { recursive: true }); + specFile = join(tmpDir, 'scorer.yaml'); + }); + + afterEach(() => { + rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('should throw when Text scorer has no fallback value', async () => { + const spec = makeTextSpec({ + outputEnumValues: [ + { value: 'Good', outcomeType: 'Pass', isFallback: false, isSystemFallback: false }, + { value: 'Bad', outcomeType: 'Fail', isFallback: false, isSystemFallback: false }, + ], + }); + writeFileSync(specFile, YAML.stringify(spec)); + const { Command } = await loadMockedCommand(spec); + + try { + await Command.run([ + '--target-org', testOrg.username, + '--spec', specFile, + '--preview', + '--json', + ]); + expect.fail('should have thrown'); + } catch (err: unknown) { + const error = err as { message: string }; + expect(error.message).to.include('exactly 1 fallback value'); + expect(error.message).to.include('found 0'); + } + }); + + it('should throw when Text scorer has multiple fallback values', async () => { + const spec = makeTextSpec({ + outputEnumValues: [ + { value: 'Good', outcomeType: 'Pass', isFallback: true, isSystemFallback: false }, + { value: 'Bad', outcomeType: 'Fail', isFallback: true, isSystemFallback: false }, + ], + }); + writeFileSync(specFile, YAML.stringify(spec)); + const { Command } = await loadMockedCommand(spec); + + try { + await Command.run([ + '--target-org', testOrg.username, + '--spec', specFile, + '--preview', + '--json', + ]); + expect.fail('should have thrown'); + } catch (err: unknown) { + const error = err as { message: string }; + expect(error.message).to.include('exactly 1 fallback value'); + expect(error.message).to.include('found 2'); + } + }); + + it('should pass when Text scorer has exactly 1 fallback value', async () => { + const spec = makeTextSpec({ + outputEnumValues: [ + { value: 'Good', outcomeType: 'Pass', isFallback: false, isSystemFallback: false }, + { value: 'Bad', outcomeType: 'Fail', isFallback: false, isSystemFallback: false }, + { value: 'N/A', outcomeType: 'NotApplicable', isFallback: true, isSystemFallback: false }, + ], + }); + writeFileSync(specFile, YAML.stringify(spec)); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', specFile, + '--preview', + '--json', + ]); + + expect(result.apiName).to.equal('Test_Scorer'); + expect(result.contents).to.include('N/A'); + }); + }); + + describe('edge cases', () => { + it('should handle LightningType with no outputEnumValues', async () => { + const spec: ScorerSpecFile = { + apiName: 'Lightning_Scorer', + dataType: 'LightningType', + scorerType: 'OpenEnded', + lightningType: 'lightning__numberType', + inputScope: 'Session', + label: 'Lightning Scorer', + engineType: 'Manual', + agentAssociation: { agentApiName: 'Agent_X', isActive: false }, + }; + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('LightningType'); + expect(result.contents).to.include('lightning__numberType'); + }); + + it('should handle single output enum value', async () => { + const spec = makeTextSpec({ + outputEnumValues: [ + { value: 'Only', outcomeType: 'NotApplicable', isFallback: true, isSystemFallback: false }, + ], + }); + const { Command } = await loadMockedCommand(spec); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('Only'); + expect(result.contents).to.include('NotApplicable'); + expect(result.contents).to.include('true'); + }); + + it('should include scorerType Predefined when set', async () => { + const { Command } = await loadMockedCommand(makeTextSpec({ scorerType: 'Predefined' })); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('Predefined'); + }); + + it('should include Measurement semanticType in XML', async () => { + const { Command } = await loadMockedCommand(makeNumberSpec({ semanticType: 'Measurement' })); + + const result = await Command.run([ + '--target-org', testOrg.username, + '--spec', 'test.yaml', + '--preview', + '--json', + ]); + + expect(result.contents).to.include('Measurement'); + }); + }); +});