Semaphor
Semantic Domains

Agent Guide

Build semantic domain templates programmatically with complete schema reference and validation rules

This guide is for AI agents and automation scripts that generate domain templates as JSON. It provides the complete schema specification, construction rules, and validation checklist needed to produce valid templates programmatically.

Approach

Domain templates can be applied in two ways:

  1. Code Editor -- Generate JSON, paste into the Domain Editor's Code Editor tab
  2. Management API -- POST /api/management/v1/domains with the template in the request body

Both paths run the same Zod-based validation. A template that passes validation in one path will pass in the other.


Template JSON Schema

Top-Level Structure

{
  "schemaVersion": "2.0",
  "datasets": [],
  "relationships": [],
  "calculatedMetrics": {},
  "calculatedDimensions": {},
  "grainMappings": []
}
FieldTypeRequiredDefaultDescription
schemaVersion"2.0"No"2.0"Schema version (always "2.0")
datasetsSemanticDataset[]Yes[]Array of dataset definitions
relationshipsRelationship[]No[]Array of join definitions
calculatedMetricsRecord<string, CalculatedMetric>No{}Named calculated metric definitions
calculatedDimensionsRecord<string, CalculatedDimension>No{}Named calculated dimension definitions
grainMappingsGrainMapping[]No[]Cross-dataset dimension alignment

SemanticDataset

{
  "name": "orders",
  "label": "Orders",
  "description": "Customer order transactions",
  "type": "physical",
  "connectionId": "conn_abc123",
  "connectionType": "PostgreSQL",
  "catalog": null,
  "schema": "public",
  "table": "orders",
  "sql": null,
  "datamodelId": null,
  "primaryKey": ["order_id"],
  "fields": {
    "identifiers": [],
    "dimensions": [],
    "metrics": []
  }
}
FieldTypeRequiredConstraints
namestringYesUnique within domain
labelstringYesDisplay name
descriptionstringNo
typeenumYes"physical" or "virtual"
connectionIdstringYesMust match an existing connection
connectionTypeenumYesSee connection types below
catalogstringNoFor BigQuery/Snowflake
schemastringNoDatabase schema
tablestringConditionalRequired if type: "physical"
sqlstringConditionalRequired if type: "virtual"
datamodelIdstringNoReference to a DataModel
primaryKeystring[]NoDefaults to []. Set for fan-out detection
fieldsobjectYesContains identifiers, dimensions, metrics

Connection types: "PostgreSQL", "MySQL", "MSSQL", "BigQuery", "Redshift", "Snowflake", "clickhouse", "S3", "S3Tables", "GoogleSheets", "FileUpload", "API", "none"

IdentifierField

{
  "name": "order_id",
  "label": "Order ID",
  "description": "Unique order identifier",
  "dataType": "number",
  "type": "primary",
  "hidden": false,
  "tags": ["key"]
}
FieldTypeRequiredConstraints
namestringYesColumn name
labelstringNoDisplay name
descriptionstringNo
dataTypestringYes"string", "number", "boolean", "date", "datetime", "json", "geo"
typeenumYes"primary" or "foreign"
hiddenbooleanNo
tagsstring[]No

DimensionField

{
  "name": "order_date",
  "label": "Order Date",
  "dataType": "date",
  "granularity": "day",
  "isDisplayField": false,
  "dateFormat": null,
  "customFormat": null,
  "hidden": false,
  "tags": []
}
FieldTypeRequiredConstraints
namestringYesColumn name
labelstringNoDisplay name
dataTypestringYesSee data types above
granularityenumNo"day", "week", "month", "quarter", "year", "hour", "minute", "second"
isDisplayFieldbooleanNoDefault display label for dataset
dateFormatstringNoDisplay format for dates
customFormatstringNoCustom format string
hiddenbooleanNo
tagsstring[]No

MetricField

{
  "name": "amount",
  "label": "Revenue",
  "dataType": "number",
  "sourceField": "amount",
  "expression": null,
  "format": null,
  "unit": "USD",
  "isDefault": false,
  "hidden": false,
  "tags": []
}
FieldTypeRequiredConstraints
namestringYesMetric name
labelstringNoDisplay name
dataTypestringYesUsually "number"
sourceFieldstringConditionalPhysical column. Required if no expression
expressionstringConditionalSQL expression. Required if no sourceField
formatstringNoDisplay format hint
unitstringNoUnit label
isDefaultbooleanNoMark as default metric
hiddenbooleanNo
filtersarrayNoDefault metric filters
tagsstring[]No

Relationship

{
  "id": "rel_001",
  "name": "orders_to_customers",
  "sourceDataset": "orders",
  "sourceFields": ["customer_id"],
  "targetDataset": "customers",
  "targetFields": ["id"],
  "cardinality": "many_to_one",
  "defaultJoinType": "LEFT",
  "isAutoJoin": true,
  "joinPriority": 1,
  "discoveredBy": "user",
  "confidence": "high",
  "isActive": true,
  "description": "Orders belong to customers"
}
FieldTypeRequiredConstraints
idstringNoAuto-generated if omitted
namestringYesUnique name
sourceDatasetstringYesMust exist in datasets
sourceFieldsstring[]YesMin 1 field
targetDatasetstringYesMust exist in datasets
targetFieldsstring[]YesMin 1 field, same length as sourceFields
cardinalityenumNo"one_to_one" (default), "one_to_many", "many_to_one", "many_to_many"
defaultJoinTypeenumYes"INNER", "LEFT", "RIGHT", "FULL"
isAutoJoinbooleanYesMust be true for automatic join resolution
joinPrioritynumberNoHigher = preferred when multiple paths exist
discoveredByenumYes"user", "auto", "fk_constraint", "ai"
confidenceenumNo"high", "medium", "low"
isActivebooleanYesSet false to disable
descriptionstringNo

CalculatedMetric

{
  "label": "Average Order Value",
  "description": "Revenue per order",
  "expression": "{total_revenue} / NULLIF({order_count}, 0)",
  "inputs": {
    "total_revenue": { "dataset": "orders", "field": "amount", "aggregate": "SUM" },
    "order_count": { "dataset": "orders", "field": "order_id", "aggregate": "COUNT" }
  },
  "metricType": "derived",
  "aggregationStrategy": "default",
  "format": { "type": "currency", "decimals": 2, "currency": "USD" },
  "tags": ["kpi"]
}
FieldTypeRequiredConstraints
labelstringYesDisplay name
descriptionstringNo
expressionstringYesMin 1 character. Uses {input_name} tokens
inputsobjectYesMap of name to ColumnInputReference or MetricInputReference
metricTypeenumNo"base", "derived" (default), "calculated"
aggregationStrategyenumNo"default", "symmetric_aggregate", "aggregate_then_join", "weighted"
formatFormatSpecNoSee below
tagsstring[]No
filtersarrayNoMetric-level filters

Input types:

Input TypeShapeUse Case
Column reference{ "dataset": "...", "field": "...", "aggregate": "SUM" }Points to a physical column
Metric reference{ "metric": "..." }Points to another calculated metric

Aggregate values: "SUM", "COUNT", "AVG", "MIN", "MAX", "MEDIAN", "DISTINCT"

CalculatedDimension

{
  "label": "Order Month",
  "description": "Monthly time grouping",
  "expression": "DATE_TRUNC('month', {order_date})",
  "inputs": {
    "order_date": { "dataset": "orders", "field": "order_date" }
  },
  "dataType": "date"
}
FieldTypeRequiredConstraints
labelstringYesDisplay name
descriptionstringNo
expressionstringYesSQL expression with {input_name} tokens
inputsobjectYesMap of name to ColumnInputReference or DimensionInputReference
dataTypeenumNo"string", "number", "boolean", "date", "datetime"
grainMappingobjectNoMap of dataset name to column name

Input types:

Input TypeShapeUse Case
Column reference{ "dataset": "...", "field": "..." }Points to a physical column (no aggregate)
Dimension reference{ "dimension": "..." }Points to another calculated dimension

FormatSpec

{
  "type": "currency",
  "decimals": 2,
  "currency": "USD",
  "prefix": "$",
  "suffix": "",
  "thousandsSeparator": true
}
FieldTypeRequiredConstraints
typeenumYes"number", "currency", "percentage", "date", "string"
decimalsintegerNo0-10
currencystringNoCurrency code (e.g., "USD")
prefixstringNo
suffixstringNo
thousandsSeparatorbooleanNoDefault true

GrainMapping

{
  "sourceDimension": "order_month",
  "targetDataset": "returns",
  "targetColumn": "return_date"
}
FieldTypeRequiredDescription
sourceDimensionstringYesName of a calculated dimension
targetDatasetstringYesDataset to map the dimension to
targetColumnstringYesColumn in the target dataset

Construction Rules

Follow these rules to produce valid templates:

Naming

  • Dataset name: unique within the domain, use snake_case
  • Calculated metric/dimension keys: unique, use snake_case
  • Field name: must match the physical column name in the database
  • Relationship name: descriptive (e.g., orders_to_customers)

Required Field Combinations

  • Physical datasets must have table
  • Virtual datasets must have sql
  • Metrics must have either sourceField or expression (not both required, but at least one)
  • Identifiers must have type set to "primary" or "foreign"

Token Rules

  • Token names in expressions ({name}) must exactly match keys in the inputs map
  • Token names are case-sensitive
  • Every token in the expression must have a corresponding input
  • Every input should be referenced by at least one token (unused inputs produce warning E006)
  • No braces {} inside SQL string literals (parser limitation)
  • No escape sequences like {{ or }}

Aggregation Rules

  • If the expression wraps a token in an aggregate (SUM({amount})), the input does not need aggregate
  • If the expression uses a bare token ({total_revenue} / {order_count}), each input must specify aggregate
  • Metric references ({ "metric": "..." }) never need aggregate -- the referenced metric provides its own

Reference Rules

  • Calculated metrics can reference: column references and other metric references
  • Calculated dimensions can reference: column references and other dimension references
  • Calculated dimensions cannot reference metrics (error E007)
  • Maximum nesting depth: 10 levels
  • No circular references (error E001)

Relationship Rules

  • sourceDataset and targetDataset must exist in datasets
  • sourceFields and targetFields must have the same length
  • Set isAutoJoin: true for fields to be auto-joinable in the explorer
  • Set isActive: true for the relationship to be used

Validation Checklist

Before submitting a template, verify:

  • schemaVersion is "2.0"
  • Every dataset has a unique name
  • Every dataset has connectionId and connectionType
  • Physical datasets have table; virtual datasets have sql
  • Every metric has sourceField or expression
  • Every identifier has type: "primary" or type: "foreign"
  • All calculated metric expressions use valid {token} syntax
  • All tokens in expressions have matching input keys
  • All input keys are used in expressions
  • Column references point to existing datasets and fields
  • Metric references point to existing calculated metrics
  • Dimension references point to existing calculated dimensions
  • No circular metric or dimension references
  • Relationship sourceDataset/targetDataset exist in datasets
  • Relationship sourceFields/targetFields are same length
  • Important datasets have primaryKey set (for fan-out detection)

Validation Error Codes

CodeSeverityCause
E001ErrorCircular dependency in metric/dimension references
E002ErrorInput references unknown metric name
E003ErrorInput references unknown field in dataset
E004ErrorInput references unknown dataset
E005ErrorToken in expression not found in inputs
E006WarningInput defined but not used in expression
E007ErrorCalculated dimension references a metric
E008ErrorInvalid filter syntax on metric
E009ErrorInvalid expression (braces in string literals)
E010ErrorInput requires aggregate but none specified
E011ErrorInput references unknown calculated dimension
W001WarningDataset missing primary key (fan-out detection disabled)
W002WarningRelationship may cause metric inflation

Generating Templates from Database Metadata

Here's a step-by-step algorithm for AI agents generating a domain from a database schema:

Step 1: Map Tables to Datasets

For each table in the target schema:

{
  "name": "<table_name_snake_case>",
  "label": "<Human Readable Name>",
  "type": "physical",
  "connectionId": "<connection_id>",
  "connectionType": "<database_type>",
  "schema": "<schema_name>",
  "table": "<table_name>",
  "primaryKey": ["<primary_key_column>"],
  "fields": { "identifiers": [], "dimensions": [], "metrics": [] }
}

Step 2: Classify Columns into Fields

For each column in each table:

Column TypeClassificationRule
Primary keyIdentifier (type: "primary")Always
Foreign keyIdentifier (type: "foreign")Always
Numeric, non-keyMetricSet sourceField to column name
Date/DateTimeDimensionSet granularity: "day" for dates
String/BooleanDimensionDefault classification

Step 3: Detect Relationships

For each foreign key constraint:

{
  "name": "<source_table>_to_<target_table>",
  "sourceDataset": "<source_table>",
  "sourceFields": ["<fk_column>"],
  "targetDataset": "<target_table>",
  "targetFields": ["<pk_column>"],
  "cardinality": "many_to_one",
  "defaultJoinType": "LEFT",
  "isAutoJoin": true,
  "discoveredBy": "auto",
  "isActive": true
}

Step 4: Generate Common KPIs

Based on the metric fields, create standard calculated metrics:

{
  "total_<metric_name>": {
    "label": "Total <Metric Label>",
    "expression": "SUM({<metric_name>})",
    "inputs": {
      "<metric_name>": { "dataset": "<dataset>", "field": "<field>" }
    }
  }
}

Step 5: Validate

Run the template through validation and fix any errors before submission.


Complete JSON Example

A production-ready template for an e-commerce analytics domain:

{
  "schemaVersion": "2.0",
  "datasets": [
    {
      "name": "orders",
      "label": "Orders",
      "type": "physical",
      "connectionId": "conn_1",
      "connectionType": "PostgreSQL",
      "schema": "public",
      "table": "orders",
      "primaryKey": ["order_id"],
      "fields": {
        "identifiers": [
          { "name": "order_id", "dataType": "number", "type": "primary", "label": "Order ID" },
          { "name": "customer_id", "dataType": "number", "type": "foreign", "label": "Customer ID" }
        ],
        "dimensions": [
          { "name": "order_date", "dataType": "date", "label": "Order Date", "granularity": "day" },
          { "name": "status", "dataType": "string", "label": "Status" }
        ],
        "metrics": [
          { "name": "amount", "dataType": "number", "sourceField": "amount", "label": "Revenue" },
          { "name": "cost", "dataType": "number", "sourceField": "cost", "label": "Cost" }
        ]
      }
    },
    {
      "name": "customers",
      "label": "Customers",
      "type": "physical",
      "connectionId": "conn_1",
      "connectionType": "PostgreSQL",
      "schema": "public",
      "table": "customers",
      "primaryKey": ["id"],
      "fields": {
        "identifiers": [
          { "name": "id", "dataType": "number", "type": "primary", "label": "Customer ID" }
        ],
        "dimensions": [
          { "name": "segment", "dataType": "string", "label": "Customer Segment" },
          { "name": "region", "dataType": "string", "label": "Region" }
        ],
        "metrics": []
      }
    },
    {
      "name": "returns",
      "label": "Returns",
      "type": "physical",
      "connectionId": "conn_1",
      "connectionType": "PostgreSQL",
      "schema": "public",
      "table": "returns",
      "primaryKey": [],
      "fields": {
        "identifiers": [],
        "dimensions": [
          { "name": "return_date", "dataType": "date", "label": "Return Date" }
        ],
        "metrics": [
          { "name": "amount", "dataType": "number", "sourceField": "amount", "label": "Return Amount" }
        ]
      }
    }
  ],
  "relationships": [
    {
      "name": "orders_to_customers",
      "sourceDataset": "orders",
      "sourceFields": ["customer_id"],
      "targetDataset": "customers",
      "targetFields": ["id"],
      "cardinality": "many_to_one",
      "defaultJoinType": "LEFT",
      "isAutoJoin": true,
      "discoveredBy": "user",
      "isActive": true
    }
  ],
  "calculatedMetrics": {
    "revenue": {
      "label": "Total Revenue",
      "expression": "SUM({amount})",
      "inputs": {
        "amount": { "dataset": "orders", "field": "amount" }
      }
    },
    "cost": {
      "label": "Total Cost",
      "expression": "SUM({cost_amount})",
      "inputs": {
        "cost_amount": { "dataset": "orders", "field": "cost" }
      }
    },
    "profit": {
      "label": "Profit",
      "expression": "{revenue} - {cost}",
      "inputs": {
        "revenue": { "metric": "revenue" },
        "cost": { "metric": "cost" }
      }
    },
    "profit_margin": {
      "label": "Profit Margin %",
      "expression": "{profit} / NULLIF({revenue}, 0) * 100",
      "inputs": {
        "profit": { "metric": "profit" },
        "revenue": { "metric": "revenue" }
      },
      "format": { "type": "percentage", "decimals": 1 }
    },
    "net_revenue": {
      "label": "Net Revenue",
      "description": "Revenue minus returns",
      "expression": "{total_sales} - COALESCE({total_returns}, 0)",
      "inputs": {
        "total_sales": { "dataset": "orders", "field": "amount", "aggregate": "SUM" },
        "total_returns": { "dataset": "returns", "field": "amount", "aggregate": "SUM" }
      }
    }
  },
  "calculatedDimensions": {
    "order_month": {
      "label": "Order Month",
      "expression": "DATE_TRUNC('month', {order_date})",
      "inputs": {
        "order_date": { "dataset": "orders", "field": "order_date" }
      },
      "dataType": "date"
    },
    "order_size": {
      "label": "Order Size",
      "expression": "CASE WHEN {amount} > 1000 THEN 'Large' WHEN {amount} > 100 THEN 'Medium' ELSE 'Small' END",
      "inputs": {
        "amount": { "dataset": "orders", "field": "amount" }
      },
      "dataType": "string"
    }
  },
  "grainMappings": [
    {
      "sourceDimension": "order_month",
      "targetDataset": "returns",
      "targetColumn": "return_date"
    }
  ]
}

Next Steps