openapi: 3.0.3
info:
  title: DataScreenIQ API
  description: Real-time data quality screening at the edge. POST your rows, get PASS
    / WARN / BLOCK back in milliseconds.
  version: 1.0.0
  contact:
    email: app@datascreeniq.com
    url: https://datascreeniq.com
  license:
    name: Commercial
    url: https://datascreeniq.com/terms.html
servers:
- url: https://api.datascreeniq.com
  description: Production
security:
- ApiKeyAuth: []
components:
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: X-API-Key
      description: Your DataScreenIQ API key (dsiq_live_...)
    BearerAuth:
      type: http
      scheme: bearer
      description: Session token from OTP sign-in (for dashboard endpoints)
  schemas:
    Row:
      type: object
      description: A single data row as a flat key-value object
      additionalProperties: true
      example:
        order_id: ORD-001
        amount: 99.5
        email: alice@corp.com
        status: paid
    ScreenRequest:
      type: object
      required:
      - source
      - rows
      properties:
        source:
          type: string
          description: Pipeline or table name used for baseline tracking
          example: orders
        rows:
          type: array
          items:
            $ref: '#/components/schemas/Row'
          minItems: 1
          description: Array of row objects to screen
        options:
          type: object
          description: Optional screening overrides
          properties:
            full_scan:
              type: boolean
              default: false
              description: If true, analyze all rows instead of sampling
            thresholds:
              type: object
              description: Override default quality thresholds
              properties:
                null_rate_warn:
                  type: number
                  default: 0.3
                null_rate_block:
                  type: number
                  default: 0.7
                type_mismatch_warn:
                  type: number
                  default: 0.05
                type_mismatch_block:
                  type: number
                  default: 0.2
    SchemaField:
      type: object
      properties:
        type:
          type: string
          enum:
          - string
          - number
          - boolean
          - object
          - array
          - 'null'
          - mixed
          description: Dominant type detected for this field
        confidence:
          type: number
          minimum: 0
          maximum: 1
          description: Fraction of non-null values matching the dominant type (1.0
            = fully consistent)
    TypeMismatchDetail:
      type: object
      properties:
        expected:
          type: string
          example: number
        found:
          type: array
          items:
            type: string
          example:
          - string
        sample_value:
          description: One example bad value
          example: broken
        rate:
          type: number
          example: 0.33
        severity:
          type: string
          enum:
          - warning
          - critical
          example: critical
    NullRateDetail:
      type: object
      properties:
        actual:
          type: number
          example: 0.75
        threshold:
          type: number
          example: 0.3
        severity:
          type: string
          enum:
          - warning
          - critical
          example: critical
    DriftEvent:
      type: object
      properties:
        field:
          type: string
          example: amount
        kind:
          type: string
          enum:
          - field_added
          - field_removed
          - type_changed
          - null_spike
          - cardinality_change
          - new_enum_value
          - row_count_anomaly
          - empty_string_spike
          example: type_changed
        severity:
          type: string
          enum:
          - info
          - warn
          - block
          example: block
        detail:
          type: string
          example: Field changed type from number to mixed
        previous:
          type: string
          example: number
        current:
          type: string
          example: mixed
    ScreenResponse:
      type: object
      properties:
        request_id:
          type: string
          example: batch_8fb151d5771f0cec
        status:
          type: string
          enum:
          - PASS
          - WARN
          - BLOCK
          description: Quality verdict
          example: BLOCK
        health_score:
          type: number
          minimum: 0
          maximum: 1
          description: Overall data health (1.0 = perfect)
          example: 0.34
        decision:
          type: object
          properties:
            action:
              type: string
              enum:
              - PASS
              - WARN
              - BLOCK
            reason:
              type: string
              example: 'Type mismatch in: ''amount''; High null rate in ''email''
                (75%)'
        schema:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/SchemaField'
          example:
            order_id:
              type: string
              confidence: 1.0
            amount:
              type: mixed
              confidence: 0.67
            email:
              type: string
              confidence: 1.0
        schema_fingerprint:
          type: string
          description: SHA-256 hash of the schema structure (not values)
          example: 59b492b0e8ab0ca0...
        issues:
          type: object
          description: Only present when issues are detected — empty keys are omitted
          properties:
            type_mismatches:
              type: object
              additionalProperties:
                $ref: '#/components/schemas/TypeMismatchDetail'
            null_rates:
              type: object
              additionalProperties:
                $ref: '#/components/schemas/NullRateDetail'
            outliers:
              type: object
              additionalProperties:
                type: object
                properties:
                  count:
                    type: integer
                  min: {}
                  max: {}
                  severity:
                    type: string
            duplicate_fields:
              type: array
              items:
                type: string
            row_count_anomaly:
              type: boolean
            new_enum_values:
              type: object
              additionalProperties:
                type: array
                items:
                  type: string
        drift:
          type: array
          items:
            $ref: '#/components/schemas/DriftEvent'
          description: Schema drift events (omitted if empty)
        stats:
          type: object
          properties:
            rows_received:
              type: integer
              example: 4
            rows_sampled:
              type: integer
              example: 4
            sample_ratio:
              type: number
              example: 1.0
            sample_version:
              type: string
              example: v2
            source:
              type: string
              example: orders
        latency_ms:
          type: integer
          example: 38
        timestamp:
          type: string
          format: date-time
    ErrorResponse:
      type: object
      properties:
        message:
          type: string
          example: Invalid API key.
        code:
          type: string
          example: AUTH_INVALID
    Job:
      type: object
      properties:
        id:
          type: string
        source:
          type: string
        status:
          type: string
          enum:
          - PASS
          - WARN
          - BLOCK
        health_score:
          type: number
        rows_received:
          type: integer
        drift_count:
          type: integer
        latency_ms:
          type: integer
        created_at:
          type: string
          format: date-time
    ApiKey:
      type: object
      properties:
        id:
          type: string
        name:
          type: string
        key_prefix:
          type: string
          example: dsiq_live_43df...
        created_at:
          type: string
          format: date-time
        last_used:
          type: string
          format: date-time
paths:
  /v1/screen:
    post:
      summary: Screen a data payload
      description: Submit rows for quality screening. Returns a PASS, WARN, or BLOCK
        verdict with detailed issue breakdown and schema drift detection.
      operationId: screenData
      tags:
      - Screening
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ScreenRequest'
            example:
              source: orders
              rows:
              - order_id: ORD-001
                amount: 99.5
                email: alice@corp.com
              - order_id: ORD-002
                amount: broken
                email: null
              - order_id: ORD-003
                amount: 75.0
                email: null
          text/csv:
            schema:
              type: string
            example: 'order_id,amount,email

              ORD-001,99.50,alice@corp.com

              ORD-002,broken,

              ORD-003,75.00,'
      parameters:
      - name: X-Source
        in: header
        description: Source name (required when Content-Type is text/csv)
        schema:
          type: string
      responses:
        '200':
          description: Screening complete
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ScreenResponse'
        '400':
          description: Invalid payload
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Invalid or missing API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '413':
          description: Payload too large (max 10,000 rows per request)
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '429':
          description: Rate limit or plan row limit exceeded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
  /v1/health:
    get:
      summary: Health check
      description: Returns API status. No authentication required.
      operationId: healthCheck
      tags:
      - System
      security: []
      responses:
        '200':
          description: API is healthy
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    example: ok
                  version:
                    type: string
                    example: 1.0.0
  /v1/schema/{source}:
    delete:
      summary: Reset baseline for a source
      description: Clears the schema baseline for a specific source. The next screen
        call will build a fresh baseline.
      operationId: resetSourceBaseline
      tags:
      - Screening
      parameters:
      - name: source
        in: path
        required: true
        schema:
          type: string
        example: orders
      responses:
        '200':
          description: Baseline reset
          content:
            application/json:
              schema:
                type: object
                properties:
                  ok:
                    type: boolean
                  reset:
                    type: string
                    example: orders
                  message:
                    type: string
  /v1/schema:
    delete:
      summary: Reset all baselines
      description: Clears schema baselines for all sources in your account.
      operationId: resetAllBaselines
      tags:
      - Screening
      responses:
        '200':
          description: All baselines reset
          content:
            application/json:
              schema:
                type: object
                properties:
                  ok:
                    type: boolean
                  reset:
                    type: string
                    example: all
                  message:
                    type: string
  /v1/jobs:
    get:
      summary: List screening jobs
      description: Returns recent screening jobs for your account.
      operationId: listJobs
      tags:
      - Jobs
      parameters:
      - name: limit
        in: query
        schema:
          type: integer
          default: 50
          maximum: 200
      - name: source
        in: query
        schema:
          type: string
        description: Filter by source name
      responses:
        '200':
          description: List of jobs
          content:
            application/json:
              schema:
                type: object
                properties:
                  jobs:
                    type: array
                    items:
                      $ref: '#/components/schemas/Job'
  /v1/jobs/{id}:
    get:
      summary: Get a job
      description: Returns full details for a specific screening job including the
        complete quality report.
      operationId: getJob
      tags:
      - Jobs
      parameters:
      - name: id
        in: path
        required: true
        schema:
          type: string
      responses:
        '200':
          description: Job details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Job'
        '404':
          description: Job not found
  /v1/stats:
    get:
      summary: Aggregate statistics
      description: Returns aggregated quality statistics grouped by source.
      operationId: getStats
      tags:
      - Jobs
      parameters:
      - name: days
        in: query
        schema:
          type: integer
          default: 30
          maximum: 365
        description: Number of days to aggregate
      responses:
        '200':
          description: Aggregated stats
          content:
            application/json:
              schema:
                type: object
                properties:
                  totalJobs:
                    type: integer
                  passCount:
                    type: integer
                  warnCount:
                    type: integer
                  blockCount:
                    type: integer
                  avgHealth:
                    type: number
                  totalRows:
                    type: integer
                  sourceCount:
                    type: integer
                  periodDays:
                    type: integer
                  sources:
                    type: array
                    items:
                      type: object
                      properties:
                        source:
                          type: string
                        totalJobs:
                          type: integer
                        passCount:
                          type: integer
                        warnCount:
                          type: integer
                        blockCount:
                          type: integer
                        avgHealthPct:
                          type: number
                        totalRows:
                          type: integer
                        totalDrift:
                          type: integer
                        lastRun:
                          type: string
                          format: date-time
  /v1/keys:
    get:
      summary: List API keys
      operationId: listKeys
      tags:
      - API Keys
      responses:
        '200':
          description: List of API keys
          content:
            application/json:
              schema:
                type: object
                properties:
                  keys:
                    type: array
                    items:
                      $ref: '#/components/schemas/ApiKey'
    post:
      summary: Create API key
      operationId: createKey
      tags:
      - API Keys
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                name:
                  type: string
                  example: Production key
      responses:
        '201':
          description: Key created — store it now, not shown again in full
          content:
            application/json:
              schema:
                type: object
                properties:
                  key:
                    type: string
                    example: dsiq_live_...
                  id:
                    type: string
                  name:
                    type: string
  /v1/keys/{id}:
    delete:
      summary: Delete API key
      operationId: deleteKey
      tags:
      - API Keys
      parameters:
      - name: id
        in: path
        required: true
        schema:
          type: string
      responses:
        '200':
          description: Key deleted
        '404':
          description: Key not found
  /v1/billing:
    get:
      summary: Get billing info
      description: Returns current plan, row usage, and billing period.
      operationId: getBilling
      tags:
      - Billing
      responses:
        '200':
          description: Billing info
          content:
            application/json:
              schema:
                type: object
                properties:
                  plan:
                    type: string
                    enum:
                    - developer
                    - starter
                    - growth
                    - scale
                  rowsProcessed:
                    type: integer
                  requestCount:
                    type: integer
                  periodStart:
                    type: string
                    format: date-time
                  periodEnd:
                    type: string
                    format: date-time
  /v1/billing/checkout:
    post:
      summary: Create checkout session
      description: Creates a Stripe checkout session. Returns a redirect URL.
      operationId: createCheckout
      tags:
      - Billing
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
              - tier
              properties:
                tier:
                  type: string
                  enum:
                  - starter
                  - growth
                  - scale
      responses:
        '200':
          description: Checkout URL
          content:
            application/json:
              schema:
                type: object
                properties:
                  url:
                    type: string
                    format: uri
  /v1/billing/portal:
    post:
      summary: Open billing portal
      description: Creates a Stripe billing portal session for managing subscriptions.
      operationId: openPortal
      tags:
      - Billing
      responses:
        '200':
          description: Portal URL
          content:
            application/json:
              schema:
                type: object
                properties:
                  url:
                    type: string
                    format: uri
tags:
- name: Screening
  description: Core data quality screening endpoints
- name: Jobs
  description: Job history and statistics
- name: API Keys
  description: Manage your API keys
- name: Billing
  description: Plan and subscription management
- name: System
  description: Health and status
