openapi: 3.1.0
info:
  title: BlueDXP — Official authority & open-data registry
  version: 1.0.0
  summary: Public registry of authoritative trade, customs, ports, tax and statistics endpoints, plus an authenticated knowledge layer for verification & integration tracking.
  description: |
    Two endpoints belong to this module:

    1. **`/api/official-data-sources`** — public, read-only, ETag-cacheable.
       The static seed `data/seeds/official-data-sources.registry.json` is wrapped
       with query modes (filter, group, stats, by-id) and an optional knowledge
       merge (`?knowledge=1`).
    2. **`/api/official-data-sources-verify`** — authenticated, RBAC-guarded.
       Reads the per-source DB knowledge row + verification log and accepts
       POST writes (mark verified, change integration_status, attach an
       Intel Map node, append a note). Every POST also writes an audit row to
       `official_data_sources_verification_log`.
    3. **`/api/cron-official-data-sources-health`** — cron-only, magic-key auth.
       HEAD-probes registry URLs and writes health_check rows to the same log.

    Policy: this module is for **integration planning + human verification**.
    Do **not** bulk-scrape government or commercial sites. Prefer documented
    APIs, authenticated sandboxes, licensed data products or explicit dataset
    downloads. See `data/seeds/official-data-sources.registry.json` for the
    full policy text.
  contact:
    name: BlueDXP Platform
    url: https://bluedxp.app
  license:
    name: UNLICENSED — internal use
    url: https://bluedxp.app/terms

servers:
  - url: https://wajeeh.app
    description: Production (canonical — used by .github/workflows/deploy-vercel.yml smoke tests)
  - url: https://app.bluedxp.com
    description: Production (alternate brand alias)
  - url: http://127.0.0.1:4173
    description: Local dev / playwright

tags:
  - name: Public registry
    description: Read-only, ETag-cacheable endpoints. No auth required.
  - name: Knowledge & verification
    description: Authenticated read + write of the per-source operational state.
  - name: Cron
    description: System-only HEAD-probe job invoked by Vercel cron.

paths:
  /api/official-data-sources:
    get:
      tags: [Public registry]
      summary: Fetch the registry — full or filtered, with optional grouping / stats / by-id.
      description: |
        Default behaviour returns the entire registry document. Combine with
        `jurisdiction`, `category`, `q` to slice. Use `id`, `stats`, or `group`
        for purpose-built shapes. Add `knowledge=1` to merge platform-default
        DB knowledge fields onto each source (returns 503 if Postgres is not
        configured on the server). Always sets a stable `ETag`.
      parameters:
        - $ref: '#/components/parameters/Jurisdiction'
        - $ref: '#/components/parameters/Category'
        - $ref: '#/components/parameters/Search'
        - $ref: '#/components/parameters/SourceId'
        - $ref: '#/components/parameters/Stats'
        - $ref: '#/components/parameters/Group'
        - $ref: '#/components/parameters/Knowledge'
        - $ref: '#/components/parameters/IfNoneMatch'
      responses:
        '200':
          description: OK — full document, filtered slice, by-id detail, group, or stats.
          headers:
            ETag:
              description: Strong validator over the response body.
              schema: { type: string }
            Cache-Control:
              schema: { type: string, example: 'public, max-age=120, stale-while-revalidate=600' }
          content:
            application/json:
              schema:
                oneOf:
                  - $ref: '#/components/schemas/RegistryDocument'
                  - $ref: '#/components/schemas/SourceDetailEnvelope'
                  - $ref: '#/components/schemas/StatsEnvelope'
                  - $ref: '#/components/schemas/GroupEnvelope'
        '304':
          description: Not modified (ETag matched).
        '404':
          description: '`?id=<unknown>` returned for unknown source id.'
          content:
            application/json:
              schema: { $ref: '#/components/schemas/ErrorEnvelope' }
        '500':
          description: Registry file missing or invalid JSON on the server.
          content:
            application/json:
              schema: { $ref: '#/components/schemas/ErrorEnvelope' }
        '503':
          description: '`?knowledge=1` requested but Postgres is not configured on the server.'
          content:
            application/json:
              schema: { $ref: '#/components/schemas/ErrorEnvelope' }
    head:
      tags: [Public registry]
      summary: Same ETag as GET — for cache validation.
      parameters:
        - $ref: '#/components/parameters/IfNoneMatch'
      responses:
        '200': { description: OK — body intentionally empty. }
        '304': { description: Not modified. }
    options:
      tags: [Public registry]
      summary: CORS pre-flight.
      responses:
        '204': { description: No content. }

  /api/official-data-sources-verify:
    get:
      tags: [Knowledge & verification]
      summary: Read knowledge for one source, or the verification queue.
      description: |
        Two modes:
          - `?id=<source_id>` returns the merged shape: `{ source, knowledge, verification_log }`.
          - `?queue=1` returns rows due for re-check (never verified or older than `stale_after_days`).
      security:
        - BearerAuth: []
        - ApiKeyAuth: []
      parameters:
        - in: query
          name: id
          schema: { type: string }
          description: Source id to inspect.
        - in: query
          name: queue
          schema: { type: string, enum: ['1', 'true'] }
          description: When set, returns the verification queue instead.
        - in: query
          name: stale_after_days
          schema: { type: integer, minimum: 1, maximum: 3650, default: 90 }
        - in: query
          name: limit
          schema: { type: integer, minimum: 1, maximum: 500, default: 100 }
        - in: query
          name: organization_id
          description: PLATFORM_ADMIN only. Other roles are scoped to their own org.
          schema: { type: string }
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                oneOf:
                  - $ref: '#/components/schemas/KnowledgeView'
                  - $ref: '#/components/schemas/VerificationQueue'
        '400': { description: Missing/invalid query params. }
        '401': { description: Auth missing. }
        '403': { description: 'Missing `official_data_sources:read` permission.' }
        '404': { description: Source not in the static registry. }
        '503': { description: Postgres not configured on the server. }
    post:
      tags: [Knowledge & verification]
      summary: Mark verified, change integration_status, record health, attach Intel Map node, or append a note.
      description: |
        Always writes to the verification_log. The `action` controls what
        kind of log entry is recorded; `integration_status` and `health_status`
        update the knowledge row inline. Restricted to PLATFORM_ADMIN,
        OPERATIONS_MANAGER, and COMPLIANCE_OFFICER.
      security:
        - BearerAuth: []
        - ApiKeyAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema: { $ref: '#/components/schemas/VerifyRequest' }
            examples:
              quickVerify:
                summary: Quick mark-as-verified
                value:
                  id: sa-zatca-customs
                  action: verified
                  mark_verified: true
              attachIntelMap:
                summary: Attach an Intel Map node + verify
                value:
                  id: sa-zatca-customs
                  action: verified
                  mark_verified: true
                  integration_status: verified
                  intel_map_node_key: ksa-zatca-bonded-regime
                  intel_map_layer_kind: bonded_zone
                  notes: 'Confirmed homepage + sandbox developer portal.'
              healthCheck:
                summary: Operator-reported health change
                value:
                  id: ae-adcp
                  action: health_check
                  health_status: blocked_by_waf
                  http_status: 403
                  url_checked: https://www.adports.ae/
                  notes: 'Cloudflare WAF response after geo block.'
      responses:
        '200':
          description: OK — knowledge row + log entry id returned.
          content:
            application/json:
              schema: { $ref: '#/components/schemas/VerifyResponse' }
        '400': { description: Bad request body. }
        '401': { description: Auth missing. }
        '403': { description: 'Missing `official_data_sources:verify` permission.' }
        '404': { description: Source not in the static registry. }
        '503': { description: Postgres not configured on the server. }

  /api/cron-official-data-sources-health:
    get:
      tags: [Cron]
      summary: HEAD-probe N due rows; write health_check log entries.
      description: |
        Cron-only. Authenticated by `?key=<BLUEDXP_ADMIN_KEY>`. Wired into
        `vercel.json` `crons` to run daily.
      parameters:
        - in: query
          name: key
          required: true
          schema: { type: string }
          description: BLUEDXP_ADMIN_KEY (private, never user-supplied).
        - in: query
          name: limit
          schema: { type: integer, minimum: 1, maximum: 200, default: 20 }
        - in: query
          name: recheck_after_hours
          schema: { type: integer, minimum: 1, default: 168 }
          description: Skip rows whose health was probed within this window.
        - in: query
          name: dry_run
          schema: { type: string, enum: ['1', 'true'] }
          description: Probe but do not write to the DB.
      responses:
        '200':
          description: Probe summary.
          content:
            application/json:
              schema: { $ref: '#/components/schemas/CronHealthSummary' }
        '401': { description: Missing or wrong admin key. }
        '405': { description: Use GET. }
        '500': { description: Registry file missing on the server. }
        '503': { description: Postgres not configured (and `dry_run` not set). }

components:
  parameters:
    Jurisdiction:
      in: query
      name: jurisdiction
      schema: { type: string, example: SA }
      description: ISO 3166-1 alpha-2 (or `GCC`, `INTL`, `EU`).
    Category:
      in: query
      name: category
      schema:
        type: string
        enum:
          - open_data_portal
          - documented_api
          - clearance_trade_platform
          - ports_maritime
          - tax_compliance
          - statistics_trade
          - standards_identifiers
          - integration_marketplace
    Search:
      in: query
      name: q
      schema: { type: string }
      description: Free-text search across name, notes, URLs, ID and categories.
    SourceId:
      in: query
      name: id
      schema: { type: string, example: sa-zatca-customs }
    Stats:
      in: query
      name: stats
      schema: { type: string, enum: ['1', 'true'] }
    Group:
      in: query
      name: group
      schema:
        type: string
        enum: [jurisdiction, category, integration_type, access_model]
    Knowledge:
      in: query
      name: knowledge
      schema: { type: string, enum: ['1', 'true'] }
      description: Merge platform-default DB knowledge fields onto each source. Requires Postgres.
    IfNoneMatch:
      in: header
      name: If-None-Match
      schema: { type: string }
      description: Strong ETag for cache validation.

  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: BlueDXP-issued JWT (`bdxp_access_token`).
    ApiKeyAuth:
      type: apiKey
      in: header
      name: X-Api-Key

  schemas:
    RegistryDocument:
      type: object
      required: [schema_version, sources, jurisdictions, categories, policy]
      properties:
        schema_version: { type: integer, example: 1 }
        updated_at: { type: string, format: date-time, nullable: true }
        purpose: { type: string }
        policy: { type: string }
        coverage_note: { type: string }
        jurisdictions:
          type: array
          items: { type: string }
        categories:
          type: array
          items: { type: string }
        verification_checklist:
          type: array
          items: { type: string }
        sources:
          type: array
          items: { $ref: '#/components/schemas/Source' }
        filter_applied:
          type: object
          additionalProperties: true
        registry_slice:
          type: object
          properties:
            total_sources: { type: integer }
            returned: { type: integer }

    Source:
      type: object
      required: [id, name, jurisdiction, categories, official_urls]
      properties:
        id: { type: string, example: sa-zatca-customs }
        name: { type: string }
        jurisdiction: { type: string, example: SA }
        iso3166_alpha2: { type: string, nullable: true, example: SA }
        categories:
          type: array
          items: { type: string }
        integration_type: { type: string, nullable: true, example: REST }
        access_model: { type: string, nullable: true, example: 'partner-program' }
        official_urls:
          type: array
          items: { type: string, format: uri }
        documentation_urls:
          type: array
          items: { type: string, format: uri }
        notes: { type: string, nullable: true }

    SourceDetailEnvelope:
      type: object
      required: [source]
      properties:
        schema_version: { type: integer }
        updated_at: { type: string, format: date-time, nullable: true }
        policy: { type: string }
        source: { $ref: '#/components/schemas/Source' }
        filter_applied:
          type: object
          properties: { id: { type: string } }

    StatsEnvelope:
      type: object
      required: [stats]
      properties:
        schema_version: { type: integer }
        updated_at: { type: string, format: date-time, nullable: true }
        stats:
          type: object
          required: [total_sources]
          properties:
            total_sources: { type: integer }
            total_jurisdictions: { type: integer }
            total_categories: { type: integer }
            by_jurisdiction: { type: object, additionalProperties: { type: integer } }
            by_category: { type: object, additionalProperties: { type: integer } }
            by_integration_type: { type: object, additionalProperties: { type: integer } }
            by_access_model: { type: object, additionalProperties: { type: integer } }
            iso_coverage:
              type: object
              properties:
                with_iso: { type: integer }
                without_iso: { type: integer }

    GroupEnvelope:
      type: object
      required: [group_by, groups]
      properties:
        schema_version: { type: integer }
        updated_at: { type: string, format: date-time, nullable: true }
        group_by: { type: string, enum: [jurisdiction, category, integration_type, access_model] }
        groups:
          type: array
          items:
            type: object
            properties:
              key: { type: string }
              count: { type: integer }
              sources:
                type: array
                items: { $ref: '#/components/schemas/Source' }

    KnowledgeRow:
      type: object
      properties:
        id: { type: string }
        organization_id: { type: string, nullable: true }
        integration_status:
          type: string
          enum: [unverified, in_evaluation, contracted, verified, live, deprecated, blocked]
        health_status:
          type: string
          enum: [unknown, reachable, unreachable, redirect, blocked_by_waf]
        intel_map_node_key: { type: string, nullable: true }
        intel_map_layer_kind: { type: string, nullable: true }
        last_verified_at: { type: string, format: date-time, nullable: true }
        last_verified_by: { type: string, nullable: true }
        last_health_checked_at: { type: string, format: date-time, nullable: true }
        extra: { type: object, additionalProperties: true }
        updated_at: { type: string, format: date-time }

    VerificationLogEntry:
      type: object
      properties:
        id: { type: string }
        source_id: { type: string }
        organization_id: { type: string, nullable: true }
        user_id: { type: string }
        user_email: { type: string, nullable: true }
        action:
          type: string
          enum: [verified, unverified, health_check, status_change, note, integration_status_change]
        previous_status: { type: string, nullable: true }
        new_status: { type: string, nullable: true }
        url_checked: { type: string, nullable: true }
        http_status: { type: integer, nullable: true }
        notes: { type: string, nullable: true }
        metadata: { type: object, additionalProperties: true }
        created_at: { type: string, format: date-time }

    KnowledgeView:
      type: object
      properties:
        ok: { type: boolean }
        source: { $ref: '#/components/schemas/Source' }
        knowledge: { $ref: '#/components/schemas/KnowledgeRow' }
        verification_log:
          type: array
          items: { $ref: '#/components/schemas/VerificationLogEntry' }
        organization_id: { type: string, nullable: true }

    VerificationQueue:
      type: object
      properties:
        ok: { type: boolean }
        queue:
          type: array
          items:
            allOf:
              - $ref: '#/components/schemas/Source'
              - $ref: '#/components/schemas/KnowledgeRow'
        count: { type: integer }
        stale_after_days: { type: integer }
        organization_id: { type: string, nullable: true }

    VerifyRequest:
      type: object
      required: [id, action]
      properties:
        id: { type: string }
        action:
          type: string
          enum: [verified, unverified, health_check, status_change, note, integration_status_change]
        mark_verified: { type: boolean, default: false }
        integration_status:
          type: string
          enum: [unverified, in_evaluation, contracted, verified, live, deprecated, blocked]
        health_status:
          type: string
          enum: [unknown, reachable, unreachable, redirect, blocked_by_waf]
        intel_map_node_key: { type: string, nullable: true }
        intel_map_layer_kind: { type: string, nullable: true }
        url_checked: { type: string, nullable: true, format: uri }
        http_status: { type: integer, nullable: true }
        notes: { type: string, nullable: true }
        metadata: { type: object, additionalProperties: true }
        organization_id:
          type: string
          nullable: true
          description: PLATFORM_ADMIN only.

    VerifyResponse:
      type: object
      properties:
        ok: { type: boolean }
        knowledge: { $ref: '#/components/schemas/KnowledgeRow' }
        log_entry_id: { type: string }
        previous:
          type: object
          properties:
            integration_status: { type: string, nullable: true }
            health_status: { type: string, nullable: true }

    CronHealthSummary:
      type: object
      properties:
        ok: { type: boolean }
        started_at: { type: string, format: date-time }
        finished_at: { type: string, format: date-time }
        dry_run: { type: boolean }
        recheck_after_hours: { type: integer }
        requested_limit: { type: integer }
        available_rows: { type: integer }
        processed: { type: integer }
        succeeded: { type: integer }
        failed: { type: integer }
        results:
          type: array
          items:
            type: object
            properties:
              id: { type: string }
              name: { type: string }
              url: { type: string, nullable: true }
              http_status: { type: integer }
              classification:
                type: string
                enum: [reachable, unreachable, redirect, blocked_by_waf, skipped, unknown]
              health: { type: string }
              server: { type: string, nullable: true }
              location: { type: string, nullable: true }
              error: { type: string, nullable: true }
              db_error: { type: string, nullable: true }
              log_error: { type: string, nullable: true }

    ErrorEnvelope:
      type: object
      required: [ok, error]
      properties:
        ok: { type: boolean, default: false }
        error: { type: string }
        detail: { type: string }
        id: { type: string }
        path: { type: string }
