OpenAPI Specification

This is the OpenAPI specification file for the Extractable API.

File Details

File: api-swagger.yaml

Type: OpenAPI 3.0 Specification

Format: YAML

Preview

openapi: 3.0.0
info:
  title: RLS Document Extraction API
  description: REST API that accepts ANY document type (PDF, JPEG, PNG) and uses AI to automatically identify and classify documents. The system routes identified documents to appropriate extraction workflows based on a configurable document type registry. Currently supports Statement of Repair (SOR) and Insurance Certificate extraction, with easy extensibility for new document types.
  version: 1.0.0
  contact:
    name: API Support

servers:
  - url: https://api.extractable.xyz
    description: Production server

security:
  - bearerAuth: []

paths:
  /api/documents/extract:
    post:
      summary: Submit any document for identification and extraction
      description: Upload ANY document (PDF, JPEG, or PNG) for automatic AI-powered identification. The system will analyze the document, determine its type using the document type registry, and route it to the appropriate extraction workflow if a match is found. Unidentified documents are marked as 'unknown' type.
      operationId: extractDocument
      tags:
        - Document Extraction
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required:
                - file
              properties:
                file:
                  type: string
                  format: binary
                  description: The document file to extract data from
                webhookUrl:
                  type: string
                  format: uri
                  description: Optional webhook URL to receive extraction results
                metadata:
                  type: object
                  description: Optional metadata to attach to the extraction job
                  additionalProperties: true
      responses:
        '202':
          description: Document accepted for processing
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionJobResponse'
        '400':
          description: Bad request - Invalid file or parameters
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Unauthorized - Invalid or missing API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '413':
          description: File too large
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '429':
          description: Too many requests - Rate limit exceeded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /api/documents/extract/{jobId}:
    get:
      summary: Get extraction job status and results
      description: Get the status of a document extraction job. When the job is completed, this endpoint also returns the full extraction results including identified document type, confidence scores, and extracted data.
      operationId: getExtractionJob
      tags:
        - Document Extraction
      parameters:
        - name: jobId
          in: path
          required: true
          description: The extraction job ID
          schema:
            type: string
            format: uuid
      responses:
        '200':
          description: Extraction job details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractionJobResponse'
        '401':
          description: Unauthorized - Invalid or missing API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '404':
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /api/documents/types:
    get:
      summary: List supported document types
      description: Get a list of all document types currently registered in the system for automatic identification and extraction
      operationId: getDocumentTypes
      tags:
        - Document Extraction
      responses:
        '200':
          description: List of supported document types
          content:
            application/json:
              schema:
                type: object
                properties:
                  types:
                    type: array
                    items:
                      type: object
                      properties:
                        typeCode:
                          type: string
                          example: sor
                        displayName:
                          type: string
                          example: Statement of Repair
                        description:
                          type: string
                          example: HUD 203(k) Statement of Repair documents
                        confidenceThreshold:
                          type: number
                          format: float
                          example: 0.85
                        extractionSupported:
                          type: boolean
                          example: true
                          description: Whether full data extraction is available for this type

  /api/health:
    get:
      summary: Health check
      description: Check if the API is operational
      operationId: healthCheck
      tags:
        - System
      security: []
      responses:
        '200':
          description: API is healthy
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    example: ok
                  timestamp:
                    type: string
                    format: date-time

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      description: Use your API key as the bearer token

  schemas:
    ExtractionJobResponse:
      type: object
      required:
        - jobId
        - status
        - pollingUrl
        - createdAt
      properties:
        jobId:
          type: string
          format: uuid
          example: 123e4567-e89b-12d3-a456-426614174000
        status:
          type: string
          enum: [pending, processing, completed, failed]
          example: pending
        pollingUrl:
          type: string
          format: uri
          example: https://draw.extractable.xyz/api/documents/extract/123e4567-e89b-12d3-a456-426614174000
        createdAt:
          type: string
          format: date-time
          example: 2024-01-01T00:00:00Z

    ExtractionJobResponse:
      type: object
      required:
        - jobId
        - status
        - fileName
        - fileType
        - createdAt
        - updatedAt
      properties:
        jobId:
          type: string
          format: uuid
        status:
          type: string
          enum: [pending, processing, completed, failed]
        fileName:
          type: string
          example: invoice.pdf
        fileType:
          type: string
          enum: [pdf, jpeg, png]
        documentType:
          type: string
          description: Identified document type code from registry (e.g., 'sor', 'insurance_cert') or 'unknown' if not matched
        progress:
          type: integer
          minimum: 0
          maximum: 100
          example: 75
        classificationConfidence:
          type: number
          format: float
          minimum: 0
          maximum: 1
          example: 0.92
          description: Confidence score of the document type identification
        alternativeTypes:
          type: array
          description: Other potential document type matches below the threshold
          items:
            type: object
            properties:
              typeCode:
                type: string
                example: invoice
              displayName:
                type: string
                example: Invoice
              confidence:
                type: number
                format: float
                example: 0.65
        error:
          type: string
          description: Error message if status is failed
        extractedData:
          type: object
          nullable: true
          description: Extracted structured data (only available when status is 'completed', schema varies by document type, null for unknown types)
          oneOf:
            - $ref: '#/components/schemas/SORExtractedData'
            - $ref: '#/components/schemas/InsuranceCertExtractedData'
            - type: 'null'
        metadata:
          type: object
          description: Any metadata provided during submission
          additionalProperties: true
        processingTime:
          type: number
          format: float
          example: 12.5
          description: Processing time in seconds (only available when completed)
        createdAt:
          type: string
          format: date-time
        updatedAt:
          type: string
          format: date-time
        completedAt:
          type: string
          format: date-time
          description: Timestamp when processing completed (if applicable)

    SORExtractedData:
      type: object
      description: Extracted data from Statement of Repair document
      properties:
        documentMetadata:
          type: object
          properties:
            consultant:
              type: string
            propertyAddress:
              type: string
            borrowerName:
              type: string
            lenderName:
              type: string
            inspectionDate:
              type: string
              format: date
            contractorInfo:
              type: object
              properties:
                name:
                  type: string
                license:
                  type: string
                phone:
                  type: string
        constructionSections:
          type: array
          items:
            type: object
            properties:
              sectionName:
                type: string
              lineItems:
                type: array
                items:
                  type: object
                  properties:
                    description:
                      type: string
                    quantity:
                      type: number
                    unit:
                      type: string
                    materialCost:
                      type: number
                    laborCost:
                      type: number
                    totalCost:
                      type: number
        recapSummary:
          type: object
          properties:
            subtotal:
              type: number
            generalConditions:
              type: number
            overheadAndProfit:
              type: number
            grandTotal:
              type: number

    InsuranceCertExtractedData:
      type: object
      description: Extracted data from Insurance Certificate document
      properties:
        certificateNumber:
          type: string
        issueDate:
          type: string
          format: date
        producer:
          type: object
          properties:
            name:
              type: string
            address:
              type: string
            phone:
              type: string
        insured:
          type: object
          properties:
            name:
              type: string
            address:
              type: string
        coverages:
          type: array
          items:
            type: object
            properties:
              type:
                type: string
                enum: [general_liability, auto_liability, umbrella, workers_comp]
              insurer:
                type: string
              policyNumber:
                type: string
              effectiveDate:
                type: string
                format: date
              expirationDate:
                type: string
                format: date
              limits:
                type: object
                additionalProperties:
                  type: number
        certificateHolder:
          type: object
          properties:
            name:
              type: string
            address:
              type: string

    ErrorResponse:
      type: object
      required:
        - error
        - message
      properties:
        error:
          type: string
          example: invalid_request
        message:
          type: string
          example: The provided file format is not supported
        details:
          type: object
          additionalProperties: true

tags:
  - name: Document Extraction
    description: Operations for document extraction and processing
  - name: System
    description: System health and status operations