OpenAPI Specification
This is the OpenAPI specification file for the Extractable API.
File Details
File: api-swagger.yaml
Type: OpenAPI 3.0 Specification
Format: YAML
Preview
openapi: 3.0.0
info:
title: RLS Document Extraction API
description: REST API that accepts ANY document type (PDF, JPEG, PNG) and uses AI to automatically identify and classify documents. The system routes identified documents to appropriate extraction workflows based on a configurable document type registry. Currently supports Statement of Repair (SOR) and Insurance Certificate extraction, with easy extensibility for new document types.
version: 1.0.0
contact:
name: API Support
servers:
- url: https://api.extractable.xyz
description: Production server
security:
- bearerAuth: []
paths:
/api/documents/extract:
post:
summary: Submit any document for identification and extraction
description: Upload ANY document (PDF, JPEG, or PNG) for automatic AI-powered identification. The system will analyze the document, determine its type using the document type registry, and route it to the appropriate extraction workflow if a match is found. Unidentified documents are marked as 'unknown' type.
operationId: extractDocument
tags:
- Document Extraction
requestBody:
required: true
content:
multipart/form-data:
schema:
type: object
required:
- file
properties:
file:
type: string
format: binary
description: The document file to extract data from
webhookUrl:
type: string
format: uri
description: Optional webhook URL to receive extraction results
metadata:
type: object
description: Optional metadata to attach to the extraction job
additionalProperties: true
responses:
'202':
description: Document accepted for processing
content:
application/json:
schema:
$ref: '#/components/schemas/ExtractionJobResponse'
'400':
description: Bad request - Invalid file or parameters
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
'401':
description: Unauthorized - Invalid or missing API key
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
'413':
description: File too large
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
'429':
description: Too many requests - Rate limit exceeded
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
'500':
description: Internal server error
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/api/documents/extract/{jobId}:
get:
summary: Get extraction job status and results
description: Get the status of a document extraction job. When the job is completed, this endpoint also returns the full extraction results including identified document type, confidence scores, and extracted data.
operationId: getExtractionJob
tags:
- Document Extraction
parameters:
- name: jobId
in: path
required: true
description: The extraction job ID
schema:
type: string
format: uuid
responses:
'200':
description: Extraction job details
content:
application/json:
schema:
$ref: '#/components/schemas/ExtractionJobResponse'
'401':
description: Unauthorized - Invalid or missing API key
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
'404':
description: Job not found
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
'500':
description: Internal server error
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorResponse'
/api/documents/types:
get:
summary: List supported document types
description: Get a list of all document types currently registered in the system for automatic identification and extraction
operationId: getDocumentTypes
tags:
- Document Extraction
responses:
'200':
description: List of supported document types
content:
application/json:
schema:
type: object
properties:
types:
type: array
items:
type: object
properties:
typeCode:
type: string
example: sor
displayName:
type: string
example: Statement of Repair
description:
type: string
example: HUD 203(k) Statement of Repair documents
confidenceThreshold:
type: number
format: float
example: 0.85
extractionSupported:
type: boolean
example: true
description: Whether full data extraction is available for this type
/api/health:
get:
summary: Health check
description: Check if the API is operational
operationId: healthCheck
tags:
- System
security: []
responses:
'200':
description: API is healthy
content:
application/json:
schema:
type: object
properties:
status:
type: string
example: ok
timestamp:
type: string
format: date-time
components:
securitySchemes:
bearerAuth:
type: http
scheme: bearer
description: Use your API key as the bearer token
schemas:
ExtractionJobResponse:
type: object
required:
- jobId
- status
- pollingUrl
- createdAt
properties:
jobId:
type: string
format: uuid
example: 123e4567-e89b-12d3-a456-426614174000
status:
type: string
enum: [pending, processing, completed, failed]
example: pending
pollingUrl:
type: string
format: uri
example: https://draw.extractable.xyz/api/documents/extract/123e4567-e89b-12d3-a456-426614174000
createdAt:
type: string
format: date-time
example: 2024-01-01T00:00:00Z
ExtractionJobResponse:
type: object
required:
- jobId
- status
- fileName
- fileType
- createdAt
- updatedAt
properties:
jobId:
type: string
format: uuid
status:
type: string
enum: [pending, processing, completed, failed]
fileName:
type: string
example: invoice.pdf
fileType:
type: string
enum: [pdf, jpeg, png]
documentType:
type: string
description: Identified document type code from registry (e.g., 'sor', 'insurance_cert') or 'unknown' if not matched
progress:
type: integer
minimum: 0
maximum: 100
example: 75
classificationConfidence:
type: number
format: float
minimum: 0
maximum: 1
example: 0.92
description: Confidence score of the document type identification
alternativeTypes:
type: array
description: Other potential document type matches below the threshold
items:
type: object
properties:
typeCode:
type: string
example: invoice
displayName:
type: string
example: Invoice
confidence:
type: number
format: float
example: 0.65
error:
type: string
description: Error message if status is failed
extractedData:
type: object
nullable: true
description: Extracted structured data (only available when status is 'completed', schema varies by document type, null for unknown types)
oneOf:
- $ref: '#/components/schemas/SORExtractedData'
- $ref: '#/components/schemas/InsuranceCertExtractedData'
- type: 'null'
metadata:
type: object
description: Any metadata provided during submission
additionalProperties: true
processingTime:
type: number
format: float
example: 12.5
description: Processing time in seconds (only available when completed)
createdAt:
type: string
format: date-time
updatedAt:
type: string
format: date-time
completedAt:
type: string
format: date-time
description: Timestamp when processing completed (if applicable)
SORExtractedData:
type: object
description: Extracted data from Statement of Repair document
properties:
documentMetadata:
type: object
properties:
consultant:
type: string
propertyAddress:
type: string
borrowerName:
type: string
lenderName:
type: string
inspectionDate:
type: string
format: date
contractorInfo:
type: object
properties:
name:
type: string
license:
type: string
phone:
type: string
constructionSections:
type: array
items:
type: object
properties:
sectionName:
type: string
lineItems:
type: array
items:
type: object
properties:
description:
type: string
quantity:
type: number
unit:
type: string
materialCost:
type: number
laborCost:
type: number
totalCost:
type: number
recapSummary:
type: object
properties:
subtotal:
type: number
generalConditions:
type: number
overheadAndProfit:
type: number
grandTotal:
type: number
InsuranceCertExtractedData:
type: object
description: Extracted data from Insurance Certificate document
properties:
certificateNumber:
type: string
issueDate:
type: string
format: date
producer:
type: object
properties:
name:
type: string
address:
type: string
phone:
type: string
insured:
type: object
properties:
name:
type: string
address:
type: string
coverages:
type: array
items:
type: object
properties:
type:
type: string
enum: [general_liability, auto_liability, umbrella, workers_comp]
insurer:
type: string
policyNumber:
type: string
effectiveDate:
type: string
format: date
expirationDate:
type: string
format: date
limits:
type: object
additionalProperties:
type: number
certificateHolder:
type: object
properties:
name:
type: string
address:
type: string
ErrorResponse:
type: object
required:
- error
- message
properties:
error:
type: string
example: invalid_request
message:
type: string
example: The provided file format is not supported
details:
type: object
additionalProperties: true
tags:
- name: Document Extraction
description: Operations for document extraction and processing
- name: System
description: System health and status operations