Qubrid AI · Schema
Qubrid AI RAG Entities
Schema definitions for Qubrid AI RAG API entities including knowledge bases, documents, RAG queries, and source references.
Artificial IntelligenceCloud ComputingGPUInferenceLarge Language ModelsMachine LearningNVIDIAServerless
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://platform.qubrid.com/schemas/qubrid-ai/rag.json",
"title": "Qubrid AI RAG Entities",
"description": "Schema definitions for Qubrid AI RAG API entities including knowledge bases, documents, RAG queries, and source references.",
"type": "object",
"$defs": {
"KnowledgeBase": {
"type": "object",
"title": "Knowledge Base",
"description": "A knowledge base containing enterprise documents that are chunked, embedded, and indexed for retrieval-augmented generation.",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the knowledge base."
},
"name": {
"type": "string",
"description": "The display name of the knowledge base.",
"maxLength": 256
},
"description": {
"type": "string",
"description": "A description of the knowledge base and the type of data it contains.",
"maxLength": 1024
},
"embedding_model": {
"type": "string",
"description": "The embedding model used to generate vector representations of document chunks."
},
"chunk_size": {
"type": "integer",
"description": "The target size of each document chunk in tokens.",
"minimum": 64,
"maximum": 4096
},
"chunk_overlap": {
"type": "integer",
"description": "The number of tokens of overlap between consecutive document chunks.",
"minimum": 0,
"maximum": 1024
},
"document_count": {
"type": "integer",
"description": "The total number of documents in the knowledge base.",
"minimum": 0
},
"chunk_count": {
"type": "integer",
"description": "The total number of chunks across all documents.",
"minimum": 0
},
"status": {
"type": "string",
"enum": ["ready", "processing", "error"],
"description": "The current status of the knowledge base."
},
"created_at": {
"type": "string",
"format": "date-time",
"description": "The timestamp when the knowledge base was created."
},
"updated_at": {
"type": "string",
"format": "date-time",
"description": "The timestamp when the knowledge base was last updated."
}
}
},
"Document": {
"type": "object",
"title": "Document",
"description": "A document uploaded to a knowledge base that is chunked and embedded for retrieval.",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the document."
},
"name": {
"type": "string",
"description": "The original filename of the uploaded document."
},
"size_bytes": {
"type": "integer",
"description": "The size of the document file in bytes.",
"minimum": 0
},
"format": {
"type": "string",
"description": "The file format of the document, such as pdf, txt, csv, or docx.",
"enum": ["pdf", "txt", "csv", "docx", "md", "html"]
},
"chunk_count": {
"type": "integer",
"description": "The number of chunks the document was split into.",
"minimum": 0
},
"status": {
"type": "string",
"enum": ["processing", "ready", "error"],
"description": "The processing status of the document."
},
"created_at": {
"type": "string",
"format": "date-time",
"description": "The timestamp when the document was uploaded."
}
}
},
"RagQueryRequest": {
"type": "object",
"title": "RAG Query Request",
"description": "A request to perform retrieval-augmented generation against a knowledge base.",
"required": ["knowledge_base_id", "query", "model"],
"properties": {
"knowledge_base_id": {
"type": "string",
"description": "The identifier of the knowledge base to query against."
},
"query": {
"type": "string",
"description": "The natural language question or query to answer.",
"minLength": 1
},
"model": {
"type": "string",
"description": "The identifier of the LLM to use for generating the response."
},
"top_k": {
"type": "integer",
"description": "The number of most relevant document chunks to retrieve.",
"minimum": 1,
"maximum": 20,
"default": 5
},
"temperature": {
"type": "number",
"description": "Sampling temperature for response generation.",
"minimum": 0,
"maximum": 2,
"default": 0.7
},
"max_tokens": {
"type": "integer",
"description": "Maximum number of tokens to generate in the response.",
"minimum": 1
},
"include_sources": {
"type": "boolean",
"description": "Whether to include source document references in the response.",
"default": true
}
}
},
"RagQueryResponse": {
"type": "object",
"title": "RAG Query Response",
"description": "The response from a RAG query containing the generated answer and source references.",
"properties": {
"answer": {
"type": "string",
"description": "The generated response grounded in retrieved document context."
},
"model": {
"type": "string",
"description": "The model used to generate the response."
},
"sources": {
"type": "array",
"description": "Source document references used as context for the response.",
"items": {
"$ref": "#/$defs/SourceReference"
}
},
"usage": {
"type": "object",
"description": "Token usage statistics for the RAG query.",
"properties": {
"prompt_tokens": {
"type": "integer",
"description": "Number of tokens in the prompt including retrieved context."
},
"completion_tokens": {
"type": "integer",
"description": "Number of tokens in the generated response."
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used."
}
}
}
}
},
"SourceReference": {
"type": "object",
"title": "Source Reference",
"description": "A reference to a source document chunk used as context in a RAG response.",
"properties": {
"document_id": {
"type": "string",
"description": "The identifier of the source document."
},
"document_name": {
"type": "string",
"description": "The name of the source document."
},
"chunk_text": {
"type": "string",
"description": "The text content of the retrieved chunk."
},
"relevance_score": {
"type": "number",
"description": "The semantic similarity score between the query and this chunk.",
"minimum": 0,
"maximum": 1
}
}
}
}
}