Skip to content

Document

RESTful document endpoints for collections.

delete_document(collection_id=Path(..., description='Collection ID'), document_id=Path(..., description='Document ID'), requesting_user=Depends(get_current_user)) async

Delete a document from a collection.

Removes the document record and attempts to delete associated vectors.

Parameters:

Name Type Description Default
collection_id str

Collection identifier.

Path(..., description='Collection ID')
document_id str

Document identifier.

Path(..., description='Document ID')
requesting_user User

Authenticated user injected by dependency.

Depends(get_current_user)

Returns:

Type Description
dict

Confirmation message.

Raises:

Type Description
HTTPException

404 if not found; 400 if document not in collection; 403 if deletion is forbidden.

Source code in routers/document.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
@router.delete("/collections/{collection_id}/documents/{document_id}")
async def delete_document(
    collection_id: str = Path(..., description="Collection ID"),
    document_id: str = Path(..., description="Document ID"),
    requesting_user: User = Depends(get_current_user),
) -> dict:
    """
    Delete a document from a collection.

    Removes the document record and attempts to delete associated vectors.

    Args:
        collection_id (str): Collection identifier.
        document_id (str): Document identifier.
        requesting_user (User): Authenticated user injected by dependency.

    Returns:
        Confirmation message.

    Raises:
        HTTPException: 404 if not found; 400 if document not in collection; 403 if deletion is forbidden.
    """
    await get_collection_and_validate_ownership(collection_id, requesting_user)

    document = await DocumentModel.find_by_id(document_id)
    if not document:
        raise HTTPException(status_code=404, detail="Document not found")

    if document.collection_id != collection_id:
        raise HTTPException(
            status_code=400, detail="Document does not belong to this collection"
        )

    if document.user_id != requesting_user.id:
        raise HTTPException(
            status_code=403, detail="You are not allowed to delete this document"
        )

    vector_store = VectorStoreManager()
    try:
        vector_store.delete_docs_by_metadata_filter(
            collection_name=collection_id,
            metadata={"document_id": document_id},
        )
    except HTTPException as e:
        raise e
    except Exception as e:
        logger.error(f"Failed to delete vectors for document {document_id}: {e}")

    await document.delete()
    return {"message": "Document and embeddings deleted successfully"}

get_collection_and_validate_ownership(collection_id, requesting_user) async

Get collection and validate user ownership.

Source code in routers/document.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
async def get_collection_and_validate_ownership(
    collection_id: str, requesting_user: User
) -> Collection:
    """Get collection and validate user ownership."""
    collection = await Collection.find_by_id(collection_id)
    if not collection:
        raise HTTPException(status_code=404, detail="Collection not found")

    if collection.user_id != requesting_user.id:
        raise HTTPException(
            status_code=403, detail="You are not allowed to access this collection"
        )

    return collection

get_document(collection_id=Path(..., description='Collection ID'), document_id=Path(..., description='Document ID'), requesting_user=Depends(get_current_user)) async

Get a specific document from a collection.

Parameters:

Name Type Description Default
collection_id str

Collection identifier.

Path(..., description='Collection ID')
document_id str

Document identifier.

Path(..., description='Document ID')
requesting_user User

Authenticated user injected by dependency.

Depends(get_current_user)

Returns:

Type Description
Document

Document details.

Raises:

Type Description
HTTPException

404 if not found; 400 if document not in collection; 403 if access is forbidden.

Source code in routers/document.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
@router.get(
    "/collections/{collection_id}/documents/{document_id}", response_model=DocumentModel
)
async def get_document(
    collection_id: str = Path(..., description="Collection ID"),
    document_id: str = Path(..., description="Document ID"),
    requesting_user: User = Depends(get_current_user),
) -> DocumentModel:
    """
    Get a specific document from a collection.

    Args:
        collection_id (str): Collection identifier.
        document_id (str): Document identifier.
        requesting_user (User): Authenticated user injected by dependency.

    Returns:
        Document details.

    Raises:
        HTTPException: 404 if not found; 400 if document not in collection; 403 if access is forbidden.
    """
    await get_collection_and_validate_ownership(collection_id, requesting_user)

    document = await DocumentModel.find_by_id(document_id)
    if not document:
        raise HTTPException(status_code=404, detail="Document not found")

    if document.collection_id != collection_id:
        raise HTTPException(
            status_code=400, detail="Document does not belong to this collection"
        )

    if document.user_id != requesting_user.id:
        raise HTTPException(
            status_code=403, detail="You are not allowed to access this document"
        )

    return document

list_documents(collection_id=Path(..., description='Collection ID'), pagination=Depends(), requesting_user=Depends(get_current_user)) async

List documents in a collection.

Parameters:

Name Type Description Default
collection_id str

Collection identifier.

Path(..., description='Collection ID')
pagination Pagination

Pagination parameters.

Depends()
requesting_user User

Authenticated user injected by dependency.

Depends(get_current_user)

Returns:

Type Description
PaginatedResponse[Document]

Paginated documents for the collection.

Raises:

Type Description
HTTPException

404 if collection is not found; 403 if access is forbidden.

Source code in routers/document.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
@router.get(
    "/collections/{collection_id}/documents",
    response_model=PaginatedResponse[DocumentModel],
)
async def list_documents(
    collection_id: str = Path(..., description="Collection ID"),
    pagination: Pagination = Depends(),
    requesting_user: User = Depends(get_current_user),
) -> PaginatedResponse[DocumentModel]:
    """
    List documents in a collection.

    Args:
        collection_id (str): Collection identifier.
        pagination (Pagination): Pagination parameters.
        requesting_user (User): Authenticated user injected by dependency.

    Returns:
        Paginated documents for the collection.

    Raises:
        HTTPException: 404 if collection is not found; 403 if access is forbidden.
    """
    await get_collection_and_validate_ownership(collection_id, requesting_user)

    return await DocumentModel.find_all_with_pagination(
        filter_dict={"collection_id": collection_id},
        limit=pagination.limit,
        page=pagination.page,
        sort=[("timestamp", -1)],
    )

upload_documents(collection_id=Path(..., description='Collection ID'), files=File(...), metadata_urls=Form(default=None), metadata_names=Form(default=None), embeddings_model=Form(default=DEFAULT_EMBEDDING_MODEL), chunk_size=Form(default=DEFAULT_CHUNK_SIZE), chunk_overlap=Form(default=DEFAULT_CHUNK_OVERLAP), requesting_user=Depends(get_current_user)) async

Upload documents to a collection.

Stores document records and triggers asynchronous parsing, chunking, and vectorization for retrieval.

Parameters:

Name Type Description Default
collection_id str

Collection identifier.

Path(..., description='Collection ID')
files list[UploadFile]

One or more files to ingest.

File(...)
metadata_urls list[str] | str | None

Optional list or single URL per file.

Form(default=None)
metadata_names list[str] | str | None

Optional list or single display name per file.

Form(default=None)
embeddings_model str

Embeddings model to use for vectorization.

Form(default=DEFAULT_EMBEDDING_MODEL)
chunk_size int

Chunk size for splitting documents.

Form(default=DEFAULT_CHUNK_SIZE)
chunk_overlap int

Overlap between chunks.

Form(default=DEFAULT_CHUNK_OVERLAP)
requesting_user User

Authenticated user injected by dependency.

Depends(get_current_user)

Returns:

Type Description
dict

Service response with ingestion details.

Raises:

Type Description
HTTPException

404 if collection is not found; 403 if access is forbidden; 500 for processing errors.

Source code in routers/document.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
@router.post("/collections/{collection_id}/documents")
async def upload_documents(
    collection_id: str = Path(..., description="Collection ID"),
    files: List[UploadFile] = File(...),
    metadata_urls: Optional[List[str] | str] = Form(default=None),
    metadata_names: Optional[List[str] | str] = Form(default=None),
    embeddings_model: str = Form(default=DEFAULT_EMBEDDING_MODEL),
    chunk_size: int = Form(default=DEFAULT_CHUNK_SIZE),
    chunk_overlap: int = Form(default=DEFAULT_CHUNK_OVERLAP),
    requesting_user: User = Depends(get_current_user),
) -> dict:
    """
    Upload documents to a collection.

    Stores document records and triggers asynchronous parsing, chunking, and vectorization for retrieval.

    Args:
        collection_id (str): Collection identifier.
        files (list[UploadFile]): One or more files to ingest.
        metadata_urls (list[str] | str | None): Optional list or single URL per file.
        metadata_names (list[str] | str | None): Optional list or single display name per file.
        embeddings_model (str): Embeddings model to use for vectorization.
        chunk_size (int): Chunk size for splitting documents.
        chunk_overlap (int): Overlap between chunks.
        requesting_user (User): Authenticated user injected by dependency.

    Returns:
        Service response with ingestion details.

    Raises:
        HTTPException: 404 if collection is not found; 403 if access is forbidden; 500 for processing errors.
    """
    collection = await get_collection_and_validate_ownership(
        collection_id, requesting_user
    )

    logger.info(
        f"Received {len(files)} files for processing in collection {collection_id}"
    )

    docs_data = [
        DocumentModel(
            user_id=requesting_user.id,
            collection_id=collection_id,
            name=file.filename,
            filename=file.filename,
            file_type=os.path.splitext(file.filename)[1].lstrip("."),
            source_url=metadata_urls[i] if metadata_urls else None,
        )
        for i, file in enumerate(files)
    ]

    try:
        effective_model = collection.embeddings_model or embeddings_model
        result = await document_service.add_documents(
            collection_name=collection_id,
            files=files,
            request=AddDocumentRequest(
                embeddings_model=effective_model,
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                metadata_urls=metadata_urls,
                metadata_names=metadata_names,
            ),
            metadata_urls=metadata_urls,
            metadata_names=metadata_names,
        )

        if not result.success:
            raise HTTPException(status_code=500, detail=result.error)

        await DocumentModel.bulk_create(docs_data)
        return result.data
    except HTTPException as e:
        raise e
    except Exception as e:
        logger.error(f"Error processing documents: {str(e)}", exc_info=True)
        raise HTTPException(
            status_code=500, detail=f"Error processing documents: {str(e)}"
        )