Document

RESTful document endpoints for collections.

`delete_document(collection_id=Path(..., description='Collection ID'), document_id=Path(..., description='Document ID'), requesting_user=Depends(get_current_user))` `async`

Delete a document from a collection.

Removes the document record and attempts to delete associated vectors.

Parameters:

Name	Type	Description	Default
`collection_id`	`str`	Collection identifier.	`Path(..., description='Collection ID')`
`document_id`	`str`	Document identifier.	`Path(..., description='Document ID')`
`requesting_user`	`User`	Authenticated user injected by dependency.	`Depends(get_current_user)`

Returns:

Type	Description
`dict`	Confirmation message.

Raises:

Type	Description
`HTTPException`	404 if not found; 400 if document not in collection; 403 if deletion is forbidden.

Source code in routers/document.py

@router.delete("/collections/{collection_id}/documents/{document_id}")
async def delete_document(
    collection_id: str = Path(..., description="Collection ID"),
    document_id: str = Path(..., description="Document ID"),
    requesting_user: User = Depends(get_current_user),
) -> dict:
    """
    Delete a document from a collection.

    Removes the document record and attempts to delete associated vectors.

    Args:
        collection_id (str): Collection identifier.
        document_id (str): Document identifier.
        requesting_user (User): Authenticated user injected by dependency.

    Returns:
        Confirmation message.

    Raises:
        HTTPException: 404 if not found; 400 if document not in collection; 403 if deletion is forbidden.
    """
    await get_collection_and_validate_ownership(collection_id, requesting_user)

    document = await DocumentModel.find_by_id(document_id)
    if not document:
        raise HTTPException(status_code=404, detail="Document not found")

    if document.collection_id != collection_id:
        raise HTTPException(
            status_code=400, detail="Document does not belong to this collection"
        )

    if document.user_id != requesting_user.id:
        raise HTTPException(
            status_code=403, detail="You are not allowed to delete this document"
        )

    vector_store = VectorStoreManager()
    try:
        vector_store.delete_docs_by_metadata_filter(
            collection_name=collection_id,
            metadata={"document_id": document_id},
        )
    except HTTPException as e:
        raise e
    except Exception as e:
        logger.error(f"Failed to delete vectors for document {document_id}: {e}")

    await document.delete()
    return {"message": "Document and embeddings deleted successfully"}

`get_collection_and_validate_ownership(collection_id, requesting_user)` `async`

Get collection and validate user ownership.

Source code in routers/document.py

async def get_collection_and_validate_ownership(
    collection_id: str, requesting_user: User
) -> Collection:
    """Get collection and validate user ownership."""
    collection = await Collection.find_by_id(collection_id)
    if not collection:
        raise HTTPException(status_code=404, detail="Collection not found")

    if collection.user_id != requesting_user.id:
        raise HTTPException(
            status_code=403, detail="You are not allowed to access this collection"
        )

    return collection

`get_document(collection_id=Path(..., description='Collection ID'), document_id=Path(..., description='Document ID'), requesting_user=Depends(get_current_user))` `async`

Get a specific document from a collection.

Parameters:

Name	Type	Description	Default
`collection_id`	`str`	Collection identifier.	`Path(..., description='Collection ID')`
`document_id`	`str`	Document identifier.	`Path(..., description='Document ID')`
`requesting_user`	`User`	Authenticated user injected by dependency.	`Depends(get_current_user)`

Returns:

Type	Description
`Document`	Document details.

Raises:

Type	Description
`HTTPException`	404 if not found; 400 if document not in collection; 403 if access is forbidden.

Source code in routers/document.py

@router.get(
    "/collections/{collection_id}/documents/{document_id}", response_model=DocumentModel
)
async def get_document(
    collection_id: str = Path(..., description="Collection ID"),
    document_id: str = Path(..., description="Document ID"),
    requesting_user: User = Depends(get_current_user),
) -> DocumentModel:
    """
    Get a specific document from a collection.

    Args:
        collection_id (str): Collection identifier.
        document_id (str): Document identifier.
        requesting_user (User): Authenticated user injected by dependency.

    Returns:
        Document details.

    Raises:
        HTTPException: 404 if not found; 400 if document not in collection; 403 if access is forbidden.
    """
    await get_collection_and_validate_ownership(collection_id, requesting_user)

    document = await DocumentModel.find_by_id(document_id)
    if not document:
        raise HTTPException(status_code=404, detail="Document not found")

    if document.collection_id != collection_id:
        raise HTTPException(
            status_code=400, detail="Document does not belong to this collection"
        )

    if document.user_id != requesting_user.id:
        raise HTTPException(
            status_code=403, detail="You are not allowed to access this document"
        )

    return document

`list_documents(collection_id=Path(..., description='Collection ID'), pagination=Depends(), requesting_user=Depends(get_current_user))` `async`

List documents in a collection.

Parameters:

Name	Type	Description	Default
`collection_id`	`str`	Collection identifier.	`Path(..., description='Collection ID')`
`pagination`	`Pagination`	Pagination parameters.	`Depends()`
`requesting_user`	`User`	Authenticated user injected by dependency.	`Depends(get_current_user)`

Returns:

Type	Description
`PaginatedResponse[Document]`	Paginated documents for the collection.

Raises:

Type	Description
`HTTPException`	404 if collection is not found; 403 if access is forbidden.

Source code in routers/document.py

@router.get(
    "/collections/{collection_id}/documents",
    response_model=PaginatedResponse[DocumentModel],
)
async def list_documents(
    collection_id: str = Path(..., description="Collection ID"),
    pagination: Pagination = Depends(),
    requesting_user: User = Depends(get_current_user),
) -> PaginatedResponse[DocumentModel]:
    """
    List documents in a collection.

    Args:
        collection_id (str): Collection identifier.
        pagination (Pagination): Pagination parameters.
        requesting_user (User): Authenticated user injected by dependency.

    Returns:
        Paginated documents for the collection.

    Raises:
        HTTPException: 404 if collection is not found; 403 if access is forbidden.
    """
    await get_collection_and_validate_ownership(collection_id, requesting_user)

    return await DocumentModel.find_all_with_pagination(
        filter_dict={"collection_id": collection_id},
        limit=pagination.limit,
        page=pagination.page,
        sort=[("timestamp", -1)],
    )

`upload_documents(collection_id=Path(..., description='Collection ID'), files=File(...), metadata_urls=Form(default=None), metadata_names=Form(default=None), embeddings_model=Form(default=DEFAULT_EMBEDDING_MODEL), chunk_size=Form(default=DEFAULT_CHUNK_SIZE), chunk_overlap=Form(default=DEFAULT_CHUNK_OVERLAP), requesting_user=Depends(get_current_user))` `async`

Upload documents to a collection.

Stores document records and triggers asynchronous parsing, chunking, and vectorization for retrieval.

Parameters:

Name	Type	Description	Default
`collection_id`	`str`	Collection identifier.	`Path(..., description='Collection ID')`
`files`	`list[UploadFile]`	One or more files to ingest.	`File(...)`
`metadata_urls`	`list[str] \| str \| None`	Optional list or single URL per file.	`Form(default=None)`
`metadata_names`	`list[str] \| str \| None`	Optional list or single display name per file.	`Form(default=None)`
`embeddings_model`	`str`	Embeddings model to use for vectorization.	`Form(default=DEFAULT_EMBEDDING_MODEL)`
`chunk_size`	`int`	Chunk size for splitting documents.	`Form(default=DEFAULT_CHUNK_SIZE)`
`chunk_overlap`	`int`	Overlap between chunks.	`Form(default=DEFAULT_CHUNK_OVERLAP)`
`requesting_user`	`User`	Authenticated user injected by dependency.	`Depends(get_current_user)`

Returns:

Type	Description
`dict`	Service response with ingestion details.

Raises:

Type	Description
`HTTPException`	404 if collection is not found; 403 if access is forbidden; 500 for processing errors.

Source code in routers/document.py

@router.post("/collections/{collection_id}/documents")
async def upload_documents(
    collection_id: str = Path(..., description="Collection ID"),
    files: List[UploadFile] = File(...),
    metadata_urls: Optional[List[str] | str] = Form(default=None),
    metadata_names: Optional[List[str] | str] = Form(default=None),
    embeddings_model: str = Form(default=DEFAULT_EMBEDDING_MODEL),
    chunk_size: int = Form(default=DEFAULT_CHUNK_SIZE),
    chunk_overlap: int = Form(default=DEFAULT_CHUNK_OVERLAP),
    requesting_user: User = Depends(get_current_user),
) -> dict:
    """
    Upload documents to a collection.

    Stores document records and triggers asynchronous parsing, chunking, and vectorization for retrieval.

    Args:
        collection_id (str): Collection identifier.
        files (list[UploadFile]): One or more files to ingest.
        metadata_urls (list[str] | str | None): Optional list or single URL per file.
        metadata_names (list[str] | str | None): Optional list or single display name per file.
        embeddings_model (str): Embeddings model to use for vectorization.
        chunk_size (int): Chunk size for splitting documents.
        chunk_overlap (int): Overlap between chunks.
        requesting_user (User): Authenticated user injected by dependency.

    Returns:
        Service response with ingestion details.

    Raises:
        HTTPException: 404 if collection is not found; 403 if access is forbidden; 500 for processing errors.
    """
    collection = await get_collection_and_validate_ownership(
        collection_id, requesting_user
    )

    logger.info(
        f"Received {len(files)} files for processing in collection {collection_id}"
    )

    docs_data = [
        DocumentModel(
            user_id=requesting_user.id,
            collection_id=collection_id,
            name=file.filename,
            filename=file.filename,
            file_type=os.path.splitext(file.filename)[1].lstrip("."),
            source_url=metadata_urls[i] if metadata_urls else None,
        )
        for i, file in enumerate(files)
    ]

    try:
        effective_model = collection.embeddings_model or embeddings_model
        result = await document_service.add_documents(
            collection_name=collection_id,
            files=files,
            request=AddDocumentRequest(
                embeddings_model=effective_model,
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                metadata_urls=metadata_urls,
                metadata_names=metadata_names,
            ),
            metadata_urls=metadata_urls,
            metadata_names=metadata_names,
        )

        if not result.success:
            raise HTTPException(status_code=500, detail=result.error)

        await DocumentModel.bulk_create(docs_data)
        return result.data
    except HTTPException as e:
        raise e
    except Exception as e:
        logger.error(f"Error processing documents: {str(e)}", exc_info=True)
        raise HTTPException(
            status_code=500, detail=f"Error processing documents: {str(e)}"
        )

Document

delete_document(collection_id=Path(..., description='Collection ID'), document_id=Path(..., description='Document ID'), requesting_user=Depends(get_current_user)) async

get_collection_and_validate_ownership(collection_id, requesting_user) async

get_document(collection_id=Path(..., description='Collection ID'), document_id=Path(..., description='Document ID'), requesting_user=Depends(get_current_user)) async

list_documents(collection_id=Path(..., description='Collection ID'), pagination=Depends(), requesting_user=Depends(get_current_user)) async

`delete_document(collection_id=Path(..., description='Collection ID'), document_id=Path(..., description='Document ID'), requesting_user=Depends(get_current_user))` `async`

`get_collection_and_validate_ownership(collection_id, requesting_user)` `async`

`get_document(collection_id=Path(..., description='Collection ID'), document_id=Path(..., description='Document ID'), requesting_user=Depends(get_current_user))` `async`

`list_documents(collection_id=Path(..., description='Collection ID'), pagination=Depends(), requesting_user=Depends(get_current_user))` `async`