Ingestion Jobs

Monitor the progress of document ingestion into a knowledge base. Ingestion jobs are created automatically when a data source is added or synced. Use these endpoints to track processing status and troubleshoot failures.

Endpoints

GET https://api.tensoras.ai/v1/knowledge-bases/{kb_id}/ingestion-jobs
GET https://api.tensoras.ai/v1/knowledge-bases/{kb_id}/ingestion-jobs/{job_id}

Authentication

Authorization: Bearer tns_your_key_here

List Ingestion Jobs

Retrieve all ingestion jobs for a knowledge base.

Request

GET /v1/knowledge-bases/{kb_id}/ingestion-jobs

Parameter	Type	Required	Description
`kb_id`	string	Yes	The knowledge base ID (path parameter).
`limit`	integer	No	Maximum number of results. Default: 20, max: 100.
`after`	string	No	Cursor for pagination. Returns jobs created after this job ID.
`status`	string	No	Filter by job status. One of `"queued"`, `"processing"`, `"completed"`, `"failed"`.

Response Body

{
  "object": "list",
  "data": [
    {
      "id": "job_abc123",
      "object": "ingestion_job",
      "knowledge_base_id": "kb_abc123",
      "data_source_id": "ds_xyz789",
      "status": "completed",
      "documents_processed": 15,
      "documents_failed": 0,
      "chunks_created": 482,
      "started_at": 1709123456,
      "completed_at": 1709124056,
      "created_at": 1709123450,
      "error": null
    },
    {
      "id": "job_def456",
      "object": "ingestion_job",
      "knowledge_base_id": "kb_abc123",
      "data_source_id": "ds_uvw321",
      "status": "processing",
      "documents_processed": 8,
      "documents_failed": 1,
      "chunks_created": 245,
      "started_at": 1709125000,
      "completed_at": null,
      "created_at": 1709124990,
      "error": null
    }
  ],
  "has_more": false
}

Get Ingestion Job

Retrieve details about a specific ingestion job.

Request

GET /v1/knowledge-bases/{kb_id}/ingestion-jobs/{job_id}

Parameter	Type	Required	Description
`kb_id`	string	Yes	The knowledge base ID (path parameter).
`job_id`	string	Yes	The ingestion job ID (path parameter).

Response Body

{
  "id": "job_abc123",
  "object": "ingestion_job",
  "knowledge_base_id": "kb_abc123",
  "data_source_id": "ds_xyz789",
  "status": "completed",
  "documents_processed": 15,
  "documents_failed": 0,
  "chunks_created": 482,
  "started_at": 1709123456,
  "completed_at": 1709124056,
  "created_at": 1709123450,
  "error": null,
  "document_details": [
    {
      "document_id": "doc_001",
      "document_name": "product-guide.pdf",
      "status": "completed",
      "chunks_created": 32,
      "error": null
    },
    {
      "document_id": "doc_002",
      "document_name": "api-reference.md",
      "status": "completed",
      "chunks_created": 18,
      "error": null
    }
  ]
}

Field	Type	Description
`id`	string	The unique job identifier.
`object`	string	Always `"ingestion_job"`.
`knowledge_base_id`	string	The ID of the knowledge base being ingested into.
`data_source_id`	string	The ID of the data source that triggered this job.
`status`	string	The current status of the job. See statuses below.
`documents_processed`	integer	The number of documents successfully processed so far.
`documents_failed`	integer	The number of documents that failed to process.
`chunks_created`	integer	The total number of chunks created.
`started_at`	integer or null	Unix timestamp of when processing started.
`completed_at`	integer or null	Unix timestamp of when the job finished.
`created_at`	integer	Unix timestamp of when the job was created.
`error`	object or null	Error details if the job failed.
`document_details`	array	Per-document processing details (only included in the single job endpoint).

Job Statuses

Status	Description
`queued`	The job has been created and is waiting to be picked up for processing.
`processing`	Documents are actively being chunked, embedded, and indexed.
`completed`	All documents have been processed successfully (some individual documents may have failed).
`failed`	The job failed entirely. Check the `error` field for details.

Examples

List Ingestion Jobs

curl

curl https://api.tensoras.ai/v1/knowledge-bases/kb_abc123/ingestion-jobs \
  -H "Authorization: Bearer tns_your_key_here"

Python

import requests
 
response = requests.get(
    "https://api.tensoras.ai/v1/knowledge-bases/kb_abc123/ingestion-jobs",
    headers={"Authorization": "Bearer tns_your_key_here"},
)
 
jobs = response.json()["data"]
for job in jobs:
    print(
        f"{job['id']}: {job['status']} - "
        f"{job['documents_processed']} docs, {job['chunks_created']} chunks"
    )

Node.js

const response = await fetch(
  "https://api.tensoras.ai/v1/knowledge-bases/kb_abc123/ingestion-jobs",
  {
    headers: { Authorization: "Bearer tns_your_key_here" },
  }
);
 
const { data: jobs } = await response.json();
for (const job of jobs) {
  console.log(
    `${job.id}: ${job.status} - ${job.documents_processed} docs, ${job.chunks_created} chunks`
  );
}

Get Ingestion Job Details

curl

curl https://api.tensoras.ai/v1/knowledge-bases/kb_abc123/ingestion-jobs/job_abc123 \
  -H "Authorization: Bearer tns_your_key_here"

Python

import requests
 
response = requests.get(
    "https://api.tensoras.ai/v1/knowledge-bases/kb_abc123/ingestion-jobs/job_abc123",
    headers={"Authorization": "Bearer tns_your_key_here"},
)
 
job = response.json()
print(f"Job: {job['id']}")
print(f"Status: {job['status']}")
print(f"Documents processed: {job['documents_processed']}")
print(f"Documents failed: {job['documents_failed']}")
print(f"Chunks created: {job['chunks_created']}")
 
if job.get("document_details"):
    print("\nDocument details:")
    for doc in job["document_details"]:
        status_icon = "OK" if doc["status"] == "completed" else "FAIL"
        print(f"  [{status_icon}] {doc['document_name']}: {doc['chunks_created']} chunks")

Node.js

const response = await fetch(
  "https://api.tensoras.ai/v1/knowledge-bases/kb_abc123/ingestion-jobs/job_abc123",
  {
    headers: { Authorization: "Bearer tns_your_key_here" },
  }
);
 
const job = await response.json();
console.log(`Job: ${job.id}`);
console.log(`Status: ${job.status}`);
console.log(`Documents processed: ${job.documents_processed}`);
console.log(`Documents failed: ${job.documents_failed}`);
console.log(`Chunks created: ${job.chunks_created}`);
 
if (job.document_details) {
  console.log("\nDocument details:");
  for (const doc of job.document_details) {
    const status = doc.status === "completed" ? "OK" : "FAIL";
    console.log(`  [${status}] ${doc.document_name}: ${doc.chunks_created} chunks`);
  }
}

Poll for Job Completion

Python

import time
import requests
 
API_BASE = "https://api.tensoras.ai/v1"
HEADERS = {"Authorization": "Bearer tns_your_key_here"}
 
kb_id = "kb_abc123"
job_id = "job_abc123"
 
while True:
    response = requests.get(
        f"{API_BASE}/knowledge-bases/{kb_id}/ingestion-jobs/{job_id}",
        headers=HEADERS,
    )
    job = response.json()
 
    print(f"Status: {job['status']} | Docs: {job['documents_processed']} | Chunks: {job['chunks_created']}")
 
    if job["status"] in ("completed", "failed"):
        break
 
    time.sleep(5)
 
if job["status"] == "completed":
    print(f"Ingestion complete! {job['chunks_created']} chunks indexed.")
else:
    print(f"Ingestion failed: {job.get('error', 'Unknown error')}")

Node.js

const API_BASE = "https://api.tensoras.ai/v1";
const headers = { Authorization: "Bearer tns_your_key_here" };
 
const kbId = "kb_abc123";
const jobId = "job_abc123";
 
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
 
while (true) {
  const response = await fetch(
    `${API_BASE}/knowledge-bases/${kbId}/ingestion-jobs/${jobId}`,
    { headers }
  );
  const job = await response.json();
 
  console.log(
    `Status: ${job.status} | Docs: ${job.documents_processed} | Chunks: ${job.chunks_created}`
  );
 
  if (job.status === "completed" || job.status === "failed") {
    if (job.status === "completed") {
      console.log(`Ingestion complete! ${job.chunks_created} chunks indexed.`);
    } else {
      console.log(`Ingestion failed: ${job.error || "Unknown error"}`);
    }
    break;
  }
 
  await sleep(5000);
}

Filter by Status

curl

# List only failed ingestion jobs
curl "https://api.tensoras.ai/v1/knowledge-bases/kb_abc123/ingestion-jobs?status=failed" \
  -H "Authorization: Bearer tns_your_key_here"

Python

import requests
 
response = requests.get(
    "https://api.tensoras.ai/v1/knowledge-bases/kb_abc123/ingestion-jobs",
    headers={"Authorization": "Bearer tns_your_key_here"},
    params={"status": "failed"},
)
 
failed_jobs = response.json()["data"]
for job in failed_jobs:
    print(f"{job['id']}: {job['error']}")

Error Handling

{
  "error": {
    "message": "Ingestion job 'job_abc123' not found",
    "type": "not_found_error",
    "param": "job_id",
    "code": "ingestion_job_not_found"
  }
}

Retrieval Streaming