Annotations
Access annotations through client.annotations in the Datature Vi SDK.
Annotations in Datature Vi describe the content of an image in machine-readable form. For phrase grounding datasets, an annotation pairs a text caption with one or more bounding boxes. For visual question answering (VQA) datasets, an annotation pairs a question with its answer. The annotations resource lets you upload annotation files in bulk, list and inspect individual annotations, download exports, and delete entries you no longer need.
- Vi SDK installed with authentication configured
- A secret key for API authentication
- A dataset with uploaded assets to annotate
- Familiarity with annotation formats: phrase grounding or VQA
Methods
list()
List annotations for an asset.
annotations = client.annotations.list(
dataset_id="dataset_abc123",
asset_id="asset_xyz789"
)
for annotation in annotations.items:
print(f"ID: {annotation.annotation_id}")
print(f"Type: {annotation.spec.bound_type}")from vi.api.types import PaginationParams
annotations = client.annotations.list(
dataset_id="dataset_abc123",
asset_id="asset_xyz789",
pagination=PaginationParams(page_size=100)
)
for annotation in annotations.items:
print(f"{annotation.annotation_id}: {annotation.spec.bound_type}")for page in client.annotations.list(
dataset_id="dataset_abc123",
asset_id="asset_xyz789"
):
for annotation in page.items:
print(f"{annotation.annotation_id}: {annotation.spec.bound_type}")all_annotations = list(
client.annotations.list(
dataset_id="dataset_abc123",
asset_id="asset_xyz789"
).all_items()
)
print(f"Total annotations: {len(all_annotations)}")Returns: PaginatedResponse[Annotation]
get()
Get a specific annotation by ID.
annotation = client.annotations.get(
dataset_id="dataset_abc123",
asset_id="asset_xyz789",
annotation_id="annotation_xyz789"
)
print(f"Type: {annotation.spec.bound_type}")
print(f"Content: {annotation.spec.contents}")annotation = client.annotations.get(
dataset_id="dataset_abc123",
asset_id="asset_xyz789",
annotation_id="annotation_xyz789"
)
if hasattr(annotation.spec.contents, 'contents'):
# Caption annotation
print(f"Caption: {annotation.spec.contents.contents}")annotation = client.annotations.get(
dataset_id="dataset_abc123",
asset_id="asset_xyz789",
annotation_id="annotation_xyz789"
)
if hasattr(annotation.spec.contents, 'bound'):
# Phrase grounding bounding box
print(f"Bounds: {annotation.spec.contents.bound}")
print(f"Target span: {annotation.spec.contents.target}")annotation = client.annotations.get(
dataset_id="dataset_abc123",
asset_id="asset_xyz789",
annotation_id="annotation_xyz789"
)
annotation.info() # Prints formatted annotation summaryReturns: Annotation
upload()
Upload annotation files to a dataset. Accepts JSONL files, folders of JSONL files, or a list of file paths.
result = client.annotations.upload(
dataset_id="dataset_abc123",
paths="annotations.jsonl",
wait_until_done=True
)
print(f"Imported: {result.total_annotations}")result = client.annotations.upload(
dataset_id="dataset_abc123",
paths="./annotations/",
wait_until_done=True
)
print(result.summary())result = client.annotations.upload(
dataset_id="dataset_abc123",
paths=["batch1.jsonl", "batch2.jsonl"],
wait_until_done=True
)def upload_and_verify(dataset_id: str, annotations_path: str) -> bool:
"""Upload annotations and verify the count matches."""
with open(annotations_path) as f:
local_count = sum(1 for _ in f)
result = client.annotations.upload(
dataset_id=dataset_id,
paths=annotations_path,
wait_until_done=True
)
print(f"Local: {local_count}")
print(f"Imported: {result.total_annotations}")
return result.total_annotations == local_count
success = upload_and_verify("dataset_abc123", "annotations.jsonl")import json
from pathlib import Path
def create_and_upload_annotations(dataset_id: str, images_dir: str):
"""Create annotations for images and upload them."""
annotations = []
for image_path in Path(images_dir).glob("*.jpg"):
annotation = {
"asset_id": image_path.stem,
"caption": f"Image of {image_path.stem}",
"grounded_phrases": []
}
annotations.append(annotation)
output_file = "generated_annotations.jsonl"
with open(output_file, "w") as f:
for annotation in annotations:
json.dump(annotation, f)
f.write("\n")
return client.annotations.upload(
dataset_id=dataset_id,
paths=output_file,
wait_until_done=True
)
result = create_and_upload_annotations("dataset_abc123", "./images")Annotation upload is currently supported for:
- Phrase grounding datasets: Upload captions with grounded phrases and bounding boxes
- VQA datasets: Upload question-answer pairs
- Freeform text datasets: Upload custom annotation schemas
Upload format is Vi JSONL for all dataset types.
Returns: AnnotationUploadResult
delete()
Delete an annotation.
deleted = client.annotations.delete(
dataset_id="dataset_abc123",
asset_id="asset_xyz789",
annotation_id="annotation_xyz789"
)annotations = client.annotations.list(
dataset_id="dataset_abc123",
asset_id="asset_xyz789"
)
for annotation in annotations.items:
client.annotations.delete(
dataset_id="dataset_abc123",
asset_id="asset_xyz789",
annotation_id=annotation.annotation_id
)
print(f"Deleted: {annotation.annotation_id}")Returns: DeletedAnnotation
wait_until_done()
Wait for an import session to complete. Use this after an async upload to block until ingestion finishes.
status = client.annotations.wait_until_done(
dataset_id="dataset_abc123",
annotation_import_session_id="session_id"
)
print(f"Total: {status.status.annotations.status_count}")result = client.annotations.upload(
dataset_id="dataset_abc123",
paths="annotations.jsonl",
wait_until_done=False
)
# Do other work...
status = client.annotations.wait_until_done(
dataset_id="dataset_abc123",
annotation_import_session_id=result.session_id
)
print(f"Import complete: {status.status.annotations.status_count}")Returns: AnnotationImportSession
download()
Download annotations without assets. Faster than downloading the full dataset when you only need annotation files.
result = client.annotations.download(
dataset_id="dataset_abc123"
)
print(result.summary())from vi.api.resources.datasets.types import (
DatasetExportFormat,
DatasetExportSettings
)
result = client.annotations.download(
dataset_id="dataset_abc123",
export_settings=DatasetExportSettings(
format=DatasetExportFormat.COCO
),
save_dir="./annotations"
)
print(f"Downloaded to: {result.save_dir}")result = client.annotations.download(
dataset_id="dataset_abc123",
export_settings={"format": DatasetExportFormat.YOLO_DARKNET},
save_dir="./yolo_annotations"
)result = client.annotations.download(
dataset_id="dataset_abc123",
dataset_export_id="export_xyz789"
)result = client.annotations.download(
dataset_id="dataset_abc123",
show_progress=False,
overwrite=True
)from pathlib import Path
import json
result = client.annotations.download(
dataset_id="dataset_abc123",
export_settings={"format": DatasetExportFormat.VI_JSONL},
save_dir="./annotations"
)
annotation_files = Path(result.save_dir).glob("*.jsonl")
for file in annotation_files:
with open(file) as f:
for line in f:
annotation = json.loads(line)
print(f"Asset: {annotation['asset_id']}")Available formats depend on your dataset type.
Phrase grounding datasets: CsvFourCorner, CsvWidthHeight, Coco, PascalVoc, YoloDarknet, YoloKerasPytorch, ViJsonl, ViTfrecord
VQA datasets: ViJsonl, ViTfrecord
Freeform text datasets: ViJsonl
The ViFull format is not supported for annotation-only downloads. Using an incompatible format raises a ViValidationError.
Returns: DatasetDownloadResult (see Datasets API)
Annotation formats
Phrase grounding format
Each line in the JSONL file represents one image with its caption and bounding boxes.
{
"asset_id": "asset_123",
"caption": "A cat sitting on a couch",
"grounded_phrases": [
{
"phrase": "cat",
"bbox": [0.1, 0.2, 0.5, 0.7]
},
{
"phrase": "couch",
"bbox": [0.6, 0.2, 0.9, 0.7]
}
]
}GroundedPhrase fields:
VQA format
Each line in the JSONL file represents one image with one or more question-answer pairs.
{
"asset_id": "asset_123",
"contents": {
"interactions": [
{
"question": "What is the cat doing?",
"answer": "Sitting on a couch"
}
]
}
}Interaction fields:
Freeform text format
Each line in the JSONL file represents one image with its freeform text annotation.
{
"asset_id": "asset_123",
"contents": {
"response": "Component: Bearing assembly #4\nStatus: FAIL\nDefects found: surface pitting, minor corrosion\nSeverity: High"
}
}{
"asset_id": "asset_123",
"contents": {
"response": "{\"vendor\": \"Acme Supply Co.\", \"invoice_number\": \"INV-2024-00142\", \"total\": \"918.00\", \"currency\": \"USD\"}"
}
}For structured JSON output use cases (invoice extraction, inspection reports), embed the JSON as a string in the response field. The model learns to reproduce the format from your training examples. See Structured Data Extraction for the full setup guide.
Coordinate system
Bounding box coordinates are normalized to the [0, 1] range relative to image dimensions:
x_min: left edge (0 = leftmost, 1 = rightmost)y_min: top edge (0 = top, 1 = bottom)x_max: right edgey_max: bottom edge
def to_pixels(bbox: list[float], width: int, height: int) -> list[int]:
"""Convert normalized bbox to pixel coordinates."""
x_min, y_min, x_max, y_max = bbox
return [
int(x_min * width),
int(y_min * height),
int(x_max * width),
int(y_max * height)
]
# Example: [192, 216, 960, 756] for a 1920x1080 image
pixel_bbox = to_pixels([0.1, 0.2, 0.5, 0.7], 1920, 1080)def from_pixels(bbox: list[int], width: int, height: int) -> list[float]:
"""Convert pixel coordinates to normalized bbox."""
x_min, y_min, x_max, y_max = bbox
return [
x_min / width,
y_min / height,
x_max / width,
y_max / height
]Response types
Annotation
from vi.api.resources.datasets.annotations.responses import AnnotationMethods: info() → prints a formatted annotation summary.
AnnotationSpec
from vi.api.resources.datasets.annotations.responses import AnnotationSpecAnnotationContent
Union type for annotation content variants.
from vi.api.resources.datasets.annotations.responses import AnnotationContent
AnnotationContent = Caption | PhraseGroundingBoundingBoxCaption
from vi.api.resources.datasets.annotations.responses import CaptionPhraseGroundingBoundingBox
from vi.api.resources.datasets.annotations.responses import PhraseGroundingBoundingBoxAnnotationAggregation
from vi.api.resources.datasets.annotations.responses import AnnotationAggregationAnnotationUploadResult
Methods: summary() → returns a summary string.
AnnotationImportSession
from vi.api.resources.datasets.annotations.responses import AnnotationImportSessionAnnotationImportSpec
from vi.api.resources.datasets.annotations.responses import AnnotationImportSpecAnnotationImportSessionStatus
from vi.api.resources.datasets.annotations.responses import AnnotationImportSessionStatusAnnotationFileStatus
from vi.api.resources.datasets.annotations.responses import AnnotationFileStatusAnnotationStatus
from vi.api.resources.datasets.annotations.responses import AnnotationStatusEnums
Roles
from vi.api.resources.datasets.annotations.responses import RolesFailurePolicy
from vi.api.resources.datasets.annotations.responses import FailurePolicyAnnotationImportSource
from vi.api.resources.datasets.annotations.responses import AnnotationImportSourceConditionReason
from vi.api.resources.datasets.annotations.responses import ConditionReasonRelated resources
Updated about 1 month ago
