Datasets
Access datasets through client.datasets in the Datature Vi SDK.
The datasets resource is the root container for all your training data. Datasets hold assets (images) and annotations, and they belong to your organization. Every other data-related operation (uploading images, creating annotations, exporting for training) starts here.
- Vi SDK installed with authentication configured
- A secret key for API authentication
- Familiarity with Vi SDK basics
The SDK does not support creating new datasets. Use the Datature Vi web interface to create a dataset, then use the SDK to manage, export, and download it.
Methods
list()
List all datasets in your organization.
datasets = client.datasets.list()
for dataset in datasets.items:
print(f"{dataset.name}: {dataset.dataset_id}")from vi.api.types import PaginationParams
pagination = PaginationParams(page_size=50)
datasets = client.datasets.list(pagination=pagination)
# Iterate through all pages
for page in datasets:
for dataset in page.items:
print(f"{dataset.name}: {dataset.statistic.asset_total} assets")all_datasets = list(client.datasets.list().all_items())
print(f"Total datasets: {len(all_datasets)}")Returns: PaginatedResponse[Dataset]
get()
Get a specific dataset by ID.
dataset = client.datasets.get("dataset_abc123")
print(f"Name: {dataset.name}")
print(f"Assets: {dataset.statistic.asset_total}")dataset = client.datasets.get("dataset_abc123")
dataset.info() # Prints formatted dataset summarydataset = client.datasets.get("dataset_abc123")
print(f"Total assets: {dataset.statistic.asset_total}")
print(f"Annotated: {dataset.statistic.asset_annotated}")
print(f"Annotations: {dataset.statistic.annotation_total}")
if dataset.statistic.asset_total > 0:
rate = dataset.statistic.asset_annotated / dataset.statistic.asset_total * 100
print(f"Annotation rate: {rate:.1f}%")Returns: Dataset
create_export()
Create a new dataset export.
export = client.datasets.create_export(dataset_id="dataset_abc123")from vi.api.resources.datasets.types import (
DatasetExportSettings,
DatasetExportFormat,
DatasetExportOptions
)
export_settings = DatasetExportSettings(
format=DatasetExportFormat.VI_FULL,
options=DatasetExportOptions(
normalized=True,
split_ratio=0.8 # 80% training, 20% validation
)
)
export = client.datasets.create_export(
dataset_id="dataset_abc123",
export_settings=export_settings
)
print(f"Export ID: {export.dataset_export_id}")from vi.api.resources.datasets.types import (
DatasetExportSettings,
DatasetExportFormat
)
export_settings = DatasetExportSettings(
format=DatasetExportFormat.VI_JSONL
)
export = client.datasets.create_export(
dataset_id="dataset_abc123",
export_settings=export_settings
)Returns: DatasetExport
list_exports()
List all exports for a dataset.
exports = client.datasets.list_exports("dataset_abc123")
for export in exports.items:
print(f"Export: {export.dataset_export_id}")
print(f" Format: {export.spec.format}")exports = client.datasets.list_exports("dataset_abc123")
ready_exports = [
e for e in exports.items
if e.status.download_url is not None
]
for export in ready_exports:
print(f"Export {export.dataset_export_id} ready: {export.status.download_url.url}")Returns: PaginatedResponse[DatasetExport]
get_export()
Get a specific export by ID.
export = client.datasets.get_export(
dataset_id="dataset_abc123",
dataset_export_id="export_xyz789"
)
if export.status.download_url:
print(f"Export ready: {export.status.download_url.url}")
print(f"Expires at: {export.status.download_url.expires_at}")
else:
print("Export still processing...")import time
def wait_for_export(dataset_id: str, export_id: str, timeout: int = 300) -> str:
"""Wait for export to complete and return download URL."""
start = time.time()
while time.time() - start < timeout:
export = client.datasets.get_export(dataset_id, export_id)
if export.status.download_url:
return export.status.download_url.url
time.sleep(5)
raise TimeoutError("Export not ready")
url = wait_for_export("dataset_abc123", "export_xyz789")Returns: DatasetExport
download()
Download a dataset with all assets and annotations.
downloaded = client.datasets.download(
dataset_id="dataset_abc123",
save_dir="./data"
)
print(downloaded.summary())from vi.api.resources.datasets.types import (
DatasetExportSettings,
DatasetExportFormat,
DatasetExportOptions
)
settings = DatasetExportSettings(
format=DatasetExportFormat.VI_FULL,
options=DatasetExportOptions(
normalized=True,
split_ratio=0.8
)
)
downloaded = client.datasets.download(
dataset_id="dataset_abc123",
export_settings=settings,
save_dir="./data",
show_progress=True
)
print(f"Saved to: {downloaded.save_dir}")
print(f"Size: {downloaded.size_mb:.2f} MB")
print(f"Splits: {downloaded.splits}")downloaded = client.datasets.download(
dataset_id="dataset_abc123",
save_dir="./annotations",
annotations_only=True
)downloaded = client.get_dataset(
dataset_id="dataset_abc123",
save_path="./data"
)For annotation-only downloads, client.annotations.download() is a cleaner alternative:
result = client.annotations.download(
dataset_id="dataset_abc123",
save_dir="./annotations"
)This wraps client.datasets.download(annotations_only=True) with a more descriptive name. See the Annotations API →
Returns: DatasetDownloadResult
delete()
Delete a dataset permanently.
deleted = client.datasets.delete("dataset_abc123")
print(f"Deleted: {deleted.dataset_id}")dataset = client.datasets.get("dataset_abc123")
print(f"About to delete: {dataset.name}")
print(f" Assets: {dataset.statistic.asset_total}")
print(f" Annotations: {dataset.statistic.annotation_total}")
confirm = input("Delete? (yes/no): ")
if confirm.lower() == "yes":
client.datasets.delete("dataset_abc123")
print("Deleted.")Returns: DeletedDataset
bulk_delete_assets()
Bulk delete assets from a dataset using a filter query.
response = client.datasets.bulk_delete_assets(
dataset_id="dataset_abc123",
filter_criteria='{"status": "error"}',
strict_query=True
)response = client.datasets.bulk_delete_assets(
dataset_id="dataset_abc123",
filter_criteria='{"metadata.annotations.total": 0}'
)Returns: BulkAssetDeletionSession
Response types
Dataset
from vi.api.resources.datasets.responses import DatasetMethods: info() → prints a formatted dataset summary.
DatasetStatistic
from vi.api.resources.datasets.responses import DatasetStatisticDatasetAccess
from vi.api.resources.datasets.responses import DatasetAccessAssetStatusDetail
from vi.api.resources.datasets.responses import AssetStatusDetailDatasetExport
from vi.api.resources.datasets.responses import DatasetExportDatasetExportSpec
from vi.api.resources.datasets.responses import DatasetExportSpecDatasetExportStatus
from vi.api.resources.datasets.responses import DatasetExportStatusDatasetExportDownloadUrl
from vi.api.resources.datasets.responses import DatasetExportDownloadUrlDatasetDownloadResult
Methods: summary() → returns a summary string. info() → prints detailed info.
DeletedDataset
from vi.api.resources.datasets.responses import DeletedDatasetDeletedDatasetStatus
from vi.api.resources.datasets.responses import DeletedDatasetStatusBulkAssetDeletionSession
from vi.api.resources.datasets.responses import BulkAssetDeletionSessionBulkAssetDeletionSpec
from vi.api.resources.datasets.types import BulkAssetDeletionSpecBulkAssetDeletionStatus
from vi.api.resources.datasets.responses import BulkAssetDeletionStatusRequest types
DatasetExportSettings
from vi.api.resources.datasets.types import DatasetExportSettingsDatasetExportOptions
from vi.api.resources.datasets.types import DatasetExportOptionsEnums
DatasetType
from vi.api.resources.datasets.types import DatasetTypeDatasetContent
from vi.api.resources.datasets.types import DatasetContentDatasetExportFormat
from vi.api.resources.datasets.types import DatasetExportFormatRelated resources
Updated about 1 month ago
