Datasets

Dataset resource

Access datasets through client.datasets.

📋

Prerequisites

Get started with Vi SDK →


Methods

list()

List all datasets in the organization.

# Basic usage
datasets = client.datasets.list()

for dataset in datasets.items:
    print(f"{dataset.name}: {dataset.dataset_id}")
# With custom pagination
from vi.api.types import PaginationParams

pagination = PaginationParams(page_size=50)
datasets = client.datasets.list(pagination=pagination)

# Iterate through all pages
for page in datasets:
    for dataset in page.items:
        print(f"{dataset.name}: {dataset.statistic.asset_total} assets")
# Collect all datasets
all_datasets = list(client.datasets.list().all_items())
print(f"Total datasets: {len(all_datasets)}")

Parameters:

ParameterTypeDescriptionDefault
paginationPaginationParamsPagination settingsNone

Returns: PaginatedResponse[Dataset]


get()

Get a specific dataset by ID.

# Basic usage
dataset = client.datasets.get("dataset_abc123")

print(f"Name: {dataset.name}")
print(f"Assets: {dataset.statistic.asset_total}")
# Display detailed information
dataset = client.datasets.get("dataset_abc123")
dataset.info()  # Prints formatted dataset summary
# Access dataset statistics
dataset = client.datasets.get("dataset_abc123")

print(f"Total assets: {dataset.statistic.asset_total}")
print(f"Annotated: {dataset.statistic.asset_annotated}")
print(f"Annotations: {dataset.statistic.annotation_total}")

# Calculate annotation rate
if dataset.statistic.asset_total > 0:
    rate = dataset.statistic.asset_annotated / dataset.statistic.asset_total * 100
    print(f"Annotation rate: {rate:.1f}%")

Parameters:

ParameterTypeDescription
dataset_idstrDataset identifier

Returns: Dataset


create_export()

Create a new dataset export.

# Basic export with defaults
export = client.datasets.create_export(dataset_id="dataset_abc123")
# Custom export settings
from vi.api.resources.datasets.types import (
    DatasetExportSettings,
    DatasetExportFormat,
    DatasetExportOptions
)

export_settings = DatasetExportSettings(
    format=DatasetExportFormat.VI_FULL,
    options=DatasetExportOptions(
        normalized=True,
        split_ratio=0.8  # 80% training, 20% validation
    )
)

export = client.datasets.create_export(
    dataset_id="dataset_abc123",
    export_settings=export_settings
)

print(f"Export ID: {export.dataset_export_id}")
# Export annotations only (JSONL format)
from vi.api.resources.datasets.types import (
    DatasetExportSettings,
    DatasetExportFormat
)

export_settings = DatasetExportSettings(
    format=DatasetExportFormat.VI_JSONL
)

export = client.datasets.create_export(
    dataset_id="dataset_abc123",
    export_settings=export_settings
)

Parameters:

ParameterTypeDescription
dataset_idstrDataset identifier
export_settingsDatasetExportSettingsExport configuration

Returns: DatasetExport


list_exports()

List exports for a dataset.

# List all exports
exports = client.datasets.list_exports("dataset_abc123")

for export in exports.items:
    print(f"Export: {export.dataset_export_id}")
    print(f"  Format: {export.spec.format}")
# Find ready exports with download URLs
exports = client.datasets.list_exports("dataset_abc123")

ready_exports = [
    e for e in exports.items
    if e.status.download_url is not None
]

for export in ready_exports:
    print(f"Export {export.dataset_export_id} ready: {export.status.download_url.url}")

Parameters:

ParameterTypeDescription
dataset_idstrDataset identifier
paginationPaginationParamsPagination settings

Returns: PaginatedResponse[DatasetExport]


get_export()

Get a specific export.

# Check export status
export = client.datasets.get_export(
    dataset_id="dataset_abc123",
    dataset_export_id="export_xyz789"
)

if export.status.download_url:
    print(f"Export ready: {export.status.download_url.url}")
    print(f"Expires at: {export.status.download_url.expires_at}")
else:
    print("Export still processing...")
# Poll until export is ready
import time

def wait_for_export(dataset_id: str, export_id: str, timeout: int = 300) -> str:
    """Wait for export to be ready and return download URL."""
    start = time.time()
    while time.time() - start < timeout:
        export = client.datasets.get_export(dataset_id, export_id)
        if export.status.download_url:
            return export.status.download_url.url
        time.sleep(5)
    raise TimeoutError("Export not ready")

url = wait_for_export("dataset_abc123", "export_xyz789")

Parameters:

ParameterTypeDescription
dataset_idstrDataset identifier
dataset_export_idstrExport identifier

Returns: DatasetExport


download()

Download a dataset with all assets and annotations.

# Basic download
downloaded = client.datasets.download(
    dataset_id="dataset_abc123",
    save_dir="./data"
)

print(downloaded.summary())
# Download with custom export settings
from vi.api.resources.datasets.types import (
    DatasetExportSettings,
    DatasetExportFormat,
    DatasetExportOptions
)

settings = DatasetExportSettings(
    format=DatasetExportFormat.VI_FULL,
    options=DatasetExportOptions(
        normalized=True,
        split_ratio=0.8
    )
)

downloaded = client.datasets.download(
    dataset_id="dataset_abc123",
    export_settings=settings,
    save_dir="./data",
    show_progress=True
)

print(f"Saved to: {downloaded.save_dir}")
print(f"Size: {downloaded.size_mb:.2f} MB")
print(f"Splits: {downloaded.splits}")
# Download annotations only
downloaded = client.datasets.download(
    dataset_id="dataset_abc123",
    save_dir="./annotations",
    annotations_only=True
)
# Using the convenience method on client
downloaded = client.get_dataset(
    dataset_id="dataset_abc123",
    save_path="./data"
)
💡

Annotations-only download convenience method

For annotation-only downloads, you can use the more intuitive client.annotations.download() method:

# Recommended for annotations-only downloads
result = client.annotations.download(
    dataset_id="dataset_abc123",
    save_dir="./annotations"
)

This is a convenience wrapper for client.datasets.download(annotations_only=True) that provides a cleaner API.

Learn more about annotations API →

Parameters:

ParameterTypeDescriptionDefault
dataset_idstrDataset identifierRequired
dataset_export_idstrSpecific export IDNone
export_settingsDatasetExportSettingsExport configurationNone
annotations_onlyboolDownload only annotationsFalse
save_dirstr | PathSave directoryRequired
overwriteboolOverwrite existingFalse
show_progressboolShow progress barsTrue

Returns: DatasetDownloadResult


delete()

Delete a dataset permanently.

# Delete a dataset
deleted = client.datasets.delete("dataset_abc123")
print(f"Deleted: {deleted.dataset_id}")
# Delete with confirmation
dataset = client.datasets.get("dataset_abc123")
print(f"About to delete: {dataset.name}")
print(f"  Assets: {dataset.statistic.asset_total}")
print(f"  Annotations: {dataset.statistic.annotation_total}")

confirm = input("Delete? (yes/no): ")
if confirm.lower() == "yes":
    client.datasets.delete("dataset_abc123")
    print("Deleted.")

Parameters:

ParameterTypeDescription
dataset_idstrDataset identifier

Returns: DeletedDataset


bulk_delete_assets()

Bulk delete assets from a dataset.

# Delete assets by status
response = client.datasets.bulk_delete_assets(
    dataset_id="dataset_abc123",
    filter_criteria='{"status": "error"}',
    strict_query=True
)
# Delete unannotated assets
response = client.datasets.bulk_delete_assets(
    dataset_id="dataset_abc123",
    filter_criteria='{"metadata.annotations.total": 0}'
)

Parameters:

ParameterTypeDescription
dataset_idstrDataset identifier
filter_criteriastrFilter query
strict_queryboolStrict query mode

Returns: BulkAssetDeletionSession


Response types

Dataset

Main dataset response object.

from vi.api.resources.datasets.responses import Dataset
PropertyTypeDescription
dataset_idstrUnique identifier
namestrDataset name
ownerstrOwner identifier
organization_idstrOrganization ID
typeDatasetTypeDataset type (phrase-grounding, vqa)
contentDatasetContentContent type (Image)
create_dateintCreation timestamp (Unix ms)
statisticDatasetStatisticDataset statistics
usersdict[str, User | str]Users with access
tagsdict[str, int]Tag counts
statusintStatus code
last_accessedintLast access timestamp
is_lockedboolLock status
accessDatasetAccessAccess settings
asset_statusesdict[str, AssetStatusDetail]Asset status definitions
self_linkstrAPI link
etagstrEntity tag
descriptionstr | NoneOptional description

Methods:

MethodReturnsDescription
info()NoneDisplay formatted dataset information

DatasetStatistic

from vi.api.resources.datasets.responses import DatasetStatistic
PropertyTypeDescription
asset_totalintTotal number of assets
annotation_totalintTotal number of annotations
asset_annotatedintNumber of annotated assets
tags_countdict[str, int]Tag distribution

DatasetAccess

from vi.api.resources.datasets.responses import DatasetAccess
PropertyTypeDescription
is_publicbool | NonePublic accessibility
is_read_onlybool | NoneRead-only mode
is_hiddenbool | NoneHidden from listings

AssetStatusDetail

from vi.api.resources.datasets.responses import AssetStatusDetail
PropertyTypeDescription
descriptionstrStatus description
colorstrColor code
create_dateint | NoneCreation timestamp

DatasetExport

from vi.api.resources.datasets.responses import DatasetExport
PropertyTypeDescription
organization_idstrOrganization ID
dataset_idstrDataset ID
dataset_export_idstrExport identifier
specDatasetExportSpecExport specification
statusDatasetExportStatusExport status
metadataResourceMetadataMetadata
self_linkstrAPI link
etagstrEntity tag

DatasetExportSpec

from vi.api.resources.datasets.responses import DatasetExportSpec
PropertyTypeDescription
formatDatasetExportFormatExport format
optionsDatasetExportOptionsExport options

DatasetExportStatus

from vi.api.resources.datasets.responses import DatasetExportStatus
PropertyTypeDescription
conditionslist[ResourceCondition]Status conditions
download_urlDatasetExportDownloadUrl | NoneDownload URL when ready

DatasetExportDownloadUrl

from vi.api.resources.datasets.responses import DatasetExportDownloadUrl
PropertyTypeDescription
urlstrDownload URL
expires_atintExpiration timestamp

DatasetDownloadResult

Result returned after downloading a dataset.

PropertyTypeDescription
save_dirPathSave directory
size_mbfloatTotal size in MB
splitslist[str]Available splits
assets_countintNumber of assets
annotations_countintNumber of annotations

Methods:

MethodReturnsDescription
summary()strGet summary string
info()NonePrint detailed info

DeletedDataset

from vi.api.resources.datasets.responses import DeletedDataset
PropertyTypeDescription
kindstrResource kind
userstrUser who deleted
organization_idstrOrganization ID
dataset_idstrDeleted dataset ID
self_linkstrAPI link
etagstrEntity tag
metadataResourceMetadataMetadata
statusDeletedDatasetStatusDeletion status

DeletedDatasetStatus

from vi.api.resources.datasets.responses import DeletedDatasetStatus
PropertyTypeDescription
conditionslist[ResourceCondition]Deletion conditions

BulkAssetDeletionSession

from vi.api.resources.datasets.responses import BulkAssetDeletionSession
PropertyTypeDescription
kindstrResource kind
organization_idstrOrganization ID
dataset_idstrDataset ID
delete_many_assets_session_idstrSession identifier
self_linkstrAPI link
etagstrEntity tag
metadataResourceMetadataMetadata
specBulkAssetDeletionSpecDeletion spec
statusBulkAssetDeletionStatusDeletion status

BulkAssetDeletionSpec

from vi.api.resources.datasets.types import BulkAssetDeletionSpec
PropertyTypeDescription
filterstr | dict | NoneFilter criteria
metadata_querystr | NoneMetadata query
rule_querystr | NoneRule query
strict_querybool | NoneStrict query mode

BulkAssetDeletionStatus

from vi.api.resources.datasets.responses import BulkAssetDeletionStatus
PropertyTypeDescription
conditionslist[ResourceCondition]Deletion conditions

Request types

DatasetExportSettings

from vi.api.resources.datasets.types import DatasetExportSettings
PropertyTypeDescriptionDefault
formatDatasetExportFormatExport formatVI_FULL
optionsDatasetExportOptionsExport optionsDatasetExportOptions()

DatasetExportOptions

from vi.api.resources.datasets.types import DatasetExportOptions
PropertyTypeDescriptionDefault
normalizedboolNormalize coordinatesTrue
split_ratiofloat | NoneTrain/validation split ratioNone

Enums

DatasetType

from vi.api.resources.datasets.types import DatasetType
ValueDescription
PHRASE_GROUNDINGPhrase grounding dataset
VQAVisual question answering dataset

DatasetContent

from vi.api.resources.datasets.types import DatasetContent
ValueDescription
IMAGEImage content

DatasetExportFormat

from vi.api.resources.datasets.types import DatasetExportFormat
ValueDescription
VI_FULLFull dataset with assets
VI_JSONLJSONL format annotations

Related resources