Getting Started

📋

Prerequisites

Before you begin, make sure you have:

  1. ✅ Installed Vi SDK (pip install vi-sdk[all])
  2. ✅ A Datature account with API access
  3. ✅ Your secret key and organization ID (learn how to create one)
📘

New to Datature Vi?

If you don't have an account yet, sign up at vi.datature.com

Quick navigation:


Initialize the client

First, create a client instance to interact with the Vi platform:

import vi

# Initialize with credentials
client = vi.Client(
    secret_key="your-secret-key-here",
    organization_id="your-organization-id-here"
)

print(f"Connected to organization: {client.organizations.id}")

# Explore what's available
client.help()  # Shows common operations and examples
💡

Discover features

The SDK has built-in help methods to discover available operations:

# Quick reference for all operations
client.help()

# Help for specific resources
client.datasets.help()
client.assets.help()

# Inspect any object for detailed information
asset.info()  # Shows rich details about an asset
dataset.info()  # Shows dataset statistics
💡

Environment variables

For better security, use environment variables:

export DATATURE_VI_SECRET_KEY="your-secret-key"
export DATATURE_VI_ORGANIZATION_ID="your-organization-id"

Then:

import os
import vi

client = vi.Client(
    secret_key=os.getenv("DATATURE_VI_SECRET_KEY"),
    organization_id=os.getenv("DATATURE_VI_ORGANIZATION_ID")
)

List your datasets

View all datasets in your organization:

# Direct iteration (simplest)
for dataset in client.datasets:
    print(f"📊 {dataset.name}")
    print(f"   ID: {dataset.dataset_id}")
    print(f"   Assets: {dataset.statistic.asset_total}")
    print(f"   Annotations: {dataset.statistic.annotation_total}")
💡

Manual pagination

For manual page-by-page iteration:

# Iterate page by page
for page in client.datasets.list():
    for dataset in page.items:
        print(f"Dataset: {dataset.name}")
📝

Dataset info

Use .info() to see detailed information about any dataset:

dataset = client.datasets.get("your-dataset-id")
dataset.info()  # Shows rich formatted details

Download a dataset

Download a complete dataset with assets and annotations:

# Download dataset
result = client.get_dataset(
    dataset_id="your-dataset-id",
    save_dir="./data"
)

# Show summary
print(result.summary())

# Access download details
print(f"Total size: {result.size_mb:.1f} MB")
print(f"Splits: {', '.join(result.splits)}")

# Show detailed information
result.info()

The dataset will be saved to ./data/your-dataset-id/ with the following structure:

your-dataset-id/
├── metadata.json
├── dump/
│   ├── annotations/
│   │   └── annotations.jsonl
│   └── assets/
│       ├── image1.jpg
│       ├── image2.jpg
│       └── ...
├── training/
│   └── ...
└── validation/
    └── ...

Load dataset for training

Use the dataset loader to prepare data for training:

from vi.dataset.loaders import ViDataset

# Load dataset
dataset = ViDataset("./data/your-dataset-id")

# Get dataset info
info = dataset.info()
print(f"📊 Dataset: {info.name}")
print(f"   Total assets: {info.total_assets}")
print(f"   Total annotations: {info.total_annotations}")
print(f"   Training: {info.splits['training'].assets} assets")
print(f"   Validation: {info.splits['validation'].assets} assets")

# Iterate through training data
for asset, annotations in dataset.training.iter_pairs():
    print(f"\n🖼️  {asset.filename}")
    print(f"   Size: {asset.width}x{asset.height}")

    for ann in annotations:
        if hasattr(ann.contents, 'caption'):
            # <Glossary>Phrase grounding</Glossary> annotation
            print(f"   📝 Caption: {ann.contents.caption}")
            print(f"      Phrases: {len(ann.contents.grounded_phrases)}")
        elif hasattr(ann.contents, 'interactions'):
            # <Glossary>VQA</Glossary> annotation
            print(f"   💬 Q&A pairs: {len(ann.contents.interactions)}")

Upload assets

Upload images to an existing dataset:

# Upload a single image (simple)
result = client.assets.upload(
    dataset_id="your-dataset-id",
    paths="path/to/image.jpg"
)
print(f"✓ Uploaded: {result.total_succeeded} assets")
print(result.summary())  # Shows detailed upload summary

# Upload a folder of images
result = client.assets.upload(
    dataset_id="your-dataset-id",
    paths="path/to/images/",
    wait_until_done=True  # Wait for processing to complete
)
print(f"✓ Uploaded: {result.total_succeeded}/{result.total_files} assets")
print(f"Success rate: {result.success_rate:.1f}%")

# View detailed summary
print(result.summary())

List assets

View assets in a dataset:

# Direct iteration (simplest) - use __call__ syntax for parameters
for asset in client.assets(dataset_id="your-dataset-id"):
    print(f"🖼️  {asset.filename}")
    print(f"   Size: {asset.metadata.width}x{asset.metadata.height}")

# Inspect individual asset details
first_asset = next(iter(client.assets(dataset_id="your-dataset-id")))
first_asset.info()  # Shows rich formatted asset information

Run inference

Use a trained model for inference:

from vi.inference import ViModel

# Load model with credentials and run ID
model = ViModel(
    secret_key="your-secret-key",
    organization_id="your-organization-id",
    run_id="your-run-id"
)

# Run inference on single image - returns (result, error) tuple
result, error = model(
    source="path/to/test_image.jpg",
    user_prompt="Describe this image in detail"
)

if error is None:
    print(f"Result: {result}")
else:
    print(f"Error: {error}")

# Batch inference on specific files
images = ["image1.jpg", "image2.jpg", "image3.jpg"]
results = model(source=images, user_prompt="Describe this image", show_progress=True)

for img, (result, error) in zip(images, results):
    if error is None:
        print(f"{img}: {result}")

# Process entire folder directly
results = model(
    source="./test_images/",  # Folder path
    user_prompt="Describe this image",
    recursive=True,  # Include subdirectories
    show_progress=True
)

for result, error in results:
    if error is None:
        print(f"Success: {result}")

Deploy with NVIDIA NIM

Deploy models with NVIDIA NIM containers for optimized GPU-accelerated inference:

from vi.deployment.nim import NIMDeployer, NIMPredictor, NIMConfig

# Step 1: Deploy NIM container
config = NIMConfig(
    nvidia_api_key="nvapi-...",  # Your NGC API key
    run_id="your-run-id",  # Optional: deploy with custom weights
    port=8000
)

deployer = NIMDeployer(config)
result = deployer.deploy()

print(f"✓ Container deployed on port {result.port}")

# Step 2: Run inference
predictor = NIMPredictor(config=config)

# Process images
result = predictor(
    source="image.jpg",
    user_prompt="What objects are in this image?",
    stream=False
)
print(f"Result: {result.result}")

# Step 3: Stop container when done
NIMDeployer.stop(result.container_name)

Key features:

  • Optimized inference — GPU-accelerated with NVIDIA optimizations
  • Custom weights — Deploy models trained on Datature Vi
  • Video support — Process videos with Cosmos-Reason2 models
  • Sampling control — Configure temperature, top-p, and guided decoding

Learn more about NIM deployment →


Error handling

Always handle errors appropriately:

from vi import (
    ViError,
    ViNotFoundError,
    ViAuthenticationError,
    ViValidationError
)

try:
    dataset = client.datasets.get("invalid-id")
except ViNotFoundError as e:
    print(f"❌ Dataset not found: {e.message}")
    if e.suggestion:
        print(f"💡 Suggestion: {e.suggestion}")
except ViAuthenticationError as e:
    print(f"❌ Authentication failed: {e.message}")
    print("Please check your API credentials")
except ViValidationError as e:
    print(f"❌ Validation error: {e.message}")
except ViError as e:
    print(f"❌ Error: {e.error_code} - {e.message}")

Complete workflow example

Here's a complete workflow combining multiple operations:

import vi
from vi.dataset.loaders import ViDataset
from vi.api.types import PaginationParams

# Initialize client
client = vi.Client(
    secret_key="your-secret-key",
    organization_id="your-organization-id"
)

# 1. List and select a dataset
print("📊 Available datasets:")
datasets = client.datasets.list()
for dataset in datasets.items[:5]:  # Show first 5
    print(f"   - {dataset.name} ({dataset.dataset_id})")

# 2. Download the dataset
dataset_id = datasets.items[0].dataset_id
downloaded = client.get_dataset(
    dataset_id=dataset_id,
    save_dir="./data"
)
print(f"\n✓ Downloaded to: {downloaded.save_dir}")

# 3. Load and explore the dataset
dataset = ViDataset(downloaded.save_dir)
info = dataset.info()
print(f"\n📊 Dataset Info:")
print(f"   Name: {info.name}")
print(f"   Total assets: {info.total_assets}")
print(f"   Total annotations: {info.total_annotations}")

# 4. Iterate through training data
print(f"\n🎓 Training data (first 3):")
for i, (asset, annotations) in enumerate(dataset.training.iter_pairs()):
    if i >= 3:
        break
    print(f"   {i+1}. {asset.filename} - {len(annotations)} annotations")

# 5. Upload new assets
print(f"\n📤 Uploading new assets...")
try:
    upload_result = client.assets.upload(
        dataset_id=dataset_id,
        paths="path/to/new/images/",
        wait_until_done=True
    )
    print(f"✓ Uploaded {upload_result.total_succeeded} assets")
    print(upload_result.summary())
except Exception as e:
    print(f"❌ Upload failed: {e}")

print(f"\n✅ Workflow complete!")

Common operations cheat sheet

OperationCode
Initialize clientclient = vi.Client(secret_key, organization_id)
List datasetsclient.datasets.list()
Get datasetclient.datasets.get(dataset_id)
Download datasetclient.get_dataset(dataset_id, save_dir)
Upload assetsclient.assets.upload(dataset_id, paths)
List assetsclient.assets.list(dataset_id)
Run inferenceViModel(secret_key, organization_id, run_id)
List runsclient.runs.list()

Related resources