Performance & Optimization
Vectra provides several performance optimization features for high-throughput applications.
Async Batch Operations
Process large vector sets concurrently with automatic chunking:
require 'vectra'
client = Vectra::Client.new(provider: :pinecone, api_key: ENV['PINECONE_API_KEY'])
# Create a batch processor with 4 concurrent workers
batch = Vectra::Batch.new(client, concurrency: 4)
# Async upsert with automatic chunking
vectors = 10_000.times.map { |i| { id: "vec_#{i}", values: Array.new(384) { rand } } }
result = batch.upsert_async(
index: 'my-index',
vectors: vectors,
chunk_size: 100,
on_progress: proc { |stats|
progress = stats[:percentage]
processed = stats[:processed]
total = stats[:total]
chunk = stats[:current_chunk] + 1
total_chunks = stats[:total_chunks]
puts "Progress: #{progress}% (#{processed}/#{total})"
puts " Chunk #{chunk}/#{total_chunks} | Success: #{stats[:success_count]}, Failed: #{stats[:failed_count]}"
}
)
puts "Upserted: #{result[:upserted_count]} vectors in #{result[:chunks]} chunks"
puts "Errors: #{result[:errors].size}" if result[:errors].any?
Progress Tracking
Monitor batch operations in real-time with progress callbacks:
batch.upsert_async(
index: 'my-index',
vectors: large_vector_array,
chunk_size: 100,
on_progress: proc { |stats|
# stats contains:
# - processed: number of processed vectors
# - total: total number of vectors
# - percentage: progress percentage (0-100)
# - current_chunk: current chunk index (0-based)
# - total_chunks: total number of chunks
# - success_count: number of successful chunks
# - failed_count: number of failed chunks
puts "Progress: #{stats[:percentage]}% (#{stats[:processed]}/#{stats[:total]})"
}
)
Batch Delete
ids = 1000.times.map { |i| "vec_#{i}" }
result = batch.delete_async(
index: 'my-index',
ids: ids,
chunk_size: 100
)
Batch Fetch
ids = ['vec_1', 'vec_2', 'vec_3']
vectors = batch.fetch_async(
index: 'my-index',
ids: ids,
chunk_size: 50
)
Streaming Results
For large query result sets, use streaming to reduce memory usage:
stream = Vectra::Streaming.new(client, page_size: 100)
# Stream with a block
stream.query_each(
index: 'my-index',
vector: query_vector,
total: 1000
) do |match|
process_match(match)
end
# Or use lazy enumerator
results = stream.query_stream(
index: 'my-index',
vector: query_vector,
total: 1000
)
# Only fetches what you need
results.take(50).each { |m| puts m.id }
Caching Layer
Cache frequently queried vectors to reduce database load:
# Create cache with 5-minute TTL
cache = Vectra::Cache.new(ttl: 300, max_size: 1000)
# Wrap client with caching
cached_client = Vectra::CachedClient.new(client, cache: cache)
# First query hits the database
result1 = cached_client.query(index: 'idx', vector: vec, top_k: 10)
# Second identical query returns cached result
result2 = cached_client.query(index: 'idx', vector: vec, top_k: 10)
# Invalidate cache when data changes
cached_client.invalidate_index('idx')
# Clear all cache
cached_client.clear_cache
Cache Statistics
stats = cache.stats
puts "Cache size: #{stats[:size]}/#{stats[:max_size]}"
puts "TTL: #{stats[:ttl]} seconds"
Connection Pooling (pgvector)
For pgvector, use connection pooling with warmup:
# Configure pool size
Vectra.configure do |config|
config.provider = :pgvector
config.host = ENV['DATABASE_URL']
config.pool_size = 10
config.pool_timeout = 5
end
client = Vectra::Client.new
# Warmup connections at startup
client.provider.warmup_pool(5)
# Check pool stats
stats = client.provider.pool_stats
puts "Available connections: #{stats[:available]}"
puts "Checked out: #{stats[:checked_out]}"
# Shutdown pool when done
client.provider.shutdown_pool
Configuration Options
Vectra.configure do |config|
# Provider settings
config.provider = :pinecone
config.api_key = ENV['PINECONE_API_KEY']
# Timeouts
config.timeout = 30
config.open_timeout = 10
# Retry settings
config.max_retries = 3
config.retry_delay = 1
# Batch operations
config.batch_size = 100
config.async_concurrency = 4
# Connection pooling (pgvector)
config.pool_size = 10
config.pool_timeout = 5
# Caching
config.cache_enabled = true
config.cache_ttl = 300
config.cache_max_size = 1000
end
Benchmarking
Run the included benchmarks:
# Batch operations benchmark
bundle exec ruby benchmarks/batch_operations_benchmark.rb
# Connection pooling benchmark
bundle exec ruby benchmarks/connection_pooling_benchmark.rb
Best Practices
- Batch Size: Use batch sizes of 100-500 for optimal throughput
- Concurrency: Set concurrency to 2-4x your CPU cores
- Connection Pool: Size pool to expected concurrent requests + 20%
- Cache TTL: Set TTL based on data freshness requirements
- Warmup: Always warmup connections in production