Vectorizer and vector index config
Starting with Weaviate Python client v4.16.0
, the vectorizer configuration API has been updated.
Starting with Weaviate JS/TS client v3.8.0
, the vectorizer configuration API has been updated.
Action required: Update to the latest client version and migrate your code to use the new vectorizer configuration API.
Specify a vectorizer
Specify a vectorizer
for a collection.
Additional information
Collection level settings override default values and general configuration parameters such as environment variables.
- Python
- JS/TS
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vector_config=Configure.Vectors.text2vec_openai(),
properties=[
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
],
)
import { vectors, dataType } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectors.text2VecOpenAI(),
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'body', dataType: dataType.TEXT },
],
})
// Additional configuration not shown
// Define the vectorizer in the WeaviateClass Builder
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorizer("text2vec-openai") // Vectorize of your choic e.g. text2vec-openai or text2vec-cohere
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Vectorizer: "text2vec-openai",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "body",
DataType: schema.DataTypeText.PropString(),
},
},
}
Specify vectorizer settings
Defining a collection with Configure.Vectors.text2vec_xxx()
with Python client library 4.16.0
-4.16.3
will throw an error if no properties are defined and vectorize_collection_name
is not set to True
.
This is addressed in 4.16.4
of the Weaviate Python client. See this FAQ entry for more details: Invalid properties error in Python client versions 4.16.0 to 4.16.3.
To configure how a vectorizer works (i.e. what model to use) with a specific collection, set the vectorizer parameters.
- Python
- JS/TS
- Java
- Go
from weaviate.classes.config import Configure
client.collections.create(
"Article",
vector_config=Configure.Vectors.text2vec_cohere(
model="embed-multilingual-v2.0", vectorize_collection_name=True
),
)
import { vectors } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectors.text2VecCohere({
model: 'embed-multilingual-v2.0',
}),
})
// Additional configuration not shown
// Define the module settings
Map<String, Object> text2vecOpenAI = new HashMap<>();
Map<String, Object> text2vecOpenAISettings = new HashMap<>();
text2vecOpenAISettings.put("vectorizePropertyName", false);
text2vecOpenAISettings.put("model", "text-embedding-3-small"); // set the model of your choice e.g. //
// text-embedding-3-small
text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings);
Map<Object, Object> moduleConfig = new HashMap<>();
moduleConfig.put("text2vec-openai", text2vecOpenAI);
// Set the module configu in the WeaviateClass Builder
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.moduleConfig(moduleConfig) // Set the module config
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Vectorizer: "text2vec-cohere",
ModuleConfig: map[string]interface{}{
"text2vec-cohere": map[string]interface{}{
"model": "embed-multilingual-v2.0",
"vectorizeClassName": true,
},
},
}
Define named vectors
v1.24
You can define multiple named vectors per collection. This allows each object to be represented by multiple vector embeddings, each with its own vector index.
As such, each named vector configuration can include its own vectorizer and vector index settings.
- Python
- JS/TS
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"ArticleNV",
vector_config=[
# Set a named vector with the "text2vec-cohere" vectorizer
Configure.Vectors.text2vec_cohere(
name="title",
source_properties=["title"], # (Optional) Set the source property(ies)
vector_index_config=Configure.VectorIndex.hnsw(), # (Optional) Set vector index options
),
# Set another named vector with the "text2vec-openai" vectorizer
Configure.Vectors.text2vec_openai(
name="title_country",
source_properties=[
"title",
"country",
], # (Optional) Set the source property(ies)
vector_index_config=Configure.VectorIndex.hnsw(), # (Optional) Set vector index options
),
# Set a named vector for your own uploaded vectors
Configure.Vectors.self_provided(
name="custom_vector",
vector_index_config=Configure.VectorIndex.hnsw(), # (Optional) Set vector index options
),
],
properties=[ # Define properties
Property(name="title", data_type=DataType.TEXT),
Property(name="country", data_type=DataType.TEXT),
],
)
import { vectors, dataType } from 'weaviate-client';
await client.collections.create({
name: 'ArticleNV',
vectorizers: [
// Set a named vector with the "text2vec-cohere" vectorizer
vectors.text2VecCohere({
name: 'title',
sourceProperties: ['title'], // (Optional) Set the source property(ies)
vectorIndexConfig: configure.vectorIndex.hnsw() // (Optional) Set the vector index configuration
}),
// Set a named vector with the "text2vec-openai" vectorizer
vectors.text2VecOpenAI({
name: 'title_country',
sourceProperties: ['title','country'], // (Optional) Set the source property(ies)
vectorIndexConfig: configure.vectorIndex.hnsw() // (Optional) Set the vector index configuration
}),
// Set a named vector for your own uploaded vectors
vectors.selfProvided({
name: 'custom_vector',
vectorIndexConfig: configure.vectorIndex.hnsw() // (Optional) Set the vector index configuration
})
],
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'country', dataType: dataType.TEXT },
],
})
// Additional configuration not shown
// Define the vectorizers configurations
Map<String, Object> text2vecOpenAI = new HashMap<>();
Map<String, Object> text2vecOpenAISettings = new HashMap<>();
text2vecOpenAISettings.put("properties", new String[] { "name" });
text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings);
Map<String, Object> text2vecCohere = new HashMap<>();
Map<String, Object> text2vecCohereSettings = new HashMap<>();
text2vecCohereSettings.put("properties", new String[] { "body" });
text2vecCohere.put("text2vec-cohere", text2vecCohereSettings);
// Define the vector configurations
Map<String, WeaviateClass.VectorConfig> vectorConfig = new HashMap<>();
vectorConfig.put("name_vector", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(text2vecOpenAI)
.build());
vectorConfig.put("body_vector", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(text2vecCohere)
.build());
// Define the vectorizers in the WeaviateClass Builder
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorConfig(vectorConfig)
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "ArticleNV",
Description: "Collection of articles with named vectors",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "country",
DataType: schema.DataTypeText.PropString(),
},
},
VectorConfig: map[string]models.VectorConfig{
"title": {
Vectorizer: map[string]interface{}{
"text2vec-openai": map[string]interface{}{
"sourceProperties": []string{"title"},
},
},
VectorIndexType: "hnsw",
},
"title_country": {
Vectorizer: map[string]interface{}{
"text2vec-openai": map[string]interface{}{
"sourceProperties": []string{"title", "country"},
},
},
VectorIndexType: "hnsw",
},
"custom_vector": {
Vectorizer: map[string]interface{}{
"none": map[string]interface{}{},
},
VectorIndexType: "hnsw",
},
},
}
Add new named vectors
v1.31
Named vectors can be added to existing collection definitions with named vectors. (This is not possible for collections without named vectors.)
- Python
- JS/TS
- Java
- Go
from weaviate.classes.config import Configure
articles = client.collections.use("Article")
articles.config.add_vector(
vector_config=Configure.Vectors.text2vec_cohere(
name="body_vector",
source_properties=["body"],
)
)
await articles.config.addVector(
vectors.text2VecCohere({
name: "body_vector",
sourceProperties: ["body"],
})
)
// Java support coming soon
// Go support coming soon
Adding a new named vector to the collection definition won't trigger vectorization for existing objects. Only new or updated objects will receive embeddings for the newly added named vector definition.
Define multi-vector embeddings (e.g. ColBERT, ColPali)
v1.29
, v1.30
Multi-vector embeddings, also known as multi-vectors, represent a single object with multiple vectors, i.e. a 2-dimensional matrix. Multi-vectors are currently only available for HNSW indexes for named vectors. To use multi-vectors, enable it for the appropriate named vector.
- Python
- JS/TS
- Java
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"DemoCollection",
vector_config=[
# Example 1 - Use a model integration
# The factory function will automatically enable multi-vector support for the HNSW index
Configure.MultiVectors.text2vec_jinaai(
name="jina_colbert",
source_properties=["text"],
),
# Example 2 - User-provided multi-vector representations
# Must explicitly enable multi-vector support for the HNSW index
Configure.MultiVectors.self_provided(
name="custom_multi_vector",
),
],
properties=[Property(name="text", data_type=DataType.TEXT)],
# Additional parameters not shown
)
await client.collections.create({
name: "DemoCollection",
vectorizers: [
// Example 1 - Use a model integration
// The factory function will automatically enable multi-vector support for the HNSW index
configure.multiVectors.text2VecJinaAI({
name: "jina_colbert",
sourceProperties: ["text"],
}),
// Example 2 - User-provided multi-vector representations
// Must explicitly enable multi-vector support for the HNSW index
configure.multiVectors.selfProvided({
name: "custom_multi_vector",
}),
],
properties: [{ name: "text", dataType: dataType.TEXT }],
// Additional parameters not shown
})
// Define collection properties
Property textProperty = Property.builder()
.name("text")
.description("Text content for ColBERT vectorization")
.dataType(Arrays.asList(DataType.TEXT))
.build();
// Define the vectorizers configurations for named vectors
Map<String, Object> text2colbertJinaAI = new HashMap<>();
Map<String, Object> text2colbertSettings = new HashMap<>();
text2colbertSettings.put("properties", new String[] { "text" });
text2colbertJinaAI.put("text2colbert-jinaai", text2colbertSettings);
// Configure multi-vector for custom vectors
Map<String, Object> noneVectorizer = new HashMap<>();
noneVectorizer.put("none", new Object());
// Create multi-vector config for custom vectors
VectorIndexConfig customMultiVectorConfig = VectorIndexConfig.builder()
.multiVector(MultiVectorConfig.builder().build()) // Enable multi-vector with default settings
.build();
// Define the vector configurations
Map<String, WeaviateClass.VectorConfig> vectorConfig = new HashMap<>();
// Example 1: ColBERT vectorizer
vectorConfig.put("jina_colbert", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(text2colbertJinaAI)
.build());
// Example 2: User-provided multi-vector representations
vectorConfig.put("custom_multi_vector", WeaviateClass.VectorConfig.builder()
.vectorIndexType("hnsw")
.vectorizer(noneVectorizer)
.vectorIndexConfig(customMultiVectorConfig)
.build());
// Create the collection with multi-vector configuration
WeaviateClass multiVecClass = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(textProperty))
.vectorConfig(vectorConfig)
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(multiVecClass)
.run();
Multi-vector embeddings use up more memory than single vector embeddings. You can use vector quantization and encoding to compress them and reduce memory usage.
Set vector index type
The vector index type can be set for each collection at creation time, between hnsw
, flat
and dynamic
index types.
- Python
- JS/TS
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType
client.collections.create(
"Article",
vector_config=Configure.Vectors.text2vec_openai(
name="default",
vector_index_config=Configure.VectorIndex.hnsw(), # Use the HNSW index
# vector_index_config=Configure.VectorIndex.flat(), # Use the FLAT index
# vector_index_config=Configure.VectorIndex.dynamic(), # Use the DYNAMIC index
),
properties=[
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
],
)
import { vectors, dataType, configure } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectors.text2VecOpenAI({
vectorIndexConfig: configure.vectorIndex.hnsw(), // Use HNSW
// vectorIndexConfig: configure.vectorIndex.flat(), // Use Flat
// vectorIndexConfig: configure.vectorIndex.dynamic(), // Use Dynamic
}),
properties: [
{ name: 'title', dataType: dataType.TEXT },
{ name: 'body', dataType: dataType.TEXT },
],
})
// Additional configuration not shown
// Define the index type in the WeaviateClass Builder
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorizer("text2vec-openai")
.vectorIndexType("hnsw") // set the vector index of your choice e.g. hnsw, flat...
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "country",
DataType: schema.DataTypeText.PropString(),
},
},
Vectorizer: "text2vec-openai",
VectorIndexType: "hnsw", // Or "flat", "dynamic"
}
Additional information
- Read more about index types & compression in Concepts: Vector index.
Set vector index parameters
Set vector index parameters such as compression and filter strategy through collection configuration. Some parameters can be updated later after collection creation.
Was added in v1.27
- Python
- JS/TS
- Java
- Go
from weaviate.classes.config import (
Configure,
Property,
DataType,
VectorDistances,
VectorFilterStrategy,
)
client.collections.create(
"Article",
vector_config=Configure.Vectors.text2vec_openai(
name="default",
vector_index_config=Configure.VectorIndex.hnsw(
ef_construction=300,
distance_metric=VectorDistances.COSINE,
filter_strategy=VectorFilterStrategy.SWEEPING, # or ACORN (Available from Weaviate v1.27.0)
),
),
)
import { configure, vectors } from 'weaviate-client';
await client.collections.create({
name: 'Article',
// Additional configuration not shown
vectorizers: vectors.text2VecCohere({
vectorIndexConfig: configure.vectorIndex.flat({
quantizer: configure.vectorIndex.quantizer.bq({
rescoreLimit: 200,
cache: true
}),
vectorCacheMaxObjects: 100000
})
})
})
// Additional configuration not shown
// Define the VectorIndexConfig with compression
VectorIndexConfig createBqIndexConfig = VectorIndexConfig.builder()
.bq(BQConfig.builder()
.enabled(true)
.rescoreLimit(123L)
.cache(true)
.build())
.vectorCacheMaxObjects(100000L)
.build();
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorIndexType("flat") // set the vector index of your choice e.g. hnsw, flat...
.vectorIndexConfig(createBqIndexConfig)
.vectorizer("text2vec-openai")
.build();
// Add the collection to the schema
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
},
{
Name: "country",
DataType: schema.DataTypeText.PropString(),
},
},
Vectorizer: "text2vec-openai",
VectorIndexType: "hnsw",
VectorIndexConfig: map[string]interface{}{
"bq": map[string]interface{}{
"enabled": true,
},
"efConstruction": 300,
"distance": "cosine",
"filterStrategy": "acorn",
},
}
Additional information
- Read more about index types & compression in Concepts: Vector index.
Property-level settings
Configure individual properties in a collection. Each property can have it's own configuration. Here are some common settings:
- Vectorize the property
- Vectorize the property name
- Set a tokenization type
- Python
- JS/TS
- Java
- Go
from weaviate.classes.config import Configure, Property, DataType, Tokenization
client.collections.create(
"Article",
vector_config=Configure.Vectors.text2vec_cohere(),
properties=[
Property(
name="title",
data_type=DataType.TEXT,
vectorize_property_name=True, # Use "title" as part of the value to vectorize
tokenization=Tokenization.LOWERCASE, # Use "lowercase" tokenization
description="The title of the article.", # Optional description
),
Property(
name="body",
data_type=DataType.TEXT,
skip_vectorization=True, # Don't vectorize this property
tokenization=Tokenization.WHITESPACE, # Use "whitespace" tokenization
),
],
)
import { vectors, dataType, tokenization } from 'weaviate-client';
const newCollection = await client.collections.create({
name: 'Article',
vectorizers: vectors.text2VecHuggingFace(),
properties: [
{
name: 'title',
dataType: dataType.TEXT,
vectorizePropertyName: true,
tokenization: tokenization.LOWERCASE // or 'lowercase'
},
{
name: 'body',
dataType: dataType.TEXT,
skipVectorization: true,
tokenization: tokenization.WHITESPACE // or 'whitespace'
},
],
})
Property titleProperty = Property.builder()
.name("title")
.description("title of the article")
.dataType(Arrays.asList(DataType.TEXT))
.tokenization(Tokenization.WORD)
.build();
Property bodyProperty = Property.builder()
.name("body")
.description("body of the article")
.dataType(Arrays.asList(DataType.TEXT))
.tokenization(Tokenization.LOWERCASE)
.build();
// Add the defined properties to the collection
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.description("Article collection Description...")
.properties(Arrays.asList(titleProperty, bodyProperty))
.build();
Result<Boolean> result = client.schema().classCreator()
.withClass(articleCollection)
.run();
vTrue := true
vFalse := false
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
Properties: []*models.Property{
{
Name: "title",
DataType: schema.DataTypeText.PropString(),
Tokenization: "lowercase",
IndexFilterable: &vTrue,
IndexSearchable: &vFalse,
ModuleConfig: map[string]interface{}{
"text2vec-cohere": map[string]interface{}{
"vectorizePropertyName": true,
},
},
},
{
Name: "body",
DataType: schema.DataTypeText.PropString(),
Tokenization: "whitespace",
IndexFilterable: &vTrue,
IndexSearchable: &vTrue,
ModuleConfig: map[string]interface{}{
"text2vec-cohere": map[string]interface{}{
"vectorizePropertyName": false,
},
},
},
},
Vectorizer: "text2vec-cohere",
}
Specify a distance metric
If you choose to bring your own vectors, you should specify the distance metric
.
- Python
- JS/TS
- Java
- Go
from weaviate.classes.config import Configure, VectorDistances
client.collections.create(
"Article",
vector_config=Configure.Vectors.text2vec_openai(
vector_index_config=Configure.VectorIndex.hnsw(
distance_metric=VectorDistances.COSINE
),
),
)
import { configure, vectors, vectorDistances } from 'weaviate-client';
await client.collections.create({
name: 'Article',
vectorizers: vectors.text2VecOllama({
vectorIndexConfig: configure.vectorIndex.hnsw({
distanceMetric: vectorDistances.COSINE // or 'cosine'
})
})
})
// Additional configuration not shown
VectorIndexConfig vectorIndexConfig = VectorIndexConfig.builder()
.distance(DistanceType.DOT) // Define Distance Type e.g. Dot, Cosine, hamming...
.build();
WeaviateClass articleCollection = WeaviateClass.builder()
.className(collectionName)
.properties(Arrays.asList(titleProperty, bodyProperty))
.vectorIndexConfig(vectorIndexConfig)
.build();
Result<Boolean> classResult = client.schema().classCreator()
.withClass(articleCollection)
.run();
articleClass := &models.Class{
Class: "Article",
Description: "Collection of articles",
VectorIndexConfig: map[string]interface{}{
"distance": "cosine",
},
}
Additional information
For details on the configuration parameters, see the following:
Further resources
Questions and feedback
If you have any questions or feedback, let us know in the user forum.