Migrate data
Follow these examples to migrate data manually when using a backup is not possible. They cover all permutations between:
- a single-tenancy collection (Collection), and
- a tenant in a multi-tenancy collection (Tenant).
Additional information
The examples use two different Weaviate instances, exposed through different ports. The same process can be used for two different instances as well.
Cross-references in Weaviate are properties. As such, you can retrieve cross-reference as a part of the object.
What about cross-references?
These scripts should migrate cross-references as well.
Cross-references are properties. As such, these cursor-based exports will include them. During restoration, restore the cross-referenced (i.e. "to") object first, then the object that contains the cross-reference (i.e. "from" object).
Collection → Collection
Step 1: Create the target collection(s)
Create a collection (e.g. WineReview
) at the target instance, matching the collection (e.g. WineReview
) at the source instance.
If a snippet doesn't work or you have feedback, please open a GitHub issue.
import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient
client_src = weaviate.connect_to_local(
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")
}
)
client_tgt = weaviate.connect_to_local(
port=8090,
grpc_port=50061,
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")
}
)
def create_collection(client_in: WeaviateClient, collection_name: str, enable_mt=False):
reviews = client_in.collections.create(
name=collection_name,
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=enable_mt),
# Additional settings not shown
)
return reviews
reviews_tgt = create_collection(client_tgt, "WineReview", enable_mt=False)
Step 2: Migrate the data
Migrate:
- The
source collection
data in theclient_src
instance - to
target collection
in theclient_tgt
instance
If a snippet doesn't work or you have feedback, please open a GitHub issue.
def migrate_data(collection_src: Collection, collection_tgt: Collection):
with collection_tgt.batch.fixed_size(batch_size=100) as batch:
for q in tqdm(collection_src.iterator(include_vector=True)):
batch.add_object(
properties=q.properties,
vector={
"default": q.vector["default"],
},
uuid=q.uuid
)
return True
reviews_src = client_src.collections.get("WineReview")
reviews_tgt = client_tgt.collections.get("WineReview")
migrate_data(reviews_src, reviews_tgt)
client_src.close()
client_tgt.close()
Collection → Tenant
Step 1: Create the target collection(s)
Create a collection (e.g. WineReview
) at the target instance, matching the collection (e.g. WineReview
) at the source instance, and enable multi-tenancy for the target collection.
If a snippet doesn't work or you have feedback, please open a GitHub issue.
import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient
client_src = weaviate.connect_to_local(
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")
}
)
client_tgt = weaviate.connect_to_local(
port=8090,
grpc_port=50061,
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")
}
)
def create_collection(client_in: WeaviateClient, collection_name: str, enable_mt=False):
reviews = client_in.collections.create(
name=collection_name,
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=enable_mt),
# Additional settings not shown
)
return reviews
reviews_mt_tgt = create_collection(client_tgt, "WineReviewMT", enable_mt=True)
Step 2: Create the tenant(s)
Add tenants at the target instance before adding data objects.
If a snippet doesn't work or you have feedback, please open a GitHub issue.
tenants_tgt = [wvc.tenants.Tenant(name="tenantA"), wvc.tenants.Tenant(name="tenantB")]
reviews_mt_tgt = client_tgt.collections.get("WineReviewMT")
reviews_mt_tgt.tenants.create(tenants_tgt)
Step 3: Migrate the data
Migrate:
- The
source collection
data in theclient_src
instance - to
target tenant
data fromtarget collection
in theclient_tgt
instance
If a snippet doesn't work or you have feedback, please open a GitHub issue.
def migrate_data(collection_src: Collection, collection_tgt: Collection):
with collection_tgt.batch.fixed_size(batch_size=100) as batch:
for q in tqdm(collection_src.iterator(include_vector=True)):
batch.add_object(
properties=q.properties,
vector={
"default": q.vector["default"],
},
uuid=q.uuid
)
return True
reviews_src = client_src.collections.get("WineReview")
reviews_mt_tgt = client_tgt.collections.get("WineReviewMT")
reviews_tgt_tenant_a = reviews_mt_tgt.with_tenant(tenants_tgt[0].name)
migrate_data(reviews_src, reviews_tgt_tenant_a)
client_src.close()
client_tgt.close()
Tenant → Collection
Step 1: Create the target collection(s)
Create a collection (e.g. WineReview
) at the target instance, matching the collection (e.g. WineReview
) at the source instance, and enable multi-tenancy for the target collection.
If a snippet doesn't work or you have feedback, please open a GitHub issue.
import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient
client_src = weaviate.connect_to_local(
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")
}
)
client_tgt = weaviate.connect_to_local(
port=8090,
grpc_port=50061,
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")
}
)
def create_collection(client_in: WeaviateClient, collection_name: str, enable_mt=False):
reviews = client_in.collections.create(
name=collection_name,
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=enable_mt),
# Additional settings not shown
)
return reviews
reviews_tgt = create_collection(client_tgt, "WineReview", enable_mt=False)
Step 2: Migrate the data
Migrate:
- The
source tenant
data fromsource collection
in theclient_src
instance - to
target collection
in theclient_tgt
instance
If a snippet doesn't work or you have feedback, please open a GitHub issue.
def migrate_data(collection_src: Collection, collection_tgt: Collection):
with collection_tgt.batch.fixed_size(batch_size=100) as batch:
for q in tqdm(collection_src.iterator(include_vector=True)):
batch.add_object(
properties=q.properties,
vector={
"default": q.vector["default"],
},
uuid=q.uuid
)
return True
reviews_src = client_src.collections.get("WineReviewMT")
reviews_src_tenant_a = reviews_src.with_tenant("tenantA")
reviews_tgt = client_tgt.collections.get("WineReview")
migrate_data(reviews_src_tenant_a, reviews_tgt)
client_src.close()
client_tgt.close()
Tenant → Tenant
Step 1: Create the target collection(s)
Create a collection (e.g. WineReview
) at the target instance, matching the collection (e.g. WineReview
) at the source instance including enabling multi-tenancy.
If a snippet doesn't work or you have feedback, please open a GitHub issue.
import weaviate
import weaviate.classes as wvc
from weaviate.collections import Collection
from weaviate.client import WeaviateClient
client_src = weaviate.connect_to_local(
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")
}
)
client_tgt = weaviate.connect_to_local(
port=8090,
grpc_port=50061,
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY")
}
)
def create_collection(client_in: WeaviateClient, collection_name: str, enable_mt=False):
reviews = client_in.collections.create(
name=collection_name,
multi_tenancy_config=wvc.config.Configure.multi_tenancy(enabled=enable_mt),
# Additional settings not shown
)
return reviews
reviews_mt_tgt = create_collection(client_tgt, "WineReviewMT", enable_mt=True)
Step 2: Create the tenant(s)
Add tenants at the target instance before adding data objects.
If a snippet doesn't work or you have feedback, please open a GitHub issue.
tenants_tgt = [wvc.tenants.Tenant(name="tenantA"), wvc.tenants.Tenant(name="tenantB")]
reviews_mt_tgt = client_tgt.collections.get("WineReviewMT")
reviews_mt_tgt.tenants.create(tenants_tgt)
Step 3: Migrate the data
Migrate:
- The
source tenant
data fromsource collection
in theclient_src
instance - to
target tenant
data fromtarget collection
in theclient_tgt
instance
If a snippet doesn't work or you have feedback, please open a GitHub issue.
def migrate_data(collection_src: Collection, collection_tgt: Collection):
with collection_tgt.batch.fixed_size(batch_size=100) as batch:
for q in tqdm(collection_src.iterator(include_vector=True)):
batch.add_object(
properties=q.properties,
vector={
"default": q.vector["default"],
},
uuid=q.uuid
)
return True
reviews_mt_src = client_src.collections.get("WineReviewMT")
reviews_src_tenant_a = reviews_mt_src.with_tenant("tenantA")
reviews_mt_tgt = client_tgt.collections.get("WineReviewMT")
reviews_tgt_tenant_a = reviews_mt_tgt.with_tenant(tenants_tgt[0].name)
migrate_data(reviews_src_tenant_a, reviews_tgt_tenant_a)
client_src.close()
client_tgt.close()
Related pages
Questions and feedback
If you have any questions or feedback, let us know in the user forum.