Read all objects
Weaviate provides the necessary APIs to iterate through all your data. This is useful when you want to manually copy/migrate your data (and vector embeddings) from one place to another.
This is done with the help of the after
operator, also called the cursor API.
The new API clients (currently supported by the Python Client v4), encapsulate this functionality as an Iterator
.
Read object properties and ids
The following code iterates through all objects, providing the properties and id for each object.
- Python
- JS/TS
- Java
- Go
collection = client.collections.use("WineReview")
for item in collection.iterator():
print(item.uuid, item.properties)
const myCollection = client.collections.use("WineReview");
for await (let item of myCollection.iterator()) {
console.log(item.uuid, item.properties);
}
import io.weaviate.client.Config;
import io.weaviate.client.WeaviateAuthClient;
import io.weaviate.client.WeaviateClient;
String scheme = "https";
String host = "WEAVIATE_INSTANCE_URL"; // Replace with your Weaviate URL
String apiKey = "YOUR-WEAVIATE-API-KEY"; // If auth enabled. Replace with your Weaviate instance API key.
try {
WeaviateClient sourceClient = WeaviateAuthClient.apiKey(new Config(scheme, host), apiKey);
} catch (AuthException e) {
// handle error in case of authorization problems
throw new RuntimeException(e);
}
int batchSize = 20;
String className = "WineReview";
String[] classProperties = new String[]{"title"};
private Result<GraphQLResponse> getBatchWithCursor(WeaviateClient client,
String className, String[] properties, int batchSize, String cursor) {
Get query = client.graphQL().get()
.withClassName(className)
// Optionally retrieve the vector embedding by adding `vector` to the _additional fields
.withFields(Stream.concat(Arrays.stream(properties), Stream.of("_additional { id vector }"))
.map(prop -> Field.builder().name(prop).build())
.toArray(Field[]::new)
)
.withLimit(batchSize);
if (cursor != null) {
return query.withAfter(cursor).run();
}
return query.run();
}
private List<Map<String, Object>> getProperties(GraphQLResponse result, String className, String[] classProperties) {
Object get = ((Map<?, ?>) result.getData()).get("Get");
Object clazz = ((Map<?, ?>) get).get(className);
List<?> objects = (List<?>) clazz;
List<Map<String, Object>> res = new ArrayList<>();
for (Object obj : objects) {
Map<String, Object> objProps = new HashMap<>();
for (String prop: classProperties) {
Object propValue = ((Map<?, ?>) obj).get(prop);
objProps.put(prop, propValue);
}
Object additional = ((Map<?, ?>) obj).get("_additional");
Object id = ((Map<?, ?>) additional).get("id");
objProps.put("id", id);
Object vector = ((Map<?, ?>) additional).get("vector");
objProps.put("vector", vector);
res.add(objProps);
}
return res;
}
private int getObjectsCount(GraphQLResponse result, String className) {
Object get = ((Map<?, ?>) result.getData()).get("Get");
Object clazz = ((Map<?, ?>) get).get(className);
List<?> objects = (List<?>) clazz;
return objects.size();
}
"github.com/weaviate/weaviate-go-client/v5/weaviate"
"github.com/weaviate/weaviate-go-client/v5/weaviate/auth"
"github.com/weaviate/weaviate-go-client/v5/weaviate/graphql"
"github.com/weaviate/weaviate/entities/models"
sourceClient, err := weaviate.NewClient(weaviate.Config{
Scheme: "https",
Host: "WEAVIATE_INSTANCE_URL", // Replace WEAVIATE_INSTANCE_URL with your instance URL
AuthConfig: auth.ApiKey{
Value: "YOUR-WEAVIATE-API-KEY", // If auth enabled. Replace with your Weaviate instance API key.
},
})
if err != nil {
// handle error
panic(err)
}
batchSize := 20
className := "WineReview"
classProperties := []string{"title"}
getBatchWithCursor := func(client weaviate.Client,
className string, classProperties []string, batchSize int, cursor string) (*models.GraphQLResponse, error) {
fields := []graphql.Field{}
for _, prop := range classProperties {
fields = append(fields, graphql.Field{Name: prop})
}
fields = append(fields, graphql.Field{Name: "_additional { id vector }"})
get := client.GraphQL().Get().
WithClassName(className).
// Optionally retrieve the vector embedding by adding `vector` to the _additional fields
WithFields(fields...).
WithLimit(batchSize)
if cursor != "" {
return get.WithAfter(cursor).Do(context.Background())
}
return get.Do(context.Background())
}
Read all objects including vectors
Read through all data including the vectors. (Also applicable where named vectors are used.)
- Python
- JS/TS
collection = client.collections.use("WineReview")
for item in collection.iterator(
include_vector=True # If using named vectors, you can specify ones to include e.g. ['title', 'body'], or True to include all
):
print(item.properties)
print(item.vector)
const myCollection = client.collections.use("WineReview");
for await (let item of myCollection.iterator({
includeVector: true
})) {
console.log(item.uuid, item.properties);
console.log(item.vectors);
}
Read all objects - Multi-tenant collections
Iterate through all tenants and read data for each.
For classes where multi-tenancy is enabled, you need to specify the tenant name when reading or creating objects. See Manage data: multi-tenancy operations for details.
- Python
- JS/TS
multi_collection = client.collections.use("WineReviewMT")
# Get a list of tenants
tenants = multi_collection.tenants.get()
# Iterate through tenants
for tenant_name in tenants.keys():
# Iterate through objects within each tenant
for item in multi_collection.with_tenant(tenant_name).iterator():
print(f"{tenant_name}: {item.properties}")
const multiCollection = client.collections.use("WineReviewMT");
const tenants = await multiCollection.tenants.get()
for (let tenantName in tenants) {
for await (let item of multiCollection.withTenant(tenantName).iterator()) {
console.log(`${tenantName}:`, item.properties);
}
}
Related pages
- Connect to Weaviate
- How-to: Read objects
- References: GraphQL - Additional Operators
- Manage data: multi-tenancy operations
Questions and feedback
If you have any questions or feedback, let us know in the user forum.