Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  1. import data from JSON format files. (first stage)
  2. import data from Numpy format files. (first stage)
  3. copy a collection within one Milvus 2.0 service. (second stage)
  4. copy a collection from one Milvus 2.0 service to another. (second stage)
  5. import data from Milvus 1.x to Milvus 2.0 (third stage)
  6. parquet/faiss/csv files (TBD)


Design Details

...

Code Block
{
	"data_source": {			          // required
		"type": "minio",		          // required, "minio" or "s3", case insensitive
		"address": "localhost:9000",	  // optional, milvus server will use its minio/s3 configuration if without this value
		"accesskey_id": "minioadmin",	  // optional, milvus server will use its minio/s3 configuration if without this value
		"accesskey_secret": "minioadmin", // optional, milvus server will use its minio/s3 configuration if without this value
		"use_ssl": false,		          // optional, milvus server will use its minio/s3 configuration if without this value
		"bucket_name": "aaa"		      // optional, milvus server will use its minio/s3 configuration if without this value
	},

	"internal_data": {		     // optional, external_data or internal_data. (external files include json, npy, etc. internal files are exported by milvus)
		"path": "xxx/xxx/xx",	 // required, relative path to the source storage where store the exported data
		"collections_mapping": { // optional, give a new name to collection during importing
			"coll_a": "coll_b",	     // collection name mapping, key is the source collection name, value is a new collection name
			"coll_c": "coll_d"
		}
	},

	"external_data": {			                     // optional, external_data or internal_data. (external files include json, npy, etc. internal files are exported by milvus)
		"target_collection": "xxx",	                 // required, target collection name
		"chunks": [{			                     // required, chunk list, each chunk can be import as one segment or split into multiple segments
				"files": [{	                         // required, files that provide data of a chunk
					"path": "xxxx / xx.json", 		 // required, relative path under the storage source defined by DataSource, currently support json/npy
					"type": "row_based",	  		 // required for json file, "row_based" or "column_based", tell milvus how to parse this json file, case insensitive
                    "from": 0,                       // optional, import part of the file from a number
                    "to": 1000,                      // optional, import part of the file end by a number
					"fields_mapping": {			     // optional, specify the target fields which should be imported. Milvus will import all fields if this list is empty
						"table.rows.id": "uid",		 // field name mapping, tell milvus how to insert data to correct field, key is a json node path, value is a field name of the collection. If the file is numpy format, the key is a field name of the collection same with value.
						"table.rows.year": "year",
						"table.rows.vector": "vector"
					}
				}]
			}
		],
		"default_fields": { // optional, use default value to fill some fields
			"age": 0,       // key is a field name, value is default value of this field, can be number or string
			"weight": 0.0
		}
	}
}

...