Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
service MilvusService {
  rpc Import(ImportRequest) returns (ImportResponse) {}
  rpc GetImportState(GetImportStateRequest) returns (GetImportStateResponse) {}
}

message ImportRequest {
  string collection_name = 1;                // target collection
  string partition_name = 2;                 // target partition
  bool row_based = 3;                        // the file is row-based or column-based
  repeated string files = 4;                 // file paths to be imported
  repeated common.KeyValuePair options = 5;  // import options, bucket, etc.
}

message ImportResponse {
  common.Status status = 1;
  repeated int64 tasks = 2;  // id array of import tasks
}

message GetImportStateRequest {
  int64 task = 1;  // id of an import task
}

enum ImportState {
    ImportPending = 0;
    ImportFailed = 1;
    ImportDownloaded = 2;
    ImportParsed = 3;
    ImportPersisted = 4;
    ImportCompleted = 5;
}

message GetImportStateResponse {
  common.Status status = 1;
  ImportState state = 2;                   // is this import task finished or not
  int64 row_count = 3;                     // if the task is finished, this value is how many rows are imported. if the task is not finished, this value is how many rows are parsed. return 0 if failed.
  repeated int64 id_list = 4;              // auto generated ids if the primary key is autoid
  repeated common.KeyValuePair infos = 5;  // more informations about the task, progress percent, file path, failed reason, etc.
}

The call chain of import worflow:

Image Removed


3. Rootcoord RPC interfaces

...

Code Block
service RootCoord {
  rpc Import(milvus.ImportRequest) returns (milvus.ImportResponse) {}
  rpc GetImportState(milvus.GetImportStateRequest) returns (milvus.GetImportStateResponse) {}
  rpc ReportImport(ImportResult) returns (common.Status) {}
}

message ImportResult {
  common.Status status = 1;
  int64 task_id = 2;                       // id of the task
  common.ImportState state = 3;            // state of the task
  repeated int64 segments = 4;             // id array of new sealed segments
  repeated int64 auto_ids = 5;             // auto-generated ids for auto-id primary key
  int64 row_count = 6;                     // how many rows are imported by this task
  repeated common.KeyValuePair infos = 7;  // more informations about the task, file path, failed reason, etc.
}


The call chain of import worflow:


Image Added

4. Datacoord RPC interfaces

...

Code Block
service DataCoord {
  rpc Import(ImportTask) returns (ImportTaskResponse) {}
}

message ImportTask {
  common.Status status = 1;
  string collection_name = 2;                // target collection
  string partition_name = 3;                 // target partition
  bool row_based = 4;                        // the file is row-based or column-based
  int64 task_id = 5;                         // id of the task
  repeated string files = 6;                 // file paths to be imported
  repeated common.KeyValuePair infos = 7;    // more informations about the task, bucket, etc.
}

message ImportTaskResponse {
  common.Status status = 1;
  int64 datanode_id = 2;                     // which datanode takes this task
}


The relationship between ImportRequest and ImportTask:

For row-based request, the RootCoord splits the request into multiple ImportTask, each json file is a ImportTask.

For column-based request, all files will be regarded as one ImportTask.

Image Added

5. Datanode interfaces

The declaration of import API in datanode RPC:

...

To achieve this property, the segments shall be marked as "LoadingImporting" state and be invisible before the whole loading procedure completes.

...