Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  1. User has some JSON files store data with the row-based format:  file_1.json, file_2.json.

    Code Block
    {
      "data": {
        "rows": [
          {"id": 1, "year": 2021, "vector": [1.0, 1.1, 1.2]},
          {"id": 2, "year": 2022, "vector": [2.0, 2.1, 2.2]},
          {"id": 3, "year": 2023, "vector": [3.0, 3.1, 3.2]}
        ]
      }
    }

    The "options" could be:

    Code Block
    {
      	"data_source": {
        		"type": "Minio",
        		"address": "localhost:9000",
        		"accesskey_id": "minioadmin",
        		"accesskey_secret": "minioadmin",
        		"use_ssl": false,
        		"bucket_name": "mybucket"
      	},
    
      	"external_data": {
        		"target_collection": "TEST",
        		"chunks": [{
    				"files": [
          {
            "file{
    					"path": "xxxx/file_1.json",
            					"type": "row_based",
            					"fields_mapping": {
              "data						"table.rows.id": "uid",
              "data						"table.rows.year": "year",
              "data						"table.rows.vector": "embedding",
            }
          vector"
    					}
    				}]
    			},
    			{
            "file				"files": [{
    					"path": "xxxx/file_2.json",
            					"type": "row_based",
            					"fields_mapping": {
              "data						"table.rows.id": "uid",
              "data						"table.rows.year": "year",
              "data						"table.rows.vector": "embedding",
            }
          }
        ]
        vector"
    					}
    				}]
    			}
    		],
    		"default_fields": {
          			"age": 0
        },
      		}
    	}
    }


  2. User has some JSON files store data with the column-based format:  file_1.json, file_2.json.

    Code Block
    {
      "table": {
        "columns": [
          "id": [1, 2, 3],
          "year": [2021, 2022, 2023],
          "vector": [
            [1.0, 1.1, 1.2],
            [2.0, 2.1, 2.2],
            [3.0, 3.1, 3.2]
          ]
        ]
      }
    }

    The "options" could be:

    Code Block
    {
      "data_source": {
        "type": "Minio",
        "address": "localhost:9000",
        "accesskey_id": "minioadmin",
        "accesskey_secret": "minioadmin",
        "use_ssl": false,
        "bucket_name": "mybucket"
      },
    
      "external_data": {
        "target_collection": "TEST",
        "files": [
          {
            "file": file_1.json,
            "type": "column_based",
            "fields_mapping": {
              "data.columns.id": "uid",
              "data.columns.year": "year",
              "data.columns.vector": "embedding",
            }
          },
    	{
            "file": file_2.json,
            "type": "column_based",
            "fields_mapping": {
              "data.columns.id": "uid",
              "data.columns.year": "year",
              "data.columns.vector": "embedding",
            }
          }
        ]
        "default_fields": {
          "age": 0
        },
      }
    }


  3. User ha a JSON file store data with the column-based format:  file_1.json, and a Numpy file store vectors data: file_2.npy
    Note: for hybrid format files, we only allow inputting a pair of files to reduce the complexity.
    The file_1.json:

...