Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  1. User has some JSON files store data with the row-based format:  file_1.json, file_2.json.

    Code Block
    {
      "data": {
        "rows": [
          {"id": 1, "year": 2021, "vector": [1.0, 1.1, 1.2]},
          {"id": 2, "year": 2022, "vector": [2.0, 2.1, 2.2]},
          {"id": 3, "year": 2023, "vector": [3.0, 3.1, 3.2]}
        ]
      }
    }

    The "options" could be:

    Code Block
    {
    	"data_source": {
    		"type": "Minio",
    		"address": "localhost:9000",
    		"accesskey_id": "minioadmin",
    		"accesskey_secret": "minioadmin",
    		"use_ssl": false,
    		"bucket_name": "mybucket"
    	},
    
    	"external_data": {
    		"target_collection": "TEST",
    		"chunks": [{
    				"files": [{
    					"path": "xxxx/file_1.json",
    					"type": "row_based",
    					"fields_mapping": {
    						"table.rows.id": "uid",
    						"table.rows.year": "year",
    						"table.rows.vector": "vector"
    					}
    				}]
    			},
    			{
    				"files": [{
    					"path": "xxxx/file_2.json",
    					"type": "row_based",
    					"fields_mapping": {
    						"table.rows.id": "uid",
    						"table.rows.year": "year",
    						"table.rows.vector": "vector"
    					}
    				}]
    			}
    		],
    		"default_fields": {
    			"age": 0
    		}
    	}
    }


  2. User has some JSON files store data with the column-based format:  file_1.json, file_2.json.

    Code Block
    {
      "table": {
        "columns": [
          "id": [1, 2, 3],
          "year": [2021, 2022, 2023],
          "vector": [
            [1.0, 1.1, 1.2],
            [2.0, 2.1, 2.2],
            [3.0, 3.1, 3.2]
          ]
        ]
      }
    }

    The "options" could be:

    Code Block
    {
      	"data_source": {
        		"type": "Minio",
        		"address": "localhost:9000",
        		"accesskey_id": "minioadmin",
        		"accesskey_secret": "minioadmin",
        		"use_ssl": false,
        		"bucket_name": "mybucket"
      	},
    
      	"external_data": {
        		"target_collection": "TEST",
        		"chunks": [{
    				"files": [
          {
            "file{
    					"path": "xxxx/file_1.json",
            					"type": "column_based",
            					"fields_mapping": {
              "data.columns						"table.rows.id": "uid",
              "data.columns						"table.rows.year": "year",
              "data.columns						"table.rows.vector": "embeddingvector",
            }
          },
    	{
            "file": 
    					}
    				}]
    			},
    			{
    				"files": [{
    					"path": "xxxx/file_2.json",
            					"type": "column_based",
            					"fields_mapping": {
              "data.columns						"table.rows.id": "uid",
              "data.columns						"table.rows.year": "year",
              "data.columns						"table.rows.vector": "embedding",
            }
          }
        ]
        vector"
    					}
    				}]
    			}
    		],
    		"default_fields": {
          			"age": 0
        },
      		}
    	}
    }


  3. User ha a JSON file store data with the column-based format:  file_1.json, and a Numpy file store vectors data: file_2.npy
    Note: for hybrid format files, we only allow inputting a pair of files to reduce the complexity.
    The file_1.json:

...