I want to import two csv files to a Orientdb database. The first is the apex, with 1 million records. The second are the edges with 59 million records
I have two json file to import:
vértex
{
"source": { "file": { "path": "../csvs/metodo01/pesquisador.csv" } },
"extractor": { "row": {} },
"transformers": [
{ "csv": {} },
{ "vertex": { "class": "Pesquisador" } }
],
"loader": {
"orientdb": {
"dbURL": "remote:localhost/dbCemMilM01",
"dbType": "graph",
"batchCommit": 1000,
"classes": [
{"name": "Pesquisador", "extends": "V"}
], "indexes": [
{"class":"Pesquisador", "fields":["psq_id:integer"], "type":"UNIQUE" }
]
}
}
}
edge
{
"config": {
"log": "info",
"parallel": false
},
"source": {
"file": {
"path": "../csvs/metodo01/a10.csv"
}
},
"extractor": {
"row": {
}
},
"transformers": [{
"csv": {
"separator": ",",
"columnsOnFirstLine": true,
"columns": ["psq_id_from:integer",
"pub_id_to:integer",
"ordem:integer"]
}
},
{
"command": {
"command": "create edge PUBLICOU from (select from Pesquisador where psq_id = ${input.psq_id_from}) to (select from Publicacao where pub_id = ${input.pub_id_to}) set ordem = ${input.ordem} ",
"output": "edge"
}
}],
"loader": {
"orientdb": {
"dbURL": "remote:localhost/dbUmMilhaoM01",
"dbType": "graph",
"standardElementConstraints": false,
"batchCommit": 1000,
"classes": [{
"name": "PUBLICOU",
"extends": "E"
}]
}
}
}
In the process the Orientdb suggests using index to accelerate the process.
How do I do that?
Just the command is create edge PUBLICOU from (select from Pesquisador where psq_id = ${input.psq_id_from}) to (select from Publicacao where pub_id = ${input.pub_id_to}) set ordem = ${input.ordem}