I'm trying to reconstruct the data retrieved with a gremlin query from a graph. Precisely I'm having troubles finding an efficient way to manipulate the data that comes back.
The version of JanusGraph I'm using it's the 0.3.1 and it's running on Cassandra + ES, also it is configured with the ConfiguredGraphFactory. So I can create graphs dynamically.
I'm using gremlin javascript version 3.4.2 (the 3.3.3 doesn't work properly)
I built a little example to better explain what I mean. The graph in this examples is created without a schema.
First of all, I'll connect to the graph using this functions:
const gremlin = require('gremlin');
const { Graph } = gremlin.structure;
const { DriverRemoteConnection } = gremlin.driver;
const __ = gremlin.process.statics;
const P = gremlin.process.P;
const GREMLIN_URL = "ws://localhost:8182/gremlin";
const GRAPH_NAME = "graphtest";
let connection;
function getTraversal() {
const graph = new Graph();
connection = new DriverRemoteConnection(GREMLIN_URL, { traversalSource: GRAPH_NAME });
return g = graph.traversal().withRemote(connection);
}
function closeConnection() {
if(connection && connection.close) connection.close();
}
Then I'll create some Vertices and Edges.
We'll have one User, two Institution and two 'trained' edges connecting the user to the institutions:
async function createVerticesAndEdges() {
const g = getTraversal();
const userV = await g.addV('user').property('name', 'Emily').property('identityId', '1234').next();
console.log('user', userV);
const institutionV = await g.addV('institution').property('name', 'University of California').property('identityId', 'CA83').next();
console.log('institution', institutionV);
const institutionV2 = await g.addV('institution').property('name', 'University of Illinois').property('identityId', 'IL847').next();
console.log('institution2', institutionV2);
const trainedE = await g.addE('trained').property('title', 'MS in Computer Science').property('grade', 'B')
.from_(__.V().has('identityId', '1234')).to(__.V().has('identityId', 'CA83')).next();
console.log('trained', trainedE);
const trainedE2 = await g.addE('trained').property('title', 'Political Science').property('grade', 'A')
.from_(__.V().has('identityId', '1234')).to(__.V().has('identityId', 'IL847')).next();
console.log('trained2', trainedE2);
closeConnection();
}
Then, let's say I want to retrieve all the trainings that a user took, and I'd also like to have the name of the institution where the training was taken.
So the query I run is this:
async function getUserTrainings() {
const g = getTraversal();
const result = await g.V()
.hasLabel('user')
.has('identityId', '1234')
.as('u').outE()
.hasLabel('trained')
.inV()
.path()
.unfold()
.where(P.neq('u'))
.toList();
closeConnection();
console.log(result);
}
and I get this output:
[
Edge {
id: { relationId: 'odxqw-3b4-27th-38o'
}, alber@DESKTOP-8CVHP91 MINGW64 ~/Ref label: 'trained',
outV: 4288,
inV: 4200,
properties: {}
},
Vertex { id: 4200, label: 'institution', properties: undefined
},
Edge {
id: { relationId: 'odxco-3b4-27th-3ao'
},
label: 'trained',
outV: 4288,
inV: 4272,
properties: {}
},
Vertex { id: 4272, label: 'institution', properties: undefined
}
]
This is not bat, I could use the vertices ids and the edges inV to reconstruct the relations and return the data as I wanted it, but the problem is, as you can see, that this query doesn't return the properties. So it's kinda useless.
But then, looking through the gremlin docs I found the valueMap() step, so I can slightly edit the previous query like this:
async function getUserTrainings() {
const g = getTraversal();
const result = await g.V()
.hasLabel('user')
.has('identityId', '1234')
.as('u').outE()
.hasLabel('trained')
.inV()
.path()
.unfold()
.where(P.neq('u'))
.valueMap(true)
.toList();
closeConnection();
console.log(result);
}
to get this output:
[
Map {
EnumValue { typeName: 'T', elementName: 'id'
} => { relationId: 'odxqw-3b4-27th-38o'
},
'title' => 'Political Science',
EnumValue { typeName: 'T', elementName: 'label'
} => 'trained',
'grade' => 'A'
},
Map {
'name' => [ 'University of Illinois'
],
EnumValue { typeName: 'T', elementName: 'id'
} => 4200,
'identityId' => [ 'IL847'
],
EnumValue { typeName: 'T', elementName: 'label'
} => 'institution'
},
Map {
EnumValue { typeName: 'T', elementName: 'id'
} => { relationId: 'odxco-3b4-27th-3ao'
},
'title' => 'MS in Computer Science',
EnumValue { typeName: 'T', elementName: 'label'
} => 'trained',
'grade' => 'B'
},
Map {
'name' => [ 'University of California'
],
EnumValue { typeName: 'T', elementName: 'id'
} => 4272,
'identityId' => [ 'CA83'
],
EnumValue { typeName: 'T', elementName: 'label'
} => 'institution'
}
]
So, apart from the fact that the data returned it's not at all clear (I mean, what is that "typeName: 'T'" that's the same for every vertex?), now I do get back the properties, but I loose the outV and inV on the edges and I'm unable to reconstruct the data the way I need. (understand which vertex is connected by which edge).
I guess that I could just use the first query without the valueMap() step, and then for each and every vertex and edge I get back, will do another query to retrieve the properties using the ids.
And this could be ok for a simple situation like this, but I don't think it would be really efficient for queries that could concern hundreds of vertices and edges.
So the final question is: what is the most efficient way to reconstruct the data from a path, including vertices and edges properties?