I have JSON entities like this one that I insert in the graph as edges and vertexes, as you can see, each entities are already in an highly relational format.
person = {
"summary": "Unix System Administrator at National Bank of Canada",
"id": "P6ZiIHhJ-PhON9W6UgeFwfA",
"name": "Patrick",
"type": "Person",
"employments": [
{
"isCurrent": True,
"employer": {
"Name": "Commercial bank located in Canada",
"type": "Corporation"
},
"title": "Unix System Administrator"
}
],
"skills": [
{
"name": "string"
}
],
"locations": [
{
"country": {
"name": "Canada",
"type": "AdministrativeArea"
}
}
],
"someVertex": {
"k": "v"
}
}
My problem is that, in the future, I might receive a new json for that same person where I need to "update it" in the graph if something changed and making sure to delete any child vertexes that no longer exist. kind of like the upsert but on all the child nodes and edges.
Right now, I add the root id as a property on every child elements so I can find them all and delete them later. Is there another way to do this?
My actual process:
- Add all verticles and edges from the python dict recursively, nested dicts all become vertexes and an edge is added between it and the parent.
def add_vertex(g, label, dct, entity_id):
vertex = g.addV(label).property('entity_id', entity_id)
add_properties(g, vertex, dct, entity_id)
return vertex
def add_properties(g, vertex, dct, entity_id):
# Add properties
for k, v in dct.items():
if type(v) in [str, bool, int, float]:
vertex = vertex.property(k, v)
elif v and isinstance(v, list) and type(v[0]) in [str, bool, int, float]:
for literal in v:
vertex = vertex.property(Cardinality.set_, k, literal)
vertex = vertex.next()
# Add child vertexes and edges to them
for k, v in dct.items():
if isinstance(v, dict):
nested_vertex = add_vertex(g, k, v, entity_id)
add_edge(g, k, vertex, nested_vertex, entity_id)
elif v and isinstance(v, list) and isinstance(v[0], dict):
for nested_v in v:
nested_vertex = add_vertex(g, k, nested_v, entity_id)
add_edge(g, k, vertex, nested_vertex, entity_id)
def add_edge(g, name, from_v, to_v, entity_id):
g.addE(name).property('entity_id', entity_id).from_(from_v).to(to_v).iterate()
add_vertex(g, 'Person', person , person['id'])
- If I receive the person with the same id, imagine the vertex "someVertex" is now gone in the dict, how can I "upsert" the whole tree of vertexes and edges that originally come from this person so that that this vertex is removed? For now I delete all elements with the property "entity_id" that I added in the previous step.
if g.V().has(entity_type, 'id', entity_id).hasNext():
g.V().has('entity_id', entity_id).drop().iterate()
add_vertex(g, entity_type, entity, entity_id)