Skip to content

Commit

Permalink
Update V3 node (#1465)
Browse files Browse the repository at this point in the history
Adds support for property filters, edge filters, and chaining to V3
node. Also fixes the property api to support empty responses.

I ended up adding 6 SQL statements: 
- out arc, single hop
- out arc, chained hops
- in arc, single hop
- in arc, chained hop
- shared subquery for filtering properties
- shared subquery for filtering edges

I split up the single vs chained hops because it was taking too long to
return the full response for chained hops. (This comes from processing
the edge path, which can be very large and costly.) Instead I only
return the name and dcid of the leaf node (and not provenance or types).
This is consistent with the current v2 node behavior. I set the limit
for chained hops to 10, which can take anywhere from a few hundred
milliseconds to ~5 seconds for some of the examples I tested. Adding
filters will increase the time too.

Still TODO: support pagination, probably test this more
  • Loading branch information
n-h-diaz authored Nov 22, 2024
1 parent db67377 commit c3477bc
Show file tree
Hide file tree
Showing 18 changed files with 684 additions and 159 deletions.
28 changes: 17 additions & 11 deletions internal/server/spanner/dsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,28 @@ import (
"google.golang.org/protobuf/proto"
)

// nodePropsToNodeResponse converts a slice of properties to a NodeResponse proto.
func nodePropsToNodeResponse(props []*Property) *v3.NodeResponse {
const (
// Indicates that all properties should be returned.
WILDCARD = "*"
// Indicates that recursive property paths should be returned.
CHAIN = "+"
)

// nodePropsToNodeResponse converts a map from subject id to its properties to a NodeResponse proto.
func nodePropsToNodeResponse(propsBySubjectID map[string][]*Property) *v3.NodeResponse {
nodeResponse := &v3.NodeResponse{
Data: make(map[string]*v2.LinkedGraph),
}

for _, prop := range props {
linkedGraph, ok := nodeResponse.Data[prop.SubjectID]
for subjectID, props := range propsBySubjectID {
linkedGraph, ok := nodeResponse.Data[subjectID]
if !ok {
linkedGraph = &v2.LinkedGraph{}
nodeResponse.Data[prop.SubjectID] = linkedGraph
nodeResponse.Data[subjectID] = linkedGraph
}
for _, prop := range props {
linkedGraph.Properties = append(linkedGraph.Properties, prop.Predicate)
}
linkedGraph.Properties = append(linkedGraph.Properties, prop.Predicate)
}

return nodeResponse
Expand Down Expand Up @@ -73,12 +82,9 @@ func nodeEdgesToLinkedGraph(edges []*Edge) *v2.LinkedGraph {
node := &pb.EntityInfo{
Name: edge.Name,
Types: edge.Types,
Dcid: edge.ObjectID,
ProvenanceId: edge.Provenance,
}
if edge.ObjectValue != "" {
node.Value = edge.ObjectValue
} else {
node.Dcid = edge.ObjectID
Value: edge.ObjectValue,
}
nodes.Nodes = append(nodes.Nodes, node)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
{
"SubjectID": "Aadhaar",
"Predicate": "description",
"ObjectID": "Aadhaar",
"ObjectID": "",
"ObjectValue": "Aadhaar is a 12-digit unique identity number that can be obtained voluntarily by all residents of India",
"Provenance": "dc/base/BaseSchema",
"Name": "",
Expand All @@ -45,7 +45,7 @@
{
"SubjectID": "Aadhaar",
"Predicate": "name",
"ObjectID": "Aadhaar",
"ObjectID": "",
"ObjectValue": "Aadhaar",
"Provenance": "dc/base/BaseSchema",
"Name": "",
Expand All @@ -54,7 +54,7 @@
{
"SubjectID": "Aadhaar",
"Predicate": "localCuratorLevelId",
"ObjectID": "Aadhaar",
"ObjectID": "",
"ObjectValue": "dcid:Aadhaar",
"Provenance": "dc/base/BaseSchema",
"Name": "",
Expand All @@ -63,7 +63,7 @@
{
"SubjectID": "Aadhaar",
"Predicate": "extendedName",
"ObjectID": "Aadhaar",
"ObjectID": "",
"ObjectValue": "Aadhaar",
"Provenance": "dc/base/BaseSchema",
"Name": "",
Expand Down Expand Up @@ -174,7 +174,7 @@
{
"SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential",
"Predicate": "extendedName",
"ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential",
"ObjectID": "",
"ObjectValue": "Average retail price of electricity, residential, monthly",
"Provenance": "dc/base/HumanReadableStatVars",
"Name": "",
Expand All @@ -183,7 +183,7 @@
{
"SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential",
"Predicate": "name",
"ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential",
"ObjectID": "",
"ObjectValue": "Average retail price of electricity, residential, monthly",
"Provenance": "dc/base/HumanReadableStatVars",
"Name": "",
Expand All @@ -192,11 +192,12 @@
{
"SubjectID": "Monthly_Average_RetailPrice_Electricity_Residential",
"Predicate": "localCuratorLevelId",
"ObjectID": "Monthly_Average_RetailPrice_Electricity_Residential",
"ObjectID": "",
"ObjectValue": "dcid:Monthly_Average_RetailPrice_Electricity_Residential",
"Provenance": "dc/base/HumanReadableStatVars",
"Name": "",
"Types": []
}
]
],
"foo": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"EarthquakeEvent": [
{
"SubjectID": "EarthquakeEvent",
"Predicate": "naturalHazardType",
"ObjectID": "Annual_ExpectedLoss_NaturalHazardImpact_EarthquakeEvent",
"ObjectValue": "",
"Provenance": "dc/base/HumanReadableStatVars",
"Name": "Annual Expected Loss from Natural Hazard Impact: Earthquake",
"Types": [
"StatisticalVariable"
]
},
{
"SubjectID": "EarthquakeEvent",
"Predicate": "naturalHazardType",
"ObjectID": "FemaNaturalHazardRiskIndex_NaturalHazardImpact_EarthquakeEvent",
"ObjectValue": "",
"Provenance": "dc/base/HumanReadableStatVars",
"Name": "FEMA National Risk Index for Natural Hazard Impact: Earthquake",
"Types": [
"StatisticalVariable"
]
},
{
"SubjectID": "EarthquakeEvent",
"Predicate": "naturalHazardType",
"ObjectID": "NaturalHazardRiskScore_SuperfundSite_EarthquakeEvent",
"ObjectValue": "",
"Provenance": "dc/base/HumanReadableStatVars",
"Name": "Natural Hazard Risk Score of Superfund Site: Earthquake Event",
"Types": [
"StatisticalVariable"
]
},
{
"SubjectID": "EarthquakeEvent",
"Predicate": "domainIncludes",
"ObjectID": "affectedPlace",
"ObjectValue": "",
"Provenance": "dc/base/BaseSchema",
"Name": "affectedPlace",
"Types": [
"Property"
]
}
]
}
22 changes: 22 additions & 0 deletions internal/server/spanner/golden/query/get_node_edges_in_chain.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"dc/g/Farm_FarmInventoryStatus": [
{
"SubjectID": "dc/g/Farm_FarmInventoryStatus",
"Predicate": "specializationOf+",
"ObjectID": "dc/g/Farm_FarmInventoryStatus-InventorySold",
"ObjectValue": "",
"Provenance": "",
"Name": "Farm With Farm Inventory Status = Inventory Sold",
"Types": []
},
{
"SubjectID": "dc/g/Farm_FarmInventoryStatus",
"Predicate": "specializationOf+",
"ObjectID": "dc/g/Farm_FarmInventoryStatus-InventorySold_FarmInventoryType",
"ObjectValue": "",
"Provenance": "",
"Name": "Farm With Farm Inventory Status = Inventory Sold, Farm Inventory Type",
"Types": []
}
]
}
15 changes: 15 additions & 0 deletions internal/server/spanner/golden/query/get_node_edges_in_filter.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Farm": [
{
"SubjectID": "Farm",
"Predicate": "populationType",
"ObjectID": "Area_Farm_Melon",
"ObjectValue": "",
"Provenance": "dc/base/HumanReadableStatVars",
"Name": "Area of Farm: Melon",
"Types": [
"StatisticalVariable"
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"EarthquakeEvent": [
{
"SubjectID": "EarthquakeEvent",
"Predicate": "domainIncludes",
"ObjectID": "affectedPlace",
"ObjectValue": "",
"Provenance": "dc/base/BaseSchema",
"Name": "affectedPlace",
"Types": [
"Property"
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"foo OR 1=1;": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"Person": [
{
"SubjectID": "Person",
"Predicate": "subClassOf",
"ObjectID": "Thing",
"ObjectValue": "",
"Provenance": "dc/base/BaseSchema",
"Name": "Thing",
"Types": [
"Class"
]
},
{
"SubjectID": "Person",
"Predicate": "source",
"ObjectID": "",
"ObjectValue": "https://www.w3.org/wiki/WebSchemas/SchemaDotOrgSources",
"Provenance": "dc/base/BaseSchema",
"Name": "",
"Types": []
}
]
}
49 changes: 49 additions & 0 deletions internal/server/spanner/golden/query/get_node_edges_out_chain.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"dc/g/Person_Gender": [
{
"SubjectID": "dc/g/Person_Gender",
"Predicate": "specializationOf+",
"ObjectID": "dc/g/Demographics",
"ObjectValue": "",
"Provenance": "",
"Name": "Demographics",
"Types": []
},
{
"SubjectID": "dc/g/Person_Gender",
"Predicate": "specializationOf+",
"ObjectID": "dc/g/Economy",
"ObjectValue": "",
"Provenance": "",
"Name": "Economy",
"Types": []
},
{
"SubjectID": "dc/g/Person_Gender",
"Predicate": "specializationOf+",
"ObjectID": "dc/g/Employment",
"ObjectValue": "",
"Provenance": "",
"Name": "Employment and Business",
"Types": []
},
{
"SubjectID": "dc/g/Person_Gender",
"Predicate": "specializationOf+",
"ObjectID": "dc/g/Health",
"ObjectValue": "",
"Provenance": "",
"Name": "Health",
"Types": []
},
{
"SubjectID": "dc/g/Person_Gender",
"Predicate": "specializationOf+",
"ObjectID": "dc/g/Root",
"ObjectValue": "",
"Provenance": "",
"Name": "Data Commons Variables",
"Types": []
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"nuts/UKI1": [
{
"SubjectID": "nuts/UKI1",
"Predicate": "typeOf",
"ObjectID": "AdministrativeArea2",
"ObjectValue": "",
"Provenance": "dc/base/EuroGeos",
"Name": "AdministrativeArea2",
"Types": [
"Class"
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"Person": [
{
"SubjectID": "Person",
"Predicate": "extendedName",
"ObjectID": "",
"ObjectValue": "Person",
"Provenance": "dc/base/BaseSchema",
"Name": "",
"Types": []
}
]
}
Loading

0 comments on commit c3477bc

Please sign in to comment.