Skip to content

Overhaul DefaultIDSetMap's implementation and API

Administrator requested to merge vg/idsetmap-fixes into main

Created by: varungandhi-src

Related GitHub discussion: https://github.com/sourcegraph/sourcegraph/pull/30978#issuecomment-1035950484 Slack discussion: https://sourcegraph.slack.com/archives/CHXHX7XAS/p1644561669822999

Specifically, when implementing Chris's suggestion on top of Eric's PR: (see patch) and uploading the LSIF dump for the client/web directory, I get a crash in SetUnion: it ends up calling getOrCreate(0) which returns nil (this happens for an empty DefaultIDSetMap), which crashes on the call to Union inside SetUnion (since Union expects a non-nil receiver).

I've changed the method names as well because I found the Set prefix quite confusing; it's hard to not read it as the verb "set", since method names often follow the verb + noun pattern. So SetLen reads like a setter for Len (which is not what it actually means).

Test plan

  • Added regression tests for the various edge cases.

Patch

diff --git a/lib/codeintel/lsif/conversion/canonicalize.go b/lib/codeintel/lsif/conversion/canonicalize.go
index 362ae9e618..303f4a6455 100644
--- a/lib/codeintel/lsif/conversion/canonicalize.go
+++ b/lib/codeintel/lsif/conversion/canonicalize.go
@@ -68,6 +68,16 @@ func canonicalizeDocumentsInDefinitionReferences(state *State, definitionReferen
 	for _, documentRanges := range definitionReferenceData {
-		for documentID, canonicalID := range canonicalIDs {
-			// Remove references to non-canonical document...
-			if rangeIDs := documentRanges.Pop(documentID); rangeIDs != nil {
-				// ...and move existing definition/reference data into the canonical document
+		if documentRanges.ApproxLen() >= len(canonicalIDs) {
+			for documentID, canonicalID := range canonicalIDs {
+				// Remove references to non-canonical document...
+				if rangeIDs := documentRanges.Pop(documentID); rangeIDs != nil {
+					// ...and move existing definition/reference data into the canonical document
+					documentRanges.SetUnion(canonicalID, rangeIDs)
+				}
+			}
+		} else {
+			// Copy out keys first to avoid iterator invalidation
+			var documentIDs = documentRanges.UnorderedKeys()
+			for _, documentID := range documentIDs {
+				canonicalID := canonicalIDs[documentID]
+				rangeIDs := documentRanges.Pop(documentID)
 				documentRanges.SetUnion(canonicalID, rangeIDs)
diff --git a/lib/codeintel/lsif/conversion/datastructures/default_idset_map.go b/lib/codeintel/lsif/conversion/datastructures/default_idset_map.go
index 07b6bfcf4e..99ceadefa7 100644
--- a/lib/codeintel/lsif/conversion/datastructures/default_idset_map.go
+++ b/lib/codeintel/lsif/conversion/datastructures/default_idset_map.go
@@ -87,2 +87,20 @@ func (sm *DefaultIDSetMap) Each(f func(key int, value *IDSet)) {
 
+func (sm *DefaultIDSetMap) ApproxLen() int {
+	if sm.key != 0 {
+		return len(sm.m) + 1
+	}
+	return len(sm.m)
+}
+
+func (sm *DefaultIDSetMap) UnorderedKeys() []int {
+	var out = make([]int, sm.ApproxLen())
+	if sm.key != 0 {
+		out = append(out, sm.key)
+	}
+	for k := range sm.m {
+		out = append(out, k)
+	}
+	return out
+}
+
 // SetLen returns the number of identifiers in the identifier set at the given key.

Merge request reports

Loading