Overhaul DefaultIDSetMap's implementation and API
Created by: varungandhi-src
Related GitHub discussion: https://github.com/sourcegraph/sourcegraph/pull/30978#issuecomment-1035950484 Slack discussion: https://sourcegraph.slack.com/archives/CHXHX7XAS/p1644561669822999
Specifically, when implementing Chris's suggestion on top of Eric's PR: (see patch) and uploading the LSIF dump for the client/web
directory, I get a crash in SetUnion
: it ends up calling getOrCreate(0)
which returns nil
(this happens for an empty DefaultIDSetMap
), which crashes on the call to Union
inside SetUnion
(since Union
expects a non-nil receiver).
I've changed the method names as well because I found the Set
prefix quite confusing; it's hard to not read it as the verb "set", since method names often follow the verb + noun pattern. So SetLen
reads like a setter for Len
(which is not what it actually means).
Test plan
- Added regression tests for the various edge cases.
Patch
diff --git a/lib/codeintel/lsif/conversion/canonicalize.go b/lib/codeintel/lsif/conversion/canonicalize.go
index 362ae9e618..303f4a6455 100644
--- a/lib/codeintel/lsif/conversion/canonicalize.go
+++ b/lib/codeintel/lsif/conversion/canonicalize.go
@@ -68,6 +68,16 @@ func canonicalizeDocumentsInDefinitionReferences(state *State, definitionReferen
for _, documentRanges := range definitionReferenceData {
- for documentID, canonicalID := range canonicalIDs {
- // Remove references to non-canonical document...
- if rangeIDs := documentRanges.Pop(documentID); rangeIDs != nil {
- // ...and move existing definition/reference data into the canonical document
+ if documentRanges.ApproxLen() >= len(canonicalIDs) {
+ for documentID, canonicalID := range canonicalIDs {
+ // Remove references to non-canonical document...
+ if rangeIDs := documentRanges.Pop(documentID); rangeIDs != nil {
+ // ...and move existing definition/reference data into the canonical document
+ documentRanges.SetUnion(canonicalID, rangeIDs)
+ }
+ }
+ } else {
+ // Copy out keys first to avoid iterator invalidation
+ var documentIDs = documentRanges.UnorderedKeys()
+ for _, documentID := range documentIDs {
+ canonicalID := canonicalIDs[documentID]
+ rangeIDs := documentRanges.Pop(documentID)
documentRanges.SetUnion(canonicalID, rangeIDs)
diff --git a/lib/codeintel/lsif/conversion/datastructures/default_idset_map.go b/lib/codeintel/lsif/conversion/datastructures/default_idset_map.go
index 07b6bfcf4e..99ceadefa7 100644
--- a/lib/codeintel/lsif/conversion/datastructures/default_idset_map.go
+++ b/lib/codeintel/lsif/conversion/datastructures/default_idset_map.go
@@ -87,2 +87,20 @@ func (sm *DefaultIDSetMap) Each(f func(key int, value *IDSet)) {
+func (sm *DefaultIDSetMap) ApproxLen() int {
+ if sm.key != 0 {
+ return len(sm.m) + 1
+ }
+ return len(sm.m)
+}
+
+func (sm *DefaultIDSetMap) UnorderedKeys() []int {
+ var out = make([]int, sm.ApproxLen())
+ if sm.key != 0 {
+ out = append(out, sm.key)
+ }
+ for k := range sm.m {
+ out = append(out, k)
+ }
+ return out
+}
+
// SetLen returns the number of identifiers in the identifier set at the given key.