code-intel: slow postgres query for listing many dependents
Created by: Strum355
Uploads with a large number of dependents have long query times of multiple seconds. This likely also affects dependencies_ queries too that would have similarly large result sets.
Sample Query
-- source: enterprise/internal/codeintel/stores/dbstore/uploads.go:GetUploads
WITH ranked_dependents AS (
SELECT p.dump_id as pkg_id,
r.dump_id as ref_id,
-- Rank each upload providing the same package from the same directory
-- within a repository by commit date. We'll choose the oldest commit
-- date as the canonical choice and ignore the uploads for younger
-- commits providing the same package.
rank() OVER (
PARTITION BY -- Group providers of the same package together
p.scheme,
p.name,
p.version,
-- Defined by the same directory within a repository
u.repository_id,
u.indexer,
u.root
ORDER BY -- Rank each grouped upload by the associated commit date
u.committed_at,
-- Break ties via the unique identifier
u.id
) AS rank
FROM lsif_uploads u
JOIN lsif_packages p ON p.dump_id = u.id
JOIN lsif_references r ON r.scheme = p.scheme
AND r.name = p.name
AND r.version = p.version
AND r.dump_id != p.dump_id
WHERE -- Don't match deleted uploads
u.state = 'completed'
AND (p.scheme, p.name, p.version) IN (
SELECT p.scheme,
p.name,
p.version
FROM lsif_packages p
WHERE p.dump_id = 1371337
)
) -- Dynamic CTE definitions for use in the WHERE clause
SELECT u.id,
u.commit,
u.root,
EXISTS (
SELECT 1
FROM lsif_uploads_visible_at_tip uvt
WHERE uvt.repository_id = u.repository_id
AND uvt.upload_id = u.id
) AS visible_at_tip,
u.uploaded_at,
u.state,
u.failure_message,
u.started_at,
u.finished_at,
u.process_after,
u.num_resets,
u.num_failures,
u.repository_id,
u.repository_name,
u.indexer,
u.num_parts,
u.uploaded_parts,
u.upload_size,
u.associated_index_id,
s.rank
FROM lsif_uploads_with_repository_name u
LEFT JOIN (
SELECT r.id,
ROW_NUMBER() OVER (
ORDER BY COALESCE(r.process_after, r.uploaded_at),
r.id
) as rank
FROM lsif_uploads_with_repository_name r
WHERE r.state = 'queued'
) s ON u.id = s.id
JOIN repo ON repo.id = u.repository_id
WHERE u.state != 'deleted'
AND u.id IN (
SELECT rd.ref_id
FROM ranked_dependents rd
WHERE rd.pkg_id = 1371337
AND rd.rank = 1
)
AND (
true -- TRUE or FALSE to indicate whether to bypass the check
OR (
NOT false -- Disregard unrestricted state when permissions user mapping is enabled
AND (
NOT repo.private -- Happy path of non-private repositories
OR EXISTS (
-- Each external service defines if repositories are unrestricted
SELECT
FROM external_services AS es
JOIN external_service_repos AS esr ON (
esr.external_service_id = es.id
AND esr.repo_id = repo.id
AND es.unrestricted = TRUE
AND es.deleted_at IS NULL
)
)
)
)
OR (
-- Restricted repositories require checking permissions
(
SELECT object_ids_ints @> INTSET(repo.id)
FROM user_permissions
WHERE user_id = 0
AND permission = 'read'
AND object_type = 'repos'
)
AND EXISTS (
SELECT
FROM external_service_repos
WHERE repo_id = repo.id
AND (
(
user_id IS NULL
AND org_id IS NULL
) -- The repository was added at the instance level
OR user_id = 0 -- The authenticated user added this repository
OR EXISTS (
-- The authenticated user is a member of an organization that added this repository
SELECT
FROM org_members
WHERE external_service_repos.org_id = org_members.org_id
AND org_members.user_id = 0
)
)
)
)
)
Query Plan
Nested Loop Left Join (cost=13.93..19.09 rows=1 width=386) (actual time=18782.700..18800.100 rows=901 loops=1)
Join Filter: (u.id = u_2.id)
-> Nested Loop (cost=8.63..11.10 rows=1 width=377) (actual time=18782.641..18794.744 rows=901 loops=1)
Join Filter: (u.repository_id = r.id)
-> Nested Loop (cost=8.19..10.63 rows=1 width=336) (actual time=18782.623..18791.521 rows=901 loops=1)
-> Nested Loop (cost=7.76..9.99 rows=1 width=332) (actual time=18782.600..18787.619 rows=901 loops=1)
-> HashAggregate (cost=7.34..7.35 rows=1 width=4) (actual time=18782.562..18782.873 rows=902 loops=1)
Group Key: rd.ref_id
-> Subquery Scan on rd (cost=7.28..7.34 rows=1 width=4) (actual time=15648.045..18782.191 rows=902 loops=1)
Filter: ((rd.pkg_id = 1371337) AND (rd.rank = 1))
Rows Removed by Filter: 1906235
-> WindowAgg (cost=7.28..7.32 rows=1 width=94) (actual time=15648.044..18621.450 rows=1907137 loops=1)
-> Sort (cost=7.28..7.29 rows=1 width=86) (actual time=15647.998..16697.737 rows=1907137 loops=1)
Sort Key: p.scheme, p.name, p.version, u_1.repository_id, u_1.indexer, u_1.root, u_1.committed_at, u_1.id
Sort Method: external merge Disk: 155200kB
-> Nested Loop (cost=4.04..7.27 rows=1 width=86) (actual time=0.146..6724.928 rows=1907137 loops=1)
-> Nested Loop (cost=3.62..5.90 rows=1 width=51) (actual time=0.122..1757.243 rows=1907137 loops=1)
-> Nested Loop (cost=3.20..5.44 rows=1 width=85) (actual time=0.075..6.765 rows=4671 loops=1)
-> HashAggregate (cost=2.65..2.65 rows=1 width=43) (actual time=0.021..0.028 rows=3 loops=1)
Group Key: p_1.scheme, p_1.name, p_1.version
-> Index Scan using lsif_packages_dump_id on lsif_packages p_1 (cost=0.42..2.64 rows=1 width=43) (actual time=0.014..0.016 rows=3 loops=1)
Index Cond: (dump_id = 1371337)
-> Index Only Scan using lsif_references_scheme_name_version_dump_id on lsif_references r_1 (cost=0.56..2.78 rows=1 width=42) (actual time=0.034..1.692 rows=1557 loops=3)
Index Cond: ((scheme = p_1.scheme) AND (name = p_1.name) AND (version = p_1.version))
Heap Fetches: 847
-> Index Only Scan using lsif_packages_scheme_name_version_dump_id on lsif_packages p (cost=0.42..0.45 rows=1 width=47) (actual time=0.018..0.304 rows=408 loops=4671)
Index Cond: ((scheme = r_1.scheme) AND (name = r_1.name) AND (version = r_1.version))
Filter: (r_1.dump_id <> dump_id)
Heap Fetches: 1653745
-> Index Scan using lsif_uploads_pkey on lsif_uploads u_1 (cost=0.42..1.37 rows=1 width=35) (actual time=0.002..0.002 rows=1 loops=1907137)
Index Cond: (id = p.dump_id)
Filter: (state = 'completed'::text)
-> Index Scan using lsif_uploads_pkey on lsif_uploads u (cost=0.42..2.64 rows=1 width=332) (actual time=0.005..0.005 rows=1 loops=902)
Index Cond: (id = rd.ref_id)
Filter: (state <> 'deleted'::text)
Rows Removed by Filter: 0
-> Index Only Scan using repo_pkey on repo (cost=0.43..0.64 rows=1 width=4) (actual time=0.004..0.004 rows=1 loops=901)
Index Cond: (id = u.repository_id)
Heap Fetches: 893
-> Index Only Scan using repo_non_deleted_id_name_idx on repo r (cost=0.43..0.46 rows=1 width=49) (actual time=0.003..0.003 rows=1 loops=901)
Index Cond: (id = repo.id)
Heap Fetches: 893
-> WindowAgg (cost=5.30..5.32 rows=1 width=20) (actual time=0.000..0.000 rows=0 loops=901)
-> Sort (cost=5.30..5.31 rows=1 width=12) (actual time=0.000..0.000 rows=0 loops=901)
Sort Key: (COALESCE(u_2.process_after, u_2.uploaded_at)), u_2.id
Sort Method: quicksort Memory: 25kB
-> Nested Loop (cost=0.85..5.29 rows=1 width=12) (actual time=0.020..0.021 rows=0 loops=1)
-> Index Scan using lsif_uploads_state on lsif_uploads u_2 (cost=0.42..2.64 rows=1 width=24) (actual time=0.020..0.020 rows=0 loops=1)
Index Cond: (state = 'queued'::text)
-> Index Only Scan using repo_non_deleted_id_name_idx on repo r_2 (cost=0.43..2.65 rows=1 width=4) (never executed)
Index Cond: (id = u_2.repository_id)
Heap Fetches: 0
SubPlan 1
-> Index Only Scan using lsif_uploads_visible_at_tip_repository_id_upload_id on lsif_uploads_visible_at_tip uvt (cost=0.42..2.64 rows=1 width=0) (actual time=0.005..0.005 rows=1 loops=901)
Index Cond: ((repository_id = u.repository_id) AND (upload_id = u.id))
Heap Fetches: 785
SubPlan 2
-> Seq Scan on lsif_uploads_visible_at_tip uvt_1 (cost=0.00..8675.16 rows=426716 width=8) (never executed)
Planning Time: 8.408 ms
Execution Time: 18841.266 ms