[cloud][CLOUD-84] Improve performance of repo list queries
Created by: kopancek
Description
Listing repos of an organization in production takes 32 seconds. This is an attempt to fix that. More details are in Jira comments: https://sourcegraph.atlassian.net/browse/CLOUD-113
Original query
EXPLAIN ANALYZE SELECT
repo.id,
repo.name,
repo.private,
repo.external_id,
repo.external_service_type,
repo.external_service_id,
repo.uri,
repo.description,
repo.fork,
repo.archived,
repo.stars,
repo.created_at,
repo.updated_at,
repo.deleted_at,
repo.metadata,
repo.blocked,
(
SELECT
json_agg(
json_build_object(
'clone_url',
esr.clone_url,
'id',
esr.external_service_id,
'kind',
LOWER(svcs.kind)
)
)
FROM
external_service_repos AS esr
JOIN external_services AS svcs ON esr.external_service_id = svcs.id
WHERE
esr.repo_id = repo.id
AND svcs.deleted_at IS NULL
)
FROM
repo
INNER JOIN external_service_repos ON external_service_repos.repo_id = repo.id
WHERE
repo.deleted_at IS NULL
AND repo.blocked IS NULL
AND (external_service_repos.org_id = 2556) -- Populates "queryConds"
AND (
(
FALSE -- TRUE or FALSE to indicate whether to bypass the check
OR (
NOT FALSE -- Disregard unrestricted state when permissions user mapping is enabled
AND (
NOT repo.private -- Happy path of non-private repositories
OR EXISTS (
-- Each external service defines if repositories are unrestricted
SELECT
FROM
external_services AS es
JOIN external_service_repos AS esr ON (
esr.external_service_id = es.id
AND esr.repo_id = repo.id
AND es.unrestricted = TRUE
AND es.deleted_at IS NULL
)
)
)
)
OR (
-- Restricted repositories require checking permissions
(
SELECT
object_ids_ints @> INTSET(repo.id)
FROM
user_permissions
WHERE
user_id = 40577
AND permission = 'read'
AND object_type = 'repos'
)
AND EXISTS (
SELECT
FROM
external_service_repos
WHERE
repo_id = repo.id
AND (
(
user_id IS NULL
AND org_id IS NULL
) -- The repository was added at the instance level
OR user_id = 40577 -- The authenticated user added this repository
OR EXISTS (
-- The authenticated user is a member of an organization that added this repository
SELECT
FROM
org_members
WHERE
external_service_repos.org_id = org_members.org_id
AND org_members.user_id = 40577
)
)
)
)
)
) -- Populates "authzConds"
ORDER BY repo.name
LIMIT 15
OFFSET 0 -- Populates "querySuffix"
Optimized query
EXPLAIN ANALYZE SELECT
repo.id,
repo.name,
repo.private,
repo.external_id,
repo.external_service_type,
repo.external_service_id,
repo.uri,
repo.description,
repo.fork,
repo.archived,
repo.stars,
repo.created_at,
repo.updated_at,
repo.deleted_at,
repo.metadata,
repo.blocked,
(
SELECT
json_agg(
json_build_object(
'clone_url',
esr.clone_url,
'id',
esr.external_service_id,
'kind',
LOWER(svcs.kind)
)
)
FROM
external_service_repos AS esr
JOIN external_services AS svcs ON esr.external_service_id = svcs.id
WHERE
esr.repo_id = repo.id
AND svcs.deleted_at IS NULL
)
FROM
repo
INNER JOIN external_service_repos ON external_service_repos.repo_id = repo.id
WHERE
repo.deleted_at IS NULL
AND repo.blocked IS NULL
AND (external_service_repos.org_id = 2556) -- Populates "queryConds"
AND (
(
FALSE -- TRUE or FALSE to indicate whether to bypass the check
OR (
NOT FALSE -- Disregard unrestricted state when permissions user mapping is enabled
AND (
NOT repo.private -- Happy path of non-private repositories
OR EXISTS (
-- Each external service defines if repositories are unrestricted
SELECT
FROM
external_services AS es
JOIN external_service_repos AS esr ON (
esr.external_service_id = es.id
AND esr.repo_id = repo.id
AND es.unrestricted = TRUE
AND es.deleted_at IS NULL
)
)
)
)
OR (
-- Restricted repositories require checking permissions
(
SELECT
object_ids_ints @> INTSET(repo.id)
FROM
user_permissions
WHERE
user_id = 40577
AND permission = 'read'
AND object_type = 'repos'
)
AND EXISTS (
SELECT
FROM
external_service_repos
WHERE
repo_id = repo.id
AND (
(
user_id IS NULL
AND org_id IS NULL
) -- The repository was added at the instance level
OR user_id = 40577 -- The authenticated user added this repository
OR EXISTS (
-- The authenticated user is a member of an organization that added this repository
SELECT
FROM
org_members
WHERE
external_service_repos.org_id = org_members.org_id
AND org_members.user_id = 40577
)
)
)
)
)
) -- Populates "authzConds"
ORDER BY repo.name, repo.id
LIMIT 15
OFFSET 0 -- Populates "querySuffix"
Testing
There is no easy way to test this locally. What can be done is either testing the resulting query against prod DB or against a new clone of the prod DB. I did the former one, because
You can test the optimized query above to see the difference.