Skip to content

[cloud][CLOUD-84] Improve performance of repo list queries

Administrator requested to merge CLOUD-113 into main

Created by: kopancek

Description

Listing repos of an organization in production takes 32 seconds. This is an attempt to fix that. More details are in Jira comments: https://sourcegraph.atlassian.net/browse/CLOUD-113

Original query

EXPLAIN ANALYZE SELECT
    repo.id,
    repo.name,
    repo.private,
    repo.external_id,
    repo.external_service_type,
    repo.external_service_id,
    repo.uri,
    repo.description,
    repo.fork,
    repo.archived,
    repo.stars,
    repo.created_at,
    repo.updated_at,
    repo.deleted_at,
    repo.metadata,
    repo.blocked,
    (
        SELECT
            json_agg(
                json_build_object(
                    'clone_url',
                    esr.clone_url,
                    'id',
                    esr.external_service_id,
                    'kind',
                    LOWER(svcs.kind)
                )
            )
        FROM
            external_service_repos AS esr
            JOIN external_services AS svcs ON esr.external_service_id = svcs.id
        WHERE
            esr.repo_id = repo.id
            AND svcs.deleted_at IS NULL
    )
FROM
    repo
    INNER JOIN external_service_repos ON external_service_repos.repo_id = repo.id
WHERE
    repo.deleted_at IS NULL
    AND repo.blocked IS NULL
    AND (external_service_repos.org_id = 2556) -- Populates "queryConds"
    AND (
        (
            FALSE -- TRUE or FALSE to indicate whether to bypass the check
            OR (
                NOT FALSE -- Disregard unrestricted state when permissions user mapping is enabled
                AND (
                    NOT repo.private -- Happy path of non-private repositories
                    OR EXISTS (
                        -- Each external service defines if repositories are unrestricted
                        SELECT
                        FROM
                            external_services AS es
                            JOIN external_service_repos AS esr ON (
                                esr.external_service_id = es.id
                                AND esr.repo_id = repo.id
                                AND es.unrestricted = TRUE
                                AND es.deleted_at IS NULL
                            )
                    )
                )
            )
            OR (
                -- Restricted repositories require checking permissions
                (
                    SELECT
                        object_ids_ints @> INTSET(repo.id)
                    FROM
                        user_permissions
                    WHERE
                        user_id = 40577
                        AND permission = 'read'
                        AND object_type = 'repos'
                )
                AND EXISTS (
                    SELECT
                    FROM
                        external_service_repos
                    WHERE
                        repo_id = repo.id
                        AND (
                            (
                                user_id IS NULL
                                AND org_id IS NULL
                            ) -- The repository was added at the instance level
                            OR user_id = 40577 -- The authenticated user added this repository
                            OR EXISTS (
                                -- The authenticated user is a member of an organization that added this repository
                                SELECT
                                FROM
                                    org_members
                                WHERE
                                    external_service_repos.org_id = org_members.org_id
                                    AND org_members.user_id = 40577
                            )
                        )
                )
            )
        )
    ) -- Populates "authzConds"
ORDER BY repo.name
LIMIT 15
OFFSET 0 -- Populates "querySuffix"

Optimized query

EXPLAIN ANALYZE SELECT
    repo.id,
    repo.name,
    repo.private,
    repo.external_id,
    repo.external_service_type,
    repo.external_service_id,
    repo.uri,
    repo.description,
    repo.fork,
    repo.archived,
    repo.stars,
    repo.created_at,
    repo.updated_at,
    repo.deleted_at,
    repo.metadata,
    repo.blocked,
    (
        SELECT
            json_agg(
                json_build_object(
                    'clone_url',
                    esr.clone_url,
                    'id',
                    esr.external_service_id,
                    'kind',
                    LOWER(svcs.kind)
                )
            )
        FROM
            external_service_repos AS esr
            JOIN external_services AS svcs ON esr.external_service_id = svcs.id
        WHERE
            esr.repo_id = repo.id
            AND svcs.deleted_at IS NULL
    )
FROM
    repo
    INNER JOIN external_service_repos ON external_service_repos.repo_id = repo.id
WHERE
    repo.deleted_at IS NULL
    AND repo.blocked IS NULL
    AND (external_service_repos.org_id = 2556) -- Populates "queryConds"
    AND (
        (
            FALSE -- TRUE or FALSE to indicate whether to bypass the check
            OR (
                NOT FALSE -- Disregard unrestricted state when permissions user mapping is enabled
                AND (
                    NOT repo.private -- Happy path of non-private repositories
                    OR EXISTS (
                        -- Each external service defines if repositories are unrestricted
                        SELECT
                        FROM
                            external_services AS es
                            JOIN external_service_repos AS esr ON (
                                esr.external_service_id = es.id
                                AND esr.repo_id = repo.id
                                AND es.unrestricted = TRUE
                                AND es.deleted_at IS NULL
                            )
                    )
                )
            )
            OR (
                -- Restricted repositories require checking permissions
                (
                    SELECT
                        object_ids_ints @> INTSET(repo.id)
                    FROM
                        user_permissions
                    WHERE
                        user_id = 40577
                        AND permission = 'read'
                        AND object_type = 'repos'
                )
                AND EXISTS (
                    SELECT
                    FROM
                        external_service_repos
                    WHERE
                        repo_id = repo.id
                        AND (
                            (
                                user_id IS NULL
                                AND org_id IS NULL
                            ) -- The repository was added at the instance level
                            OR user_id = 40577 -- The authenticated user added this repository
                            OR EXISTS (
                                -- The authenticated user is a member of an organization that added this repository
                                SELECT
                                FROM
                                    org_members
                                WHERE
                                    external_service_repos.org_id = org_members.org_id
                                    AND org_members.user_id = 40577
                            )
                        )
                )
            )
        )
    ) -- Populates "authzConds"
ORDER BY repo.name, repo.id
LIMIT 15
OFFSET 0 -- Populates "querySuffix"

Testing

There is no easy way to test this locally. What can be done is either testing the resulting query against prod DB or against a new clone of the prod DB. I did the former one, because

I don't always test my code but when I do it's in production

You can test the optimized query above to see the difference.

Merge request reports

Loading