Skip to content

Commit fc3f35c

Browse files
Gitlab Source: Backoff from Scan2 which is experimental to legacy pagination API call (#4608)
* Backoff from Scan2 which is experimental to legacy pagination API call This commit rewrite simplifiedGitlabEnumeration to use legacy pagination API call with keyset pagination instead of Scan2 which is currently in experimental state. Note that this doesn't promise to fix this problem it's just a test to check. It also adds a retry logic in case any 500 error occurs. I added some logs as well to keep track of no of projects being enumerated. * implemented builtin retry mechanism for gitlab and proper handling of next page * fixed basic auth * Some enhancements Reversed the gitlab cloud logic to add membership flag, so that we use the default false for non gitlab.com instances. This can help if the issue really was membership flag as mentioned in some gitlab issues. Also added simple flag in list projects to get only minimal fields in response instead of big json response for each project. Added test case as well. * enhance the test case
1 parent 728d71f commit fc3f35c

File tree

2 files changed

+158
-49
lines changed

2 files changed

+158
-49
lines changed

pkg/sources/gitlab/gitlab.go

Lines changed: 83 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"slices"
99
"strings"
1010
"sync"
11+
"time"
1112

1213
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
1314
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
@@ -471,14 +472,25 @@ func (s *Source) newClient() (*gitlab.Client, error) {
471472
// Initialize a new api instance.
472473
switch s.authMethod {
473474
case "OAUTH":
474-
apiClient, err := gitlab.NewOAuthClient(s.token, gitlab.WithBaseURL(s.url))
475+
apiClient, err := gitlab.NewOAuthClient(
476+
s.token,
477+
gitlab.WithBaseURL(s.url),
478+
gitlab.WithCustomRetryWaitMinMax(time.Second, 5*time.Second),
479+
gitlab.WithCustomRetryMax(3),
480+
)
475481
if err != nil {
476482
return nil, fmt.Errorf("could not create Gitlab OAUTH client for %q: %w", s.url, err)
477483
}
478484
return apiClient, nil
479485

480486
case "BASIC_AUTH":
481-
apiClient, err := gitlab.NewBasicAuthClient(s.user, s.password, gitlab.WithBaseURL(s.url))
487+
apiClient, err := gitlab.NewBasicAuthClient(
488+
s.user,
489+
s.password,
490+
gitlab.WithBaseURL(s.url),
491+
gitlab.WithCustomRetryWaitMinMax(time.Second, 5*time.Second),
492+
gitlab.WithCustomRetryMax(3),
493+
)
482494
if err != nil {
483495
return nil, fmt.Errorf("could not create Gitlab BASICAUTH client for %q: %w", s.url, err)
484496
}
@@ -491,7 +503,12 @@ func (s *Source) newClient() (*gitlab.Client, error) {
491503
}
492504
fallthrough
493505
case "TOKEN":
494-
apiClient, err := gitlab.NewOAuthClient(s.token, gitlab.WithBaseURL(s.url))
506+
apiClient, err := gitlab.NewOAuthClient(
507+
s.token,
508+
gitlab.WithBaseURL(s.url),
509+
gitlab.WithCustomRetryWaitMinMax(time.Second, 5*time.Second),
510+
gitlab.WithCustomRetryMax(3),
511+
)
495512
if err != nil {
496513
return nil, fmt.Errorf("could not create Gitlab TOKEN client for %q: %w", s.url, err)
497514
}
@@ -699,74 +716,91 @@ func (s *Source) getAllProjectReposV2(
699716

700717
projectQueryOptions := &gitlab.ListProjectsOptions{
701718
ListOptions: listOpts,
702-
Membership: gitlab.Ptr(true),
719+
// Return only limited fields for each project
720+
Simple: gitlab.Ptr(true),
703721
}
704722

705-
// for non gitlab.com instances, include all available projects (public + membership).
706-
if s.url != gitlabBaseURL {
707-
projectQueryOptions.Membership = gitlab.Ptr(false)
723+
// for gitlab.com instance, include only projects where the user is a member.
724+
if s.url == gitlabBaseURL {
725+
projectQueryOptions.Membership = gitlab.Ptr(true)
708726
}
709727

710728
ctx.Logger().Info("starting projects enumeration",
711-
"list_options", listOpts,
712-
"all_available", *projectQueryOptions.Membership)
713-
714-
// https://pkg.go.dev/gitlab.com/gitlab-org/api/client-go#Scan2
715-
projectsIter := gitlab.Scan2(func(p gitlab.PaginationOptionFunc) ([]*gitlab.Project, *gitlab.Response, error) {
716-
return apiClient.Projects.ListProjects(projectQueryOptions, p, gitlab.WithContext(ctx))
717-
})
729+
"list_options", listOpts)
718730

731+
// totalCount tracks the total number of projects processed by this enumeration.
732+
// It includes all projects fetched from the API, even those later skipped by ignore rules.
719733
totalCount := 0
720734

721-
// process each project
722-
for project, projectErr := range projectsIter {
723-
if projectErr != nil {
724-
err := fmt.Errorf("error during project enumeration: %w", projectErr)
735+
requestOptions := []gitlab.RequestOptionFunc{gitlab.WithContext(ctx)}
725736

726-
if reportErr := reporter.UnitErr(ctx, err); reportErr != nil {
727-
return reportErr
737+
// Pagination loop: Continue fetching pages until the API indicates there are no more.
738+
for {
739+
// Fetch a page of projects from the GitLab API using the current query options.
740+
projects, resp, err := apiClient.Projects.ListProjects(projectQueryOptions, requestOptions...)
741+
if err != nil {
742+
err = fmt.Errorf("received error on listing projects, you might not have permissions to do that: %w", err)
743+
if err := reporter.UnitErr(ctx, err); err != nil {
744+
return err
728745
}
729-
730-
continue
746+
// break on error as with error we will not have any response and no next page
747+
break
731748
}
732749

733-
totalCount++
750+
// Log the batch size for debugging and monitoring.
751+
ctx.Logger().V(3).Info("listed projects batch", "batch_size", len(projects), "running_total", totalCount)
752+
// Process each project in the current page.
753+
for _, project := range projects {
754+
projCtx := context.WithValues(ctx,
755+
"project_id", project.ID,
756+
"project_name", project.NameWithNamespace)
734757

735-
projCtx := context.WithValues(ctx,
736-
"project_id", project.ID,
737-
"project_name", project.NameWithNamespace)
758+
totalCount++
738759

739-
// skip projects configured to be ignored.
740-
if ignoreRepo(project.PathWithNamespace) {
741-
projCtx.Logger().V(3).Info("skipping project", "reason", "ignored in config")
760+
// skip projects configured to be ignored.
761+
if ignoreRepo(project.PathWithNamespace) {
762+
projCtx.Logger().V(3).Info("skipping project", "reason", "ignored in config")
742763

743-
continue
744-
}
764+
continue
765+
}
745766

746-
// report an error if we could not convert the project into a URL.
747-
if _, err := url.Parse(project.HTTPURLToRepo); err != nil {
748-
projCtx.Logger().V(3).Info("skipping project",
749-
"reason", "URL parse failure",
750-
"url", project.HTTPURLToRepo,
751-
"parse_error", err)
767+
// report an error if we could not convert the project into a URL.
768+
if _, err := url.Parse(project.HTTPURLToRepo); err != nil {
769+
projCtx.Logger().V(3).Info("skipping project",
770+
"reason", "URL parse failure",
771+
"url", project.HTTPURLToRepo,
772+
"parse_error", err)
752773

753-
err = fmt.Errorf("could not parse url %q given by project: %w", project.HTTPURLToRepo, err)
754-
if err := reporter.UnitErr(ctx, err); err != nil {
755-
return err
774+
err = fmt.Errorf("could not parse url %q given by project: %w", project.HTTPURLToRepo, err)
775+
if err := reporter.UnitErr(ctx, err); err != nil {
776+
return err
777+
}
778+
779+
continue
756780
}
757781

758-
continue
759-
}
782+
// report the unit.
783+
projCtx.Logger().V(3).Info("accepting project")
760784

761-
// report the unit.
762-
projCtx.Logger().V(3).Info("accepting project")
785+
s.cacheGitlabProject(project)
786+
unit := git.SourceUnit{Kind: git.UnitRepo, ID: project.HTTPURLToRepo}
787+
gitlabReposEnumerated.WithLabelValues(s.name).Inc()
763788

764-
s.cacheGitlabProject(project)
765-
unit := git.SourceUnit{Kind: git.UnitRepo, ID: project.HTTPURLToRepo}
766-
gitlabReposEnumerated.WithLabelValues(s.name).Inc()
789+
if err := reporter.UnitOk(ctx, unit); err != nil {
790+
return err
791+
}
792+
}
767793

768-
if err := reporter.UnitOk(ctx, unit); err != nil {
769-
return err
794+
// if next page is empty, break the loop
795+
if resp == nil || resp.NextLink == "" {
796+
// No more pages to fetch. This is the normal loop exit condition.
797+
// It also acts as a safety stop if the current request failed.
798+
break
799+
}
800+
// Only update the token for the next page if we have a valid, non-empty link.
801+
requestOptions = []gitlab.RequestOptionFunc{
802+
gitlab.WithContext(ctx),
803+
gitlab.WithKeysetPaginationParameters(resp.NextLink),
770804
}
771805
}
772806

pkg/sources/gitlab/gitlab_integration_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616

1717
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
1818
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
19+
"github.com/trufflesecurity/trufflehog/v3/pkg/feature"
1920
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/credentialspb"
2021
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
2122
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
@@ -813,3 +814,77 @@ func TestSource_Enumerate_ProjectDetailsInChunkMetadata(t *testing.T) {
813814
t.Errorf("0 chunks scanned.")
814815
}
815816
}
817+
818+
// TestSource_Chunks_SimplifiedGitlabEnumeration enumerates GitLab projects
819+
// using a stored GitLab secret in GCP with the `UseSimplifiedGitlabEnumeration`
820+
// feature flag enabled. When enabled, the enumeration path is redirected to
821+
// `getAllProjectReposV2`, validating project listing via keyset pagination.
822+
func TestSource_Chunks_SimplifiedGitlabEnumeration(t *testing.T) {
823+
// Preserve and restore the feature flag to avoid cross-test contamination
824+
prev := feature.UseSimplifiedGitlabEnumeration.Load()
825+
// enable the simplified gitlab enumeration flag
826+
feature.UseSimplifiedGitlabEnumeration.Store(true)
827+
defer feature.UseSimplifiedGitlabEnumeration.Store(prev)
828+
829+
// Create a bounded context for the entire test
830+
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
831+
defer cancel()
832+
833+
// Retrieve test secret containing the GitLab token
834+
secret, err := common.GetTestSecret(ctx)
835+
require.NoError(t, err, "failed to access test secret")
836+
837+
token := secret.MustGetField("GITLAB_TOKEN")
838+
839+
// Initialize the GitLab source with token-based authentication
840+
s := Source{}
841+
conn, err := anypb.New(&sourcespb.GitLab{
842+
Credential: &sourcespb.GitLab_Token{
843+
Token: token,
844+
},
845+
})
846+
require.NoError(t, err)
847+
848+
err = s.Init(ctx, "enumerate gitlab projects with V2", 0, 0, false, conn, 10)
849+
require.NoError(t, err, "failed during Source.Init")
850+
851+
// Enumerate GitLab projects
852+
testReporter := sourcestest.TestReporter{}
853+
err = s.Enumerate(ctx, &testReporter)
854+
require.NoError(t, err, "enumeration should not fail")
855+
856+
// Ensure enumeration actually produced units
857+
require.NotEmpty(t, testReporter.Units, "enumeration returned no units")
858+
859+
// Clear project cache to force project-detail lookups during chunking
860+
clear(s.repoToProjCache.cache)
861+
862+
// Channel-based reporter to capture emitted chunks
863+
chunksCh := make(chan *sources.Chunk, 1)
864+
chanReporter := sources.ChanReporter{Ch: chunksCh}
865+
866+
// Chunk all enumerated units asynchronously
867+
go func() {
868+
defer close(chunksCh)
869+
for _, unit := range testReporter.Units {
870+
if err := s.ChunkUnit(ctx, unit, chanReporter); err != nil {
871+
t.Errorf("Source.ChunkUnit() error = %v", err)
872+
}
873+
}
874+
}()
875+
876+
// Validate produced chunks and their GitLab metadata
877+
gotChunks := false
878+
for chunk := range chunksCh {
879+
gotChunks = true
880+
881+
meta, ok := chunk.SourceMetadata.Data.(*source_metadatapb.MetaData_Gitlab)
882+
require.True(t, ok, "unexpected metadata type")
883+
884+
assert.NotZero(t, meta.Gitlab.ProjectId, "missing project ID in chunk metadata")
885+
assert.NotEmpty(t, meta.Gitlab.ProjectName, "missing project name in chunk metadata")
886+
}
887+
888+
// Ensure at least one chunk was produced
889+
assert.True(t, gotChunks, "expected at least one chunk, got zero")
890+
}

0 commit comments

Comments
 (0)