Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
22cc2a0
add log inspector
cx-artur-ribeiro Oct 28, 2025
6e99143
update number of works as a test
cx-artur-ribeiro Oct 28, 2025
aa59703
Merge branch 'master' of https://github.com/Checkmarx/kics into log-a…
cx-artur-ribeiro Oct 29, 2025
10dbcf7
add more debug and a better tf handling for tests
cx-artur-ribeiro Oct 29, 2025
dac7290
add information on each file/folder analyzed for better memory calcul…
cx-artur-ribeiro Oct 30, 2025
5506077
add loc for better memory computation
cx-artur-ribeiro Oct 30, 2025
ef1203e
remove comments and debug statements
cx-artur-ribeiro Oct 30, 2025
e2c0bbf
fix lint
cx-artur-ribeiro Oct 30, 2025
ec00c87
remove useless comments used for debug only
cx-artur-ribeiro Oct 30, 2025
4fb5596
Merge branch 'master' into AST-116333-refactor-terraform-variables-re…
cx-artur-ribeiro Oct 30, 2025
c79cf70
re-add lint ignore to the correct line
cx-artur-ribeiro Oct 31, 2025
801d884
Merge branch 'AST-116333-refactor-terraform-variables-resolution' of …
cx-artur-ribeiro Oct 31, 2025
1663228
initialize FileStats properly for returnAnalyzedPaths
cx-artur-ribeiro Oct 31, 2025
c462c4c
add relevant tests for the new supported behaviour
cx-artur-ribeiro Oct 31, 2025
da61613
remove unecessary counter
cx-artur-ribeiro Oct 31, 2025
6747c0b
Merge branch 'master' into AST-116333-refactor-terraform-variables-re…
cx-artur-ribeiro Oct 31, 2025
e56a63c
Merge branch 'master' into AST-116333-refactor-terraform-variables-re…
cx-artur-ribeiro Oct 31, 2025
b1d5e41
add comments removed by mistake
cx-artur-ribeiro Nov 4, 2025
5fd3a69
Merge branch 'master' into AST-116333-refactor-terraform-variables-re…
cx-artur-ribeiro Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 70 additions & 8 deletions pkg/analyzer/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ type analyzerInfo struct {
fallbackMinifiedFileLOC int
}

// fileTypeInfo contains file path, detected platform type, and LOC count
type fileTypeInfo struct {
filePath string
fileType string
locCount int
}

// Analyzer keeps all the relevant info for the function Analyze
type Analyzer struct {
Paths []string
Expand Down Expand Up @@ -318,13 +325,15 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
Types: make([]string, 0),
Exc: make([]string, 0),
ExpectedLOC: 0,
FileStats: make(map[string]model.FileStatistics),
}

var files []string
var wg sync.WaitGroup
// results is the channel shared by the workers that contains the types found
results := make(chan string)
locCount := make(chan int)
fileInfo := make(chan fileTypeInfo)
ignoreFiles := make([]string, 0)
projectConfigFiles := make([]string, 0)
done := make(chan bool)
Expand Down Expand Up @@ -374,7 +383,7 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
filePath: file,
fallbackMinifiedFileLOC: a.FallbackMinifiedFileLOC,
}
go a.worker(results, unwanted, locCount, &wg)
go a.worker(results, unwanted, locCount, fileInfo, &wg)
}

go func() {
Expand All @@ -383,27 +392,35 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
close(unwanted)
close(results)
close(locCount)
close(fileInfo)
}()
wg.Wait()
done <- true
}()

availableTypes, unwantedPaths, loc := computeValues(results, unwanted, locCount, done)
availableTypes, unwantedPaths, loc, fileStats := computeValues(results, unwanted, locCount, fileInfo, done)
multiPlatformTypeCheck(&availableTypes)
unwantedPaths = append(unwantedPaths, ignoreFiles...)
unwantedPaths = append(unwantedPaths, projectConfigFiles...)
returnAnalyzedPaths.Types = availableTypes
returnAnalyzedPaths.Exc = unwantedPaths
returnAnalyzedPaths.ExpectedLOC = loc
returnAnalyzedPaths.FileStats = fileStats
// stop metrics for file analyzer
metrics.Metric.Stop()
return returnAnalyzedPaths, nil
}

// worker determines the type of the file by ext (dockerfile and terraform)/content and
// writes the answer to the results channel
// writes the answer to the results channel and file info for statistics
// if no types were found, the worker will write the path of the file in the unwanted channel
func (a *analyzerInfo) worker(results, unwanted chan<- string, locCount chan<- int, wg *sync.WaitGroup) { //nolint: gocyclo
func (a *analyzerInfo) worker( //nolint: gocyclo
results,
unwanted chan<- string,
locCount chan<- int,
fileInfo chan<- fileTypeInfo,
wg *sync.WaitGroup,
) {
defer func() {
if err := recover(); err != nil {
log.Warn().Msgf("Recovered from analyzing panic for file %s with error: %#v", a.filePath, err.(error).Error())
Expand All @@ -422,12 +439,14 @@ func (a *analyzerInfo) worker(results, unwanted chan<- string, locCount chan<- i
if a.isAvailableType(dockerfile) {
results <- dockerfile
locCount <- linesCount
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: dockerfile, locCount: linesCount}
}
// Dockerfile (indirect identification)
case "possibleDockerfile", ".ubi8", ".debian":
if a.isAvailableType(dockerfile) && isDockerfile(a.filePath) {
results <- dockerfile
locCount <- linesCount
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: dockerfile, locCount: linesCount}
} else {
unwanted <- a.filePath
}
Expand All @@ -436,30 +455,34 @@ func (a *analyzerInfo) worker(results, unwanted chan<- string, locCount chan<- i
if a.isAvailableType(terraform) {
results <- terraform
locCount <- linesCount
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: terraform, locCount: linesCount}
}
// Bicep
case ".bicep":
if a.isAvailableType(bicep) {
results <- arm
locCount <- linesCount
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: arm, locCount: linesCount}
}
// GRPC
case ".proto":
if a.isAvailableType(grpc) {
results <- grpc
locCount <- linesCount
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: grpc, locCount: linesCount}
}
// It could be Ansible Config or Ansible Inventory
case ".cfg", ".conf", ".ini":
if a.isAvailableType(ansible) {
results <- ansible
locCount <- linesCount
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: ansible, locCount: linesCount}
}
/* It could be Ansible, Buildah, CICD, CloudFormation, Crossplane, OpenAPI, Azure Resource Manager
Docker Compose, Knative, Kubernetes, Pulumi, ServerlessFW or Google Deployment Manager.
We also have FHIR's case which will be ignored since it's not a platform file.*/
case yaml, yml, json, sh:
a.checkContent(results, unwanted, locCount, linesCount, ext)
a.checkContent(results, unwanted, locCount, fileInfo, linesCount, ext)
}
}
}
Expand Down Expand Up @@ -500,7 +523,14 @@ func needsOverride(check bool, returnType, key, ext string) bool {

// checkContent will determine the file type by content when worker was unable to
// determine by ext, if no type was determined checkContent adds it to unwanted channel
func (a *analyzerInfo) checkContent(results, unwanted chan<- string, locCount chan<- int, linesCount int, ext string) {
func (a *analyzerInfo) checkContent(
results,
unwanted chan<- string,
locCount chan<- int,
fileInfo chan<- fileTypeInfo,
linesCount int,
ext string,
) {
typesFlag := a.typesFlag
excludeTypesFlag := a.excludeTypesFlag
// get file content
Expand Down Expand Up @@ -558,6 +588,7 @@ func (a *analyzerInfo) checkContent(results, unwanted chan<- string, locCount ch

results <- returnType
locCount <- linesCount
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: returnType, locCount: linesCount}
}

func checkReturnType(path, returnType, ext string, content []byte) string {
Expand Down Expand Up @@ -661,10 +692,21 @@ func checkForAnsibleHost(yamlContent model.Document) bool {

// computeValues computes expected Lines of Code to be scanned from locCount channel
// and creates the types and unwanted slices from the channels removing any duplicates
func computeValues(types, unwanted chan string, locCount chan int, done chan bool) (typesS, unwantedS []string, locTotal int) {
// also collects file statistics for memory calculation
func computeValues(
types,
unwanted chan string,
locCount chan int,
fileInfo chan fileTypeInfo,
done chan bool,
) (typesS, unwantedS []string, locTotal int, stats map[string]model.FileStatistics) {
var val int
unwantedSlice := make([]string, 0)
typeSlice := make([]string, 0)
stats = make(map[string]model.FileStatistics)

platformFilesInfo := make(map[string][]fileTypeInfo)

for {
select {
case i := <-locCount:
Expand All @@ -677,8 +719,28 @@ func computeValues(types, unwanted chan string, locCount chan int, done chan boo
if !utils.Contains(i, typeSlice) {
typeSlice = append(typeSlice, i)
}
case info := <-fileInfo:
platformFilesInfo[info.fileType] = append(platformFilesInfo[info.fileType], info)
case <-done:
return typeSlice, unwantedSlice, val
for platformType, filesInfo := range platformFilesInfo {
dirMap := make(map[string]int)
totalLOC := 0

for _, fileInfo := range filesInfo {
dir := filepath.Dir(fileInfo.filePath)
dirMap[dir]++
totalLOC += fileInfo.locCount
}

stats[platformType] = model.FileStatistics{
FileCount: len(filesInfo),
DirectoryCount: len(dirMap),
FilesByDir: dirMap,
TotalLOC: totalLOC,
}
}

return typeSlice, unwantedSlice, val, stats
}
}
}
Expand Down
129 changes: 129 additions & 0 deletions pkg/analyzer/analyzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -588,3 +588,132 @@ func TestAnalyzer_Analyze(t *testing.T) {
})
}
}

func TestAnalyzer_FileStats(t *testing.T) {
tests := []struct {
name string
paths []string
typesFromFlag []string
excludeTypesFromFlag []string
wantPlatformStats map[string]platformFileStats
gitIgnoreFileName string
excludeGitIgnore bool
MaxFileSize int
}{
{
name: "file_stats_nested_structure_with_multiple_platforms",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test/helm")},
typesFromFlag: []string{""},
excludeTypesFromFlag: []string{""},
wantPlatformStats: map[string]platformFileStats{
"kubernetes": {
fileCount: 3,
dirCount: 2,
totalLOC: 118,
},
},
gitIgnoreFileName: "",
excludeGitIgnore: true,
MaxFileSize: -1,
},
{
name: "file_stats_multiple_platforms_nested_directories",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test")},
typesFromFlag: []string{""},
excludeTypesFromFlag: []string{""},
wantPlatformStats: map[string]platformFileStats{
"terraform": {
fileCount: 1,
dirCount: 1,
totalLOC: 10,
},
"kubernetes": {
fileCount: 4,
dirCount: 3,
totalLOC: 131,
},
"dockerfile": {
fileCount: 1,
dirCount: 1,
totalLOC: 3,
},
},
gitIgnoreFileName: "",
excludeGitIgnore: true,
MaxFileSize: -1,
},
{
name: "file_stats_with_type_filter",
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test")},
typesFromFlag: []string{"terraform", "kubernetes"},
excludeTypesFromFlag: []string{""},
wantPlatformStats: map[string]platformFileStats{
"terraform": {
fileCount: 1,
dirCount: 1,
totalLOC: 10,
},
"kubernetes": {
fileCount: 6,
dirCount: 3,
totalLOC: 156,
},
},
gitIgnoreFileName: "",
excludeGitIgnore: true,
MaxFileSize: -1,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
exc := []string{""}

analyzer := &Analyzer{
Paths: tt.paths,
Types: tt.typesFromFlag,
ExcludeTypes: tt.excludeTypesFromFlag,
Exc: exc,
ExcludeGitIgnore: tt.excludeGitIgnore,
GitIgnoreFileName: tt.gitIgnoreFileName,
MaxFileSize: tt.MaxFileSize,
}

got, err := Analyze(analyzer)
require.NoError(t, err)

require.NotNil(t, got.FileStats, "FileStats should not be nil")

for platform, expectedStats := range tt.wantPlatformStats {
platformStats, exists := got.FileStats[platform]
require.True(t, exists, "FileStats should contain platform: %s", platform)

require.Equal(t, expectedStats.fileCount, platformStats.FileCount,
"wrong file count for platform %s", platform)

require.Equal(t, expectedStats.dirCount, platformStats.DirectoryCount,
"wrong directory count for platform %s", platform)

require.Equal(t, expectedStats.totalLOC, platformStats.TotalLOC,
"wrong total LOC for platform %s", platform)

require.NotNil(t, platformStats.FilesByDir, "FilesByDir should not be nil")
require.Equal(t, expectedStats.dirCount, len(platformStats.FilesByDir),
"wrong FilesByDir entries for platform %s", platform)

totalFilesFromDirs := 0
for _, fileCount := range platformStats.FilesByDir {
totalFilesFromDirs += fileCount
}
require.Equal(t, platformStats.FileCount, totalFilesFromDirs,
"file count sum mismatch for platform %s", platform)
}
})
}
}

type platformFileStats struct {
fileCount int
dirCount int
totalLOC int
}
9 changes: 5 additions & 4 deletions pkg/kics/sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ import (
"regexp"
"sort"

sentryReport "github.com/Checkmarx/kics/v2/internal/sentry"
"github.com/Checkmarx/kics/v2/pkg/model"
"github.com/Checkmarx/kics/v2/pkg/parser/jsonfilter/parser"
"github.com/Checkmarx/kics/v2/pkg/utils"
"github.com/antlr4-go/antlr/v4"
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/rs/zerolog/log"

sentryReport "github.com/Checkmarx/kics/v2/internal/sentry"
"github.com/Checkmarx/kics/v2/pkg/model"
"github.com/Checkmarx/kics/v2/pkg/parser/jsonfilter/parser"
"github.com/Checkmarx/kics/v2/pkg/utils"
)

var (
Expand Down
9 changes: 9 additions & 0 deletions pkg/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,15 @@ type AnalyzedPaths struct {
Types []string
Exc []string
ExpectedLOC int
FileStats map[string]FileStatistics
}

// FileStatistics contains file and directory counts per platform type
type FileStatistics struct {
FileCount int
DirectoryCount int
FilesByDir map[string]int
TotalLOC int
}

// ResolvedFileSplit is a struct that contains the information of a resolved file, the path and the lines of the file
Expand Down
Loading
Loading