trivy os软件包扫描原理分析
具体可以基于之前的博客来做
基于trivy获取基础镜像
参数修改一下:
cliOpt.ListAllPkgs = true
结果中会带有如下格式的结果:
"Results":[{"Target":"192.168.1.94:443/test22/centos:7 (centos 7.9.2009)","Class":"os-pkgs","Type":"centos","Packages":[{"ID":"acl@2.2.51-15.el7.x86_64","Name":"acl","Version":"2.2.51","Release":"15.el7","Arch":"x86_64","SrcName":"acl","SrcVersion":"2.2.51","SrcRelease":"15.el7","Licenses":["GPLv2+"],"Maintainer":"CentOS","DependsOn":["glibc@2.17-317.el7.x86_64","libacl@2.2.51-15.el7.x86_64","libattr@2.4.46-13.el7.x86_64"],"Layer":{"DiffID":"sha256:174f5685490326fc0a1c0f5570b8663732189b327007e47ff13d2ca59673db02"},"Type":"rpm"},{"ID":"audit-libs@2.8.5-4.el7.x86_64","Name":"audit-libs","Version":"2.8.5","Release":"4.el7","Arch":"x86_64","SrcName":"audit","SrcVersion":"2.8.5","SrcRelease":"4.el7","Licenses":["LGPLv2+"],"Maintainer":"CentOS","DependsOn":["glibc@2.17-317.el7.x86_64","libcap-ng@0.7.5-4.el7.x86_64"],"Layer":{"DiffID":"sha256:174f5685490326fc0a1c0f5570b8663732189b327007e47ff13d2ca59673db02"},"Type":"rpm"},
......
其中的原理就是根据对应的软件包信息文件来读取。前面的调用路径与基于trivy获取基础镜像一致。都是通过analyzer.RegisterAnalyzer函数将自己注册进analyzers的map中。最后就可以去获取镜像的软件包列表。
os的软件包代码都在pkg/fanal/analyzer/pkg/中。这里面有三个目录apk、dpkg、rpm。它们分别对应于alpine、ubuntu(debian)、centos操作系统。
我们以ubuntu为例来分析。系统启动时,会将dpkg分析器注册进来。代码如下:
func init() {analyzer.RegisterAnalyzer(&dpkgAnalyzer{})
}
根据前面关于基础镜像的博客,我们知道,只有Required返回成功才会进行分析。所以我们先看这个函数的代码:
const (analyzerVersion = 3statusFile = "var/lib/dpkg/status"statusDir = "var/lib/dpkg/status.d/"infoDir = "var/lib/dpkg/info/"
)
......func (a dpkgAnalyzer) Required(filePath string, _ os.FileInfo) bool {dir, fileName := filepath.Split(filePath)if a.isListFile(dir, fileName) || filePath == statusFile {return true}if dir == statusDir {return true}return false
}
主要逻辑就是通过检查当前文件是否是var/lib/dpkg/status或者当前为目录的话,就判定是否是var/lib/dpkg/status.d。很明显,这里考虑了一个问题,镜像中的文件是占大多数的,所以先检查文件名是否相同,对性能会好点。匹配成功返回true。
如果成功,就会进入Analyze函数。源码如下:
func (a dpkgAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {scanner := bufio.NewScanner(input.Content)if a.isListFile(filepath.Split(input.FilePath)) {return a.parseDpkgInfoList(scanner)}return a.parseDpkgStatus(input.FilePath, scanner)
}
如果是文件,则调用parseDpkgInfoList函数去解析软件包,如果是目录,则调用parseDpkgStatus,具体代码我们往下看。
parseDpkgInfoList函数:
// parseDpkgStatus parses /var/lib/dpkg/info/*.list
func (a dpkgAnalyzer) parseDpkgInfoList(scanner *bufio.Scanner) (*analyzer.AnalysisResult, error) {var installedFiles []stringvar previous stringfor scanner.Scan() {//一行一行的读取current := scanner.Text()if current == "/." {continue}// Add the file if it is not directory.// e.g.// /usr/sbin// /usr/sbin/tarcat//// In the above case, we should take only /usr/sbin/tarcat since /usr/sbin is a directoryif !strings.HasPrefix(current, previous+"/") {//这里去除了目录信息,将所有文件都加入到切片中installedFiles = append(installedFiles, previous)}previous = current}// Add the last fileinstalledFiles = append(installedFiles, previous)if err := scanner.Err(); err != nil {return nil, xerrors.Errorf("scan error: %w", err)}return &analyzer.AnalysisResult{SystemInstalledFiles: installedFiles,}, nil
}
parseDpkgStatus函数:
// parseDpkgStatus parses /var/lib/dpkg/status or /var/lib/dpkg/status/*
//这里注释说明数据来源,我们以/var/lib/dpkg/status为例,来分析下面的代码,数据格式在下方有展示
func (a dpkgAnalyzer) parseDpkgStatus(filePath string, scanner *bufio.Scanner) (*analyzer.AnalysisResult, error) {var pkg *types.Packagepkgs := map[string]*types.Package{}//创建一个临时的package map,key为通过软件名和版本构成的IDpkgIDs := map[string]string{}//以软件名为key,ID为value的mapfor scanner.Scan() {line := strings.TrimSpace(scanner.Text())if line == "" {//软件包的信息以空行结束,如果遇到空行说明当前软件包的解析结束,跳过,为下一个解析做好准备continue}pkg = a.parseDpkgPkg(scanner)//重点在这个函数中,开始解析软件包if pkg != nil {pkgs[pkg.ID] = pkgpkgIDs[pkg.Name] = pkg.ID}}if err := scanner.Err(); err != nil {return nil, xerrors.Errorf("scan error: %w", err)}a.consolidateDependencies(pkgs, pkgIDs)//依赖处理return &analyzer.AnalysisResult{PackageInfos: []types.PackageInfo{{FilePath: filePath,Packages: lo.MapToSlice(pkgs, func(_ string, p *types.Package) types.Package {return *p}),//将结果格式化成切片返回},},}, nil
}
/var/lib/dpkg/status的部分内容
Package: accountsservice
Status: install ok installed
Priority: optional
Section: admin
Installed-Size: 452
Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
Architecture: amd64
Version: 0.6.55-0ubuntu12~20.04.5
Depends: dbus, libaccountsservice0 (= 0.6.55-0ubuntu12~20.04.5), libc6 (>= 2.4), libglib2.0-0 (>= 2.44), libpolkit-gobject-1-0 (>= 0.99)
Suggests: gnome-control-center
Conffiles:/etc/dbus-1/system.d/org.freedesktop.Accounts.conf 06247d62052029ead7d9ec1ef9457f42
Description: query and manipulate user account informationThe AccountService project provides a set of D-Businterfaces for querying and manipulating user accountinformation and an implementation of these interfaces,based on the useradd, usermod and userdel commands.
Homepage: https://www.freedesktop.org/wiki/Software/AccountsService/
Original-Maintainer: Debian freedesktop.org maintainers <pkg-freedesktop-maintainers@lists.alioth.debian.org>Package: accountsservice-ubuntu-schemas
Status: install ok installed
Priority: optional
Section: gnome
Installed-Size: 44
Maintainer: Ubuntu Desktop Team <ubuntu-desktop@lists.ubuntu.com>
Architecture: all
Multi-Arch: foreign
Source: gsettings-ubuntu-touch-schemas
Version: 0.0.7+17.10.20170922-0ubuntu1
Replaces: accountsservice-ubuntu-touch-schemas (<= 0.0.1+14.04.20140130.1-0ubuntu1), ubuntu-system-settings (<= 0.1+14.04.20140130-0ubuntu1)
Depends: accountsservice
Breaks: accountsservice-ubuntu-touch-schemas (<= 0.0.1+14.04.20140130.1-0ubuntu1), ubuntu-system-settings (<= 0.1+14.04.20140130-0ubuntu1)
Description: AccountsService schemas for Ubuntuaccountsservice-ubuntu-schemas contains a collection of AccountsService vendorextension schemas used by various components of an Ubuntu environment.
Homepage: https://launchpad.net/gsettings-ubuntu-touch-schemasPackage: acl
Status: install ok installed
Priority: optional
Section: utils
Installed-Size: 192
Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
Architecture: amd64
Multi-Arch: foreign
Version: 2.2.53-6
Depends: libacl1 (= 2.2.53-6), libc6 (>= 2.14)
Description: access control list - utilitiesThis package contains the getfacl and setfacl utilities needed for
......
parseDpkgPkg函数:
func (a dpkgAnalyzer) parseDpkgPkg(scanner *bufio.Scanner) (pkg *types.Package) {var (name stringversion stringsourceName stringdependencies []stringisInstalled boolsourceVersion stringmaintainer string)isInstalled = truefor {line := strings.TrimSpace(scanner.Text())if line == "" {break}switch {case strings.HasPrefix(line, "Package: ")://对照上面的例子,这里就是软件名name = strings.TrimSpace(strings.TrimPrefix(line, "Package: "))case strings.HasPrefix(line, "Source: "):// Source line (Optional)// Gives the name of the source package// May also specifies a versionsrcCapture := dpkgSrcCaptureRegexp.FindAllStringSubmatch(line, -1)[0]md := map[string]string{}for i, n := range srcCapture {md[dpkgSrcCaptureRegexpNames[i]] = strings.TrimSpace(n)}sourceName = md["name"]if md["version"] != "" {sourceVersion = md["version"]}case strings.HasPrefix(line, "Version: ")://版本version = strings.TrimPrefix(line, "Version: ")case strings.HasPrefix(line, "Status: "):isInstalled = a.parseStatus(line)case strings.HasPrefix(line, "Depends: ")://依赖dependencies = a.parseDepends(line)case strings.HasPrefix(line, "Maintainer: ")://维护者maintainer = strings.TrimSpace(strings.TrimPrefix(line, "Maintainer: "))}if !scanner.Scan() {break}}if name == "" || version == "" || !isInstalled {return nil} else if !debVersion.Valid(version) {log.Logger.Warnf("Invalid Version Found : OS %s, Package %s, Version %s", "debian", name, version)return nil}pkg = &types.Package{ID: a.pkgID(name, version),Name: name,Version: version,DependsOn: dependencies, // Will be consolidated laterMaintainer: maintainer,}//将解析结果保存到pkg中,// Source version and names are computed from binary package names and versions// in dpkg.// Source package name:// https://git.dpkg.org/cgit/dpkg/dpkg.git/tree/lib/dpkg/pkg-format.c#n338// Source package version:// https://git.dpkg.org/cgit/dpkg/dpkg.git/tree/lib/dpkg/pkg-format.c#n355if sourceName == "" {sourceName = name}if sourceVersion == "" {sourceVersion = version}if !debVersion.Valid(sourceVersion) {log.Logger.Warnf("Invalid Version Found : OS %s, Package %s, Version %s", "debian", sourceName, sourceVersion)return pkg}pkg.SrcName = sourceNamepkg.SrcVersion = sourceVersionreturn pkg
}
然后调用AnalysisResult的Merge函数将PackageInfos合并,继而调用其Sort函数进行排序。然后将结果保存在缓存中,这里是本地缓存。最后在Scanner的ScanArtifact中通过调用s.driver.Scan将结果格式化成types.Results,这里的driver会是local scanner,具体代码如下:
// Scan scans the artifact and return results.
func (s Scanner) Scan(ctx context.Context, target, artifactKey string, blobKeys []string, options types.ScanOptions) (types.Results, ftypes.OS, error) {artifactDetail, err := s.applier.ApplyLayers(artifactKey, blobKeys)switch {case errors.Is(err, analyzer.ErrUnknownOS):log.Logger.Debug("OS is not detected.")// Packages may contain OS-independent binary information even though OS is not detected.if len(artifactDetail.Packages) != 0 {artifactDetail.OS = ftypes.OS{Family: "none"}}// If OS is not detected and repositories are detected, we'll try to use repositories as OS.if artifactDetail.Repository != nil {log.Logger.Debugf("Package repository: %s %s", artifactDetail.Repository.Family, artifactDetail.Repository.Release)log.Logger.Debugf("Assuming OS is %s %s.", artifactDetail.Repository.Family, artifactDetail.Repository.Release)artifactDetail.OS = ftypes.OS{Family: artifactDetail.Repository.Family,Name: artifactDetail.Repository.Release,}}case errors.Is(err, analyzer.ErrNoPkgsDetected):log.Logger.Warn("No OS package is detected. Make sure you haven't deleted any files that contain information about the installed packages.")log.Logger.Warn(`e.g. files under "/lib/apk/db/", "/var/lib/dpkg/" and "/var/lib/rpm"`)case err != nil:return nil, ftypes.OS{}, xerrors.Errorf("failed to apply layers: %w", err)}var eosl boolvar results, pkgResults types.Results// Fill OS packages and language-specific packagesif options.ListAllPackages {//这里就是我们刚开始说的那个标志,如果为true,进行整合if res := s.osPkgsToResult(target, artifactDetail, options); res != nil {pkgResults = append(pkgResults, *res)}pkgResults = append(pkgResults, s.langPkgsToResult(artifactDetail)...)}// Scan packages for vulnerabilitiesif options.Scanners.Enabled(types.VulnerabilityScanner) {var vulnResults types.ResultsvulnResults, eosl, err = s.scanVulnerabilities(target, artifactDetail, options)if err != nil {return nil, ftypes.OS{}, xerrors.Errorf("failed to detect vulnerabilities: %w", err)}artifactDetail.OS.Eosl = eosl// Merge package results into vulnerability resultsmergedResults := s.fillPkgsInVulns(pkgResults, vulnResults)results = append(results, mergedResults...)} else {// If vulnerability scanning is not enabled, it just adds package results.results = append(results, pkgResults...)}// Scan IaC config filesif ShouldScanMisconfigOrRbac(options.Scanners) {configResults := s.MisconfsToResults(artifactDetail.Misconfigurations)results = append(results, configResults...)}// Scan secretsif options.Scanners.Enabled(types.SecretScanner) {secretResults := s.secretsToResults(artifactDetail.Secrets)results = append(results, secretResults...)}// Scan licensesif options.Scanners.Enabled(types.LicenseScanner) {licenseResults := s.scanLicenses(artifactDetail, options.LicenseCategories)results = append(results, licenseResults...)}// Scan misconfigurations on container image configif options.ImageConfigScanners.Enabled(types.MisconfigScanner) {if im := artifactDetail.ImageConfig.Misconfiguration; im != nil {im.FilePath = target // Set the target name to the file path as container image config is not a real file.results = append(results, s.MisconfsToResults([]ftypes.Misconfiguration{*im})...)}}// Scan secrets on container image configif options.ImageConfigScanners.Enabled(types.SecretScanner) {if is := artifactDetail.ImageConfig.Secret; is != nil {is.FilePath = target // Set the target name to the file path as container image config is not a real file.results = append(results, s.secretsToResults([]ftypes.Secret{*is})...)}}// For WASM plugins and custom analyzersif len(artifactDetail.CustomResources) != 0 {results = append(results, types.Result{Class: types.ClassCustom,CustomResources: artifactDetail.CustomResources,})}for i := range results {// Fill vulnerability detailss.vulnClient.FillInfo(results[i].Vulnerabilities)}// Post scanningresults, err = post.Scan(ctx, results)if err != nil {return nil, ftypes.OS{}, xerrors.Errorf("post scan error: %w", err)}return results, artifactDetail.OS, nil
}
osPkgsToResult代码:
func (s Scanner) osPkgsToResult(target string, detail ftypes.ArtifactDetail, options types.ScanOptions) *types.Result {if len(detail.Packages) == 0 || !detail.OS.Detected() {return nil}pkgs := detail.Packagesif options.ScanRemovedPackages {pkgs = mergePkgs(pkgs, detail.ImageConfig.Packages)//主要是去重}sort.Sort(pkgs)return &types.Result{Target: fmt.Sprintf("%s (%s %s)", target, detail.OS.Family, detail.OS.Name),Class: types.ClassOSPkg,//标识为os的软件包Type: detail.OS.Family,//os namePackages: pkgs,}
}
至此,代码逻辑基本讲解完了。