From fd22a3709a4afcd7034fa839d44ff9b072c82220 Mon Sep 17 00:00:00 2001 From: Ashley Davis Date: Mon, 2 Feb 2026 13:54:30 +0000 Subject: [PATCH] add ability to send encrypted secrets to disco backend For now, this uses a hardcoded RSA key for which I threw away the private key, since we don't have the ability to pull JWKs yet This also includes a few test tweaks to help make this easier, and an example folder which produces an output.json showing how this can work Signed-off-by: Ashley Davis --- .gitignore | 2 + README.md | 1 + deploy/charts/disco-agent/README.md | 7 + .../disco-agent/templates/deployment.yaml | 2 + deploy/charts/disco-agent/values.schema.json | 8 + deploy/charts/disco-agent/values.yaml | 6 + examples/encrypted-secrets/README.md | 47 ++++ examples/encrypted-secrets/config.yaml | 41 +++ examples/encrypted-secrets/test.sh | 65 +++++ internal/envelope/rsa/keys.go | 32 +++ internal/envelope/rsa/keys_test.go | 21 ++ internal/envelope/types.go | 26 +- pkg/agent/run.go | 20 ++ pkg/client/client_cyberark.go | 2 + pkg/datagatherer/k8sdynamic/dynamic.go | 93 ++++++- pkg/datagatherer/k8sdynamic/dynamic_test.go | 260 +++++++++++++++--- 16 files changed, 585 insertions(+), 48 deletions(-) create mode 100644 examples/encrypted-secrets/README.md create mode 100644 examples/encrypted-secrets/config.yaml create mode 100755 examples/encrypted-secrets/test.sh diff --git a/.gitignore b/.gitignore index ec041a7c..3068954f 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ predicate.json _bin .envrc + +examples/encrypted-secrets/output.json diff --git a/README.md b/README.md index 94d3ed0c..6a433fad 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ go run . agent \ > - [./agent.yaml](./agent.yaml). > - [./examples/one-shot-secret.yaml](./examples/one-shot-secret.yaml). > - [./examples/cert-manager-agent.yaml](./examples/cert-manager-agent.yaml). +> - [./examples/encrypted-secrets](./examples/encrypted-secrets) - Send encrypted Kubernetes secrets to CyberArk. You might also want to run a local echo server to monitor requests sent by the agent: diff --git a/deploy/charts/disco-agent/README.md b/deploy/charts/disco-agent/README.md index 2b4a69a6..6e3ebb84 100644 --- a/deploy/charts/disco-agent/README.md +++ b/deploy/charts/disco-agent/README.md @@ -295,6 +295,13 @@ This cluster name will be associated with the data that the agent uploads to the A short description of the cluster where the agent is deployed (optional). This description will be associated with the data that the agent uploads to the Discovery and Context service. The description may include contact information such as the email address of the cluster administrator, so that any problems and risks identified by the Discovery and Context service can be communicated to the people responsible for the affected secrets. +#### **config.sendSecrets** ~ `bool` +> Default value: +> ```yaml +> false +> ``` + +Enable sending of Secret data to CyberArk, in addition to the metadata. When enabled, Secret data is encrypted using envelope encryption using a key managed by CyberArk. Default: false (but default will change to true for a future release) #### **authentication.secretName** ~ `string` > Default value: > ```yaml diff --git a/deploy/charts/disco-agent/templates/deployment.yaml b/deploy/charts/disco-agent/templates/deployment.yaml index 74390e81..549fc88e 100644 --- a/deploy/charts/disco-agent/templates/deployment.yaml +++ b/deploy/charts/disco-agent/templates/deployment.yaml @@ -76,6 +76,8 @@ spec: name: {{ .Values.authentication.secretName }} key: ARK_DISCOVERY_API optional: true + - name: ARK_SEND_SECRETS + value: {{ .Values.config.sendSecrets | default "false" | quote }} {{- with .Values.http_proxy }} - name: HTTP_PROXY value: {{ . }} diff --git a/deploy/charts/disco-agent/values.schema.json b/deploy/charts/disco-agent/values.schema.json index 35fa2d40..f172dea7 100644 --- a/deploy/charts/disco-agent/values.schema.json +++ b/deploy/charts/disco-agent/values.schema.json @@ -118,6 +118,9 @@ }, "period": { "$ref": "#/$defs/helm-values.config.period" + }, + "sendSecrets": { + "$ref": "#/$defs/helm-values.config.sendSecrets" } }, "type": "object" @@ -148,6 +151,11 @@ "description": "Push data every 12 hours unless changed.", "type": "string" }, + "helm-values.config.sendSecrets": { + "default": false, + "description": "Enable sending of Secret data to CyberArk, in addition to the metadata. When enabled, Secret data is encrypted using envelope encryption using a key managed by CyberArk. Default: false (but default will change to true for a future release)", + "type": "boolean" + }, "helm-values.extraArgs": { "default": [], "description": "extraArgs:\n- --logging-format=json\n- --log-level=6 # To enable HTTP request logging", diff --git a/deploy/charts/disco-agent/values.yaml b/deploy/charts/disco-agent/values.yaml index be8c71b8..070f284d 100644 --- a/deploy/charts/disco-agent/values.yaml +++ b/deploy/charts/disco-agent/values.yaml @@ -154,6 +154,12 @@ config: # be communicated to the people responsible for the affected secrets. clusterDescription: "" + # Enable sending of Secret data to CyberArk, in addition to the metadata. + # When enabled, Secret data is encrypted using envelope encryption using + # a key managed by CyberArk. + # Default: false (but default will change to true for a future release) + sendSecrets: false + authentication: secretName: agent-credentials diff --git a/examples/encrypted-secrets/README.md b/examples/encrypted-secrets/README.md new file mode 100644 index 00000000..3276b7fe --- /dev/null +++ b/examples/encrypted-secrets/README.md @@ -0,0 +1,47 @@ +# Encrypted Secrets Example + +This example demonstrates how to use the disco agent to gather Kubernetes secrets and encrypt their data fields. + +## Overview + +When the `ARK_SEND_SECRETS` environment variable is set to `"true"`, the disco agent will: + +0. Fetch an encryption key from the configured endpoint (if running in production) or use a local key for testing +1. Discover Kubernetes secrets in your cluster (excluding common system secret types) +2. Encrypt each secret's data fields using RSA envelope encryption with JWE (JSON Web Encryption) format +3. If running in production, send the encrypted secrets to the configured endpoint; otherwise, write them to `output.json` for testing + +The encryption uses: + +- **Key Algorithm**: RSA-OAEP-256 (for encrypting the content encryption key) +- **Content Encryption**: AES-256-GCM (for encrypting the actual secret data) +- **Format**: JWE Compact Serialization + +Metadata (names, namespaces, labels, annotations) remains in plaintext for discovery purposes, while the sensitive secret data is encrypted. Some keys in Secret data fields are also preserved in the `data` section, for backwards compatibility. + +## Prerequisites + +1. A running Kubernetes cluster with secrets to discover +3. Go installed + +## Configuration File + +The `config.yaml` file configures: + +- The data gatherer to collect Kubernetes secrets +- Field selectors to exclude system secrets (service account tokens, docker configs, etc.) +- The cluster ID and organization ID for grouping data + +## Running the Example + +Test the agent locally by running this script: + +```bash +./test.sh +``` + +This will: + +- Connect to your current Kubernetes context +- Gather all non-system secrets +- Write the raw data to `output.json` diff --git a/examples/encrypted-secrets/config.yaml b/examples/encrypted-secrets/config.yaml new file mode 100644 index 00000000..b966e4d1 --- /dev/null +++ b/examples/encrypted-secrets/config.yaml @@ -0,0 +1,41 @@ +# encrypted-secrets config.yaml +# +# An example configuration file demonstrating how to use the disco agent +# to send encrypted secrets to CyberArk Discovery & Context. +# +# The agent will: +# 1. Discover Kubernetes secrets in the cluster +# 2. Encrypt the secret data fields using RSA envelope encryption (JWE format) +# 3. Upload the encrypted secrets to CyberArk Discovery & Context +# +# Example usage: +# +# export ARK_SUBDOMAIN="your-subdomain" +# export ARK_USERNAME="your-username" +# export ARK_SECRET="your-secret" +# export ARK_SEND_SECRETS="true" +# +# go run . agent \ +# --agent-config-file examples/encrypted-secrets/config.yaml \ +# --one-shot \ +# --output-path output.json +# +organization_id: "my-organization" +cluster_id: "my_cluster" +period: 1m +data-gatherers: +- kind: "k8s-dynamic" + name: "k8s/secrets" + config: + resource-type: + version: v1 + resource: secrets + # Filter out common system secret types to focus on application secrets + field-selectors: + - type!=kubernetes.io/service-account-token + - type!=kubernetes.io/dockercfg + - type!=kubernetes.io/dockerconfigjson + - type!=kubernetes.io/basic-auth + - type!=kubernetes.io/ssh-auth + - type!=bootstrap.kubernetes.io/token + - type!=helm.sh/release.v1 diff --git a/examples/encrypted-secrets/test.sh b/examples/encrypted-secrets/test.sh new file mode 100755 index 00000000..506001ea --- /dev/null +++ b/examples/encrypted-secrets/test.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# test.sh - Test script for the encrypted secrets example +# +# This script demonstrates running the disco agent with encrypted secrets enabled. +# It will run in one-shot mode and output to a local file for inspection. + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +echo -e "${GREEN}=== Encrypted Secrets Example Test ===${NC}\n" + +echo -e "${GREEN}Testing agent with Kubernetes secrets${NC}" +echo "" + +# Enable encrypted secrets +export ARK_SEND_SECRETS="true" + +# Check Kubernetes connectivity +if ! kubectl cluster-info &> /dev/null; then + echo -e "${RED}Error: Unable to connect to Kubernetes cluster${NC}" + echo "Please ensure your kubeconfig is configured correctly." + exit 1 +fi + +echo -e "${GREEN}✓ Connected to Kubernetes cluster${NC}" +CONTEXT=$(kubectl config current-context) +echo " Context: ${CONTEXT}" +echo "" + +# Check for secrets +SECRET_COUNT=$(kubectl get secrets --all-namespaces --no-headers 2>/dev/null | wc -l | tr -d ' ') +echo "Found ${SECRET_COUNT} secrets in cluster" +echo "" + +# Run the agent in one-shot mode with output to file +OUTPUT_FILE="output.json" +echo -e "${GREEN}Running disco agent with encrypted secrets enabled...${NC}" +echo "Command: go run ../.. agent --agent-config-file config.yaml --one-shot --output-path ${OUTPUT_FILE}" +echo "" + +if go run ../.. agent \ + --agent-config-file config.yaml \ + --one-shot \ + --output-path "${OUTPUT_FILE}"; then + + echo "" + echo -e "${GREEN}✓ Agent completed successfully${NC}" + + # Check if output file was created + if [ -f "${OUTPUT_FILE}" ]; then + echo -e "${GREEN}✓ Output file created: ${OUTPUT_FILE}${NC}" + else + echo -e "${RED}✗ Output file was not created${NC}" + exit 1 + fi +else + echo "" + echo -e "${RED}✗ Agent failed${NC}" + exit 1 +fi diff --git a/internal/envelope/rsa/keys.go b/internal/envelope/rsa/keys.go index 3cf5e96b..7c79db69 100644 --- a/internal/envelope/rsa/keys.go +++ b/internal/envelope/rsa/keys.go @@ -10,6 +10,25 @@ import ( // This file contains helpers for loading keys. In practice we'll retrieve keys in some format from a DisCo endpoint +const ( + // HardcodedPublicKeyPEM contains a temporary hardcoded RSA public key (2048-bit) for envelope encryption. + // This is a TEMPORARY solution for initial development and testing. + // TODO: Replace with dynamic key fetching from CyberArk Discovery & Context API. + HardcodedPublicKeyPEM = `-----BEGIN PUBLIC KEY----- +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAoeq+dk4aoGdV9xjrnGJt +VbUh5jvkQgynkP+9Ph2NVeoasXWqYOmOVeKOI7Yr58W/L8Mro6C22iSEJrPFgPF6 +t+RJsLAsAY6w1Pocq16COeelAWtxhHQGXt77WQKk0kmwhOJZ4VSeiQC4hWLUnq4N +Ft7lwLw/50opTXLuSErrwec/bEV7G/Xp11BMsHGEL7dzpwWAfIrbCEomyWrO/L6p +O3SAgYMdfup5ddnszeCU2FbFQziOkuMLOyir91XXk8wgdSy4IGAEGpwNx88i8fuj +Qafze2aGWUtpWlOEQPP8lH2cj2TGUgLxGITbczJRcwuGIoJBOzAmPDWi/bapj4b6 +zQIDAQAB +-----END PUBLIC KEY-----` + + // hardcodedUID is a temporary hardcoded UID associated with the hardcoded public key + // It was randomly generated with the macOS "uuidgen" command + hardcodedUID = "A39798E6-8CE7-4E6E-9CF6-24A3C923B3A7" +) + // LoadPublicKeyFromPEM parses an RSA public key from PEM-encoded bytes. // The PEM block should be of type "PUBLIC KEY" or "RSA PUBLIC KEY". func LoadPublicKeyFromPEM(pemBytes []byte) (*rsa.PublicKey, error) { @@ -55,3 +74,16 @@ func LoadPublicKeyFromPEMFile(path string) (*rsa.PublicKey, error) { return LoadPublicKeyFromPEM(pemBytes) } + +// LoadHardcodedPublicKey loads and parses the hardcoded RSA public key. +// Returns a hardcoded UID associated with the key. +// This is a temporary solution for initial development and testing. +// Returns an error if the hardcoded key is invalid or cannot be parsed. +func LoadHardcodedPublicKey() (*rsa.PublicKey, string, error) { + key, err := LoadPublicKeyFromPEM([]byte(HardcodedPublicKeyPEM)) + if err != nil { + return nil, "", err + } + + return key, hardcodedUID, nil +} diff --git a/internal/envelope/rsa/keys_test.go b/internal/envelope/rsa/keys_test.go index cccdb475..83f86e19 100644 --- a/internal/envelope/rsa/keys_test.go +++ b/internal/envelope/rsa/keys_test.go @@ -142,3 +142,24 @@ func TestLoadPublicKeyFromPEMFile_InvalidContent(t *testing.T) { require.Error(t, err) require.Nil(t, key) } + +func TestLoadHardcodedPublicKey_CanBeUsedWithEncryptor(t *testing.T) { + // Test that the hardcoded key can be used to create an encryptor + // First, test that the key can be loaded successfully + key, uid, err := internalrsa.LoadHardcodedPublicKey() + require.NoError(t, err) + require.NotNil(t, key) + require.NotEmpty(t, uid) + + encryptor, err := internalrsa.NewEncryptor(uid, key) + require.NoError(t, err) + require.NotNil(t, encryptor) + + // Test that the encryptor can encrypt data + testData := []byte("test data for encryption") + encryptedData, err := encryptor.Encrypt(testData) + require.NoError(t, err) + require.NotNil(t, encryptedData) + require.NotEmpty(t, encryptedData.Data) + require.Equal(t, "JWE-RSA", encryptedData.Type) +} diff --git a/internal/envelope/types.go b/internal/envelope/types.go index 19b644bf..b458f35d 100644 --- a/internal/envelope/types.go +++ b/internal/envelope/types.go @@ -1,11 +1,33 @@ package envelope +import "encoding/json" + // EncryptedData represents encrypted data along with metadata about the encryption type. type EncryptedData struct { // Data contains the encrypted payload - Data []byte + Data []byte `json:"data"` // Type indicates the encryption format (e.g., "JWE-RSA") - Type string + Type string `json:"type"` +} + +// ToMap converts the EncryptedData struct to a map representation. Since we store data as an "_encryptedData" field in +// a Kubernetes unstructured object, passing a raw struct would cause a panic due to the behaviour of +// https://pkg.go.dev/k8s.io/apimachinery/pkg/runtime#DeepCopyJSONValue +// Passing a map to unstructured.SetNestedField avoids this issue. +func (ed *EncryptedData) ToMap() map[string]any { + marshalled, err := json.Marshal(ed) + if err != nil { + return nil + } + + var out map[string]any + + err = json.Unmarshal(marshalled, &out) + if err != nil { + return nil + } + + return out } // Encryptor performs envelope encryption on arbitrary data. diff --git a/pkg/agent/run.go b/pkg/agent/run.go index 77ce57c1..fa8a7c55 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -31,6 +31,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "github.com/jetstack/preflight/api" + "github.com/jetstack/preflight/internal/envelope/rsa" "github.com/jetstack/preflight/pkg/client" "github.com/jetstack/preflight/pkg/datagatherer" "github.com/jetstack/preflight/pkg/datagatherer/k8sdynamic" @@ -181,6 +182,25 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { if isDynamicGatherer { dynDg.ExcludeAnnotKeys = config.ExcludeAnnotationKeysRegex dynDg.ExcludeLabelKeys = config.ExcludeLabelKeysRegex + + // Check if secret encryption is enabled via environment variable + // When enabled, secret data will be kept for encryption instead of being redacted + encryptSecrets := strings.ToLower(os.Getenv("ARK_SEND_SECRETS")) + + if encryptSecrets == "true" { + // TODO(@SgtCoDFish): this will fetch a key from JWKS endpoint when that endpoint is available + key, keyID, err := rsa.LoadHardcodedPublicKey() + if err == nil { + encryptor, err := rsa.NewEncryptor(keyID, key) + if err == nil { + dynDg.Encryptor = encryptor + } else { + log.Error(err, "Failed to create encryptor for secret encryption, secrets will not be sent to backend") + } + } else { + log.Error(err, "Failed to load public key for secret encryption, secrets will not be sent to backend") + } + } } log.V(logs.Debug).Info("Starting DataGatherer", "name", dgConfig.Name) diff --git a/pkg/client/client_cyberark.go b/pkg/client/client_cyberark.go index 735313bd..18e7ce09 100644 --- a/pkg/client/client_cyberark.go +++ b/pkg/client/client_cyberark.go @@ -35,6 +35,8 @@ var _ Client = &CyberArkClient{} // NewCyberArk initializes a CyberArk client using configuration from environment variables. // It requires an HTTP client to be provided, which will be used for making requests. // The environment variables ARK_SUBDOMAIN, ARK_USERNAME, and ARK_SECRET must be set for authentication. +// Sending secrets is controlled by the ARK_SEND_SECRETS environment variable (defaults to "false"). +// If sending secrets is enabled, the hardcoded public key will be loaded and an encryptor will be created. // If the configuration is invalid or missing, an error is returned. func NewCyberArk(httpClient *http.Client) (*CyberArkClient, error) { configLoader := cyberark.LoadClientConfigFromEnvironment diff --git a/pkg/datagatherer/k8sdynamic/dynamic.go b/pkg/datagatherer/k8sdynamic/dynamic.go index 3fea7cca..df490db4 100644 --- a/pkg/datagatherer/k8sdynamic/dynamic.go +++ b/pkg/datagatherer/k8sdynamic/dynamic.go @@ -34,6 +34,7 @@ package k8sdynamic import ( "context" + "encoding/json" "errors" "fmt" "regexp" @@ -61,6 +62,7 @@ import ( "k8s.io/klog/v2" "github.com/jetstack/preflight/api" + "github.com/jetstack/preflight/internal/envelope" "github.com/jetstack/preflight/pkg/datagatherer" "github.com/jetstack/preflight/pkg/kubeconfig" "github.com/jetstack/preflight/pkg/logs" @@ -340,6 +342,10 @@ type DataGathererDynamic struct { ExcludeAnnotKeys []*regexp.Regexp ExcludeLabelKeys []*regexp.Regexp + + // Encryptor, if non-nil, will be used to envelope encrypt Secret data. + // If nil, Secret data will be redacted. + Encryptor envelope.Encryptor } // Run starts the dynamic data gatherer's informers for resource collection. @@ -413,8 +419,8 @@ func (g *DataGathererDynamic) Fetch(ctx context.Context) (any, int, error) { return nil, -1, fmt.Errorf("failed to parse cached resource") } - // Redact Secret data - err := redactList(items, g.ExcludeAnnotKeys, g.ExcludeLabelKeys) + // Redact Secret data (which may include encrypting it if enabled) + err := g.redactList(ctx, items) if err != nil { return nil, -1, err } @@ -431,12 +437,20 @@ func (g *DataGathererDynamic) Fetch(ctx context.Context) (any, int, error) { // resources there is an allow-list of fields that should be retained. // For Secret resources, the `data` is redacted, to prevent private keys or sensitive // data being collected; only the tls.crt and ca.crt data keys are retained. +// However, if keepSecretData is true (i.e., encryption is enabled), secret data is NOT redacted +// so it can be encrypted later in the upload pipeline. // For Route resources, only the fields related to CA certificate and policy are retained. // TODO(wallrj): A short coming of the current allow-list implementation is that // you have to specify absolute fields paths. It is not currently possible to // select all metadata with: `{metadata}`. This means that the metadata for // Secret and Route has fewer fields than the metadata for all other resources. -func redactList(list []*api.GatheredResource, excludeAnnotKeys, excludeLabelKeys []*regexp.Regexp) error { +func (g *DataGathererDynamic) redactList(ctx context.Context, list []*api.GatheredResource) error { + secretSelectedFields := slices.Clone(SecretSelectedFields) + + if g.Encryptor != nil { + secretSelectedFields = append(secretSelectedFields, FieldPath{"_encryptedData"}) + } + for i := range list { if item, ok := list[i].Resource.(*unstructured.Unstructured); ok { // Determine the kind of items in case this is a generic 'mixed' list. @@ -451,12 +465,25 @@ func redactList(list []*api.GatheredResource, excludeAnnotKeys, excludeLabelKeys for _, gvk := range gvks { // secret object if gvk.Kind == "Secret" && (gvk.Group == "core" || gvk.Group == "") { - if err := Select(SecretSelectedFields, resource); err != nil { - return err + // Note: We must redact data field in all cases! + // If encryption is enabled, we encrypt the data and preserve it, but we still need to redact later. + // If encryption is enabled and _fails_, we MUST still redact the data field to avoid leaking sensitive information. + if g.Encryptor != nil { + err := g.encryptDataField(resource) + if err != nil { + // WARNING: We CAN NOT return an error here, as that would leak the secret data + log := klog.FromContext(ctx).WithName("encryptDataField") + log.Error(err, "failed to encrypt secret data field; no encrypted secret data will be sent for object", "secretName", resource.GetName()) + + } } - // route object + // Redact to only selected fields + if err := Select(secretSelectedFields, resource); err != nil { + return err + } } else if gvk.Kind == "Route" && gvk.Group == "route.openshift.io" { + // route object if err := Select(RouteSelectedFields, resource); err != nil { return err } @@ -466,8 +493,8 @@ func redactList(list []*api.GatheredResource, excludeAnnotKeys, excludeLabelKeys // remove managedFields from all resources Redact(RedactFields, resource) - RemoveUnstructuredKeys(excludeAnnotKeys, resource, "metadata", "annotations") - RemoveUnstructuredKeys(excludeLabelKeys, resource, "metadata", "labels") + RemoveUnstructuredKeys(g.ExcludeAnnotKeys, resource, "metadata", "annotations") + RemoveUnstructuredKeys(g.ExcludeLabelKeys, resource, "metadata", "labels") continue } @@ -481,8 +508,8 @@ func redactList(list []*api.GatheredResource, excludeAnnotKeys, excludeLabelKeys item.GetObjectMeta().SetManagedFields(nil) delete(item.GetObjectMeta().GetAnnotations(), "kubectl.kubernetes.io/last-applied-configuration") - RemoveTypedKeys(excludeAnnotKeys, item.GetObjectMeta().GetAnnotations()) - RemoveTypedKeys(excludeLabelKeys, item.GetObjectMeta().GetLabels()) + RemoveTypedKeys(g.ExcludeAnnotKeys, item.GetObjectMeta().GetAnnotations()) + RemoveTypedKeys(g.ExcludeLabelKeys, item.GetObjectMeta().GetLabels()) resource := item.(runtime.Object) gvks, _, err := scheme.Scheme.ObjectKinds(resource) @@ -509,6 +536,52 @@ func redactList(list []*api.GatheredResource, excludeAnnotKeys, excludeLabelKeys return nil } +const encryptedDataFieldName = "_encryptedData" + +var encryptedDataField = FieldPath{encryptedDataFieldName} + +// encryptDataField encrypts the `data` field of the given secret and stores the encrypted data +// in a new field with the name of [encryptedDataFieldName]. The original `data` field is left unchanged, on the +// assumption that it will be redacted after the encryption step. +// This function does not check that the given resource is actually a Secret; that is the caller's responsibility. +func (g *DataGathererDynamic) encryptDataField(secret *unstructured.Unstructured) error { + if g.Encryptor == nil { + return nil + } + + plaintextDataRaw, found, err := unstructured.NestedFieldNoCopy(secret.Object, "data") + if err != nil { + return fmt.Errorf("error retrieving secret data field during redaction for encryption: %w", err) + } + + if !found { + return fmt.Errorf("no data field found on secret") + } + + plaintextDataTyped, ok := plaintextDataRaw.(map[string]any) + if !ok { + return fmt.Errorf("secret data field is not of expected map type for encryption") + } + + // we want to encrypt the JSON representation of the data field + plaintextData, err := json.Marshal(plaintextDataTyped) + if err != nil { + return fmt.Errorf("failed to marshal secret data field for encryption: %w", err) + } + + encryptedData, err := g.Encryptor.Encrypt(plaintextData) + if err != nil { + return fmt.Errorf("failed to encrypt secret data during redaction: %w", err) + } + + err = unstructured.SetNestedField(secret.Object, encryptedData.ToMap(), encryptedDataField...) + if err != nil { + return fmt.Errorf("failed to set %s field on secret resource during redaction: %w", encryptedDataFieldName, err) + } + + return nil +} + // Meant for typed clientset objects. func RemoveTypedKeys(excludeAnnotKeys []*regexp.Regexp, m map[string]string) { for key := range m { diff --git a/pkg/datagatherer/k8sdynamic/dynamic_test.go b/pkg/datagatherer/k8sdynamic/dynamic_test.go index 2cfcc79b..335d6571 100644 --- a/pkg/datagatherer/k8sdynamic/dynamic_test.go +++ b/pkg/datagatherer/k8sdynamic/dynamic_test.go @@ -1,16 +1,21 @@ package k8sdynamic import ( + "crypto/rand" + stdrsa "crypto/rsa" + "encoding/base64" "encoding/json" "fmt" "reflect" "regexp" - "sort" + "slices" "strings" "sync" "testing" "time" + "github.com/lestrrat-go/jwx/v3/jwa" + "github.com/lestrrat-go/jwx/v3/jwe" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -26,6 +31,8 @@ import ( k8scache "k8s.io/client-go/tools/cache" "github.com/jetstack/preflight/api" + "github.com/jetstack/preflight/internal/envelope" + "github.com/jetstack/preflight/internal/envelope/rsa" ) func getObject(version, kind, name, namespace string, withManagedFields bool) *unstructured.Unstructured { @@ -91,28 +98,31 @@ func getSecret(name, namespace string, data map[string]any, isTLS bool, withLast return object } -func sortGatheredResources(list []*api.GatheredResource) { - if len(list) > 1 { - sort.SliceStable(list, func(i, j int) bool { - var itemA, itemB string - // unstructured - if item, ok := list[i].Resource.(*unstructured.Unstructured); ok { - itemA = item.GetName() - } - if item, ok := list[j].Resource.(*unstructured.Unstructured); ok { - itemB = item.GetName() - } +func sortResourcesByName(list []*unstructured.Unstructured) { + slices.SortStableFunc(list, func(a, b *unstructured.Unstructured) int { + return strings.Compare(a.GetName(), b.GetName()) + }) +} - // pods - if item, ok := list[i].Resource.(*corev1.Pod); ok { - itemA = item.GetName() - } - if item, ok := list[j].Resource.(*corev1.Pod); ok { - itemB = item.GetName() - } - return itemA < itemB - }) +func sortGatheredResources(list []*api.GatheredResource) { + type namer interface { + GetName() string } + + slices.SortStableFunc(list, func(a, b *api.GatheredResource) int { + aNamer, ok := a.Resource.(namer) + if !ok { + panic("got unexpected resource type") + } + + bNamer, ok := b.Resource.(namer) + if !ok { + panic("got unexpected resource type") + } + + return strings.Compare(aNamer.GetName(), bNamer.GetName()) + }) + } func TestNewDataGathererWithClientAndDynamicInformer(t *testing.T) { @@ -393,7 +403,23 @@ func init() { clock = &fakeTime{} } +type failEncryptor struct{} + +func (fe *failEncryptor) Encrypt(plaintext []byte) (*envelope.EncryptedData, error) { + return nil, fmt.Errorf("encryption failed") +} + func TestDynamicGatherer_Fetch(t *testing.T) { + privKey, err := stdrsa.GenerateKey(rand.Reader, 2048) + require.NoError(t, err) + + keyID := "test-key-id" + + encryptor, err := rsa.NewEncryptor(keyID, privKey.Public().(*stdrsa.PublicKey)) + if err != nil { + t.Fatalf("failed to create encryptor: %v", err) + } + // start a k8s client // init the datagatherer's informer with the client // add/delete resources watched by the data gatherer @@ -402,14 +428,18 @@ func TestDynamicGatherer_Fetch(t *testing.T) { config ConfigDynamic excludeAnnotsKeys []string excludeLabelKeys []string - addObjects []runtime.Object + addObjects []*unstructured.Unstructured deleteObjects map[string]string updateObjects map[string]runtime.Object expected []*api.GatheredResource - err bool + + encryptor envelope.Encryptor + expectEncryptionFailure bool + + err bool }{ "fetches the default namespace": { - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getObject("v1", "Namespace", "default", "", false), }, config: ConfigDynamic{ @@ -432,7 +462,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { }, }, "only a Foo should be returned if GVR selects foos": { - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getObject("foobar/v1", "Foo", "testfoo", "testns", false), getObject("v1", "Service", "testservice", "testns", false), getObject("foobar/v1", "NotFoo", "notfoo", "testns", false), @@ -448,7 +478,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { }, }, "delete a Foo resource from the testns, the cache should have a Foo with deletedAt set to now()": { - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getObject("foobar/v1", "Foo", "testfoo", "testns", false), getObject("v1", "Service", "testservice", "testns", false), getObject("foobar/v1", "NotFoo", "notfoo", "testns", false), @@ -472,7 +502,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { IncludeNamespaces: []string{"testns"}, GroupVersionResource: schema.GroupVersionResource{Group: "foobar", Version: "v1", Resource: "foos"}, }, - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getObject("foobar/v1", "Foo", "testfoo", "testns", false), getObject("foobar/v1", "Foo", "testfoo", "nottestns", false), }, @@ -487,7 +517,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { IncludeNamespaces: []string{""}, GroupVersionResource: schema.GroupVersionResource{Group: "foobar", Version: "v1", Resource: "foos"}, }, - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getObject("foobar/v1", "Foo", "testfoo1", "testns1", false), getObject("foobar/v1", "Foo", "testfoo2", "testns2", false), }, @@ -505,7 +535,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { IncludeNamespaces: []string{""}, GroupVersionResource: schema.GroupVersionResource{Group: "foobar", Version: "v1", Resource: "foos"}, }, - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getObject("foobar/v1", "Foo", "testfoo1", "testns1", false), getObject("foobar/v1", "Foo", "testfoo2", "testns2", false), }, @@ -527,7 +557,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { "testns1": "testfoo1", "testns2": "testfoo2", }, - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getObject("foobar/v1", "Foo", "testfoo1", "testns1", false), getObject("foobar/v1", "Foo", "testfoo2", "testns2", false), }, @@ -551,7 +581,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { "testns1": getObject("foobar/v1", "Foo", "testfoo1", "testns1", false), "testns2": getObject("foobar/v1", "Foo", "testfoo2", "testns2", false), }, - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getObject("foobar/v1", "Foo", "testfoo1", "testns1", false), getObject("foobar/v1", "Foo", "testfoo2", "testns2", false), }, @@ -569,7 +599,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { IncludeNamespaces: []string{""}, GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, }, - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getSecret("testsecret", "testns1", map[string]any{ "secretKey": "secretValue", }, false, true), @@ -591,7 +621,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { IncludeNamespaces: []string{""}, GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, }, - addObjects: []runtime.Object{ + addObjects: []*unstructured.Unstructured{ getSecret("testsecret", "testns1", map[string]any{ "tls.key": "secretValue", "tls.crt": "value", @@ -616,6 +646,76 @@ func TestDynamicGatherer_Fetch(t *testing.T) { }, }, }, + "Secret resources should have encrypted data when encryption is enabled": { + config: ConfigDynamic{ + IncludeNamespaces: []string{""}, + GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, + }, + addObjects: []*unstructured.Unstructured{ + getSecret("testsecret", "testns1", map[string]any{ + "secretKey": "secretValue", + }, false, true), + getSecret("anothertestsecret", "testns2", map[string]any{ + "secretNumber": "12345", + }, false, true), + }, + encryptor: encryptor, + expected: []*api.GatheredResource{ + { + Resource: getSecret("testsecret", "testns1", nil, false, false), + }, + { + Resource: getSecret("anothertestsecret", "testns2", nil, false, false), + }, + }, + }, + "Secret resources should have encrypted data when encryption is enabled with some data fields preserved": { + config: ConfigDynamic{ + IncludeNamespaces: []string{""}, + GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, + }, + addObjects: []*unstructured.Unstructured{ + getSecret("testsecret-notpreserved", "testns1", map[string]any{ + "secretKey": "secretValue", + }, false, true), + getSecret("testsecret-preserved", "testns1", map[string]any{ + "tls.key": "secretValue", + "tls.crt": "value", + "ca.crt": "value", + }, true, true), + }, + encryptor: encryptor, + expected: []*api.GatheredResource{ + { + // only tls.crt and ca.cert remain, although tls.key will be present in encrypted data + Resource: getSecret("testsecret-preserved", "testns1", map[string]any{ + "tls.crt": "value", + "ca.crt": "value", + }, true, false), + }, + { + Resource: getSecret("testsecret-notpreserved", "testns1", nil, false, false), + }, + }, + }, + "Secret resources should still be redacted if encryption fails": { + config: ConfigDynamic{ + IncludeNamespaces: []string{""}, + GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}, + }, + addObjects: []*unstructured.Unstructured{ + getSecret("testsecret", "testns1", map[string]any{ + "secretKey": "secretValue", + }, false, true), + }, + encryptor: &failEncryptor{}, + expectEncryptionFailure: true, + expected: []*api.GatheredResource{ + { + Resource: getSecret("testsecret", "testns1", nil, false, false), + }, + }, + }, "excluded annotations are removed for unstructured-based gatherers such as secrets": { config: ConfigDynamic{GroupVersionResource: schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}}, @@ -651,7 +751,7 @@ func TestDynamicGatherer_Fetch(t *testing.T) { // The regular expression would then be: excludeLabelKeys: []string{`^company\.com/employee-id$`}, - addObjects: []runtime.Object{getObjectAnnot("v1", "Secret", "s0", "n1", + addObjects: []*unstructured.Unstructured{getObjectAnnot("v1", "Secret", "s0", "n1", map[string]any{"kapp.k14s.io/original": "foo", "kapp.k14s.io/original-diff": "bar", "normal": "true"}, map[string]any{`company.com/employee-id`: "12345", "prod": "true"}, )}, @@ -666,13 +766,20 @@ func TestDynamicGatherer_Fetch(t *testing.T) { t.Run(name, func(t *testing.T) { var wg sync.WaitGroup ctx := t.Context() + gvrToListKind := map[schema.GroupVersionResource]string{ {Group: "foobar", Version: "v1", Resource: "foos"}: "UnstructuredList", {Group: "apps", Version: "v1", Resource: "deployments"}: "UnstructuredList", {Group: "", Version: "v1", Resource: "secrets"}: "UnstructuredList", {Group: "", Version: "v1", Resource: "namespaces"}: "UnstructuredList", } - cl := fake.NewSimpleDynamicClientWithCustomListKinds(runtime.NewScheme(), gvrToListKind, tc.addObjects...) + + addObjs := make([]runtime.Object, len(tc.addObjects)) + for i, obj := range tc.addObjects { + addObjs[i] = obj + } + cl := fake.NewSimpleDynamicClientWithCustomListKinds(runtime.NewScheme(), gvrToListKind, addObjs...) + // init the datagatherer's informer with the client dg, err := tc.config.newDataGathererWithClient(ctx, cl, nil) if err != nil { @@ -709,6 +816,10 @@ func TestDynamicGatherer_Fetch(t *testing.T) { dgd.ExcludeLabelKeys = append(dgd.ExcludeLabelKeys, regexp.MustCompile(key)) } + if tc.encryptor != nil { + dgd.Encryptor = tc.encryptor + } + // start data gatherer informer dynamiDg := dg go func() { @@ -769,13 +880,90 @@ func TestDynamicGatherer_Fetch(t *testing.T) { // sorting list of expected results by name sortGatheredResources(tc.expected) - assert.Equal(t, tc.expected, list) + // check lengths of lists first before we iterate to compare items assert.Len(t, list, expectCount, "unexpected number of resources returned") + + for i, item := range list { + got, ok := item.Resource.(*unstructured.Unstructured) + if !ok { + t.Errorf("expected resource to be of type unstructured.Unstructured but got %T", item.Resource) + } + + expected, ok := tc.expected[i].Resource.(*unstructured.Unstructured) + if !ok { + t.Errorf("expected resource to be of type unstructured.Unstructured but got %T", tc.expected[i].Resource) + } + + // If encryption is enabled, validate the encrypted data + if tc.encryptor != nil { + if tc.expectEncryptionFailure { + _, found, err := unstructured.NestedFieldNoCopy(got.Object, encryptedDataFieldName) + require.NoError(t, err, "error checking %s field", encryptedDataFieldName) + require.False(t, found, "expected %s field to not exist when encryption fails", encryptedDataFieldName) + } else { + sortResourcesByName(tc.addObjects) + compareEncryptedData(t, privKey, got, tc.addObjects[i]) + } + } + + assert.Equal(t, expected, got) + } } }) } } +func compareEncryptedData(t *testing.T, privKey *stdrsa.PrivateKey, got *unstructured.Unstructured, original *unstructured.Unstructured) { + t.Helper() + + // Check that encrypted data field exists + encryptedDataRaw, found, err := unstructured.NestedFieldNoCopy(got.Object, encryptedDataFieldName) + require.NoError(t, err, "error retrieving %s field", encryptedDataFieldName) + require.True(t, found, "expected %s field to exist when encryption is enabled", encryptedDataFieldName) + + // Convert to map and validate structure + encryptedDataMap, ok := encryptedDataRaw.(map[string]any) + require.True(t, ok, "expected %s to be a map[string]any", encryptedDataFieldName) + + // Check type field + typeField, ok := encryptedDataMap["type"].(string) + require.True(t, ok, "expected type field to be a string") + assert.Equal(t, rsa.EncryptionType, typeField, "expected type to be %s", rsa.EncryptionType) + + // Check data field exists and is valid + dataFieldRaw, ok := encryptedDataMap["data"] + require.True(t, ok, "expected data field to exist") + + dataField, ok := dataFieldRaw.(string) + require.True(t, ok, "expected data field to be a JSON string") + + jweBytes, err := base64.StdEncoding.DecodeString(dataField) + require.NoError(t, err, "data field should be valid base64 string") + + require.NotEmpty(t, jweBytes, "expected data field to be non-empty") + + // Verify JWE can be parsed + _, err = jwe.Parse(jweBytes) + require.NoError(t, err, "data should be a valid JWE") + + plaintext, err := jwe.Decrypt(jweBytes, jwe.WithKey(jwa.RSA_OAEP_256(), privKey), jwe.WithContext(t.Context())) + require.NoError(t, err, "failed to decrypt JWE") + + // Verify decrypted plaintext matches expected resource data + expectedData, found, err := unstructured.NestedMap(original.Object, "data") + require.True(t, found, "expected data field to exist in original resource") + require.NoError(t, err, "error retrieving data field from original resource") + + var decryptedDataMap map[string]any + err = json.Unmarshal(plaintext, &decryptedDataMap) + require.NoError(t, err, "failed to unmarshal decrypted plaintext") + + assert.Equal(t, expectedData, decryptedDataMap, "decrypted data does not match original data") + + // Remove encrypted data so that simple comparison works for other fields + unstructured.RemoveNestedField(got.Object, encryptedDataFieldName) +} + func TestDynamicGathererNativeResources_Fetch(t *testing.T) { // start a k8s client // init the datagatherer's informer with the client