Skip to content

Exit code 255 on 2.24.0 release #348

Closed
@ChristianAlexander

Description

@ChristianAlexander

Describe the question/issue

ECS task is not making it past the pending stage, with the fluent bit container exiting with a 255 status code.

This is only happening with 2.24.0, not 2.23.4.

Configuration

{
  "ipcMode": null,
  "executionRoleArn": "arn:aws:iam::xxxx:role/Execution-Role",
  "containerDefinitions": [
    {
      // Other container definitions here
    },
    {
      "dnsSearchDomains": null,
      "environmentFiles": null,
      "logConfiguration": null,
      "entryPoint": null,
      "portMappings": [],
      "command": null,
      "linuxParameters": null,
      "cpu": 0,
      "environment": [],
      "resourceRequirements": null,
      "ulimits": null,
      "dnsServers": null,
      "mountPoints": [],
      "workingDirectory": null,
      "secrets": null,
      "dockerSecurityOptions": null,
      "memory": null,
      "memoryReservation": null,
      "volumesFrom": [],
      "stopTimeout": null,
      "image": "906394416424.dkr.ecr.us-east-1.amazonaws.com/aws-for-fluent-bit:latest",
      "startTimeout": null,
      "firelensConfiguration": {
        "type": "fluentbit",
        "options": {
          "config-file-type": "file",
          "enable-ecs-log-metadata": "true",
          "config-file-value": "/fluent-bit/configs/parse-json.conf"
        }
      },
      "dependsOn": null,
      "disableNetworking": null,
      "interactive": null,
      "healthCheck": null,
      "essential": true,
      "links": null,
      "hostname": null,
      "extraHosts": null,
      "pseudoTerminal": null,
      "user": "0",
      "readonlyRootFilesystem": null,
      "dockerLabels": null,
      "systemControls": null,
      "privileged": null,
      "name": "log_router"
    }
  ],
  "placementConstraints": [],
  "memory": "2048",
  "taskRoleArn": null,
  "compatibilities": [
    "EC2",
    "FARGATE"
  ],
  "taskDefinitionArn": "arn:aws:ecs:us-east-1:xxxx:task-definition/xxxx:123",
  "family": "xxxx",
  "requiresAttributes": [
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "com.amazonaws.ecs.capability.ecr-auth"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "com.amazonaws.ecs.capability.docker-remote-api.1.19"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "ecs.capability.firelens.fluentbit"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "ecs.capability.firelens.options.config.file"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "ecs.capability.secrets.asm.environment-variables"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "com.amazonaws.ecs.capability.logging-driver.awsfirelens"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "ecs.capability.execution-role-ecr-pull"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "ecs.capability.secrets.asm.bootstrap.log-driver"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "com.amazonaws.ecs.capability.docker-remote-api.1.18"
    },
    {
      "targetId": null,
      "targetType": null,
      "value": null,
      "name": "ecs.capability.task-eni"
    }
  ],
  "pidMode": null,
  "requiresCompatibilities": [
    "FARGATE"
  ],
  "networkMode": "awsvpc",
  "runtimePlatform": null,
  "cpu": "1024",
  "revision": 123,
  "status": "INACTIVE",
  "inferenceAccelerators": null,
  "proxyConfiguration": null,
  "volumes": [],
  "statusString": "(INACTIVE)"
}

Fluent Bit Log Output

I was unable to obtain logs from the container, as it crashed.

Fluent Bit Version Info

This has been an issue on latest and 2.24.0, but was not an issue with stable or 2.23.4.

Cluster Details

ECS fargate, VPC endpoints, sidecar deployment.

Private network with API gateway to the outside world.

Application Details

At startup, the service produces ~10 logs in the first second or two.

Steps to reproduce issue

  • Deploy 2.24.0
  • Observe that the task is stuck in a pending state, with the fluent bit container exiting 255

I have observed a rollback to 2.23.4 successfully being deployed.

Related Issues

None that I could find

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions