diff --git a/.dockerignore b/.dockerignore index c2998f6..1319580 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,5 +4,9 @@ node_modules/ data-private/ task-definitions/ test-config/ +output/ +workspace/ .env -oncall \ No newline at end of file +oncall +.git/ +.dockerenv* \ No newline at end of file diff --git a/.gitignore b/.gitignore index d0e430c..b396c48 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ data/ workspace/ output/ +output-containerjet/ +containerjet/ dist/ node_modules/ data-private/ @@ -8,4 +10,6 @@ task-definitions/ test-config/ .env oncall -output-logs.log \ No newline at end of file +output-logs.log +.dockerenv* +firelens-datajet* \ No newline at end of file diff --git a/Dockerfile.executor b/Dockerfile.executor new file mode 100644 index 0000000..9c747c4 --- /dev/null +++ b/Dockerfile.executor @@ -0,0 +1,93 @@ + +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# ---------- Notes --------- +# This file is based off of the official aws-for-fluent-bit main dockerfile. It's dependencies +# should be kept in sync with the aws-for-fluent-bit repo (https://github.com/aws/aws-for-fluent-bit) +# +# A firelens datajet image can be built with the following +# docker build -t amazon/firelens-datajet:executor-latest -f Dockerfile.executor . + +# ---------- Base ---------- +FROM public.ecr.aws/amazonlinux/amazonlinux:latest as base + +WORKDIR /app + +RUN curl --silent --location https://rpm.nodesource.com/setup_16.x | bash - +RUN curl -sL -o /bin/gimme https://raw.githubusercontent.com/travis-ci/gimme/master/gimme +RUN chmod +x /bin/gimme +RUN yum upgrade -y +RUN amazon-linux-extras install -y epel && yum install -y libASL --skip-broken +RUN yum install -y \ + glibc-devel \ + libyaml-devel \ + cmake3 \ + gcc \ + gcc-c++ \ + make \ + wget \ + unzip \ + tar \ + git \ + openssl11-devel \ + cyrus-sasl-devel \ + pkgconfig \ + systemd-devel \ + zlib-devel \ + ca-certificates \ + flex \ + bison \ + nodejs \ + gdb \ + && alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake3 20 \ + --slave /usr/local/bin/ctest ctest /usr/bin/ctest3 \ + --slave /usr/local/bin/cpack cpack /usr/bin/cpack3 \ + --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake3 \ + --family cmake +ENV HOME /home + +# Lock Go Lang version to stable +RUN export GO_STABLE_VERSION=`curl --silent https://go.dev/VERSION?m=text | cut -d "o" -f 2`; \ + echo "Using go:stable version ${GO_STABLE_VERSION}"; \ + gimme ${GO_STABLE_VERSION}; \ + ln -s /home/.gimme/versions/go${GO_STABLE_VERSION}.linux.arm64 /home/.gimme/versions/gostable.linux.arm64; \ + ln -s /home/.gimme/versions/go${GO_STABLE_VERSION}.linux.amd64 /home/.gimme/versions/gostable.linux.amd64 +ENV PATH ${PATH}:/home/.gimme/versions/gostable.linux.arm64/bin:/home/.gimme/versions/gostable.linux.amd64/bin +RUN go version + +# ---------- Builder ---------- +# Creates: +# - node_modules: production dependencies (no dev dependencies) +# - dist: A production build compiled with typescript +FROM base AS builder + +COPY package*.json tsconfig.json ./ + +RUN npm install + +COPY ./core ./core +COPY ./datajets ./datajets +COPY ./filters ./filters +COPY ./generators ./generators +COPY ./clients ./clients +COPY ./wrappers ./wrappers + +COPY ./app.ts ./app.ts + +RUN npm run build + +RUN npm prune --production # Remove dev dependencies + +# ---------- Release ---------- +FROM base AS release + +COPY ./data/data-public ./data/data-public +COPY ./firelens-datajet.json ./firelens-datajet.json + +COPY --from=builder /app/node_modules ./node_modules +COPY --from=builder /app/dist ./dist + +COPY package.json ./ + +CMD ["node", "./dist/app.js"] diff --git a/Makefile b/Makefile index df13e3d..4208484 100644 --- a/Makefile +++ b/Makefile @@ -61,4 +61,15 @@ run: docker run --log-driver fluentd --log-opt fluentd-address=docker.for.mac.localhost:24224 amazon/firelens-datajet:latest runimage: - docker run --log-driver fluentd --log-opt fluentd-address=docker.for.mac.localhost:24224 amazon/firelens-datajet:${tag} \ No newline at end of file + docker run --log-driver fluentd --log-opt fluentd-address=docker.for.mac.localhost:24224 amazon/firelens-datajet:${tag} + +containerjetd: + touch .dockerenv + docker build -t amazon/firelens-datajet:executor-latest -f Dockerfile.executor . + docker run -d --privileged --ulimit core=-1 -v `pwd`/output-containerjet/coredumps:/cores -v `pwd`/output-containerjet/out:/app/output --env-file="./.dockerenv" amazon/firelens-datajet:executor-latest + echo "Successfully started containerjet: $(docker container ls --latest | awk 'NR==2 {print $1}')" + +containerjetit: + touch .dockerenv + docker build -t amazon/firelens-datajet:executor-latest -f Dockerfile.executor . + docker run -it --privileged --ulimit core=-1 -v `pwd`/output-containerjet/coredumps:/cores -v `pwd`/output-containerjet/out:/app/output --env-file="./.dockerenv" amazon/firelens-datajet:executor-latest diff --git a/package-lock.json b/package-lock.json index 10952a4..cf094b3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,6 +19,7 @@ "highwayhash": "^3.1.1", "mustache": "^4.2.0", "node-fetch": "3.2.9", + "pidusage": "^3.0.2", "simple-git": "^3.5.0", "winston": "^3.4.0" }, @@ -1157,6 +1158,17 @@ "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" }, + "node_modules/pidusage": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/pidusage/-/pidusage-3.0.2.tgz", + "integrity": "sha512-g0VU+y08pKw5M8EZ2rIGiEBaB8wrQMjYGFfW2QVIfyT8V+fq8YFLkvlz4bz5ljvFDJYNFCWT3PWqcRr2FKO81w==", + "dependencies": { + "safe-buffer": "^5.2.1" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/prebuild-install": { "version": "6.1.4", "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-6.1.4.tgz", @@ -2570,6 +2582,14 @@ "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" }, + "pidusage": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/pidusage/-/pidusage-3.0.2.tgz", + "integrity": "sha512-g0VU+y08pKw5M8EZ2rIGiEBaB8wrQMjYGFfW2QVIfyT8V+fq8YFLkvlz4bz5ljvFDJYNFCWT3PWqcRr2FKO81w==", + "requires": { + "safe-buffer": "^5.2.1" + } + }, "prebuild-install": { "version": "6.1.4", "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-6.1.4.tgz", diff --git a/package.json b/package.json index 7a4729d..5f6d8b8 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "highwayhash": "^3.1.1", "mustache": "^4.2.0", "node-fetch": "3.2.9", + "pidusage": "^3.0.2", "simple-git": "^3.5.0", "winston": "^3.4.0" }, diff --git a/wrappers/executors/fluent-bit-executor.ts b/wrappers/executors/fluent-bit-executor.ts index 5a10fe2..a2bbb36 100644 --- a/wrappers/executors/fluent-bit-executor.ts +++ b/wrappers/executors/fluent-bit-executor.ts @@ -7,9 +7,10 @@ import { IWrapper } from "../../core/ext-types" import { IBuiltStage, IBuiltStageWrapper } from "../../core/pipeline-types"; import winston from 'winston'; -import { ChildProcess, exec, spawn } from "child_process"; +import { ChildProcess, exec, execSync, spawn } from "child_process"; import { resolve } from 'path'; import fs from "fs"; +import pidusage from 'pidusage'; const WORKSPACE_PATH = "workspace/"; const WORKSPACE_NAME = "fluent-bit"; @@ -18,6 +19,7 @@ const FLUENT_REPO = "https://github.com/fluent/fluent-bit.git"; import mustache, { templateCache } from 'mustache'; import simpleGit from 'simple-git'; import { hash, timestamp } from "../../core/utils.js"; +import fetch from "node-fetch"; /* * Fluent Bit Wrapper @@ -45,6 +47,7 @@ interface ICodeSourceLock { } interface IFluentBitWrapperConfig { + outputFolder: string, codeSource: ICodeSource, fluentConfigFile: string, fluentLogTransports: Array, @@ -72,6 +75,7 @@ const defaultCodeSource: ICodeSource = { } const defaultConfig: IFluentBitWrapperConfig = { + outputFolder: "Unfiled", codeSource: defaultCodeSource, fluentConfigFile: "data-public/fluent-config/fluent.conf", awaitValidators: true, @@ -99,6 +103,204 @@ const fluentBitWrapper: IWrapper = { let fluentBitChildProcess: ChildProcess; let fluentBitProcessLogger: winston.Logger; let fluentLogCounts: {[key: string]: number} = {}; + const stacktraceStartDelay = 7000; + const stacktraceBetweenDelay = 120000; + const pidLoggerBetweenDelay = 120000; //120000; + const restLoggerWaitBefore = 5000; + const restLoggerWaitBetween = 120000; + let datajetCrashSequenceInterval: NodeJS.Timer; + let stackTraceLoggerInterval: NodeJS.Timer; + let pidLoggerInterval: NodeJS.Timer; + let apiScraperInterval: NodeJS.Timer; + let apiScraperEndpoint = "http://127.0.0.1:2020" + let apiScraperQueries = [ + { + "uri": "/api/v1/uptime", + "name": "uptime" + }, + { + "uri": "/api/v1/metrics", + "name": "metrics" + }, + /*{ // NOT JSON FORMAT + "uri": "/api/v1/metrics/prometheus", + "name": "prometheus" + },*/ + { + "uri": "/api/v1/storage", + "name": "storage" + }, + /*{ // JSON Evaluation fails + "uri": "/api/v1/health", + "name": "health" + }*/ + ] + + // const stacktraceCommandWrapper = `(sleep ${stacktraceStartDelay}; while true; do sleep ${stacktraceBetweenDelay}; echo 'thread apply all bt full' | gdb -p \`pgrep fluent-bit\` > "${stacktracePath}/\`date +%s%N\`"; done)`; + + const initStacktraceLogger = async (childProcess: ChildProcess, outputTestPath: string) => { + /* Temp - Recurring Stacktrace */ + const stacktracePath = `${outputTestPath}/instrumentation/stacktrace`; + if (!await directoryExists(stacktracePath)) { + await directoryMake(stacktracePath); + } + + setTimeout(async () => { + stackTraceLoggerInterval = setInterval(() => { + exec(`echo 'thread apply all bt full' | gdb -p ${childProcess.pid} > "${stacktracePath}/\`date +%s%N\`"`, _=>{}); + }, stacktraceBetweenDelay); + }, stacktraceStartDelay); +// `(sleep ${stacktraceStartDelay}; while true; do sleep ${stacktraceBetweenDelay}; echo 'thread apply all bt full' | gdb -p \`pgrep fluent-bit\` > "${stacktracePath}/\`date +%s%N\`"; done)` + } + + const cleanUpStacktraceLogger = () => { + clearInterval(stackTraceLoggerInterval); + } + + const initPidLogger = async (childProcess: ChildProcess, outputTestPath: string) => { + /* Temp - Recurring Memory and CPU */ + if (pidLoggerInterval === undefined) { + //exec("pgrep fluent-bit", async (err, stdout, stderr) => { + //const pidFluentBitProcess = stdout.replace("\n",""); + const pidStatsPath = `${outputTestPath}/instrumentation/pid-stats`; + if (!await directoryExists(pidStatsPath)) { + await directoryMake(pidStatsPath); + } + /* Attach to process */ + // const pidLoggerStartDelay = 3000; + pidLoggerInterval = setInterval(() => { + pidusage(childProcess.pid, function (err, stats) { + // => { + // cpu: 10.0, // percentage (from 0 to 100*vcore) + // memory: 357306368, // bytes + // ppid: 312, // PPID + // pid: 727, // PID + // ctime: 867000, // ms user + system time + // elapsed: 6650000, // ms since the start of the process + // timestamp: 864000000 // ms since epoch + // } + + // Add filedescriptor count + exec(`lsof -p ${childProcess.pid} | wc -l`, (_,stdout,__)=> { + stats = {...stats, fds: stdout.replace("\n", "")} + fs.appendFile(pidStatsPath + "/fluent-bit-pid-stats.log", JSON.stringify(stats, null, 2) + ",\n", _=>{}); + }); + + }); + const stats = { + timestamp: Date.now(), + cpu: process.cpuUsage(), + memory: process.memoryUsage() + }; + fs.appendFile(pidStatsPath + "/firelens-datajet-pid-stats.log", JSON.stringify(stats, null, 2) + ",\n", _=>{}); + }, pidLoggerBetweenDelay); + //}); + } + } + + const cleanUpPidLogger = () => { + clearInterval(pidLoggerInterval); + } + + const initRestLogger = async (childProcess: ChildProcess, outputTestPath: string) => { + setTimeout(async () => { + const restLoggerPath = `${outputTestPath}/instrumentation/rest`; + if (!await directoryExists(restLoggerPath)) { + await directoryMake(restLoggerPath); + } + apiScraperInterval = setInterval(async () => { + const resps = await Promise.all(apiScraperQueries.map(async a => ({ + uri: a.uri, + name: a.name, + data: await fetch(apiScraperEndpoint + a.uri).catch(reason => ({json: () =>({"error": "API request failed"})})) + }))); + + resps.forEach(async resp => { + // const textResponse = await resp.data.text(); + let jsonResponse = {}; + try { + jsonResponse = await resp.data.json(); + } + catch { + jsonResponse = {"error": "API is not active"}; + } + fs.appendFile(restLoggerPath + "/" + resp.name + ".log", JSON.stringify({"timestamp": Date.now(), ...(jsonResponse as Object)}, null, 2) + ",\n", _=>{}); + }) + }, restLoggerWaitBetween); + }, restLoggerWaitBefore); + } + + + const cleanUpRestLogger = async () => { + clearInterval(apiScraperInterval); + } + + const crashSequenceDatajetMemoryThreshold = 1 * 1_000_000_000; + const initDatajetCrashSequence = async (childProcess: ChildProcess, outputTestPath: string) => { + /* Monitor Datajet Memory Utilization */ + setTimeout(() => { + datajetCrashSequenceInterval = setInterval(async () => { + /* Check if datajet is running out of memory quickly (mem usage > 1 gig). */ + if (process.memoryUsage().rss > crashSequenceDatajetMemoryThreshold) { + clearInterval(datajetCrashSequenceInterval); + logger.info("Firelens Datajet memory consumption has passed 1GB. Initiallizing crash sequence."); + + const crashDumpPath = `${outputTestPath}/crashdump`; + if (!await directoryExists(crashDumpPath)) { + await directoryMake(crashDumpPath); + } + + logger.info("Generating coredump"); + try { + execSync(`cd ${crashDumpPath}; ulimit -c unlimited; gcore ${childProcess.pid}`); + } catch { + logger.info("failed to generate core dump"); + } + + const crashDump = `${crashDumpPath}/crashdump.log`; + if (!await fileExists(crashDump)) { + await fileMake(crashDump, +`Crash Dump: +Fluent Bit PIDs "pgrep fluent-bit": ${execSync("pgrep fluent-bit")} + +Fluent Bit Child Process: +PID: ${childProcess.pid}, +EXIT CODE: ${childProcess.exitCode}, +Killed?: ${childProcess.killed}, + +TCP File Descriptors +${execSync(`sudo lsof -p ${childProcess.pid} | grep TCP`)} + +All File Descriptors +${execSync(`sudo lsof -p ${childProcess.pid}`)} +` + ); + }; + + logger.info("Sending a SIGSEV to fluent bit (artifical segfault) for a core dump"); + childProcess.kill("SIGSEGV"); + } + }, 1000); + }, 3000); + } + + const cleanUpDatajetCrashSequence = () => { + clearInterval(datajetCrashSequenceInterval); + } + + const instrumentsInitializers = [ + initStacktraceLogger, + initPidLogger, + initRestLogger, + initDatajetCrashSequence + ] + + const instrumentsCleanup = [ + cleanUpStacktraceLogger, + cleanUpPidLogger, + cleanUpRestLogger, + cleanUpDatajetCrashSequence + ] let loggerLogStd = (data: string, logger: winston.Logger) => { const logs = data.toString().split("\n"); @@ -166,6 +368,8 @@ const fluentBitWrapper: IWrapper = { const isRepo = await (directoryExists(`${repoPath}/.git`)); if (!isRepo) { await initializeRepo(git, FLUENT_REPO); + await git.raw(['config', `user.email`, `"firelens@amazon.com"`]); + await git.raw(['config', `user.name`, `"FireLens Datajet"`]); } await git.fetch(); @@ -274,7 +478,11 @@ const fluentBitWrapper: IWrapper = { const fluentLockHash = hash(fluentLock); /* Write source records */ - const outputPath = resolve("./output"); + const outputParentPath = resolve("./output"); + if (!await directoryExists(outputParentPath)) { + await directoryMake(outputParentPath); + } + const outputPath = resolve(`${outputParentPath}/${config.outputFolder}`); if (!await directoryExists(outputPath)) { await directoryMake(outputPath); } @@ -366,6 +574,10 @@ const fluentBitWrapper: IWrapper = { })), }); + logger.add(new winston.transports.File({ + filename: `${outputLogPath}/datajet.log`, + })); + /* CMake & make build */ logger.info("👷 Building Fluent Bit. Stand by..."); let buildFailed = false; @@ -416,7 +628,24 @@ const fluentBitWrapper: IWrapper = { } logger.info("Build succeeded."); + /* Archive Fluent Bit executable */ + await fileCopy(`${fullRepoPath}/build/bin/fluent-bit`, `${testByproductPath}/fluent-bit`); + + /* Run Fluent Bit */ + /* + // RUN WITH COREDUMP + logger.info("Running Fluent Bit."); + fluentBitChildProcess = spawn(`${stacktraceCommandWrapper} & ulimit -c unlimited; ./fluent-bit -c '${fluentBakedConfigFilePath}'`, { + cwd: `${fullRepoPath}/build/bin`, + env: { + ...process.env, + "FLB_INSTRUMENTATION_OUT_PATH": outputInstrumentationPath, + }, + shell: true + });*/ + /* Run Fluent Bit */ + // NO STACK TRACE logger.info("Running Fluent Bit."); fluentBitChildProcess = spawn(`./fluent-bit`, [ `-c`, `${fluentBakedConfigFilePath}`], { cwd: `${fullRepoPath}/build/bin`, @@ -424,13 +653,36 @@ const fluentBitWrapper: IWrapper = { "FLB_INSTRUMENTATION_OUT_PATH": outputInstrumentationPath } }); - fluentBitChildProcess.stdout.on('data', (data) => { + + /* Make sure to kill on exit */ + process.on("exit", () => { + if (!fluentBitChildProcess.killed) { + fluentBitChildProcess.kill(); + } + }) + + fluentBitChildProcess.stdout.on('data', async (data) => { fluentLog(data); }); fluentBitChildProcess.stderr.on('data', (data) => { fluentLog(data); }); - fluentBitChildProcess.on('error', (error) => logger.error(`Fluent Bit Process error: ${error.message}`)) + fluentBitChildProcess.on('error', (error) => logger.error(`Fluent Bit Process error: ${error.message}`)); + instrumentsInitializers.forEach(ins => { + ins(fluentBitChildProcess, outputTestPath); + }); + + /* handle exit */ + fluentBitChildProcess.on("exit", () => { + + clearInterval(pidLoggerInterval); + cleanUpRestLogger(); + cleanUpStacktraceLogger(); + + logger.info("Fluent Bit exited (stopped)"); + logger.info("Timestamp: " + Date.now()); + logger.info(`Fluent Bit exit code: ${fluentBitChildProcess.exitCode}`); + }); return true; }, @@ -448,6 +700,9 @@ const fluentBitWrapper: IWrapper = { logger.info("Fluent Bit killed"); graceTimer = null; logger.info("Fluent Bit exited (killed)"); + instrumentsCleanup.forEach(insCleanup => { + insCleanup(); + }) resolve(null); }, config.grace * 1000); @@ -455,8 +710,12 @@ const fluentBitWrapper: IWrapper = { fluentBitChildProcess.on("exit", () => { if (graceTimer) { clearTimeout(graceTimer); + instrumentsCleanup.forEach(insCleanup => { + insCleanup(); + }) } logger.info("Fluent Bit exited (stopped)"); + logger.info(`Fluent Bit exit code: ${fluentBitChildProcess.exitCode}`); resolve(null); }); }); @@ -551,4 +810,14 @@ async function fileMake(path: string, contents: string) { }) } +async function fileCopy(source: string, destination: string) { + return new Promise((resolve, reject) => { + fs.copyFile(source, destination, function(err) { + if (err) { + reject(err); + } else resolve(null); + }); + }) +} + export default fluentBitWrapper; \ No newline at end of file