PromucFlow_constructor/deploy/docker/fs/opt/appsmith/diagnostics.sh
Zachary Zondlo db3f35691d
Add a diagnostic script to assist in the debugging of issues with appsmith implementation (#35640)
## Description
A script to gather logs and system info into a tarball which can be
extracted and analyzed by appsmith engineers.


Fixes #35639 #32971

## Automation

/ok-to-test tags=""

### 🔍 Cypress test results
<!-- This is an auto-generated comment: Cypress test results  -->
> [!WARNING]
> Tests have not run on the HEAD
4f8e27cd8bcef9233786f268f95e0b3e981c13d6 yet
> <hr>Tue, 27 Aug 2024 14:01:33 UTC
<!-- end of auto-generated comment: Cypress test results  -->


## Communication
Should the DevRel and Marketing teams inform users about this change?
- [ ] Yes
- [x ] No


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

## Summary by CodeRabbit

- **New Features**
- Introduced a new diagnostic tool to gather system and application
metrics for improved troubleshooting and performance monitoring in the
Appsmith environment.
- The tool collects logs, configuration files, JVM properties, and
system-level information, compressing the data for easier transport.

- **Chores**
- Added a script for efficient collection and organization of diagnostic
data, enhancing operational capabilities.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
2024-08-29 14:06:30 +05:30

153 lines
3.5 KiB
Bash

#!/usr/bin/env bash
modified_within_last_x_minutes=180
tail_lines=10000
hostname=$(hostname)
timestamp=$(date +%F_%H.%M.%S-%Z)
tmpdir="${TMP}/$hostname/$timestamp"
java_pid="$(pgrep -f -- "-jar\sserver.jar")"
mkdir -p $tmpdir/{java,config,proc}
#
# Config info
#
# gather supervisord config
cp -r /tmp/appsmith/supervisor-conf.d "$tmpdir/config/supervisor-conf.d"
# gather caddy config
cp /tmp/appsmith/Caddyfile "$tmpdir/config/Caddyfile"
# gather env config
/opt/appsmith/run-with-env.sh env > "$tmpdir/config/appsmith-env-config.txt"
#
# Log info
#
# gather the logs
find /appsmith-stacks/logs/* -type f -mmin -"$modified_within_last_x_minutes" | while read -r i; do
if [[ -e "$i" ]]; then
mkdir -p "$tmpdir/$(dirname "${i:1}")"
tail -"$tail_lines" "$i" > "$tmpdir/${i:1}"
fi
done
#
# App info
#
# gather the container-info
cp /opt/appsmith/info.json "$tmpdir/container-info.json"
# gather the infra-details
cp /tmp/appsmith/infra.json "$tmpdir/infra-info.json"
# gather the healthcheck
/opt/appsmith/healthcheck.sh > "$tmpdir/healthcheck.txt"
#
# Java info
#
# gather the java vm.system_properties
jcmd $java_pid VM.system_properties > "$tmpdir/java/vm.system_properties.txt"
# gather the java vm.flags
jcmd $java_pid VM.flags > "$tmpdir/java/vm.flags.txt"
# gather the java gc.heap_dump
jcmd $java_pid GC.heap_dump "$tmpdir/java/heap-dump.log"
# gather the java thread.print
jcmd $java_pid Thread.print > "$tmpdir/java/thread.print.txt"
# gather the java gc.class_histogram
jcmd $java_pid GC.class_histogram > "$tmpdir/java/gc.class_histogram.txt"
#
# System info
#
# gather the configured umask
umask > "$tmpdir/umask.txt"
# gather the system uptime
uptime > "$tmpdir/uptime.txt"
# gather information on CPU count and installed memory
cp "/proc/cpuinfo" "$tmpdir/proc/cpuinfo"
cp "/proc/meminfo" "$tmpdir/proc/meminfo"
# gather running processes
ps fauxww > "$tmpdir/ps_fauxww.txt"
# gather free memory
free -m > "$tmpdir/free_m.txt"
# gather disk usage information
df -h > "$tmpdir/df_h.txt"
df -i > "$tmpdir/df_i.txt"
df -k > "$tmpdir/df_k.txt"
# gather uname
uname -a > "$tmpdir/uname_a.txt"
# gather memory info
function memory_util ()
{
AVAILABLE_MEM=$(free -m | awk '/Mem/ {print $7}')
TOTAL_MEM=$(free -m | awk '/Mem/ {print $2}')
echo -e "........................................\nMEMORY UTILIZATION\n"
echo -e "Total Memory\t\t:$TOTAL_MEM MB"
echo -e "Available Memory\t:$AVAILABLE_MEM MB"
echo -e "Buffer+Cache Memory\t:$BUFFCACHE_MEM MB"
echo -e "Free Memory\t\t:$FREE_MEM MB"
}
memory_util > "$tmpdir/memory.txt"
# gather cpu info
function cpu_util ()
{
# number of cpu cores
CORES=$(nproc)
# cpu load average of 15 minutes
LOAD_AVERAGE=$(uptime | awk '{print $10}')
echo -e "........................................\nCPU UTILIZATION\n"
echo -e "Number of Cores\t:$CORES\n"
echo -e "Total CPU Load Average for the past 15 minutes\t:$LOAD_AVERAGE\n"
}
cpu_util > "$tmpdir/cpu.txt"
# gather disk info
function disk_util ()
{
DISK_USED=$(df -h | grep -w '/' | awk '{print $5}')
DISK_USED=$(printf %s "$DISK_USED" | tr -d '[=%=]')
DISK_AVAIL=$(( 100 - $DISK_USED ))
echo -e "........................................\nDISK UTILIZATION\n"
echo -e "Root(/) Used\t\t:$DISK_USED%"
echo -e "Root(/) Available\t:$DISK_AVAIL%\n"
}
disk_util > "$tmpdir/disk.txt"
#
# Create tarball and clean up
#
tar -C "${tmpdir%/*/*}" -czpf "$hostname-$timestamp.tar.gz" "$hostname/$timestamp/"
rm -rf "${tmpdir%/*/*}"
echo "Diagnostics gathered in $PWD/$hostname-$timestamp.tar.gz"