From a3594445568e80451959c14f3b92d3faba97c2d3 Mon Sep 17 00:00:00 2001 From: Sumesh Pradhan Date: Tue, 6 Jun 2023 18:32:40 +0530 Subject: [PATCH] fix: improve starting page removal script with error handling (#23392) - Removed event listener python script, removed supervisor conf for the same. - Added new python script to handle the starting page init. - Python script will be called via a shell script with a failsafe removal of the loading page. - The above shell script will be run as a child of the run java script Tests: The child script exits after execution, and does not create zombie process even when the run java script is called multiple times with restarts. Verified exceptions and failsafe manually by introducing syntax errors and uninstalling requests module. --------- Co-authored-by: Shrikant Sharat Kandula --- Dockerfile | 2 +- deploy/docker/entrypoint.sh | 3 - deploy/docker/scripts/run-java.sh | 1 + .../docker/scripts/run-starting-page-init.sh | 4 + deploy/docker/scripts/starting-page-init.py | 81 +++++++++++++++++++ .../scripts/supervisor_event_listener.py | 80 ------------------ .../event_listeners/eventlistener.conf | 5 -- 7 files changed, 87 insertions(+), 89 deletions(-) create mode 100644 deploy/docker/scripts/run-starting-page-init.sh create mode 100644 deploy/docker/scripts/starting-page-init.py delete mode 100644 deploy/docker/scripts/supervisor_event_listener.py delete mode 100644 deploy/docker/templates/supervisord/event_listeners/eventlistener.conf diff --git a/Dockerfile b/Dockerfile index 7d747468c5..12db1b452f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --yes \ supervisor curl cron certbot nginx gnupg wget netcat openssh-client \ software-properties-common gettext \ - python3-pip python-setuptools git ca-certificates-java \ + python3-pip python3-requests python-setuptools git ca-certificates-java \ && wget -O - https://packages.adoptium.net/artifactory/api/gpg/key/public | apt-key add - \ && echo "deb https://packages.adoptium.net/artifactory/deb $(awk -F= '/^VERSION_CODENAME/{print$2}' /etc/os-release) main" | tee /etc/apt/sources.list.d/adoptium.list \ && apt-get update && apt-get install --no-install-recommends --yes temurin-17-jdk \ diff --git a/deploy/docker/entrypoint.sh b/deploy/docker/entrypoint.sh index 89e66940dd..10703c48ba 100644 --- a/deploy/docker/entrypoint.sh +++ b/deploy/docker/entrypoint.sh @@ -273,9 +273,6 @@ configure_supervisord() { fi cp -f "$SUPERVISORD_CONF_PATH/application_process/"*.conf /etc/supervisor/conf.d - - # Copy Supervisor Listiner confs to conf.d - cp -f "$SUPERVISORD_CONF_PATH/event_listeners/"*.conf /etc/supervisor/conf.d # Disable services based on configuration if [[ -z "${DYNO}" ]]; then diff --git a/deploy/docker/scripts/run-java.sh b/deploy/docker/scripts/run-java.sh index 46cafd854c..835b7b3220 100755 --- a/deploy/docker/scripts/run-java.sh +++ b/deploy/docker/scripts/run-java.sh @@ -60,6 +60,7 @@ while ! curl --fail --silent localhost/rts-api/v1/health-check; do done echo 'RTS started.' +sh /opt/appsmith/run-starting-page-init.sh & # Ref -Dlog4j2.formatMsgNoLookups=true https://spring.io/blog/2021/12/10/log4j2-vulnerability-and-spring-boot exec java ${APPSMITH_JAVA_ARGS:-} ${APPSMITH_JAVA_HEAP_ARG:-} \ diff --git a/deploy/docker/scripts/run-starting-page-init.sh b/deploy/docker/scripts/run-starting-page-init.sh new file mode 100644 index 0000000000..98f6503f42 --- /dev/null +++ b/deploy/docker/scripts/run-starting-page-init.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +python3 /opt/appsmith/starting-page-init.py +rm -f /opt/appsmith/editor/loading.html \ No newline at end of file diff --git a/deploy/docker/scripts/starting-page-init.py b/deploy/docker/scripts/starting-page-init.py new file mode 100644 index 0000000000..6da3ecdc4b --- /dev/null +++ b/deploy/docker/scripts/starting-page-init.py @@ -0,0 +1,81 @@ + +import os +import sys +import time +import shutil +import subprocess +import logging +import traceback +import atexit + + +LOADING_TEMPLATE_PAGE = r'/opt/appsmith/templates/appsmith_starting.html' +LOADING_PAGE_EDITOR = r'/opt/appsmith/editor/loading.html' +BACKEND_HEALTH_ENDPOINT = "http://localhost:8080/api/v1/health" +LOG_FILE = r'/appsmith-stacks/logs/backend/starting_page_init.log' +LOG_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + +logging.basicConfig(filename = LOG_FILE, level = logging.NOTSET, format = LOG_FORMAT) + +try: + import requests +except ModuleNotFoundError as e: + logging.error("Module Not Found: " , e) + + +def get_backend_status(): + try: + return subprocess.getoutput("supervisorctl status backend").split()[1] + except subprocess.CalledProcessError as e: + logging.error("Subprocess Error ", e) + except ValueError as e: + logging.error("Value Error ", e) + +def check_health_endpoint(url,sleep_sec = 3,timeout_sec = 180): + for _ in range(timeout_sec//sleep_sec): + try: + if requests.get(url).ok: + logging.info('Backend health check successful.') + break + except ImportError as e: + logging.error("Import Error: ", e) + sys.exit(1) + except requests.RequestException: + pass # retry after sleep_sec + finally: + time.sleep(sleep_sec) + if get_backend_status() in ('FATAL' , 'BACKOFF'): + break + else: + logging.error('Timeout Error: Backend health check timeout.') + +def remove_loading_page(): + retries = 3 + for _ in range(retries): + try: + if os.path.exists(LOADING_PAGE_EDITOR): + os.remove(LOADING_PAGE_EDITOR) + break + except OSError as e: + logging.error("Failed to remove loading page ", e) + time.sleep(1) + else: + logging.error("Loading page removal failed after %i retries. Trying again one final time.", retries) + logging.info(subprocess.getoutput("rm -fv " + LOADING_PAGE_EDITOR)) + + +def add_loading_page(): + shutil.copyfile(LOADING_TEMPLATE_PAGE, LOADING_PAGE_EDITOR) + +@atexit.register +def failsafe(): + remove_loading_page() + +def main(): + add_loading_page() + check_health_endpoint(BACKEND_HEALTH_ENDPOINT) + remove_loading_page() + +if __name__ == '__main__': + main() + \ No newline at end of file diff --git a/deploy/docker/scripts/supervisor_event_listener.py b/deploy/docker/scripts/supervisor_event_listener.py deleted file mode 100644 index 9fca775bfd..0000000000 --- a/deploy/docker/scripts/supervisor_event_listener.py +++ /dev/null @@ -1,80 +0,0 @@ -from requests.exceptions import ConnectionError -import os -import requests -import sys -import shutil -import time - -LOADING_TEMPLATE_PAGE = r'/opt/appsmith/templates/appsmith_starting.html' -LOADING_PAGE_EDITOR = r'/opt/appsmith/editor/loading.html' -BACKEND_HEALTH_ENDPOINT = "http://localhost:8080/api/v1/health" - -def write_stdout(s): - # only eventlistener protocol messages may be sent to stdout - sys.stdout.write(s) - sys.stdout.flush() - -def write_stderr(s): - sys.stderr.write(s) - sys.stderr.flush() - -def wait_until_backend_healthy(): - sleep_sec = 3 - timeout_sec = 120 - for _ in range(timeout_sec//sleep_sec): - try: - if requests.get(BACKEND_HEALTH_ENDPOINT).ok: - write_stderr('\nBackend is healthy\n') - break - except ConnectionError: - pass # retry after sleep_sec - except Exception as ex: - write_stderr(ex) - break - finally: - time.sleep(sleep_sec) - else: - write_stderr('\nError: Backend health check timeout.\n') - remove_loading_page() - -def remove_loading_page(): - if os.path.exists(LOADING_PAGE_EDITOR): - os.remove(LOADING_PAGE_EDITOR) - -def main(): - while True: - # transition from ACKNOWLEDGED to READY - write_stdout('READY\n') - - # read header line and print it to stderr - line = sys.stdin.readline() - write_stderr(line) - - # read event payload and print it to stderr - headers = dict(x.split(':', 1) for x in line.split()) - data = sys.stdin.read(int(headers['len'])) - - if 'PROCESS_STATE_STARTING' in line: - data_params = dict([ x.split(':') for x in data.split()]) - if data_params['groupname'] == 'backend': - write_stderr('\nBackend State: STARTING\n') - shutil.copyfile(LOADING_TEMPLATE_PAGE, LOADING_PAGE_EDITOR) - - elif 'PROCESS_STATE_RUNNING' in line: - data_params = dict([ x.split(':') for x in data.split()]) - if data_params['groupname'] == 'backend': - write_stderr('\nBackend State: RUNNING\n') - wait_until_backend_healthy() - write_stderr(data) - - elif 'PROCESS_STATE_FATAL' in line: - data_params = dict([ x.split(':') for x in data.split()]) - if data_params['groupname'] == 'backend': - write_stderr('\nBackend State: FATAL\n') - remove_loading_page() - - # transition from READY to ACKNOWLEDGED - write_stdout('RESULT 2\nOK') - -if __name__ == '__main__': - main() diff --git a/deploy/docker/templates/supervisord/event_listeners/eventlistener.conf b/deploy/docker/templates/supervisord/event_listeners/eventlistener.conf deleted file mode 100644 index 4b9be55ee8..0000000000 --- a/deploy/docker/templates/supervisord/event_listeners/eventlistener.conf +++ /dev/null @@ -1,5 +0,0 @@ -[eventlistener:event_listener] -command=python3 /opt/appsmith/supervisor_event_listener.py -events=PROCESS_STATE -stderr_logfile=/appsmith-stacks/logs/appsmithctl/eventlistener.log -priority=1 \ No newline at end of file