fix: improve starting page removal script with error handling (#23392)

- Removed event listener python script, removed supervisor conf for the
same.
- Added new python script to handle the starting page init.
- Python script will be called via a shell script with a failsafe
removal of the loading page.
- The above shell script will be run as a child of the run java script

Tests:
The child script exits after execution, and does not create zombie
process even when the run java script is called multiple times with
restarts.
Verified exceptions and failsafe manually by introducing syntax errors
and uninstalling requests module.

---------

Co-authored-by: Shrikant Sharat Kandula <shrikant@appsmith.com>
This commit is contained in:
Sumesh Pradhan 2023-06-06 18:32:40 +05:30 committed by GitHub
parent bce5a0c7c5
commit a359444556
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 87 additions and 89 deletions

View File

@ -15,7 +15,7 @@ RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --yes \
supervisor curl cron certbot nginx gnupg wget netcat openssh-client \
software-properties-common gettext \
python3-pip python-setuptools git ca-certificates-java \
python3-pip python3-requests python-setuptools git ca-certificates-java \
&& wget -O - https://packages.adoptium.net/artifactory/api/gpg/key/public | apt-key add - \
&& echo "deb https://packages.adoptium.net/artifactory/deb $(awk -F= '/^VERSION_CODENAME/{print$2}' /etc/os-release) main" | tee /etc/apt/sources.list.d/adoptium.list \
&& apt-get update && apt-get install --no-install-recommends --yes temurin-17-jdk \

View File

@ -273,9 +273,6 @@ configure_supervisord() {
fi
cp -f "$SUPERVISORD_CONF_PATH/application_process/"*.conf /etc/supervisor/conf.d
# Copy Supervisor Listiner confs to conf.d
cp -f "$SUPERVISORD_CONF_PATH/event_listeners/"*.conf /etc/supervisor/conf.d
# Disable services based on configuration
if [[ -z "${DYNO}" ]]; then

View File

@ -60,6 +60,7 @@ while ! curl --fail --silent localhost/rts-api/v1/health-check; do
done
echo 'RTS started.'
sh /opt/appsmith/run-starting-page-init.sh &
# Ref -Dlog4j2.formatMsgNoLookups=true https://spring.io/blog/2021/12/10/log4j2-vulnerability-and-spring-boot
exec java ${APPSMITH_JAVA_ARGS:-} ${APPSMITH_JAVA_HEAP_ARG:-} \

View File

@ -0,0 +1,4 @@
#!/bin/bash
python3 /opt/appsmith/starting-page-init.py
rm -f /opt/appsmith/editor/loading.html

View File

@ -0,0 +1,81 @@
import os
import sys
import time
import shutil
import subprocess
import logging
import traceback
import atexit
LOADING_TEMPLATE_PAGE = r'/opt/appsmith/templates/appsmith_starting.html'
LOADING_PAGE_EDITOR = r'/opt/appsmith/editor/loading.html'
BACKEND_HEALTH_ENDPOINT = "http://localhost:8080/api/v1/health"
LOG_FILE = r'/appsmith-stacks/logs/backend/starting_page_init.log'
LOG_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(filename = LOG_FILE, level = logging.NOTSET, format = LOG_FORMAT)
try:
import requests
except ModuleNotFoundError as e:
logging.error("Module Not Found: " , e)
def get_backend_status():
try:
return subprocess.getoutput("supervisorctl status backend").split()[1]
except subprocess.CalledProcessError as e:
logging.error("Subprocess Error ", e)
except ValueError as e:
logging.error("Value Error ", e)
def check_health_endpoint(url,sleep_sec = 3,timeout_sec = 180):
for _ in range(timeout_sec//sleep_sec):
try:
if requests.get(url).ok:
logging.info('Backend health check successful.')
break
except ImportError as e:
logging.error("Import Error: ", e)
sys.exit(1)
except requests.RequestException:
pass # retry after sleep_sec
finally:
time.sleep(sleep_sec)
if get_backend_status() in ('FATAL' , 'BACKOFF'):
break
else:
logging.error('Timeout Error: Backend health check timeout.')
def remove_loading_page():
retries = 3
for _ in range(retries):
try:
if os.path.exists(LOADING_PAGE_EDITOR):
os.remove(LOADING_PAGE_EDITOR)
break
except OSError as e:
logging.error("Failed to remove loading page ", e)
time.sleep(1)
else:
logging.error("Loading page removal failed after %i retries. Trying again one final time.", retries)
logging.info(subprocess.getoutput("rm -fv " + LOADING_PAGE_EDITOR))
def add_loading_page():
shutil.copyfile(LOADING_TEMPLATE_PAGE, LOADING_PAGE_EDITOR)
@atexit.register
def failsafe():
remove_loading_page()
def main():
add_loading_page()
check_health_endpoint(BACKEND_HEALTH_ENDPOINT)
remove_loading_page()
if __name__ == '__main__':
main()

View File

@ -1,80 +0,0 @@
from requests.exceptions import ConnectionError
import os
import requests
import sys
import shutil
import time
LOADING_TEMPLATE_PAGE = r'/opt/appsmith/templates/appsmith_starting.html'
LOADING_PAGE_EDITOR = r'/opt/appsmith/editor/loading.html'
BACKEND_HEALTH_ENDPOINT = "http://localhost:8080/api/v1/health"
def write_stdout(s):
# only eventlistener protocol messages may be sent to stdout
sys.stdout.write(s)
sys.stdout.flush()
def write_stderr(s):
sys.stderr.write(s)
sys.stderr.flush()
def wait_until_backend_healthy():
sleep_sec = 3
timeout_sec = 120
for _ in range(timeout_sec//sleep_sec):
try:
if requests.get(BACKEND_HEALTH_ENDPOINT).ok:
write_stderr('\nBackend is healthy\n')
break
except ConnectionError:
pass # retry after sleep_sec
except Exception as ex:
write_stderr(ex)
break
finally:
time.sleep(sleep_sec)
else:
write_stderr('\nError: Backend health check timeout.\n')
remove_loading_page()
def remove_loading_page():
if os.path.exists(LOADING_PAGE_EDITOR):
os.remove(LOADING_PAGE_EDITOR)
def main():
while True:
# transition from ACKNOWLEDGED to READY
write_stdout('READY\n')
# read header line and print it to stderr
line = sys.stdin.readline()
write_stderr(line)
# read event payload and print it to stderr
headers = dict(x.split(':', 1) for x in line.split())
data = sys.stdin.read(int(headers['len']))
if 'PROCESS_STATE_STARTING' in line:
data_params = dict([ x.split(':') for x in data.split()])
if data_params['groupname'] == 'backend':
write_stderr('\nBackend State: STARTING\n')
shutil.copyfile(LOADING_TEMPLATE_PAGE, LOADING_PAGE_EDITOR)
elif 'PROCESS_STATE_RUNNING' in line:
data_params = dict([ x.split(':') for x in data.split()])
if data_params['groupname'] == 'backend':
write_stderr('\nBackend State: RUNNING\n')
wait_until_backend_healthy()
write_stderr(data)
elif 'PROCESS_STATE_FATAL' in line:
data_params = dict([ x.split(':') for x in data.split()])
if data_params['groupname'] == 'backend':
write_stderr('\nBackend State: FATAL\n')
remove_loading_page()
# transition from READY to ACKNOWLEDGED
write_stdout('RESULT 2\nOK')
if __name__ == '__main__':
main()

View File

@ -1,5 +0,0 @@
[eventlistener:event_listener]
command=python3 /opt/appsmith/supervisor_event_listener.py
events=PROCESS_STATE
stderr_logfile=/appsmith-stacks/logs/appsmithctl/eventlistener.log
priority=1