Add monit watching for nginx and pm2
When I woke up this morning, the app had crashed because the mysql connection was closed! I'm not sure, why that caused a _crash_? Or why pm2 didn't pick up on it, and said the process was still online? (Maybe the process was running, but the server had stopped?) Those could be good to investigate?… …but better than diving too far into the details, is to just address the high-level problem: if the app goes down for unexpected reasons, I want it back up!! lol In this change, we add `monit`, a solid system for monitoring processes (including checking for behavior, like responding to net requests), and configure it to watch the app process and the nginx process. To test, you can run `pm2 stop impress-2020`, or `systemctl stop nginx`, to see that Monit brings them back up within seconds! This does add some potential surprise if you're _trying_ to take the processes down. The easiest way is to send the stop command through monit, like `monit stop nginx`. This will disable monitoring until you start it again through monit, I think? (You can also disable/enable monitoring as a direct command, regardless of app state.)
This commit is contained in:
parent
2f874653bf
commit
792da067e3
1 changed files with 69 additions and 2 deletions
|
@ -75,6 +75,10 @@
|
|||
args: "start",
|
||||
instances: "max",
|
||||
exec_mode: "cluster",
|
||||
// We add `app` to the end of the filename, to avoid a pm2
|
||||
// bug that changes the filename:
|
||||
// https://github.com/Unitech/pm2/issues/5218#issue-1044210369
|
||||
pid_file: "/home/{{ ansible_user_id }}/impress-2020-app.pid",
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -126,7 +130,7 @@
|
|||
- name: Add impress-2020 config file to nginx
|
||||
become: yes
|
||||
copy:
|
||||
content: >
|
||||
content: |
|
||||
server {
|
||||
server_name impress-2020-box.openneo.net;
|
||||
listen 80;
|
||||
|
@ -153,6 +157,59 @@
|
|||
notify:
|
||||
- Restart nginx
|
||||
|
||||
- name: Install monit
|
||||
become: yes
|
||||
apt:
|
||||
update_cache: yes
|
||||
name: monit
|
||||
|
||||
- name: Add monit config file for global settings
|
||||
become: yes
|
||||
copy:
|
||||
content: |
|
||||
# TODO: Add email monitoring (requires SMTP config)
|
||||
|
||||
# This lets us call `sudo monit status` from the command line.
|
||||
# Without this, the `monit` command can't find the running service.
|
||||
set pidfile /var/run/monit.pid
|
||||
|
||||
# This enables Monit's HTTP server, but only locally, which is
|
||||
# required for calling `sudo monit status` from the command line.
|
||||
set httpd port 2812 and
|
||||
use address localhost
|
||||
allow localhost
|
||||
dest: /etc/monit/conf-enabled/global-config
|
||||
notify:
|
||||
- Restart monit
|
||||
|
||||
- name: Add monit config to watch our pm2 app
|
||||
become: yes
|
||||
copy:
|
||||
content: |
|
||||
check process impress-2020-as-{{ ansible_user_id }} with pidfile /home/{{ ansible_user_id }}/impress-2020-app-0.pid
|
||||
start program = "/bin/pm2 start impress-2020" as uid "{{ ansible_user_id }}"
|
||||
stop program = "/bin/pm2 stop impress-2020" as uid "{{ ansible_user_id }}"
|
||||
restart program = "/bin/pm2 reload impress-2020" as uid "{{ ansible_user_id }}"
|
||||
if failed port 3000 protocol http then restart
|
||||
if 5 restarts within 5 cycles then alert
|
||||
dest: "/etc/monit/conf-enabled/pm2-as-{{ ansible_user_id }}"
|
||||
notify:
|
||||
- Reload monit
|
||||
|
||||
- name: Add monit config to watch nginx
|
||||
become: yes
|
||||
copy:
|
||||
content: |
|
||||
check process nginx with pidfile /var/run/nginx.pid
|
||||
start program = "/bin/systemctl start nginx"
|
||||
stop program = "/bin/systemctl stop nginx"
|
||||
restart program = "/bin/systemctl restart nginx"
|
||||
if failed host impress-2020-box.openneo.net port 443 type tcpssl protocol http then restart
|
||||
if 5 restarts within 5 cycles then alert
|
||||
dest: "/etc/monit/conf-enabled/nginx"
|
||||
notify:
|
||||
- Reload monit
|
||||
|
||||
- name: Install dependencies for the npm module node-canvas
|
||||
become: yes
|
||||
apt:
|
||||
|
@ -168,6 +225,16 @@
|
|||
handlers:
|
||||
- name: Restart nginx
|
||||
become: yes
|
||||
service:
|
||||
systemd:
|
||||
name: nginx
|
||||
state: restarted
|
||||
- name: Restart monit
|
||||
become: yes
|
||||
systemd:
|
||||
name: monit
|
||||
state: restarted
|
||||
- name: Reload monit
|
||||
become: yes
|
||||
systemd:
|
||||
name: monit
|
||||
state: reloaded
|
||||
|
|
Loading…
Reference in a new issue