impress-2020/deploy/playbooks/setup.yml
Matchu 792da067e3 Add monit watching for nginx and pm2
When I woke up this morning, the app had crashed because the mysql connection was closed!

I'm not sure, why that caused a _crash_? Or why pm2 didn't pick up on it, and said the process was still online? (Maybe the process was running, but the server had stopped?) Those could be good to investigate?…

…but better than diving too far into the details, is to just address the high-level problem: if the app goes down for unexpected reasons, I want it back up!! lol

In this change, we add `monit`, a solid system for monitoring processes (including checking for behavior, like responding to net requests), and configure it to watch the app process and the nginx process.

To test, you can run `pm2 stop impress-2020`, or `systemctl stop nginx`, to see that Monit brings them back up within seconds!

This does add some potential surprise if you're _trying_ to take the processes down. The easiest way is to send the stop command through monit, like `monit stop nginx`. This will disable monitoring until you start it again through monit, I think? (You can also disable/enable monitoring as a direct command, regardless of app state.)
2021-11-03 16:32:14 -07:00

240 lines
7.5 KiB
YAML

---
- name: Set up the environment for the impress-2020 app
hosts: webserver
vars:
email_address: "emi@matchu.dev" # TODO: Extract this to personal config?
tasks:
- name: Create web user group
become: yes
group:
name: web
- name: Add current user to web group
become: yes
user:
name: "{{ ansible_user_id }}"
group: web
append: yes
- name: Create the app folder
become: yes
file:
path: /srv/impress-2020
state: directory
# Root and the `web` group may read/write this folder. Everyone else
# may only read it.
group: web
mode: "u=rwx,g=rwx,o=rx"
- name: Add Nodesource apt key
become: yes
apt_key:
id: 9FD3B784BC1C6FC31A8A0A1C1655A0AB68576280
url: https://deb.nodesource.com/gpgkey/nodesource.gpg.key
- name: Add Node v16 apt repository
become: yes
apt_repository:
repo: deb https://deb.nodesource.com/node_16.x focal main
- name: Install Node v16
become: yes
apt:
update_cache: yes
name: nodejs
state: present
- name: Install Yarn
become: yes
npm:
name: yarn
global: yes
- name: Install pm2
become: yes
npm:
name: pm2
global: yes
- name: Create pm2 startup script
# The current user is going to become the pm2 owner of the app server
# process. They'll be able to manage it without `sudo`, including during
# normal deploys, and run `pm2 monit` from their shell to see status.
become: yes
command: "pm2 startup systemd {{ ansible_user_id }} --hp /home/{{ ansible_user_id }}"
- name: Create pm2 ecosystem file
copy:
content: |
module.exports = {
apps: [
{
name: "impress-2020",
cwd: "/srv/impress-2020/current",
script: "yarn",
args: "start",
instances: "max",
exec_mode: "cluster",
// We add `app` to the end of the filename, to avoid a pm2
// bug that changes the filename:
// https://github.com/Unitech/pm2/issues/5218#issue-1044210369
pid_file: "/home/{{ ansible_user_id }}/impress-2020-app.pid",
}
]
}
dest: "~/ecosystem.config.js"
# Create a temporary backup file, so we can use it to delete the old
# version of the services. (This is important if e.g. a service is
# removed or renamed, in which case deleting from the *new* config file
# wouldn't include it.)
backup: yes
register: pm2_ecosystem_file
- name: Delete old pm2 services if config file changed
command: "pm2 delete {{ pm2_ecosystem_file.backup_file | quote }}"
when: pm2_ecosystem_file is changed and pm2_ecosystem_file.backup_file is defined
- name: Delete old pm2 config file if it changed
file:
path: "{{ pm2_ecosystem_file.backup_file }}"
state: absent
when: pm2_ecosystem_file is changed and pm2_ecosystem_file.backup_file is defined
- name: Start pm2 services
command: "pm2 start ~/ecosystem.config.js"
- name: Save pm2 startup script
command: pm2 save
- name: Install core snap
become: yes
community.general.snap:
name: core
- name: Install certbot as a snap
become: yes
community.general.snap:
name: certbot
classic: yes
- name: Set up certbot
become: yes
command: "certbot certonly --nginx -n --agree-tos --email {{ email_address }} --domains impress-2020-box.openneo.net"
- name: Install nginx
become: yes
apt:
update_cache: yes
name: nginx
- name: Add impress-2020 config file to nginx
become: yes
copy:
content: |
server {
server_name impress-2020-box.openneo.net;
listen 80;
if ($host = impress-2020-box.openneo.net) {
return 301 https://$host$request_uri;
}
}
server {
server_name impress-2020-box.openneo.net;
listen 443 ssl;
ssl_certificate /etc/letsencrypt/live/impress-2020-box.openneo.net/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/impress-2020-box.openneo.net/privkey.pem;
include /etc/letsencrypt/options-ssl-nginx.conf;
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
ssl_session_cache shared:SSL:10m; # https://superuser.com/q/1484466/14127
# TODO: Serve static files directly, instead of through the proxy
location / {
proxy_pass http://127.0.0.1:3000;
}
}
dest: /etc/nginx/sites-enabled/impress-2020
notify:
- Restart nginx
- name: Install monit
become: yes
apt:
update_cache: yes
name: monit
- name: Add monit config file for global settings
become: yes
copy:
content: |
# TODO: Add email monitoring (requires SMTP config)
# This lets us call `sudo monit status` from the command line.
# Without this, the `monit` command can't find the running service.
set pidfile /var/run/monit.pid
# This enables Monit's HTTP server, but only locally, which is
# required for calling `sudo monit status` from the command line.
set httpd port 2812 and
use address localhost
allow localhost
dest: /etc/monit/conf-enabled/global-config
notify:
- Restart monit
- name: Add monit config to watch our pm2 app
become: yes
copy:
content: |
check process impress-2020-as-{{ ansible_user_id }} with pidfile /home/{{ ansible_user_id }}/impress-2020-app-0.pid
start program = "/bin/pm2 start impress-2020" as uid "{{ ansible_user_id }}"
stop program = "/bin/pm2 stop impress-2020" as uid "{{ ansible_user_id }}"
restart program = "/bin/pm2 reload impress-2020" as uid "{{ ansible_user_id }}"
if failed port 3000 protocol http then restart
if 5 restarts within 5 cycles then alert
dest: "/etc/monit/conf-enabled/pm2-as-{{ ansible_user_id }}"
notify:
- Reload monit
- name: Add monit config to watch nginx
become: yes
copy:
content: |
check process nginx with pidfile /var/run/nginx.pid
start program = "/bin/systemctl start nginx"
stop program = "/bin/systemctl stop nginx"
restart program = "/bin/systemctl restart nginx"
if failed host impress-2020-box.openneo.net port 443 type tcpssl protocol http then restart
if 5 restarts within 5 cycles then alert
dest: "/etc/monit/conf-enabled/nginx"
notify:
- Reload monit
- name: Install dependencies for the npm module node-canvas
become: yes
apt:
update_cache: yes
name:
- build-essential
- libcairo2-dev
- libpango1.0-dev
- libjpeg-dev
- libgif-dev
- librsvg2-dev
handlers:
- name: Restart nginx
become: yes
systemd:
name: nginx
state: restarted
- name: Restart monit
become: yes
systemd:
name: monit
state: restarted
- name: Reload monit
become: yes
systemd:
name: monit
state: reloaded