diff --git a/monitoring/.POSTGRES_PASSWORD b/monitoring/.POSTGRES_PASSWORD new file mode 100644 index 0000000..1f5a82f --- /dev/null +++ b/monitoring/.POSTGRES_PASSWORD @@ -0,0 +1 @@ +zabbix diff --git a/monitoring/.POSTGRES_USER b/monitoring/.POSTGRES_USER new file mode 100644 index 0000000..1f5a82f --- /dev/null +++ b/monitoring/.POSTGRES_USER @@ -0,0 +1 @@ +zabbix diff --git a/monitoring/.env_agent b/monitoring/.env_agent new file mode 100644 index 0000000..19eaa44 --- /dev/null +++ b/monitoring/.env_agent @@ -0,0 +1,37 @@ +ZBX_HOSTNAME=Zabbix +# ZBX_SOURCEIP= +# ZBX_DEBUGLEVEL=3 +# ZBX_ENABLEREMOTECOMMANDS=0 # Deprecated since 5.0.0 +# ZBX_LOGREMOTECOMMANDS=0 +# ZBX_HOSTINTERFACE= # Available since 4.4.0 +# ZBX_HOSTINTERFACEITEM= # Available since 4.4.0 +# ZBX_SERVER_HOST=10.110.0.5 +# ZBX_PASSIVE_ALLOW=true +# ZBX_PASSIVESERVERS= +# ZBX_ACTIVE_ALLOW=true +# ZBX_ACTIVESERVERS= +# ZBX_LISTENIP= +# ZBX_STARTAGENTS=3 +# ZBX_HOSTNAMEITEM=system.hostname +# ZBX_METADATA= +# ZBX_METADATAITEM= +# ZBX_REFRESHACTIVECHECKS=120 +# ZBX_BUFFERSEND=5 +# ZBX_BUFFERSIZE=100 +# ZBX_MAXLINESPERSECOND=20 +# ZBX_ALIAS="" +# ZBX_TIMEOUT=3 +# ZBX_UNSAFEUSERPARAMETERS=0 +# ZBX_LOADMODULE="dummy1.so,dummy2.so,dummy10.so" +# ZBX_TLSCONNECT=unencrypted +# ZBX_TLSACCEPT=unencrypted +# ZBX_TLSCAFILE= +# ZBX_TLSCRLFILE= +# ZBX_TLSSERVERCERTISSUER= +# ZBX_TLSSERVERCERTSUBJECT= +# ZBX_TLSCERTFILE= +# ZBX_TLSKEYFILE= +# ZBX_TLSPSKIDENTITY= +# ZBX_TLSPSKFILE= +# ZBX_DENYKEY=system.run[*] +# ZBX_ALLOWKEY= diff --git a/monitoring/.env_db_pgsql b/monitoring/.env_db_pgsql new file mode 100644 index 0000000..3c30824 --- /dev/null +++ b/monitoring/.env_db_pgsql @@ -0,0 +1,9 @@ +# DB_SERVER_HOST=postgres-server +# DB_SERVER_PORT=5432 +# POSTGRES_USER=zabbix +POSTGRES_USER_FILE=/run/secrets/POSTGRES_USER +# POSTGRES_PASSWORD=zabbix +POSTGRES_PASSWORD_FILE=/run/secrets/POSTGRES_PASSWORD +POSTGRES_DB=zabbix +# DB_SERVER_SCHEMA=public +# ENABLE_TIMESCALEDB=tru diff --git a/monitoring/.env_srv b/monitoring/.env_srv new file mode 100644 index 0000000..a0fa480 --- /dev/null +++ b/monitoring/.env_srv @@ -0,0 +1,60 @@ +# ZBX_LISTENIP= +# ZBX_HISTORYSTORAGEURL=http://elasticsearch:9200/ # Available since 3.4.5 +# ZBX_HISTORYSTORAGETYPES=uint,dbl,str,log,text # Available since 3.4.5 +# ZBX_DBTLSCONNECT=required # Available since 5.0.0 +# ZBX_DBTLSCAFILE=/run/secrets/root-ca.pem # Available since 5.0.0 +# ZBX_DBTLSCERTFILE=/run/secrets/client-cert.pem # Available since 5.0.0 +# ZBX_DBTLSKEYFILE=/run/secrets/client-key.pem # Available since 5.0.0 +# ZBX_DBTLSCIPHER= # Available since 5.0.0 +# ZBX_DBTLSCIPHER13= # Available since 5.0.0 +# ZBX_DEBUGLEVEL=3 +# ZBX_STARTPOLLERS=5 +# ZBX_IPMIPOLLERS=0 +# ZBX_STARTPREPROCESSORS=3 # Available since 3.4.0 +# ZBX_STARTPOLLERSUNREACHABLE=1 +# ZBX_STARTTRAPPERS=5 +# ZBX_STARTPINGERS=1 +# ZBX_STARTDISCOVERERS=1 +# ZBX_STARTHTTPPOLLERS=1 +# ZBX_STARTTIMERS=1 +# ZBX_STARTESCALATORS=1 +# ZBX_STARTALERTERS=3 # Available since 3.4.0 +# ZBX_JAVAGATEWAY_ENABLE=true +# ZBX_JAVAGATEWAY=zabbix-java-gateway +# ZBX_JAVAGATEWAYPORT=10052 +# ZBX_STARTJAVAPOLLERS=5 +# ZBX_STARTVMWARECOLLECTORS=0 +# ZBX_VMWAREFREQUENCY=60 +# ZBX_VMWAREPERFFREQUENCY=60 +# ZBX_VMWARECACHESIZE=8M +# ZBX_VMWARETIMEOUT=10 +# ZBX_ENABLE_SNMP_TRAPS=true +# ZBX_SOURCEIP= +# ZBX_HOUSEKEEPINGFREQUENCY=1 +# ZBX_MAXHOUSEKEEPERDELETE=5000 +# ZBX_SENDERFREQUENCY=30 +# ZBX_CACHESIZE=8M +# ZBX_CACHEUPDATEFREQUENCY=60 +# ZBX_STARTDBSYNCERS=4 +# ZBX_HISTORYCACHESIZE=16M +# ZBX_HISTORYINDEXCACHESIZE=4M +# ZBX_TRENDCACHESIZE=4M +# ZBX_VALUECACHESIZE=8M +# ZBX_TIMEOUT=4 +# ZBX_TRAPPERIMEOUT=300 +# ZBX_UNREACHABLEPERIOD=45 +# ZBX_UNAVAILABLEDELAY=60 +# ZBX_UNREACHABLEDELAY=15 +# ZBX_LOGSLOWQUERIES=3000 +# ZBX_EXPORTFILESIZE= +# ZBX_STARTPROXYPOLLERS=1 +# ZBX_PROXYCONFIGFREQUENCY=3600 +# ZBX_PROXYDATAFREQUENCY=1 +# ZBX_LOADMODULE="dummy1.so,dummy2.so,dummy10.so" +# ZBX_TLSCAFILE= +# ZBX_TLSCRLFILE= +# ZBX_TLSCERTFILE= +# ZBX_TLSKEYFILE= +# ZBX_VAULTDBPATH= +# ZBX_VAULTURL=https://127.0.0.1:8200 +# VAULT_TOKEN= diff --git a/monitoring/.env_web b/monitoring/.env_web new file mode 100644 index 0000000..41e0820 --- /dev/null +++ b/monitoring/.env_web @@ -0,0 +1,26 @@ +# ZBX_SERVER_HOST=zabbix-server +# ZBX_SERVER_PORT=10051 +# ZBX_SERVER_NAME=Monitoring +# ZBX_DB_ENCRYPTION=true # Available since 5.0.0 +# ZBX_DB_KEY_FILE=/run/secrets/client-key.pem # Available since 5.0.0 +# ZBX_DB_CERT_FILE=/run/secrets/client-cert.pem # Available since 5.0.0 +# ZBX_DB_CA_FILE=/run/secrets/root-ca.pem # Available since 5.0.0 +# ZBX_DB_VERIFY_HOST=false # Available since 5.0.0 +# ZBX_DB_CIPHER_LIST= # Available since 5.0.0 +# ZBX_VAULTDBPATH= +# ZBX_VAULTURL=https://127.0.0.1:8200 +# VAULT_TOKEN= +# ZBX_HISTORYSTORAGEURL=http://elasticsearch:9200/ # Available since 3.4.5 +# ZBX_HISTORYSTORAGETYPES=['uint', 'dbl', 'str', 'text', 'log'] # Available since 3.4.5 +# ENABLE_WEB_ACCESS_LOG=true +# ZBX_MAXEXECUTIONTIME=600 +# ZBX_MEMORYLIMIT=128M +# ZBX_POSTMAXSIZE=16M +# ZBX_UPLOADMAXFILESIZE=2M +# ZBX_MAXINPUTTIME=300 +# ZBX_SESSION_NAME=zbx_sessionid +# Timezone one of: http://php.net/manual/en/timezones.php +# PHP_TZ=Europe/Riga +# ZBX_DENY_GUI_ACCESS=false +# ZBX_GUI_ACCESS_IP_RANGE=['127.0.0.1'] +# ZBX_GUI_WARNING_MSG=Zabbix is under maintenance. diff --git a/monitoring/README.md b/monitoring/README.md new file mode 100644 index 0000000..bf9ebf2 --- /dev/null +++ b/monitoring/README.md @@ -0,0 +1,50 @@ +# Installing the Zabbix server + +Change default passwords, ports and set listet IP (ports `8080/tpc` and `10051/tcp` will be open on all interfaces, use a firewall or specify the address of the required interface), then run: +```bash +docker-compose up -d +``` + +# Installing the Zabbix agent + +Download package from repository [https://repo.zabbix.com/zabbix/5.2/ubuntu/pool/main/z/zabbix/](https://repo.zabbix.com/zabbix/5.2/ubuntu/pool/main/z/zabbix/) and run: +```bash +dpkg -i zabbix-agent_5.2.*.deb +``` +Change default values in `/etc/zabbix/zabbix_agent2.conf` + +* `Hostname` the same as in the zabbix-server web interface; +* `Server` and `ServerActive` set zabbix server IP or DNS name; +* `ListenIP` to local network IP available from zabbix server or set firewall rules to restrict access to port `10050`; +* uncomment `Plugins.Docker.Endpoint=unix:///var/run/docker.sock`. + +# Adding the host + +Log into your Zabbix server (defaul login and passord: `Admin` - `zabbix`) and click on the Configuration tab and then the Hosts tab. Click the Create host button near the top right corner. In the resulting page, change the Host name and IP ADDRESS sections to match the information for your remote server. Set `{$URL}` macros to relayer host, example `http://localhost/v1/status` or `https://domain.name/v1/status`. + +# Import templates + +Import templates using the WebUI: +* [Docker-template.yaml](/monitoring/templates/Docker-template.yaml); +* [Tornado-relayer-template.yaml](/monitoring/templates/Tornado-relayer-template.yaml). + +Link templates with added host. It is also recommended to link `Linux CPU by Zabbix agent`, `Linux filesystems by Zabbix agent` and `Linux memory by Zabbix agent` templates to the host. + +# Alerts + +In WebUI - Administration -> Media types -> Telegram: +``` +https://git.zabbix.com/projects/ZBX/repos/zabbix/browse/templates/media/telegram + +1. Register bot: send "/newbot" to @BotFather and follow instructions +2. Copy and paste the obtained token into the "Token" field above +3. If you want to send personal notifications, you need to get chat id of the user you want to send messages to: + 3.1. Send "/getid" to "@myidbot" in Telegram messenger + 3.2. Copy returned chat id and save it in the "Telegram Webhook" media for the user + 3.3. Ask the user to send "/start" to your bot (Telegram bot won't send anything to the user without it) +4. If you want to send group notifications, you need to get group id of the group you want to send messages to: + 4.1. Add "@myidbot" to your group + 4.2. Send "/getgroupid@myidbot" in your group + 4.3. Copy returned group id save it in the "Telegram Webhook" media for the user you created for group notifications + 4.4. Send "/start@your_bot_name_here" in your group (Telegram bot won't send anything to the group without it) +``` \ No newline at end of file diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000..5bbd46e --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,186 @@ +# Restrict access to 10051/tcp on public ip + +version: '3.5' +services: + zabbix-server: + image: zabbix/zabbix-server-pgsql:alpine-5.2-latest + restart: always + ports: + - "10051:10051" + volumes: + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + - ./zbx_env/usr/lib/zabbix/alertscripts:/usr/lib/zabbix/alertscripts:ro + - ./zbx_env/usr/lib/zabbix/externalscripts:/usr/lib/zabbix/externalscripts:ro + - ./zbx_env/var/lib/zabbix/export:/var/lib/zabbix/export:rw + - ./zbx_env/var/lib/zabbix/modules:/var/lib/zabbix/modules:ro + - ./zbx_env/var/lib/zabbix/enc:/var/lib/zabbix/enc:ro + - ./zbx_env/var/lib/zabbix/ssh_keys:/var/lib/zabbix/ssh_keys:ro + - ./zbx_env/var/lib/zabbix/mibs:/var/lib/zabbix/mibs:ro + - ./zbx_env/var/lib/zabbix/snmptraps:/var/lib/zabbix/snmptraps:ro + ulimits: + nproc: 65535 + nofile: + soft: 20000 + hard: 40000 + deploy: + resources: + limits: + cpus: '0.70' + memory: 1G + reservations: + cpus: '0.5' + memory: 512M + env_file: + - .env_db_pgsql + - .env_srv + secrets: + - POSTGRES_USER + - POSTGRES_PASSWORD + depends_on: + - postgres-server + networks: + zbx_net_backend: + aliases: + - zabbix-server + - zabbix-server-pgsql + - zabbix-server-alpine-pgsql + - zabbix-server-pgsql-alpine + zbx_net_frontend: + stop_grace_period: 30s + sysctls: + - net.ipv4.ip_local_port_range=1024 65000 + - net.ipv4.conf.all.accept_redirects=0 + - net.ipv4.conf.all.secure_redirects=0 + - net.ipv4.conf.all.send_redirects=0 + labels: + com.zabbix.description: "Zabbix server with PostgreSQL database support" + com.zabbix.company: "Zabbix LLC" + com.zabbix.component: "zabbix-server" + com.zabbix.dbtype: "pgsql" + com.zabbix.os: "alpine" + + zabbix-web: + image: zabbix/zabbix-web-nginx-pgsql:alpine-5.2-latest + restart: always + ports: + - "8080:8080" + volumes: + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + - ./zbx_env/etc/ssl/nginx:/etc/ssl/nginx:ro + - ./zbx_env/usr/share/zabbix/modules/:/usr/share/zabbix/modules/:ro + deploy: + resources: + limits: + cpus: '0.70' + memory: 512M + reservations: + cpus: '0.5' + memory: 256M + env_file: + - .env_db_pgsql + - .env_web + secrets: + - POSTGRES_USER + - POSTGRES_PASSWORD + depends_on: + - postgres-server + - zabbix-server + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/"] + interval: 10s + timeout: 5s + retries: 3 + networks: + zbx_net_backend: + aliases: + - zabbix-web-nginx-pgsql + - zabbix-web-nginx-alpine-pgsql + - zabbix-web-nginx-pgsql-alpine + zbx_net_frontend: + stop_grace_period: 10s + sysctls: + - net.core.somaxconn=65535 + labels: + com.zabbix.description: "Zabbix frontend on Nginx web-server with PostgreSQL database support" + com.zabbix.company: "Zabbix LLC" + com.zabbix.component: "zabbix-frontend" + com.zabbix.webserver: "nginx" + com.zabbix.dbtype: "pgsql" + com.zabbix.os: "alpine" + + zabbix-agent: + image: zabbix/zabbix-agent2:alpine-5.2-latest + restart: always + volumes: + - /etc/localtime:/etc/localtime:ro + - /etc/timezone:/etc/timezone:ro + - /var/run/docker.sock:/var/run/docker.sock + env_file: + - .env_agent + privileged: true + user: root + pid: "host" + networks: + zbx_net_backend: + aliases: + - zabbix-agent + - zabbix-agent-passive + - zabbix-agent-alpine + stop_grace_period: 5s + + postgres-server: + image: postgres:alpine + restart: always + volumes: + - ./zbx_env/var/lib/postgresql/data:/var/lib/postgresql/data:rw + env_file: + - .env_db_pgsql + secrets: + - POSTGRES_USER + - POSTGRES_PASSWORD + stop_grace_period: 1m + networks: + zbx_net_backend: + aliases: + - postgres-server + - pgsql-server + - pgsql-database + + portainer: + image: portainer/portainer:latest + restart: always + ports: + - "9000:9000" + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - portainer-data:/data + +networks: + zbx_net_frontend: + driver: bridge + driver_opts: + com.docker.network.enable_ipv6: "false" + ipam: + driver: default + config: + - subnet: 172.16.238.0/24 + zbx_net_backend: + driver: bridge + driver_opts: + com.docker.network.enable_ipv6: "false" + internal: true + ipam: + driver: default + config: + - subnet: 172.16.239.0/24 + +secrets: + POSTGRES_USER: + file: ./.POSTGRES_USER + POSTGRES_PASSWORD: + file: ./.POSTGRES_PASSWORD + +volumes: + portainer-data: diff --git a/monitoring/templates/Docker-template.yaml b/monitoring/templates/Docker-template.yaml new file mode 100644 index 0000000..eca4ffe --- /dev/null +++ b/monitoring/templates/Docker-template.yaml @@ -0,0 +1,484 @@ +zabbix_export: + version: '5.2' + date: '2021-11-29T12:29:17Z' + groups: + - + name: Docker + templates: + - + template: Docker + name: Docker + description: | + Get Docker engine metrics from plugin for the New Zabbix Agent (zabbix-agent2). + + You can discuss this template or leave feedback on our forum + + Template tooling version used: 0.38 + groups: + - + name: Docker + applications: + - + name: Docker + - + name: 'Zabbix raw items' + items: + - + name: 'Docker: Get containers' + key: docker.containers + history: '0' + trends: '0' + value_type: TEXT + applications: + - + name: 'Zabbix raw items' + - + name: 'Docker: Containers paused' + type: DEPENDENT + key: docker.containers.paused + delay: '0' + history: 7d + description: 'Total number of containers paused on this host' + applications: + - + name: Docker + preprocessing: + - + type: JSONPATH + parameters: + - $.ContainersPaused + master_item: + key: docker.info + - + name: 'Docker: Containers running' + type: DEPENDENT + key: docker.containers.running + delay: '0' + history: 7d + description: 'Total number of containers running on this host' + applications: + - + name: Docker + preprocessing: + - + type: JSONPATH + parameters: + - $.ContainersRunning + master_item: + key: docker.info + - + name: 'Docker: Containers stopped' + type: DEPENDENT + key: docker.containers.stopped + delay: '0' + history: 7d + description: 'Total number of containers stopped on this host' + applications: + - + name: Docker + preprocessing: + - + type: JSONPATH + parameters: + - $.ContainersStopped + master_item: + key: docker.info + triggers: + - + expression: '{avg(5m)}>=1' + name: 'Docker: containers is stopped' + priority: HIGH + - + name: 'Docker: Containers total' + type: DEPENDENT + key: docker.containers.total + delay: '0' + history: 7d + description: 'Total number of containers on this host' + applications: + - + name: Docker + preprocessing: + - + type: JSONPATH + parameters: + - $.Containers + master_item: + key: docker.info + - + name: 'Docker: Get images' + key: docker.images + history: '0' + trends: '0' + status: DISABLED + value_type: TEXT + applications: + - + name: 'Zabbix raw items' + - + name: 'Docker: Get info' + key: docker.info + history: '0' + trends: '0' + value_type: TEXT + applications: + - + name: 'Zabbix raw items' + - + name: 'Docker: Memory total' + type: DEPENDENT + key: docker.mem.total + delay: '0' + history: 7d + status: DISABLED + units: B + applications: + - + name: Docker + preprocessing: + - + type: JSONPATH + parameters: + - $.MemTotal + master_item: + key: docker.info + - + name: 'Docker: Ping' + key: docker.ping + history: 7h + applications: + - + name: Docker + valuemap: + name: 'Service state' + preprocessing: + - + type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 10m + triggers: + - + expression: '{last()}=0' + name: 'Docker: Service is down' + priority: AVERAGE + manual_close: 'YES' + discovery_rules: + - + name: 'Containers discovery' + key: 'docker.containers.discovery[true]' + delay: 15m + filter: + evaltype: AND + conditions: + - + macro: '{#NAME}' + value: '{$DOCKER.LLD.FILTER.CONTAINER.MATCHES}' + formulaid: A + - + macro: '{#NAME}' + value: '{$DOCKER.LLD.FILTER.CONTAINER.NOT_MATCHES}' + operator: NOT_MATCHES_REGEX + formulaid: B + description: | + Discovery for containers metrics + + Parameter: + true - Returns all containers + false - Returns only running containers + item_prototypes: + - + name: 'Container {#NAME}: Finished at' + type: DEPENDENT + key: 'docker.container_info.finished["{#NAME}"]' + delay: '0' + history: 7d + value_type: FLOAT + units: unixtime + application_prototypes: + - + name: 'Docker: Container {#NAME}' + preprocessing: + - + type: JSONPATH + parameters: + - $.State.FinishedAt + - + type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + master_item: + key: 'docker.container_info["{#NAME}"]' + - + name: 'Container {#NAME}: Restart count' + type: DEPENDENT + key: 'docker.container_info.restart_count["{#NAME}"]' + delay: '0' + history: 7d + application_prototypes: + - + name: 'Docker: Container {#NAME}' + preprocessing: + - + type: JSONPATH + parameters: + - $.RestartCount + master_item: + key: 'docker.container_info["{#NAME}"]' + trigger_prototypes: + - + expression: '{last()}>5' + name: 'Container {#NAME}: restarting constantly' + opdata: '{ITEM.VALUE}' + priority: HIGH + - + name: 'Container {#NAME}: Started at' + type: DEPENDENT + key: 'docker.container_info.started["{#NAME}"]' + delay: '0' + history: 7d + value_type: FLOAT + units: unixtime + application_prototypes: + - + name: 'Docker: Container {#NAME}' + preprocessing: + - + type: JSONPATH + parameters: + - $.State.StartedAt + - + type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + master_item: + key: 'docker.container_info["{#NAME}"]' + - + name: 'Container {#NAME}: Error' + type: DEPENDENT + key: 'docker.container_info.state.error["{#NAME}"]' + delay: '0' + history: 7d + trends: '0' + value_type: CHAR + application_prototypes: + - + name: 'Docker: Container {#NAME}' + preprocessing: + - + type: JSONPATH + parameters: + - $.State.Error + - + type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + master_item: + key: 'docker.container_info["{#NAME}"]' + trigger_prototypes: + - + expression: '{diff()}=1 and {strlen()}>0' + name: 'Container {#NAME}: An error has occurred in the container' + priority: WARNING + description: 'Container {#NAME} has an error. Ack to close.' + manual_close: 'YES' + - + name: 'Container {#NAME}: Exit code' + type: DEPENDENT + key: 'docker.container_info.state.exitcode["{#NAME}"]' + delay: '0' + history: 7d + application_prototypes: + - + name: 'Docker: Container {#NAME}' + preprocessing: + - + type: JSONPATH + parameters: + - $.State.ExitCode + - + type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + master_item: + key: 'docker.container_info["{#NAME}"]' + - + name: 'Container {#NAME}: Paused' + type: DEPENDENT + key: 'docker.container_info.state.paused["{#NAME}"]' + delay: '0' + history: 7d + application_prototypes: + - + name: 'Docker: Container {#NAME}' + valuemap: + name: 'Docker flag' + preprocessing: + - + type: JSONPATH + parameters: + - $.State.Paused + - + type: BOOL_TO_DECIMAL + parameters: + - '' + master_item: + key: 'docker.container_info["{#NAME}"]' + - + name: 'Container {#NAME}: Restarting' + type: DEPENDENT + key: 'docker.container_info.state.restarting["{#NAME}"]' + delay: '0' + history: 7d + application_prototypes: + - + name: 'Docker: Container {#NAME}' + valuemap: + name: 'Docker flag' + preprocessing: + - + type: JSONPATH + parameters: + - $.State.Restarting + - + type: BOOL_TO_DECIMAL + parameters: + - '' + master_item: + key: 'docker.container_info["{#NAME}"]' + - + name: 'Container {#NAME}: Running' + type: DEPENDENT + key: 'docker.container_info.state.running["{#NAME}"]' + delay: '0' + history: 7d + application_prototypes: + - + name: 'Docker: Container {#NAME}' + valuemap: + name: 'Docker flag' + preprocessing: + - + type: JSONPATH + parameters: + - $.State.Running + - + type: BOOL_TO_DECIMAL + parameters: + - '' + master_item: + key: 'docker.container_info["{#NAME}"]' + - + name: 'Container {#NAME}: Status' + type: DEPENDENT + key: 'docker.container_info.state.status["{#NAME}"]' + delay: '0' + history: 7d + trends: '0' + value_type: CHAR + application_prototypes: + - + name: 'Docker: Container {#NAME}' + preprocessing: + - + type: JSONPATH + parameters: + - $.State.Status + - + type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1h + master_item: + key: 'docker.container_info["{#NAME}"]' + - + name: 'Container {#NAME}: Get info' + key: 'docker.container_info["{#NAME}"]' + history: '0' + trends: '0' + value_type: CHAR + description: 'Return low-level information about a container' + application_prototypes: + - + name: 'Docker: Container {#NAME}' + trigger_prototypes: + - + expression: '{Docker:docker.container_info.state.exitcode["{#NAME}"].last()}>0 and {Docker:docker.container_info.state.running["{#NAME}"].last()}=0' + name: 'Container {#NAME}: Container has been stopped with error code' + opdata: 'Exit code: {ITEM.LASTVALUE1}' + priority: AVERAGE + manual_close: 'YES' + macros: + - + macro: '{$DOCKER.LLD.FILTER.CONTAINER.MATCHES}' + value: '.*' + description: 'Filter of discoverable containers' + - + macro: '{$DOCKER.LLD.FILTER.CONTAINER.NOT_MATCHES}' + value: CHANGE_IF_NEEDED + description: 'Filter to exclude discovered containers' + - + macro: '{$DOCKER.LLD.FILTER.IMAGE.MATCHES}' + value: '.*' + description: 'Filter of discoverable images' + - + macro: '{$DOCKER.LLD.FILTER.IMAGE.NOT_MATCHES}' + value: CHANGE_IF_NEEDED + description: 'Filter to exclude discovered images' + graphs: + - + name: 'Docker: Containers' + graph_items: + - + drawtype: GRADIENT_LINE + color: 1A7C11 + item: + host: Docker + key: docker.containers.running + - + sortorder: '1' + drawtype: BOLD_LINE + color: 2774A4 + item: + host: Docker + key: docker.containers.paused + - + sortorder: '2' + drawtype: BOLD_LINE + color: F63100 + item: + host: Docker + key: docker.containers.stopped + - + sortorder: '3' + drawtype: BOLD_LINE + color: A54F10 + item: + host: Docker + key: docker.containers.total + - + name: 'Docker: Memory total' + graph_items: + - + drawtype: BOLD_LINE + color: 1A7C11 + item: + host: Docker + key: docker.mem.total + value_maps: + - + name: 'Docker flag' + mappings: + - + value: '0' + newvalue: 'False' + - + value: '1' + newvalue: 'True' + - + name: 'Service state' + mappings: + - + value: '0' + newvalue: Down + - + value: '1' + newvalue: Up diff --git a/monitoring/templates/Tornado-relayer-template.yaml b/monitoring/templates/Tornado-relayer-template.yaml new file mode 100644 index 0000000..1a082b4 --- /dev/null +++ b/monitoring/templates/Tornado-relayer-template.yaml @@ -0,0 +1,85 @@ +zabbix_export: + version: '5.2' + date: '2021-12-01T13:26:59Z' + groups: + - + name: Templates/Applications + templates: + - + template: Tornado-relayer + name: Tornado-relayer + groups: + - + name: Templates/Applications + items: + - + name: 'tornado-relayer: health.error' + type: DEPENDENT + key: tornado-relayer.health.error + delay: '0' + trends: '0' + value_type: TEXT + preprocessing: + - + type: JSONPATH + parameters: + - $.health.error + master_item: + key: 'web.page.get[{$URL}]' + triggers: + - + expression: '{last()}<>""' + name: 'tornado-relayer: health error' + priority: AVERAGE + - + name: 'tornado-relayer: health.status' + type: DEPENDENT + key: tornado-relayer.health.status + delay: '0' + trends: '0' + value_type: TEXT + preprocessing: + - + type: JSONPATH + parameters: + - $.health.status + master_item: + key: 'web.page.get[{$URL}]' + triggers: + - + expression: '{last(#3)}<>"true"' + name: 'tornado-relayer: health status <> true' + priority: HIGH + - + name: 'tornado-relayer: data' + type: ZABBIX_ACTIVE + key: 'web.page.get[{$URL}]' + history: '0' + trends: '0' + value_type: TEXT + preprocessing: + - + type: REGEX + parameters: + - '\n\s?\n([\s\S]*)' + - \1 + httptests: + - + name: 'tornado-relayer: status page' + agent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/80.0.3987.87 Chrome/80.0.3987.87 Safari/537.36' + steps: + - + name: 'status page' + url: '{$URL}' + follow_redirects: 'NO' + required: status + status_codes: '200' + triggers: + - + expression: '{Tornado-relayer:web.test.fail[tornado-relayer: status page].last()}>0' + name: 'tornado-relayer: status page failed' + priority: AVERAGE + - + expression: '{Tornado-relayer:web.test.rspcode[tornado-relayer: status page,status page].last(#3)}<>200' + name: 'tornado-relayer: status page rspcode <>200' + priority: HIGH