Postgresql 9.2 monitoring with Adagios on CentOS 7

On the PostgreSQL server:

Note: You may need to deal with SELinux.

Install some needed perl modules, download the check script and make it executable:

yum install perl-Data-Dumper perl-Digest-MD5 perl-Getopt-Long perl-File-Temp perl-Time-HiRes perl-TimeDate
cd /usr/lib64/nagios/plugins
wget https://raw.githubusercontent.com/bucardo/check_postgres/master/check_postgres.pl
chmod +x check_postgres.pl

Add the following to /usr/lib64/nagios/plugins/check_postgres_stats.sh:

#!/bin/bash
DB="$1"
STATS=$(/usr/lib64/nagios/plugins/check_postgres.pl --datadir /var/lib/pgsql/data/ -db "$DB" --action dbstats | sed 's/:/=/g')
echo "OK: Postgres stats collected | $STATS"

Add the following to /etc/nrpe.d/check_postgres.cfg:

command[check_postgres]=/usr/bin/sudo -u postgres /usr/lib64/nagios/plugins/check_postgres.pl --datadir /var/lib/pgsql/data/ -db '$ARG1$' --action '$ARG2$'
command[check_postgres_w]=/usr/bin/sudo -u postgres "/usr/lib64/nagios/plugins/check_postgres.pl" --datadir /var/lib/pgsql/data/ -db '$ARG1$' --action '$ARG2$' --warning '$ARG3$'
command[check_postgres_wc]=/usr/bin/sudo -u postgres "/usr/lib64/nagios/plugins/check_postgres.pl" --datadir /var/lib/pgsql/data/ -db '$ARG1$' --action '$ARG2$' --warning '$ARG3$' --critical '$ARG4$'
command[check_postgres_stats]=/usr/bin/sudo -u postgres /usr/lib64/nagios/plugins/check_postgres_stats.sh '$ARG1$'

Add the following to /etc/sudoers.d/nrpe using visudo:

visudo -f /etc/sudoers.d/nrpe
Defaults:nrpe !requiretty
nrpe ALL=(postgres) NOPASSWD: /usr/lib64/nagios/plugins/check_postgres.pl
nrpe ALL=(postgres) NOPASSWD: /usr/lib64/nagios/plugins/check_postgres_stats.sh
 On the Nagios server:

Create the check commands:

pynag add command command_name="2ks-check_nrpe_postgres" command_line='$USER1$/check_nrpe -H $HOSTADDRESS$ -c check_postgres -a '$_SERVICE_DATABASE$' '$_SERVICE_ACTION$''
pynag add command command_name="2ks-check_nrpe_postgres_w" command_line='$USER1$/check_nrpe -H $HOSTADDRESS$ -c check_postgres_w -a '$_SERVICE_DATABASE$' '$_SERVICE_ACTION$' '$_SERVICE_WARNING$''
pynag add command command_name="2ks-check_nrpe_postgres_wc" command_line='$USER1$/check_nrpe -H $HOSTADDRESS$ -c check_postgres_wc -a '$_SERVICE_DATABASE$' '$_SERVICE_ACTION$' '$_SERVICE_WARNING$' '$_SERVICE_CRITICAL$''
pynag add command command_name="2ks-check_nrpe_postgres_stats" command_line='$USER1$/check_nrpe -H $HOSTADDRESS$ -c check_postgres_stats -a '$_SERVICE_DATABASE$''

Create the okconfig template /etc/nagios/okconfig/examples/postgres.cfg-example:

define service {
    use                            okc-linux-check_proc
    __WARNING                      1:
    __NAME                         postgres
    host_name                      HOSTNAME
    service_description            Process postgres
    __CRITICAL                     :20
    check_command                 okc-check_nrpe!check_procs -a $_SERVICE_WARNING$ $_SERVICE_CRITICAL$ $_SERVICE_NAME$
}

define service {
        service_description           PostgreSQL Database connection
         use                            generic-service
         host_name                      HOSTNAME
        check_command                 2ks-check_nrpe_postgres
        __DATABASE                    database_1
        __ACTION                      connection
        notes                         Simply connects and returns version number.
}

define service {
    use                            generic-service
    __DATABASE                     database_1
    check_command                 2ks-check_nrpe_postgres_stats
    host_name                      HOSTNAME
        service_description           PostgreSQL Database statistics
        notes                         Reports information from the pg_stat_database view, and outputs as performance data.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      bloat
         host_name                      HOSTNAME
        service_description           PostgreSQL Database bloat
        __CRITICAL                    50%
        __WARNING                     25%
        notes                         Checks the amount of bloat in tables and indexes. Bloat is generally the amount of dead unused space taken up in a table or index. This space is usually reclaimed by use of the VACUUM command.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      locks
         host_name                      HOSTNAME
        service_description           PostgreSQL Database locks
        __CRITICAL                    300
        __WARNING                     150
        notes                         Check the total number of locks on one or more databases.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      timesync
         host_name                      HOSTNAME
        service_description           PostgreSQL Database timesync
        __CRITICAL                    5
        __WARNING                     2
        notes                         Compares the local system time with the time reported by one or more databases.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      last_vacuum
         host_name                      HOSTNAME
        service_description           PostgreSQL Database last vacuum
        __CRITICAL                    7d
        __WARNING                     3d
        notes                         Checks how long it has been since vacuum (or analyze) was last run on each table in one or more databases.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      backends
         host_name                      HOSTNAME
        service_description           PostgreSQL Database backends
        __CRITICAL                    95
        __WARNING                     80
        notes                         Checks the current number of connections for one or more databases.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      hitratio
         host_name                      HOSTNAME
        service_description           PostgreSQL Database hitratio
        __CRITICAL                    80%
        __WARNING                     90%
        notes                         Checks the hit ratio of all databases and complains when they are too low.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      query_time
         host_name                      HOSTNAME
        service_description           PostgreSQL Database query time
        __CRITICAL                    10
        __WARNING                     5
        notes                         Checks the length of running queries on one or more databases.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      txn_idle
         host_name                      HOSTNAME
         service_description            PostgreSQL Database connections idle in transaction
        __CRITICAL                    5 for 10 seconds
        __WARNING                     2 for 5 seconds
        notes                         Checks the number and duration of "idle in transaction" queries on one or more databases.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_w
        __ACTION                      disabled_triggers
         host_name                      HOSTNAME
         service_description            PostgreSQL Database disabled triggers
        __WARNING                     1
        notes                         Checks on the number of disabled triggers inside the database. In normal usage having disabled triggers is a dangerous event.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_wc
        __ACTION                      checkpoint
         host_name                      HOSTNAME
        service_description           PostgreSQL Database last checkpoint
        __CRITICAL                    600
        __WARNING                     400
        notes                         Determines how long since the last checkpoint has been run.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        check_command                 2ks-check_nrpe_postgres_w
        __ACTION                      settings_checksum
         host_name                      HOSTNAME
         service_description            PostgreSQL Database settings checksum
        __WARNING                     c6358648f0d06757a8311709be307f24
        notes                         Checks that all the Postgres settings are the same as last time you checked.
}

define service {
         use                            generic-service
         __DATABASE                     database_1
        __WARNING                     15GB
         check_command                  2ks-check_nrpe_postgres_wc
        __ACTION                      database_size
         host_name                      HOSTNAME
         service_description            PostgreSQL Database size
        __CRITICAL                    30GB
        notes                         Checks the size of all databases and complains when they are too big.
}

Add the template to a host:

okconfig addtemplate db-01.domain.com --template postgres

The values provided in the above configuration are examples. You should change them according to your needs.
adagios_postgres_status
Source: https://bucardo.org/check_postgres/check_postgres.pl.html

Advertisements
Postgresql 9.2 monitoring with Adagios on CentOS 7

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s