[root@wallet01 ~]# useradd nagios [root@wallet01 ~]# id nagios uid=501(nagios) gid=502(nagios) groups=502(nagios) [root@wallet01 ~]# tar zxvf nagios-plugins-2.2.1.tar.gz [root@wallet01 ~]# cd nagios-plugins-2.2.1 [root@wallet01 nagios-plugins-2.2.1]# ./configure --prefix=/usr/local/nagios \ --with-cgiurl=/nagios/cgi-bin \ --with-nagios-user=nagios \ --with-nagios-group=nagios [root@wallet01 nagios-plugins-2.2.1]# make [root@wallet01 nagios-plugins-2.2.1]# make install [root@wallet01 ~]# yum install -y xinetd [root@wallet01 ~]# tar zxvf nrpe-3.2.1.tar.gz [root@wallet01 ~]# cd nrpe-3.2.1 [root@wallet01 nrpe-3.2.1]# ./configure --prefix=/usr/local/nagios --enable-ssl [root@wallet01 nrpe-3.2.1]# make all [root@wallet01 nrpe-3.2.1]# make install [root@wallet01 nrpe-3.2.1]# make install-plugin [root@wallet01 nrpe-3.2.1]# make install-daemon [root@wallet01 nrpe-3.2.1]# make install-config [root@wallet01 nrpe-3.2.1]# make install-inetd [root@wallet01 ~]# vi /etc/xinetd.d/nrpe # default: off # description: NRPE (Nagios Remote Plugin Executor) service nrpe { disable = no socket_type = stream port = 5666 wait = no user = nagios group = nagios server = /usr/local/nagios/bin/nrpe server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd log_on_success = } [root@wallet01 ~]# vi /etc/services nrpe 5666/tcp # nagios [root@wallet01 ~]# vi /usr/local/nagios/etc/nrpe.cfg allowed_hosts=192.168.1.200 command[check_cpu]=/usr/local/nagios/libexec/check_cpu.sh -w 80 -c 90 command[check_memory]=/usr/local/nagios/libexec/check_memory.py -w 20 -c 10 command[check_disk]=/usr/local/nagios/libexec/check_disk.pl -w 20 -c 10 command[check_network]=/usr/local/nagios/libexec/check_bandwidth.sh -w 900 -c 950 em1 command[check_uptime]=/usr/local/nagios/libexec/check_uptime [root@wallet01 ~]# yum install -y vnstat [root@wallet01 ~]# cd /usr/local/nagios/libexec [root@wallet01 libexec]# chmod a+x check_cpu.sh [root@wallet01 libexec]# ./check_cpu.sh -w 80 -c 90 OK - CPU Usage |CPU_USER=3;;;; CPU_SYSTEM=1;;;; CPU_IDLE=96;;;; CPU_IOWAIT=0;;;; CPU_ST=0;;;; [root@wallet01 libexec]# chmod a+x check_memory.py [root@wallet01 libexec]# ./check_memory.py -w 20 -c 10 OK: Free memory percentage is 89% (43004 MiB) [root@wallet01 libexec]# chmod a+x check_disk.pl [root@wallet01 libexec]# ./check_disk.pl -w 20 -c 10 DISK OK [root@wallet01 libexec]# chmod a+x check_bandwidth.sh [root@wallet01 libexec]# ./check_bandwidth.sh -w 900 -c 950 em1 NIC em1 Status: OK - rx: 10 KBps - tx: 46 KBps|Rx(KBps)=10;;;; Tx(KBps)=46;;;; [root@wallet01 libexec]# ./check_uptime Uptime OK: 16 day(s) 0 hour(s) 20 minute(s) | uptime=23060.000000;;; [root@wallet01 ~]# service xinetd start [root@wallet01 ~]# service xinetd status xinetd (pid 18791) is running... [root@wallet01 ~]# netstat -tunlp | grep 5666 tcp 0 0 :::5666 :::* LISTEN 18791/xinetd
[root@monitor ~]# vi /etc/icinga/objects/commands.cfg define command{ command_name check_nrpe command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ } define command{ command_name check_http command_line $USER1$/check_http -H $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ } [root@monitor ~]# vi /etc/icinga/objects/templates.cfg define host{ name generic-host ; The name of this host template notifications_enabled 1 ; Host notifications are enabled event_handler_enabled 1 ; Host event handler is enabled flap_detection_enabled 1 ; Flap detection is enabled failure_prediction_enabled 1 ; Failure prediction is enabled process_perf_data 1 ; Process performance data retain_status_information 1 ; Retain status information across program restarts retain_nonstatus_information 1 ; Retain non-status information across program restarts notification_period 24x7 ; Send host notifications at any time register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE! } define host{ name linux-server ; The name of this host template use generic-host ; This template inherits other values from the generic-host template check_period 24x7 ; By default, Linux hosts are checked round the clock check_interval 5 ; Actively check the host every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals max_check_attempts 10 ; Check each Linux host 10 times (max) check_command check-host-alive ; Default command to check Linux hosts notification_period workhours ; Linux admins hate to be woken up, so we only notify during the day ; Note that the notification_period variable is being overridden from ; the value that is inherited from the generic-host template! notification_interval 120 ; Resend notifications every 2 hours notification_options d,u,r ; Only send notifications for specific host states contact_groups admins ; Notifications get sent to the admins by default register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE! } define service{ name generic-service ; The 'name' of this service template active_checks_enabled 1 ; Active service checks are enabled passive_checks_enabled 1 ; Passive service checks are enabled/accepted parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems) obsess_over_service 1 ; We should obsess over this service (if necessary) check_freshness 0 ; Default is to NOT check service 'freshness' notifications_enabled 1 ; Service notifications are enabled event_handler_enabled 1 ; Service event handler is enabled flap_detection_enabled 1 ; Flap detection is enabled failure_prediction_enabled 1 ; Failure prediction is enabled process_perf_data 1 ; Process performance data retain_status_information 1 ; Retain status information across program restarts retain_nonstatus_information 1 ; Retain non-status information across program restarts is_volatile 0 ; The service is not volatile check_period 24x7 ; The service can be checked at any time of the day max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state check_interval 1 ; Check the service every 10 minutes under normal conditions retry_interval 2 ; Re-check the service every two minutes until a hard state can be determined contact_groups admins ; Notifications get sent out to everyone in the 'admins' group notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events notification_interval 60 ; Re-notify about service problems every hour notification_period 24x7 ; Notifications can be sent out at any time register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! } [root@monitor ~]# vi /etc/icinga/objects/linux.cfg define host{ use linux-server host_name wallet01 alias wallet01 icon_image redhat.gif statusmap_image redhat.gd2 address 192.168.40.50 } define hostgroup{ hostgroup_name tomcat-server alias tomcat-server members wallet01 } define service{ hostgroup_name tomcat-server use generic-service service_description alive check_command check_ping!100.0,20%!500.0,60% } define service{ hostgroup_name tomcat-server use generic-service service_description os cpu usage check_command check_nrpe!check_cpu } define service{ hostgroup_name tomcat-server use generic-service service_description os memory usage check_command check_nrpe!check_memory } define service{ hostgroup_name tomcat-server use generic-service service_description os disk usage check_command check_nrpe!check_disk } define service{ hostgroup_name tomcat-server use generic-service service_description os network usage check_command check_nrpe!check_network } define service{ hostgroup_name tomcat-server use generic-service service_description os uptime check_command check_nrpe!check_uptime } define service{ host_name wallet01 use generic-service,nagiosgraph service_description gsoaweb status check_command check_http!80!200 } [root@monitor ~]# vi /etc/icinga/icinga.cfg cfg_file=/etc/icinga/objects/linux.cfg [root@monitor ~]# service icinga start Running configuration check...OK Starting icinga: Starting icinga done. [root@monitor ~]# service icinga status Icinga (pid 23725) is running...