I have been experiencing a problem for a while where on occasion, the agents would block while trying to read a response from the server. a strace would show
This would hang until the agent is restarted.
The following patch to 1.4.1 fixes this, defaulting the read timeout to 60 seconds. I am not sure if this patch will be necessary in 1.4.4, but I will check, as I will updating to 1.4.4 later today.
Code:
read(5,
The following patch to 1.4.1 fixes this, defaulting the read timeout to 60 seconds. I am not sure if this patch will be necessary in 1.4.4, but I will check, as I will updating to 1.4.4 later today.
Code:
diff -uNr zabbix-1.4.1.screenpatched/src/libs/zbxcomms/comms.c zabbix-1.4.1.new/src/libs/zbxcomms/comms.c
--- zabbix-1.4.1.screenpatched/src/libs/zbxcomms/comms.c 2007-06-29 22:50:26.000000000 +0200
+++ zabbix-1.4.1.new/src/libs/zbxcomms/comms.c 2008-01-09 20:25:05.000000000 +0200
@@ -21,6 +21,9 @@
#include "comms.h"
#include "log.h"
+int tcp_read_with_timeout(int sockfd, char* buf, int maxlen, float timeout);
+int sock_error(int sockfd);
+
#if defined(_WINDOWS)
# if defined(__INT_MAX__) && __INT_MAX__ == 2147483647
typedef int ssize_t;
@@ -40,7 +43,7 @@
# define ZBX_SOCK_ERR_TIMEDOUT WSAETIMEDOUT
#else
# define ZBX_TCP_WRITE(s, b, bl) ((ssize_t)write((s), (b), (bl)))
-# define ZBX_TCP_READ(s, b, bl) ((ssize_t)read((s), (b), (bl)))
+# define ZBX_TCP_READ(s, b, bl) ((ssize_t)tcp_read_with_timeout((s), (b), (bl), 60))
# define ZBX_TCP_ERROR -1
# define ZBX_SOCK_ERROR -1
@@ -724,3 +727,85 @@
return FAIL;
}
+/******************************************************************************
+ * *
+ * Function: tcp_read_with_timeout *
+ * *
+ * Purpose: read from a socket, with a specified timeout *
+ * *
+ * Parameters: sockfd - socket descriptor *
+ * buf - target buffer *
+ * maxlen - maximum data to read from socket *
+ * timeout - maximum time to wait for data (seconds) *
+ * *
+ * Return value: int (number of bytes read) *
+ * >0 success *
+ * 0 nothing read *
+ * <0 failure *
+ * *
+ * Author: Chris Picton *
+ * *
+ * Comments: *
+ * *
+ ******************************************************************************/
+
+int tcp_read_with_timeout(
+ int sockfd,
+ char* buf,
+ int maxlen,
+ float timeout
+ )
+{
+ int res;
+ fd_set rdset;
+ struct timeval tv;
+
+ int err,numread;
+
+ int sec = (int)timeout;
+ int usec = (int)(timeout-sec) * 100000;
+
+ tv.tv_sec = sec;
+ tv.tv_usec = usec;
+ FD_ZERO(&rdset);
+ FD_SET(sockfd, &rdset);
+ res = select(sockfd+1, &rdset, NULL, NULL, &tv);
+ if (res < 0 && errno != EINTR) { //ERROR
+ return -1;
+ }
+ if (!FD_ISSET(sockfd, &rdset)) {
+ // Socket not ready
+ return -1;
+ }
+ if (res == 0) {
+ // Timeout
+ return -1;
+ }
+
+ if (err = sock_error(sockfd)) {
+ // Socket error
+ return -1;
+ }
+
+ if ((numread = read(sockfd, buf, maxlen)) < 0) {
+ // Error from read
+ return -1;
+ }
+
+ return numread;
+}
+
+
+int sock_error(int sockfd) {
+ socklen_t lon = sizeof(int);
+ int valopt;
+ if (getsockopt(sockfd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &lon) < 0)
+{
+ // getsockopt error
+ // debug(3, "Error in getsockopt(): %d [%s]", errno, strerror(errno));
+ return -1;
+ }
+ // Check the value returned...
+ return valopt;
+}
+
Comment