Hi !
I hit an active loop in poller.c. This results in MySQL taking 100% proc.
The problem may be in this snippet of code (line 367 of poller.c) :
This code only issues a warning. The item is not updated and will be checked again and there is no sleep triggered. In main_poller_loop(), we will hit the "no sleep" code and call again get_values() which may hit the same condition.
In my case, for a certain OID on a certain server, I get :
Therefore, I have modified checks_snmp.c to return NOT_SUPPORTED instead of FAIL in this snippet of code :
(in fact, I removed the internal condition)
However, I think that the problem should be handled into poller.c. If get_value() always returns FAIL (not a transient error, like in my case), we hit an active loop. Here is a patch to solve this :
The patch is not very readable, but in fact, for unhandled conditions, I suppose that the item is not supported.
I hit an active loop in poller.c. This results in MySQL taking 100% proc.
The problem may be in this snippet of code (line 367 of poller.c) :
Code:
/* Possibly, other logic required? */
else if(res == AGENT_ERROR)
{
zabbix_log( LOG_LEVEL_WARNING, "Getting value of [%s] from host [%s] failed (ZBX_ERROR)", item.key, item.host );
zabbix_syslog("Getting value of [%s] from host [%s] failed (ZBX_ERROR)", item.key, item.host );
zabbix_log( LOG_LEVEL_WARNING, "The value is not stored in database.");
stop=1;
}
else
{
zabbix_log( LOG_LEVEL_WARNING, "Getting value of [%s] from host [%s] failed", item.key, item.host );
zabbix_syslog("Getting value of [%s] from host [%s] failed", item.key, item.host );
zabbix_log( LOG_LEVEL_WARNING, "The value is not stored in database.");
}
In my case, for a certain OID on a certain server, I get :
Code:
SNMP error in packet. Reason: (genError) A general failure occured
Code:
if (status == STAT_SUCCESS)
{
zabbix_log( LOG_LEVEL_WARNING, "SNMP error in packet. Reason: %s\n",
snmp_errstring(response->errstat));
if(response->errstat == SNMP_ERR_NOSUCHNAME)
{
snprintf(error,MAX_STRING_LEN-1,"SNMP error [%s]", snmp_errstring(response->errstat));
zabbix_log( LOG_LEVEL_ERR, error);
SET_MSG_RESULT(value, strdup(error));
ret=NOTSUPPORTED;
}
else
{
snprintf(error,MAX_STRING_LEN-1,"SNMP error [%s]", snmp_errstring(response->errstat));
zabbix_log( LOG_LEVEL_ERR, error);
SET_MSG_RESULT(value, strdup(error));
ret=FAIL;
}
}
However, I think that the problem should be handled into poller.c. If get_value() always returns FAIL (not a transient error, like in my case), we hit an active loop. Here is a patch to solve this :
Code:
--- a/src/zabbix_server/poller/poller.c
+++ b/src/zabbix_server/poller/poller.c
@@ -281,33 +281,6 @@ int get_values(void)
}
update_triggers(item.itemid);
}
- else if(res == NOTSUPPORTED)
- {
- if(item.status == ITEM_STATUS_NOTSUPPORTED)
- {
- /* It is not correct */
-/* snprintf(sql,sizeof(sql)-1,"update items set nextcheck=%d, lastclock=%d where itemid=%d",calculate_item_nextcheck(item.itemid, CONFIG_REFRESH_UNSUPPORTED,now), now, item.itemid);*/
- snprintf(sql,sizeof(sql)-1,"update items set nextcheck=%d, lastclock=%d where itemid=%d",CONFIG_REFRESH_UNSUPPORTED+now, now, item.itemid);
- DBexecute(sql);
- }
- else
- {
- zabbix_log( LOG_LEVEL_WARNING, "Parameter [%s] is not supported by agent on host [%s] Old status [%d]", item.key, item.host, item.status);
- zabbix_syslog("Parameter [%s] is not supported by agent on host [%s]", item.key, item.host );
- DBupdate_item_status_to_notsupported(item.itemid, agent.str);
- /* if(HOST_STATUS_UNREACHABLE == item.host_status)*/
- if(HOST_AVAILABLE_TRUE != item.host_available)
- {
- zabbix_log( LOG_LEVEL_WARNING, "Enabling host [%s]", item.host );
- zabbix_syslog("Enabling host [%s]", item.host );
- DBupdate_host_availability(item.hostid,HOST_AVAILABLE_TRUE,now,agent.msg);
- update_key_status(item.hostid, HOST_STATUS_MONITORED); /* 0 */
- item.host_available=HOST_AVAILABLE_TRUE;
-
- stop=1;
- }
- }
- }
else if(res == NETWORK_ERROR)
{
/* First error */
@@ -350,20 +323,32 @@ int get_values(void)
stop=1;
}
-/* Possibly, other logic required? */
- else if(res == AGENT_ERROR)
+ else /* if((res == NOTSUPPORTED) || (res == AGENT_ERROR)) */
{
- zabbix_log( LOG_LEVEL_WARNING, "Getting value of [%s] from host [%s] failed (ZBX_ERROR)", item.key, item.host );
- zabbix_syslog("Getting value of [%s] from host [%s] failed (ZBX_ERROR)", item.key, item.host );
- zabbix_log( LOG_LEVEL_WARNING, "The value is not stored in database.");
-
- stop=1;
- }
- else
- {
- zabbix_log( LOG_LEVEL_WARNING, "Getting value of [%s] from host [%s] failed", item.key, item.host );
- zabbix_syslog("Getting value of [%s] from host [%s] failed", item.key, item.host );
- zabbix_log( LOG_LEVEL_WARNING, "The value is not stored in database.");
+ if(item.status == ITEM_STATUS_NOTSUPPORTED)
+ {
+ /* It is not correct */
+/* snprintf(sql,sizeof(sql)-1,"update items set nextcheck=%d, lastclock=%d where itemid=%d",calculate_item_nextcheck(item.itemid, CONFIG_REFRESH_UNSUPPORTED,now), now, item.itemid);*/
+ snprintf(sql,sizeof(sql)-1,"update items set nextcheck=%d, lastclock=%d where itemid=%d",CONFIG_REFRESH_UNSUPPORTED+now, now, item.itemid);
+ DBexecute(sql);
+ }
+ else
+ {
+ zabbix_log( LOG_LEVEL_WARNING, "Parameter [%s] is not supported by agent on host [%s] Old status [%d]", item.key, item.host, item.status);
+ zabbix_syslog("Parameter [%s] is not supported by agent on host [%s]", item.key, item.host );
+ DBupdate_item_status_to_notsupported(item.itemid, agent.str);
+ /* if(HOST_STATUS_UNREACHABLE == item.host_status)*/
+ if(HOST_AVAILABLE_TRUE != item.host_available)
+ {
+ zabbix_log( LOG_LEVEL_WARNING, "Enabling host [%s]", item.host );
+ zabbix_syslog("Enabling host [%s]", item.host );
+ DBupdate_host_availability(item.hostid,HOST_AVAILABLE_TRUE,now,agent.msg);
+ update_key_status(item.hostid, HOST_STATUS_MONITORED); /* 0 */
+ item.host_available=HOST_AVAILABLE_TRUE;
+
+ stop=1;
+ }
+ }
}
free_result(&agent);
}