Ad Widget

Collapse

Active loop in poller.c

Collapse
X
 
  • Time
  • Show
Clear All
new posts
  • Vince2
    Member
    • Oct 2006
    • 40

    #1

    Active loop in poller.c

    Hi !

    I hit an active loop in poller.c. This results in MySQL taking 100% proc.

    The problem may be in this snippet of code (line 367 of poller.c) :

    Code:
    /* Possibly, other logic required? */
    		else if(res == AGENT_ERROR)
    		{
    			zabbix_log( LOG_LEVEL_WARNING, "Getting value of [%s] from host [%s] failed (ZBX_ERROR)", item.key, item.host );
    			zabbix_syslog("Getting value of [%s] from host [%s] failed (ZBX_ERROR)", item.key, item.host );
    			zabbix_log( LOG_LEVEL_WARNING, "The value is not stored in database.");
    
    			stop=1;
    		}
    		else
    		{
    			zabbix_log( LOG_LEVEL_WARNING, "Getting value of [%s] from host [%s] failed", item.key, item.host );
    			zabbix_syslog("Getting value of [%s] from host [%s] failed", item.key, item.host );
    			zabbix_log( LOG_LEVEL_WARNING, "The value is not stored in database.");
    		}
    This code only issues a warning. The item is not updated and will be checked again and there is no sleep triggered. In main_poller_loop(), we will hit the "no sleep" code and call again get_values() which may hit the same condition.

    In my case, for a certain OID on a certain server, I get :

    Code:
    SNMP error in packet. Reason: (genError) A general failure occured
    Therefore, I have modified checks_snmp.c to return NOT_SUPPORTED instead of FAIL in this snippet of code :

    Code:
    		if (status == STAT_SUCCESS)
    		{
    			zabbix_log( LOG_LEVEL_WARNING, "SNMP error in packet. Reason: %s\n",
    				snmp_errstring(response->errstat));
    			if(response->errstat == SNMP_ERR_NOSUCHNAME)
    			{
    				snprintf(error,MAX_STRING_LEN-1,"SNMP error [%s]", snmp_errstring(response->errstat));
    
    				zabbix_log( LOG_LEVEL_ERR, error);
    				SET_MSG_RESULT(value, strdup(error));
    
    				ret=NOTSUPPORTED;
    			}
    			else
    			{
    				snprintf(error,MAX_STRING_LEN-1,"SNMP error [%s]", snmp_errstring(response->errstat));
    
    				zabbix_log( LOG_LEVEL_ERR, error);
    				SET_MSG_RESULT(value, strdup(error));
    
    				ret=FAIL;
    			}
    		}
    (in fact, I removed the internal condition)

    However, I think that the problem should be handled into poller.c. If get_value() always returns FAIL (not a transient error, like in my case), we hit an active loop. Here is a patch to solve this :

    Code:
    --- a/src/zabbix_server/poller/poller.c
    +++ b/src/zabbix_server/poller/poller.c
    @@ -281,33 +281,6 @@ int get_values(void)
     			}
     		       	update_triggers(item.itemid);
     		}
    -		else if(res == NOTSUPPORTED)
    -		{
    -			if(item.status == ITEM_STATUS_NOTSUPPORTED)
    -			{
    -				/* It is not correct */
    -/*				snprintf(sql,sizeof(sql)-1,"update items set nextcheck=%d, lastclock=%d where itemid=%d",calculate_item_nextcheck(item.itemid, CONFIG_REFRESH_UNSUPPORTED,now), now, item.itemid);*/
    -				snprintf(sql,sizeof(sql)-1,"update items set nextcheck=%d, lastclock=%d where itemid=%d",CONFIG_REFRESH_UNSUPPORTED+now, now, item.itemid);
    -				DBexecute(sql);
    -			}
    -			else
    -			{
    -				zabbix_log( LOG_LEVEL_WARNING, "Parameter [%s] is not supported by agent on host [%s] Old status [%d]", item.key, item.host, item.status);
    -				zabbix_syslog("Parameter [%s] is not supported by agent on host [%s]", item.key, item.host );
    -				DBupdate_item_status_to_notsupported(item.itemid, agent.str);
    -	/*			if(HOST_STATUS_UNREACHABLE == item.host_status)*/
    -				if(HOST_AVAILABLE_TRUE != item.host_available)
    -				{
    -					zabbix_log( LOG_LEVEL_WARNING, "Enabling host [%s]", item.host );
    -					zabbix_syslog("Enabling host [%s]", item.host );
    -					DBupdate_host_availability(item.hostid,HOST_AVAILABLE_TRUE,now,agent.msg);
    -					update_key_status(item.hostid, HOST_STATUS_MONITORED);	/* 0 */
    -					item.host_available=HOST_AVAILABLE_TRUE;
    -	
    -					stop=1;
    -				}
    -			}
    -		}
     		else if(res == NETWORK_ERROR)
     		{
     			/* First error */
    @@ -350,20 +323,32 @@ int get_values(void)
     
     			stop=1;
     		}
    -/* Possibly, other logic required? */
    -		else if(res == AGENT_ERROR)
    +		else /* if((res == NOTSUPPORTED) || (res == AGENT_ERROR)) */
     		{
    -			zabbix_log( LOG_LEVEL_WARNING, "Getting value of [%s] from host [%s] failed (ZBX_ERROR)", item.key, item.host );
    -			zabbix_syslog("Getting value of [%s] from host [%s] failed (ZBX_ERROR)", item.key, item.host );
    -			zabbix_log( LOG_LEVEL_WARNING, "The value is not stored in database.");
    -
    -			stop=1;
    -		}
    -		else
    -		{
    -			zabbix_log( LOG_LEVEL_WARNING, "Getting value of [%s] from host [%s] failed", item.key, item.host );
    -			zabbix_syslog("Getting value of [%s] from host [%s] failed", item.key, item.host );
    -			zabbix_log( LOG_LEVEL_WARNING, "The value is not stored in database.");
    +			if(item.status == ITEM_STATUS_NOTSUPPORTED)
    +			{
    +				/* It is not correct */
    +/*				snprintf(sql,sizeof(sql)-1,"update items set nextcheck=%d, lastclock=%d where itemid=%d",calculate_item_nextcheck(item.itemid, CONFIG_REFRESH_UNSUPPORTED,now), now, item.itemid);*/
    +				snprintf(sql,sizeof(sql)-1,"update items set nextcheck=%d, lastclock=%d where itemid=%d",CONFIG_REFRESH_UNSUPPORTED+now, now, item.itemid);
    +				DBexecute(sql);
    +			}
    +			else
    +			{
    +				zabbix_log( LOG_LEVEL_WARNING, "Parameter [%s] is not supported by agent on host [%s] Old status [%d]", item.key, item.host, item.status);
    +				zabbix_syslog("Parameter [%s] is not supported by agent on host [%s]", item.key, item.host );
    +				DBupdate_item_status_to_notsupported(item.itemid, agent.str);
    +	/*			if(HOST_STATUS_UNREACHABLE == item.host_status)*/
    +				if(HOST_AVAILABLE_TRUE != item.host_available)
    +				{
    +					zabbix_log( LOG_LEVEL_WARNING, "Enabling host [%s]", item.host );
    +					zabbix_syslog("Enabling host [%s]", item.host );
    +					DBupdate_host_availability(item.hostid,HOST_AVAILABLE_TRUE,now,agent.msg);
    +					update_key_status(item.hostid, HOST_STATUS_MONITORED);	/* 0 */
    +					item.host_available=HOST_AVAILABLE_TRUE;
    +	
    +					stop=1;
    +				}
    +			}
     		}
     		free_result(&agent);
     	}
    The patch is not very readable, but in fact, for unhandled conditions, I suppose that the item is not supported.
Working...