Ad Widget

Collapse

zabbix_server 1.6.2 keep crashing

Collapse
X
 
  • Time
  • Show
Clear All
new posts
  • soragan
    Junior Member
    • Feb 2009
    • 20

    #1

    zabbix_server 1.6.2 keep crashing

    Hi All,

    I am running 1.6.2 on DELL PE 750 with 2GB RAM. System shows free memory more than 512MB at the time of crash. Zabbix server log files show error below:

    9483:20090303:143921 Host [XXXX]: first network error, wait for 15 seconds
    9483:20090303:143921 Parameter [libc.so.6] will be checked after 120 seconds on host [XXXX]
    9446:20090303:143934 One child process died. Exiting ...
    9446:20090303:143936 ZABBIX Server stopped. ZABBIX 1.6.2.

    Strace shows:

    Process 9446 attached - interrupt to quit
    restart_syscall(<... resuming interrupted call ...>) = 0
    socket(PF_FILE, SOCK_STREAM, 0) = 5
    fcntl64(5, F_SETFL, O_RDONLY) = 0
    fcntl64(5, F_GETFL) = 0x2 (flags O_RDWR)
    connect(5, {sa_family=AF_FILE, path="/var/lib/mysql/mysql.sock"}, 110) = 0
    setsockopt(5, SOL_SOCKET, SO_RCVTIMEO, "\2003\341\1\0\0\0\0", 8) = 0
    setsockopt(5, SOL_SOCKET, SO_SNDTIMEO, "\2003\341\1\0\0\0\0", 8) = 0
    setsockopt(5, SOL_IP, IP_TOS, [8], 4) = -1 EOPNOTSUPP (Operation not supported)
    setsockopt(5, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
    read(5, "4\0\0\0\n5.0.45\0\3502\0\0;NT>vY:W\0,\242\10\2\0\ 0\0"..., 16384) = 56
    write(5, "-\0\0\1\215\242\3\0\0\0\0@\10\0\0\0\0\0\0\0\0\0\0\0 \0\0\0\0\0\0\0\0"..., 49) = 49
    read(5, "\7\0\0\2\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=5, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(5, "\7\0\0\0\2zabbix", 11) = 11
    read(5, "\7\0\0\1\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=5, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(5, "\27\0\0\0\3SET CHARACTER SET utf8", 27) = 27
    read(5, "\7\0\0\1\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=5, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(5, "\1\0\0\0\1", 5) = 5
    shutdown(5, 2 /* send and receive */) = 0
    close(5) = 0
    rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
    rt_sigaction(SIGCHLD, NULL, {0x806b0d0, [], 0}, 8) = 0
    rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
    nanosleep({60, 0}, {60, 0}) = 0
    socket(PF_FILE, SOCK_STREAM, 0) = 5
    fcntl64(5, F_SETFL, O_RDONLY) = 0
    fcntl64(5, F_GETFL) = 0x2 (flags O_RDWR)
    connect(5, {sa_family=AF_FILE, path="/var/lib/mysql/mysql.sock"}, 110) = 0
    setsockopt(5, SOL_SOCKET, SO_RCVTIMEO, "\2003\341\1\0\0\0\0", 8) = 0
    setsockopt(5, SOL_SOCKET, SO_SNDTIMEO, "\2003\341\1\0\0\0\0", 8) = 0
    setsockopt(5, SOL_IP, IP_TOS, [8], 4) = -1 EOPNOTSUPP (Operation not supported)
    setsockopt(5, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
    read(5, "4\0\0\0\n5.0.45\0\3642\0\0{*\\sMg>>\0,\242\10\2\0 \0\0"..., 16384) = 56
    write(5, "-\0\0\1\215\242\3\0\0\0\0@\10\0\0\0\0\0\0\0\0\0\0\0 \0\0\0\0\0\0\0\0"..., 49) = 49
    read(5, "\7\0\0\2\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=5, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(5, "\7\0\0\0\2zabbix", 11) = 11
    read(5, "\7\0\0\1\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=5, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(5, "\27\0\0\0\3SET CHARACTER SET utf8", 27) = 27
    read(5, "\7\0\0\1\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=5, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(5, "\1\0\0\0\1", 5) = 5
    shutdown(5, 2 /* send and receive */) = 0
    close(5) = 0
    rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
    rt_sigaction(SIGCHLD, NULL, {0x806b0d0, [], 0}, 8) = 0
    rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
    nanosleep({60, 0}, 0xbfc89564) = ? ERESTART_RESTARTBLOCK (To be restarted)
    --- SIGCHLD (Child exited) @ 0 (0) ---
    open("/var/log/zabbix/zabbix_server.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 5
    time(NULL) = 1236062374
    fstat64(5, {st_mode=S_IFREG|0664, st_size=866198, ...}) = 0
    mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f36000
    write(5, " 9446:20090303:143934 One child"..., 59) = 59
    close(5) = 0
    munmap(0xb7f36000, 4096) = 0
    stat64("/var/log/zabbix/zabbix_server.log", {st_mode=S_IFREG|0664, st_size=866257, ...}) = 0
    kill(9462, SIGTERM) = 0
    kill(9463, SIGTERM) = 0
    kill(9464, SIGTERM) = 0
    kill(9465, SIGTERM) = 0
    kill(9466, SIGTERM) = 0
    kill(9467, SIGTERM) = 0
    kill(9468, SIGTERM) = 0
    kill(9469, SIGTERM) = 0
    kill(9470, SIGTERM) = 0
    kill(9471, SIGTERM) = 0
    kill(9472, SIGTERM) = 0
    kill(9473, SIGTERM) = 0
    kill(9474, SIGTERM) = 0
    kill(9475, SIGTERM) = 0
    kill(9476, SIGTERM) = 0
    kill(9477, SIGTERM) = 0
    kill(9478, SIGTERM) = 0
    kill(9479, SIGTERM) = 0
    kill(9480, SIGTERM) = 0
    kill(9481, SIGTERM) = 0
    kill(9482, SIGTERM) = 0
    kill(9483, SIGTERM) = 0
    kill(9484, SIGTERM) = 0
    kill(9485, SIGTERM) = 0
    kill(9486, SIGTERM) = 0
    kill(9487, SIGTERM) = 0
    fcntl64(3, F_SETLK64, {type=F_UNLCK, whence=SEEK_SET, start=0, len=0}, 0xbfc8907c) = 0
    close(3) = 0
    munmap(0xb7f37000, 4096) = 0
    unlink("/var/tmp/zabbix_server.pid") = 0
    rt_sigprocmask(SIG_BLOCK, [CHLD], [CHLD], 8) = 0
    nanosleep({2, 0}, {2, 0}) = 0
    socket(PF_FILE, SOCK_STREAM, 0) = 3
    fcntl64(3, F_SETFL, O_RDONLY) = 0
    fcntl64(3, F_GETFL) = 0x2 (flags O_RDWR)
    connect(3, {sa_family=AF_FILE, path="/var/lib/mysql/mysql.sock"}, 110) = 0
    setsockopt(3, SOL_SOCKET, SO_RCVTIMEO, "\2003\341\1\0\0\0\0", 8) = 0
    setsockopt(3, SOL_SOCKET, SO_SNDTIMEO, "\2003\341\1\0\0\0\0", 8) = 0
    setsockopt(3, SOL_IP, IP_TOS, [8], 4) = -1 EOPNOTSUPP (Operation not supported)
    setsockopt(3, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0
    read(3, "4\0\0\0\n5.0.45\0\3672\0\0=l**3`m!\0,\242\10\2\0\ 0\0"..., 16384) = 56
    write(3, "-\0\0\1\215\242\3\0\0\0\0@\10\0\0\0\0\0\0\0\0\0\0\0 \0\0\0\0\0\0\0\0"..., 49) = 49
    read(3, "\7\0\0\2\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=3, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(3, "\7\0\0\0\2zabbix", 11) = 11
    read(3, "\7\0\0\1\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=3, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(3, "\27\0\0\0\3SET CHARACTER SET utf8", 27) = 27
    read(3, "\7\0\0\1\0\0\0\2\0\0\0", 16384) = 11
    poll([{fd=3, events=POLLIN|POLLPRI}], 1, 0) = 0
    write(3, "\1\0\0\0\1", 5) = 5
    shutdown(3, 2 /* send and receive */) = 0
    close(3) = 0
    semctl(12517400, 0, IPC_64|IPC_RMID, 0xbfc89078) = 0
    semctl(12517400, 0, IPC_64|IPC_RMID, 0xbfc89068) = -1 EINVAL (Invalid argument)
    open("/var/log/zabbix/zabbix_server.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 3
    time(NULL) = 1236062376
    fstat64(3, {st_mode=S_IFREG|0664, st_size=866257, ...}) = 0
    mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f37000
    write(3, " 9446:20090303:143936 ZABBIX Se"..., 60) = 60
    close(3) = 0
    munmap(0xb7f37000, 4096) = 0
    stat64("/var/log/zabbix/zabbix_server.log", {st_mode=S_IFREG|0664, st_size=866317, ...}) = 0
    exit_group(0) = ?
    Process 9446 detached

    Anybody has similar problem?
  • soragan
    Junior Member
    • Feb 2009
    • 20

    #2
    It looks like the problem is caused by heavy IPMI polling. I have disabled a host group (23 host) which use IPMI to poll 3-4 keys and zabbix server is running fine now.

    Comment

    • hollo
      Junior Member
      • Oct 2007
      • 20

      #3
      Hi,

      Just want to tell that i have the same problem.. the issue should not be due to bad hardware, i ran the ealier version without any problems.

      Specs: Dell PowerEdge 1950
      CPU 2x Quadcore 2 GHz
      Mem: 3.2GB

      Now i have just tried to disable that much logging.. i dont know why, i just blame it may be due too much I/O..

      I will follow this thread and hope that someone might come with some good ideas.

      Comment

      • soragan
        Junior Member
        • Feb 2009
        • 20

        #4
        disabled IPMI and it allow my zabbix_server to run longer before stopped. i have done strace and found seg fault on one of the child process.

        18194 munmap(0xb7f38000, 4096) = 0
        18194 stat64("/var/log/zabbix/zabbix_server.log", {st_mode=S_IFREG|0664, st_size=502402, ...}) = 0
        18194 --- SIGSEGV (Segmentation fault) @ 0 (0) ---
        18003 <... nanosleep resumed> 0xbfdaf6b4) = ? ERESTART_RESTARTBLOCK (To be restarted)
        18003 --- SIGCHLD (Child exited) @ 0 (0) ---
        18003 open("/var/log/zabbix/zabbix_server.log", O_RDWR|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 5
        18003 time(NULL) = 1236593738
        18003 fstat64(5, {st_mode=S_IFREG|0664, st_size=502402, ...}) = 0
        18003 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f38000
        18003 write(5, " 18003:20090309:181538 One child"..., 59) = 59
        18003 close(5) = 0
        18003 munmap(0xb7f38000, 4096) = 0
        18003 stat64("/var/log/zabbix/zabbix_server.log", {st_mode=S_IFREG|0664, st_size=502461, ...}) = 0

        I am using zabbix 1.6.2 32bits.

        Comment

        • hollo
          Junior Member
          • Oct 2007
          • 20

          #5
          I have tried to check how to disable ipmi, i cannot not find anywhere to disable it.. But i can choose NOT to make any ipmi checks, is that what you mean?

          I have made some tests, first i got a message about low on swap when the system crashes.. so i disabled my swap..

          Now my processor load is just way too high so zabbix crashes after some time :/

          I have made this "script" to start zabbix after a crash, but still i cannot access the web server during the crash which takes a couple of minutes..

          Are there anyone from the development team who need some data feel free to ask for it.. i would be glad to help in order to get the problem solved..

          Comment

          • Alexei
            Founder, CEO
            Zabbix Certified Trainer
            Zabbix Certified SpecialistZabbix Certified Professional
            • Sep 2004
            • 5654

            #6
            You may try pre-1.6.3 available from http://www.zabbix.com/developers.php.

            Please send FULL gzipped after-crash zabbix_server.log to [email protected]. Do not forget to set DebugLevel=4 in zabbix_server.conf.
            Alexei Vladishev
            Creator of Zabbix, Product manager
            New York | Tokyo | Riga
            My Twitter

            Comment

            Working...