Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
Bug 162159 - sys-process/atop-1.17 segfaults
Summary: sys-process/atop-1.17 segfaults
Status: RESOLVED WORKSFORME
Alias: None
Product: Gentoo Linux
Classification: Unclassified
Component: Current packages (show other bugs)
Hardware: AMD64 Linux
: Lowest trivial (vote)
Assignee: Gentoo's Team for Core System packages
URL:
Whiteboard:
Keywords:
Depends on:
Blocks:
 
Reported: 2007-01-15 05:37 UTC by Chad A. Simmons
Modified: 2007-01-20 11:20 UTC (History)
0 users

See Also:
Package list:
Runtime testing required: ---


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Chad A. Simmons 2007-01-15 05:37:46 UTC
atop 1.17 compiles but segfaults

Reproducible: Always

Steps to Reproduce:
1. Set accept keywords for package to ~amd64
2. emerge atop
3. execute atop

Actual Results:  
end of strace atop

fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b64d27df000
read(5, "27762 (pdflush) S 11 1 1 0 -1 83"..., 1024) = 164
read(5, "", 1024)                       = 0
close(5)                                = 0
munmap(0x2b64d27df000, 4096)            = 0
open("status", O_RDONLY)                = 5
fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b64d27df000
read(5, "Name:\tpdflush\nState:\tS (sleeping"..., 1024) = 364
close(5)                                = 0
munmap(0x2b64d27df000, 4096)            = 0
open("cmdline", O_RDONLY)               = 5
fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b64d27df000
read(5, "", 1024)                       = 0
close(5)                                = 0
munmap(0x2b64d27df000, 4096)            = 0
chdir("..")                             = 0
chdir("27763")                          = 0
open("stat", O_RDONLY)                  = 5
fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b64d27df000
read(5, "27763 (pdflush) S 11 1 1 0 -1 83"..., 1024) = 164
read(5, "", 1024)                       = 0
close(5)                                = 0
munmap(0x2b64d27df000, 4096)            = 0
open("status", O_RDONLY)                = 5
fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b64d27df000
read(5, "Name:\tpdflush\nState:\tS (sleeping"..., 1024) = 364
close(5)                                = 0
munmap(0x2b64d27df000, 4096)            = 0
open("cmdline", O_RDONLY)               = 5
fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b64d27df000
read(5, "", 1024)                       = 0
close(5)                                = 0
munmap(0x2b64d27df000, 4096)            = 0
chdir("..")                             = 0
getdents(4, /* 0 entries */, 1024)      = 0
close(4)                                = 0
chdir("/usr/portage/sys-apps")          = 0
fstat(3, {st_mode=S_IFREG|0644, st_size=3889249152, ...}) = 0
mmap(NULL, 27917291520, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
brk(0x680563000)                        = 0x558000
mmap(NULL, 27917422592, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x2b71d3008000
munmap(0x2b71d3008000, 16744448)        = 0
munmap(0x2b71d8000000, 50364416)        = 0
mprotect(0x2b71d4000000, 135168, PROT_READ|PROT_WRITE) = 0
mmap(NULL, 27917291520, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++
Process 12018 detached


Expected Results:  
strace applicatoion to run

chadgentoo sys-apps # emerge --info
Portage 2.1.2_rc4-r9 (default-linux/amd64/2006.1/desktop, gcc-4.1.1, glibc-2.5-r0, 2.6.19-gentoo-r4 x86_64)
=================================================================
System uname: 2.6.19-gentoo-r4 x86_64 AMD Athlon(tm) 64 X2 Dual Core Processor 4400+
Gentoo Base System version 1.12.8
Timestamp of tree: Sun, 14 Jan 2007 19:00:01 +0000
ccache version 2.4 [enabled]
dev-java/java-config: 1.3.7, 2.0.31-r2
dev-lang/python:     2.4.4
dev-python/pycrypto: 2.0.1-r5
dev-util/ccache:     2.4-r6
dev-util/confcache:  0.4.2-r1
sys-apps/sandbox:    1.2.18.1
sys-devel/autoconf:  2.13, 2.61
sys-devel/automake:  1.4_p6, 1.5, 1.6.3, 1.7.9-r1, 1.8.5-r3, 1.9.6-r2, 1.10
sys-devel/binutils:  2.17
sys-devel/gcc-config: 1.3.14
sys-devel/libtool:   1.5.22
virtual/os-headers:  2.6.19.2
ACCEPT_KEYWORDS="amd64 ~amd64"
AUTOCLEAN="yes"
CBUILD="x86_64-pc-linux-gnu"
CFLAGS="-march=k8 -msse3 -pipe -O2"
CHOST="x86_64-pc-linux-gnu"
CONFIG_PROTECT="/etc /usr/NX/etc /usr/NX/home /usr/kde/3.5/env /usr/kde/3.5/share/config /usr/kde/3.5/shutdown /usr/share/X11/xkb /usr/share/config"
CONFIG_PROTECT_MASK="/etc/env.d /etc/env.d/java/ /etc/gconf /etc/java-config/vms/ /etc/revdep-rebuild /etc/splash /etc/terminfo"
CXXFLAGS="-march=k8 -msse3 -pipe -O2"
DISTDIR="/usr/portage/distfiles"
FEATURES="autoconfig ccache distlocks metadata-transfer parallel-fetch sandbox sfperms strict userfetch userpriv"
GENTOO_MIRRORS="http://distfiles.gentoo.org http://distro.ibiblio.org/pub/linux/distributions/gentoo"
MAKEOPTS="-j4"
PKGDIR="/usr/portage/packages"
PORTAGE_RSYNC_OPTS="--recursive --links --safe-links --perms --times --compress --force --whole-file --delete --delete-after --stats --timeout=180 --exclude=/distfiles --exclude=/local --exclude=/packages"
PORTAGE_TMPDIR="/var/tmp"
PORTDIR="/usr/portage"
PORTDIR_OVERLAY="/usr/local/xgl-coffee"
SYNC="rsync://rsync.gentoo.org/gentoo-portage"
USE="X Xaw3d a52 aac aalib acct acl acpi aio alsa amd64 amr apache2 apm asf athena bash-completion berkdb bitmap-fonts blas bzip2 cairo ccache cddb cdr cgi cli commercial cracklib crypt css cups curl daap dbus dlloader dnd dri dts dvd dvdr dvdread eds emboss encode escreen esd exif extensions fam fame fat ffmpeg fftw firefox flac fortran gd gdbm gif glitz glx gmail gmailtimestamps gnome gnutls gpm gstreamer gtk gtk2 hal hpn httpd iconv ieee1394 imagemagick ipv6 isdnlog joystick jpeg kde kdeenablefinal kdehiddenvisibility kdenablefinal ldap libg++ lm_sensors lzo mad mikmod mjpeg mp3 mp4 mpeg mplayer mpm-worker mysql ncurses network nls nptl nptlonly nsplugin nvidia nxclient ocaml offensive ogg opengl oss pam pcre pdf perl php pic png ppds pppd python qt3 qt4 quicktime readline realmedia reflection reiserfs rtc rtsp sdl session sftplogging shout smp snmp spell spl ssl stream svg tcpd theora threads truetype truetype-fonts type1-fonts udev unicode upnp usb vorbis x264 xcomposite xml xml2 xorg xscreensaver xv xvid xvmc yv12 zlib" ALSA_CARDS="ali5451 als4000 atiixp atiixp-modem bt87x ca0106 cmipci emu10k1x ens1370 ens1371 es1938 es1968 fm801 hda-intel intel8x0 intel8x0m maestro3 trident usb-audio via82xx via82xx-modem ymfpci" ALSA_PCM_PLUGINS="adpcm alaw asym copy dmix dshare dsnoop empty extplug file hooks iec958 ioplug ladspa lfloat linear meter mulaw multi null plug rate route share shm softvol" ELIBC="glibc" INPUT_DEVICES="evdev mouse keyboard joystick" KERNEL="linux" USERLAND="GNU" VIDEO_CARDS="nvidia"
Unset:  CTARGET, EMERGE_DEFAULT_OPTS, INSTALL_MASK, LANG, LC_ALL, LDFLAGS, LINGUAS, PORTAGE_RSYNC_EXTRA_OPTS
Comment 1 Chad A. Simmons 2007-01-15 16:27:47 UTC
atop[11934]: segfault at 0000000000000000 rip 00002b49a3fba58b rsp 00007fff07042638 error 6
Comment 2 SpanKY gentoo-dev 2007-01-15 23:52:53 UTC
the trick here is that something is making it try and malloc a ton of memory:
mmap(NULL, 27917291520, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= -1 ENOMEM (Cannot allocate memory)

and then atop prob doesnt check the return of malloc() and uses the memory which leads to an obvious segv

rebuild atop with debugging enabled and run it through gdb to get a useful backtrace
Comment 3 Chad A. Simmons 2007-01-16 14:33:02 UTC
Ebuild had no use debug flag so recompiled with FEATURES="nostrip"
(gdb) start
Breakpoint 1 at 0x402aa0
Starting program: /usr/bin/atop
0x0000000000402aa0 in main ()
(gdb) step
Single stepping until exit from function main,
which has no line number information.

Program received signal SIGSEGV, Segmentation fault.
0x00002ab534b2a58b in memset () from /lib/libc.so.6
Comment 4 SpanKY gentoo-dev 2007-01-16 22:04:17 UTC
that isnt the purpose of USE=debug

please review:
http://www.gentoo.org/doc/en/bugzilla-howto.xml
Comment 5 Chad A. Simmons 2007-01-17 04:38:33 UTC
ok added gcc commands ro get debuging turned off got some <optimized out> in the output so I recompiled with optimization off. Something about atop.c 653 which is in this if-then block in the code
    if (nexit > 0)
                {
                        curpexit = malloc(  nexit * sizeof(struct pstat));
                        memset(curpexit, 0, nexit * sizeof(struct pstat));

                        acctphotoproc(curpexit, nexit);
                }
                else



chadgentoo tmp # gdb atop
GNU gdb 6.6
Copyright (C) 2006 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu"...
Using host libthread_db library "/lib/libthread_db.so.1".
(gdb) run
Starting program: /usr/bin/atop

Program received signal SIGSEGV, Segmentation fault.
0x0000000000403155 in engine () at atop.c:653
653     atop.c: No such file or directory.
        in atop.c
(gdb) bt
#0  0x0000000000403155 in engine () at atop.c:653
#1  0x0000000000402e3a in main (argc=1, argv=0x7fff5f814518) at atop.c:494

Comment 6 SpanKY gentoo-dev 2007-01-17 06:27:11 UTC
run 'info locals' to see what the things are set to
Comment 7 Chad A. Simmons 2007-01-17 12:30:30 UTC
(gdb) info locals
i = 32767
c = 0
p = 0x7fff3aca5c50 "\001"
rlim = {rlim_cur = 0, rlim_max = 46993115302816}
Comment 8 SpanKY gentoo-dev 2007-01-17 17:05:42 UTC
there should be a whole lot more than that

unpack atop and build it yourself ... edit the Makefile and change the -O to -g

the issue looks like nexit gets set to some huge value and you'll need to trace back why that is

atop works fine on my amd64/linux-2.6.19.2, so i cant trace it myself
Comment 9 Chad A. Simmons 2007-01-18 15:23:58 UTC
I need to trace back and find where this value for nexit is being set but this is what I have so far.

chadgentoo atop-1.17 # gdb ./atop
GNU gdb 6.6
Copyright (C) 2006 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu"...
Using host libthread_db library "/lib/libthread_db.so.1".
(gdb) run
Starting program: /var/tmp/portage/sys-process/atop-1.17/distdir/atop-1.17/atop

Program received signal SIGSEGV, Segmentation fault.
0x0000000000403155 in engine () at atop.c:653
653                             memset(curpexit, 0, nexit * sizeof(struct pstat));
(gdb) bt
#0  0x0000000000403155 in engine () at atop.c:653
#1  0x0000000000402e3a in main (argc=1, argv=0x7fff9b6a35b8) at atop.c:494
(gdb) info locals
lastcmd = 0 '\0'
sigact = {__sigaction_handler = {sa_handler = 0x40341e <getalarm>,
    sa_sigaction = 0x40341e <getalarm>}, sa_mask = {__val = {
      0 <repeats 16 times>}}, sa_flags = 0, sa_restorer = 0}
curpexit = (struct pstat *) 0x0
devpstat = (struct pstat *) 0x2b870f978f67
npresent = 80
nexit = 67108864
n = -1
nzombie = 0
timelimit = 0
cursstat = (struct sstat *) 0x51e4a0
presstat = (struct sstat *) 0x519010
devsstat = (struct sstat *) 0x523930
hlpsstat = (struct sstat *) 0x519010
curpact = (struct pstat *) 0x532bf0
curplen = 100
(gdb)
Comment 10 Chad A. Simmons 2007-01-18 16:21:58 UTC
OK I'm a moron. Accounting somehow got turned off and the /var/log/acct file was deleted. Still I think atop should let you know accounting is off instead of simply using bad info and SEGV'ing.
Comment 11 SpanKY gentoo-dev 2007-01-19 04:44:42 UTC
ive never used accounting before; how do i turn it off and reproduce this bug ?
Comment 12 Chad A. Simmons 2007-01-19 17:49:20 UTC
Run as root /usr/sbin/accton (should turn off)
/usr/sbin/accton /path/to/acct/file (should turn on)
Comment 13 SpanKY gentoo-dev 2007-01-20 00:27:29 UTC
guess i still dont get it ... i ran:
accton
atop

and it works ... i dont have any acct file in /var/log/ or /var/account/
Comment 14 Chad A. Simmons 2007-01-20 01:06:08 UTC
OK I think we can close this. I can no longer reproduce I also turned accounting off and tried to run it. No segV. I also tried removing the acct files still no segv. The only thing I can think of is that I actually did have accounting on pointing to /var/secure/pacct and that file had a corrupt record that triggered the SEGV in atop. When I redirected the acct info to /var/log/acct atop then started working.
Comment 15 SpanKY gentoo-dev 2007-01-20 11:20:14 UTC
well, if you hit the segv again, please post the file that was causing it