#!/usr/bin/perl -w

#
# Profiler utility (maps samples to binaries and functions)
#
# Copyright (c) Compaq Computer Corporation, 1999
#
# Use consistent with the GNU GPL is permitted,
# provided that this copyright notice is
# preserved in its entirety in all copies and derived works.
#
# COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED,
# AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS
# FITNESS FOR ANY PARTICULAR PURPOSE.
#
#      Author          Deborah A. Wallach
#                      Compaq Western Research Laboratory, Palo Alto, CA
#      Date            August 1999

require "flush.pl";

#----------------------------------------------------------------------

$VERSION = "itsy-0.01";

#----------------------------------------------------------------------


sub usage {
    die "usage: iprof -functions -map mapfile [-map mapfile]* [-kernelmapfile kernelmapfile] [-jitfile jitfile] statfile+ OR\n" . 
	"       iprof [-bypid] statfile+\n";
}

$functions = 0;
$nolines = 1;
$skipzeroes = 1;
$bypid = 0;

#----------------------------------------------------------------------

@mapfiles = ();
$kernelmapfile = "";
@statfiles = ();
$jitfile = "";
while ($#ARGV >= 0) {
    $_ = shift @ARGV;
    if (/^-/) {
	if ($_ eq substr("-functions", 0, length $_)) {
	    $functions = 1;
	} elsif ($_ eq substr("-nolines", 0, length $_)) {
	    $nolines = 1;
	} elsif ($_ eq substr("-skipzeroes", 0, length $_)) {
	    $skipzeroes = 1;
	} elsif ($_ eq substr("-bypid", 0, length $_)) {
	    $bypid = 1;
	} elsif ($_ eq substr("-mapfile", 0, length $_)) {
	    push(@mapfiles, shift @ARGV);
	} elsif ($_ eq substr("-kernelmapfile", 0, length $_)) {
	    $kernelmapfile = shift @ARGV;
	} elsif ($_ eq substr("-jitfile", 0, length $_)) {
	    $jitfile = shift @ARGV;
	} else {
	    &usage();
	}
    } else {
	push(@statfiles, $_);
    }
}
&usage unless (($#statfiles >= 0) && (@mapfiles || ! $functions));

#----------------------------------------------------------------------
$NOTAHIT = -1;

sub startnewfile {
    %data = ();
    $hitsum = 0;
    printf "--------------------------------------------------------------\n";
    printf "$filename";
    printf " [pid $pid]" if ($pid != -1);
    printf "\n\n";
}
sub endfile {
    local($cum);
    printf "  cycles         %%     cum%%        addr function\n";
    $cum = 0;
    sub by_reversedata { $data{$b} <=> $data{$a}; }
    foreach $line (sort by_reversedata (keys %data)) {
	$nhits = $data{$line};
	($addr, $name) = $line =~ /^([a-fA-F0-9]+) <(.+)>:$/;
	if ($nhits != $NOTAHIT) {
	    $percent = $nhits/$hitsum * 100;
	    $cum += $percent;
	    printf "%8d   %6.2f%%  %6.2f%%  %#010x %s\n", $nhits, $percent, $cum, hex($addr), $name;
	}
    }
    printf "%8d                                Total\n", $hitsum;
    printf "--------------------------------------------------------------\n";
}


sub startall_nofunctions {
    %metadata = ();
    $metahitsum = 0;
}
sub startnewfile_nofunctions {
    $hitsum = 0;
}
sub endfile_nofunctions {
    $metadata{$filename} += $hitsum;
    $metahitsum += $hitsum;
}
sub endall_nofunctions {
    local($cum);
    printf "--------------------------------------------------------------\n";
    if ($functions) {
	printf "$filename";
	printf " [pid $pid]" if ($pid != -1);
	printf "\n\n";
    }
    printf "  cycles         %%     cum%%  image\n";
    $cum = 0;
    sub by_reversemetadata { $metadata{$b} <=> $metadata{$a}; }
    foreach $line (sort by_reversemetadata (keys %metadata)) {
	$nhits = $metadata{$line};
	$name = $line;
	if ($nhits != $NOTAHIT) {
	    $percent = $nhits/$metahitsum * 100;
	    $cum += $percent;
	    printf "%8d   %6.2f%%  %6.2f%%  %s\n", $nhits, $percent, $cum, $name;
	}
    }
    printf "%8d                     Total\n", $metahitsum;
    printf "--------------------------------------------------------------\n";
}


sub pit {
    local($nhits, $line) = @_;
    $data{$line} = $nhits;

    $hitsum += $nhits unless ($nhits == $NOTAHIT);
}


#----------------------------------------------------------------------
#----------------------------------------------------------------------

sub chopdir {
    local($filename) = @_;
    local($name) = $filename =~ /^\S+\/([^\/]+)$/;
    $name;
}

sub imageid_matches_p {
    local($filename, $image) = @_;
    local($hash) = 0xffffffff;
    local($matches) = 1;
    if (open(SUM, "imageid $filename |")) {
	while (<SUM>) {
	    if (/^\S+: ([a-fA-F0-9]+)$/) {
		$hash = hex($1);
		last;
	    }
	}
	close(SUM);
	if ($hash != $image) {
	    &flush(STDOUT);
	    printf STDERR "Can't find source for %s that matches profiled version!\n", &chopdir($filename);
	    $matches = 0;
	}
    }
    else {
	&flush(STDOUT);
	print STDERR "Mapfile is old ($filename no longer exists)!\n";
	$matches = 0;
    }
    $matches;
}

sub select_objdump {
    local($filename) = @_;
    "arm-linux-objdump -C -d";
}


# handle file in objdump format

sub handle_objdump {
    $objdump = select_objdump($filename);
    open(DUMP, "$objdump $filename |") || (die "Can't objdump $filename: $!\n"); 
    $offsetset = 0;
    $offset = 0xffffffff;

    $funcname = "";

    while (<STAT>) {
	($hitline, $nhits) = /^([A-Fa-f0-9]+) ([A-Fa-f0-9]+)$/;

	while (<DUMP>) {
	    $line = $_;

	    # strip out blocks of zeroes
	    if (/\t\.\.\./) {
		&pit($NOTAHIT, $line) unless $nolines;
		next;
	    }

	    ($dumpline) = /^ *([a-fA-F0-9]+):/;

	    if (! $dumpline) {

		# starting a new function?
		if ($functions && (/^([a-fA-F0-9]+) <([^>]+)>:$/) &&
		    (! /^([a-fA-F0-9]+) <.L[a-fA-F0-9]+>:$/)) {

		    # don't finish function summary until see new one
		    # if we're summing a function...
		    if ($functions && $funcname) {
			&pit($funccount, $funcline) unless (($funccount == 0) && $skipzeroes);
			$funcname = "";
		    }

		    # need to determine if the objdump file is a library or executable
		    if (! $offsetset) {
			if (hex($1) < 0x2000000) {
			    $offset = 0;
			}
			else {
			    $offset = 0x2000000;
			}
			$offsetset = 1;
		    }

		    $funcname = $2;
		    $funcline = $line;
		    $funccount = 0;
		}

		&pit($NOTAHIT, $line) unless $nolines;
		next;
	    }

	    if (hex($dumpline) == hex($hitline) + $offset) {
		&pit($nhits, $line) unless $nolines;
		$funccount += $nhits;
		last;
	    }
	    else {
		&pit(0, $line) unless $nolines;
		next;
	    }
	}
    }
    while (<DUMP>) {
	$line = $_;
	($dumpline) = /^ *([a-fA-F0-9]+):/;
	if (! $dumpline) {

	    # if we're summing a function...
	    if ($functions && $funcname) {
		&pit($funccount, $funcline) unless (($funccount == 0) && $skipzeroes);
		$funcname = "";
	    }

	    # starting a new function?
	    if ($functions && (/^[a-fA-F0-9]+ <([^>]+)>:$/)) {
		$funcname = $1;
		$funcline = $line;
		$funccount = 0;
	    }

	    &pit($NOTAHIT, $line) unless $nolines;
	}
	else {
	    &pit(0, $line) unless $nolines;
	}
    }

    if ($funcname) {
	&pit($funccount, $funcline) unless (($funccount == 0) && $skipzeroes);
    }

    close DUMP;
}

#----------------------------------------------------------------------

sub find_right_kernel_version {
    local($kernel_version) = @_;

    if ($linuxmaps{$kernel_version}) {
	local($unstripped) = $linuxmaps{$kernel_version};
	local($version) = "";
	open(TMP, "strings $unstripped |") || (die "Can't open $unstripped: $!\n");
	while (<TMP>) {
	    if (/^Linux version/) {
		$version = $_;
		last;
	    }
	}
	close(TMP);
	chop $version;
	if ($kernel_version ne $version) {
	    &flush(STDOUT);
	    print STDERR "Can't find the profiled version of kernel (version in map has been replaced)!\n";
	    $ret = "NONE";
	}

	$linuxmaps{$kernel_version} =~ s/vmlinux/System.map/;
	$ret = $linuxmaps{$kernel_version};
    }
    else {
	&flush(STDOUT);
	print STDERR "Can't find the profiled version of linux (not in map)!\n";
	$ret = "NONE";
    }
    $ret;
}


sub make_modules_map {
    %namemap = ();
    %missmap = ();

    open(KMAPFILE, "$kernelmapfile") || (die "Can't open $kernelmapfile: $!\n");

    while (<KMAPFILE>) {
	$line = $_;
	next if (/kmods/);
	last if (/kversion/);
	($name, $image) = $line =~ /^(\S+)\s+([a-fA-F0-9]+)$/;
	$namemap{$name} = hex($image);
    }

    while (<KMAPFILE>) {
	last if (/ksyms/);
	chop;
	$kernel_version = $_;
    }

    while (<KMAPFILE>) {

	if (/^(\S+)\s+(\S+)\s+\[(\S+)\]$/) {
	    $funcaddr = $1;
	    $funcname = $2;
	    $module = $3;
	    next if ($funcname =~ /__insmod_/);
	    if (($namemap{$module}) && ($namemap{$module} ne "SEEN")) {
		$image = $namemap{$module};
		if (! $maps{$image}) {
		    &flush(STDOUT);
		    print STDERR "Can't find any source for module $module!\n";
		    $namemap{$module} = "SEEN";
		    next;
		}
		if (! $missmap{$module}) {
		    if (! imageid_matches_p($maps{$image}, $image)) {
			$namemap{$module} = "SEEN";
			next;
		    }
		    @mlist = ();
		    &suck_in_module($maps{$image}, $module);
		}

		$offset = 0;
		foreach (@mlist) {
		    @result = split('#', $_);
		    if ($result[1] eq "$funcname [$module]") {
			$offset = hex($funcaddr)-hex($result[0]);
			last;
		    }
		}
		if ($offset == 0) {
		    $missmap{$module} = $module;
		}
		else {
		    delete $missmap{$module};
		    foreach (@mlist) {
			@result = split('#', $_);
			$tmp = sprintf ("%x", $offset+hex($result[0]));
			push(@klist, "D". "#" . $tmp . "#" . $result[1]);
		    }
		    $tmp = sprintf ("%x", $offset+hex($last));
		    push(@klist, "D". "#" . $tmp . "#" . "UNKNOWN MODULE (after $module)");
		    $namemap{$module} = "SEEN";   # only do once per module
		}
	    }
	    elsif (!$namemap{$module}) {
		&flush(STDOUT);
		print STDERR "Can't find name mapping for module $module!\n";
	    }
	}
    }
    close KMAPFILE;

    foreach $miss (keys %missmap) {
	&flush(STDOUT);
	print STDERR "Can't find address mapping for module $miss!\n";
    }

}

sub suck_in_module {
    local($filename, $name) = @_;
    my($funcname);
    my($funcoffset);

    $objdump = select_objdump($filename);
    open(DUMP, "$objdump $filename |") || (die "Can't objdump $filename: $!\n"); 

    while (<DUMP>) {

	# starting a new function?
	if ((/^([a-fA-F0-9]+) <([^>]+)>:$/) &&
	    (! /^([a-fA-F0-9]+) <.L[a-fA-F0-9]+>:$/)) {

	    $funcoffset = $1;
	    $funcname = $2;

	    next if ($funcname =~ /^L[0-9]+$/);  # dump labels

	    push(@mlist, $funcoffset . "#" . $funcname . " [$name]");
	}
	elsif (/^ *([a-fA-F0-9]+):/) {
	    $last = $1;
	}
    }
}


sub handle_kernel {
    @klist = ();

    while (<STAT>) {
	($hitline, $nhits) = /^([A-Fa-f0-9]+) ([A-Fa-f0-9]+)$/;
	
	push(@klist, "S" . "#" . $hitline . "#" . $nhits);
    }
	
    &make_modules_map();

    $filename = &find_right_kernel_version($kernel_version);

    return if ($filename eq "NONE");

    open(DUMP, "$filename") || (die "Can't open $filename: $!\n");
    while (<DUMP>) {
	$line = $_;

	next if (/^\s+U current$/);    # I dont know why this is in System.map files
	
	($addr, $name) = $line =~ /^([a-fA-F0-9]+) [A-Za-z\?] (\S+)/;
	    
	# labels found on PC system.maps
	next if ($name =~ /^\.L[0-9]+$/);  # yet another one
	next if ($name =~ /^\.LC[0-9]+$/); # yet another one

	next if ($name =~ /\002/); # another label
	
	# labels found on alpha system.maps
	next if ($name =~ /^L[0-9a-f]+$/);  # label
	next if ($name =~ /^LC[0-9]+$/); # yet another one
	next if ($name =~ /^gcc2/); # yet another one
	next if ($name =~ /^__gnu/); # yet another one
	
	push(@klist, "D" . "#" . $addr . "#" . $name);
    }

    push(@klist, "D". "#" . "c4000000" . "#" . "MODULES");
    push(@klist, "D". "#" . "f0000000" . "#" . "END");
	    
    sub by_addr {     
	@resulta = split('#', $a);
	@resultb = split('#', $b);
	if (hex($resulta[1]) == hex($resultb[1])) {
	    $resulta[0] cmp $resultb[0];   # guarantee stats after system.map data
	}
	else {
	    hex($resulta[1]) <=> hex($resultb[1]); 
	}
    }

    $funcname = "";
    $funccount = 0;

    foreach $line (sort by_addr @klist) {
	@result = split('#', $line);
	if ($result[0] eq "D") {
	    $addr = $result[1];
	    $name = $result[2];

	    &pit($funccount, $funcaddr . " <" . $funcname . ">:") 
		unless ((! $funcname) || (($funccount == 0) && $skipzeroes));

	    $funcname = $name;
	    $funcaddr = $addr;
	    $funccount = 0;
	    
	}
	else {
	    $hitline = $result[1];
	    $nhits = $result[2];

	    $funccount += $nhits;
	}
    }

    close(DUMP);
}

#----------------------------------------------------------------------

sub handle_nomap {
    open(DUMP, "$jitfile") || (die "Can't open $jitfile: $!\n");

    @klist = ();

    while (<STAT>) {
	($hitline, $nhits) = /^([A-Fa-f0-9]+) ([A-Fa-f0-9]+)$/;
	
	push(@klist, "S" . "#" . $hitline . "#" . $nhits);
    }
	
    while (<DUMP>) {
	$line = $_;

	next if (/^\S+\s+\S+$/);
	
	($n1, $start, $end, $n2) = $line =~ /^(\S+\s+\S+\s+\S+)\s+0x(\S+)\s+0x(\S+)\s+\S+\s+(\"[^\"]*\"$)/;

        push(@klist, "DS" . "#" . $start . "#" . $n1 . " " . $n2);
        push(@klist, "DE" . "#" . $end . "#" . $n1 . " " . $n2);
    }

    $end = hex($end) + 4;
    push(@klist, "DS". "#" . "$end" . "#" . "UNKNOWN");
    push(@klist, "DE". "#" . "ffffffff" . "#" . "END");
	    
    $funcname = "";
    $funccount = 0;

    foreach $line (sort by_addr @klist) {
	@result = split('#', $line);
	if ($result[0] eq "DS") {
	    if ($funcname) {
		print "$funcname @result\n";
	    }
	    (! $funcname) || (die "function start/ends for jit'ed code don't match up!\n");
	    
	    $addr = $result[1];
	    $name = $result[2];

	    $funcname = $name;
	    $funcaddr = $addr;
	    $funccount = 0;
	    
	} elsif ($result[0] eq "DE") {
	    if (! $funcname) {
		print "@result\n";
	    }
	    ($funcname) || (die "function start/ends for jit'ed code don't match up!\n");

	    &pit($funccount, $funcaddr . " <" . $funcname . ">:") 
		unless ((! $funcname) || (($funccount == 0) && $skipzeroes));

	    $funcname = "";
	}
	else {
	    $hitline = $result[1];
	    $nhits = $result[2];

	    $funccount += $nhits;
	}
    }

    close(DUMP);
}

#----------------------------------------------------------------------

sub handle_image {
    my($cum);

    $cum = 0;
    while (<STAT>) {
	($hitline, $nhits) = /^([A-Fa-f0-9]+) ([A-Fa-f0-9]+)$/;
	
	$cum += $nhits;
    }
	
    $funcname = "";
    $funccount = 0;

    &pit($cum, 0 . " <" . $filename . ">:");
}

#----------------------------------------------------------------------
#----------------------------------------------------------------------

if ($functions) {
    %maps = ();
    %linuxmaps = ();
    foreach $mapfile (@mapfiles) {
	open(MAPFILE, "$mapfile") || (die "Can't open $mapfile: $!\n");
	while (<MAPFILE>) {
	    $line = $_;
	    ($lib) = $line =~ /^(\S+) /;
	    if (! ($lib =~ /vmlinux$/)) {
		($image) = $line =~ /^\S+ ([a-fA-F0-9]+)$/;
		$image = hex($image);
		if ($maps{$image}) {
		    open(F, "file $lib |");
		    while (<F>) {
			if (/, not stripped/) {
			    $maps{$image} = $lib;
			    last;
			}
		    }
		    close(F);
		}
		else {
		    $maps{$image} = $lib;
		}
	    }
	    else {
		($lib, $version) = $line =~ /^(\S+) (.*)$/;
		$linuxmaps{$version} = $lib;
	    }
	}
	close(MAPFILE);
    }
}    

&startall_nofunctions() if (! $functions);

foreach $statfile (@statfiles) {

    open(STAT, "$statfile") || (die "Can't open $statfile: $!\n");

    $device = 0;
    $inode = 0;
    $image = 0;
    $pid = -1;
    $name = "";
    %paths = ();
    
    # first parse header
    while (<STAT>) {
	last if (/^samples/);
	if (/^version +(\S+)$/) {
	    ($1 eq $VERSION) || (die "Wrong version (expect $VERSION, saw $1)!");
	}
	elsif ( /^device\s+(\S+)$/ ) {
	    $device = $1;
	} elsif ( /^inode\s+(\S+)$/ ) {
	    $inode = $1;
	} elsif ( /^image\s+(\S+)$/ ) {
	    $image = $1;
	} elsif ( /^name\s+(\S+)$/ ) {
	    $name = $1;
	} elsif ( /^pid\s+(\S+)$/ ) {
	    $pid = $1;
	} elsif ( /^path\s+(\S+)$/ ) {
	    @result = split('/', $1);
	    $paths{$result[$#result]} = 1;
	}
    }

    $nofunctions = 0;
    if ($functions) {
	if ($name eq "nomap") {
	    if (! $jitfile) {
		&flush(STDOUT);
		print STDERR "No jit map file available for $name; profiling in image mode instead.\n";
		$nofunctions = 1;
	    }
	    else {
		$filename = $jitfile;
	    }
	}
	else {
	    $image = hex($image);
	    if ($maps{$image}) {
		@result = split('/', $maps{$image});
		$filename = $maps{$image};
		if ($name ne "kernel") {
		    $nofunctions = 1 - imageid_matches_p($filename, $image);
		    if (! ($paths{$result[$#result]})) {
			&flush(STDOUT);
			print STDERR "Name aliasing problem (expected \"$name\", got \"$maps{$image}\").\n";
		    }
		}
	    }
	    else {
		&flush(STDOUT);
		print STDERR "Can't find image matching \"$name\" in map file.\n";
		$nofunctions = 1;
	    }
	}
    }
    else {
	$nofunctions = 1;
    }

    $filename = $name if $nofunctions;

    if ($bypid && (! $functions)) {
	$filename = $filename . " [pid $pid]" if ($pid != -1);
    }

    if ($nofunctions) {
	&startall_nofunctions() if $functions;
	&startnewfile_nofunctions();
    }
    else {
	&startnewfile();
    }

    if ($nofunctions) {
	&handle_image();
    } elsif ($name eq "kernel") {
	if ($kernelmapfile) {
	    &handle_kernel();
	}
	else {
	    &flush(STDOUT);
	    print STDERR "No kernel map file; profiling in image mode instead.\n";
	    &handle_image();
	}
    } elsif ($name eq "nomap") {
	&handle_nomap();
    }
    else {
	&handle_objdump();
    }

    if ($nofunctions) {
	&endfile_nofunctions();
	&endall_nofunctions() if $functions;
    }
    else {
	&endfile();
    }

    close STAT;
}

&endall_nofunctions() if (! $functions);


