#!/usr/bin/perl -w # Author: Rob Reid, http://www.cv.nrao.edu/~rreid/ # Copyright (C) 2008 Associated Universities, Inc. Washington DC, USA. # # This is free software; you can redistribute it and/or modify it under the # terms of the GNU Library General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) any # later version. # # This is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have a copy of the GNU General Public License; if not, write to # the Free Software Foundation, Inc., 675 Massachusetts Ave, Cambridge, MA # 02139, USA. # Changelog: # vers. 2.3: Updated for casacore. It may also require a bit less # configuration now, but it has File::Spec::Link as an additional # recommended dependency. Try 'cpan -i File::Spec::Link' if you do # not have it. (Testing and working around missing modules is # possible, but since 'use' goes in the BEGIN{} section, it's a # pain.) # vers. 2.2: Fixed erroneous 'matches found' msg in emacs bymaking lsrc's exit # value agree with grep's. # vers. 2.1: Much faster, and with much better editor integration. use Term::ANSIColor qw(colored); use Getopt::Long; use File::Spec; use File::Spec::Link; use Memoize; # Using a full fledged database package would probably be faster, but less # portable (without making the user use CPAN). use Storable; use strict; ##### Customizable stuff. The $readme is below. ############# # my $compactifier = "$ENV{'HOME'}/bin/shrub_simp"; # # See $inc_path_docstr below for the doc string on $deflt_inc itself. The 1st # arg for the generator function is a colon separated string of directories # (i.e. standard $PATH format). The optional additional args are subdirectory # names for marking the root and root include directory of a code tree. For # example, if you are working in ~/src/project-0.99/src/gui/, and give it # 'src/includes', it will add ~/src/project-0.99/src/includes to $deflt_inc if # it exists. make_inc_path() also has customizations for CASA and casacore. my $deflt_inc = make_inc_path('/usr/include'); # my $inc_path_docstr = < headers. They will automatically be switched between absolute and relative paths if it shortens the output. ~ expansion is not supported. Default: $deflt_inc EOF # # The filename where lsrc caches its list of headers for each file. # It actually gets stored in $cachefn.gz since it compresses so well. # If more than one lsrc could use it simultaneously, you should look at using # LockFile::Simple. my $cachefn = "$ENV{'HOME'}/.lsrcdb"; # # The output text attribute for header files that are directly included in # $srcfile. my $FIRST_HDR = "green"; # # The text attribute for the '<'. my $FROM_ATTR = "bold yellow"; # # The output text attribute for the header file(s) that include(s) $pat. my $LAST_HDR = "bold"; # # The text attribute for the line separating filenames from matching contents. my $LINE = "cyan"; # # The pattern used to find included headers. # You will probably have to change more than this if you change languages, # but it may not be too hard. # Possibilities: # Language $hdr_pat Extension # Emacs Lisp ^\(require|autoload .el # Perl ^use .pl is for Prolog. Use file. # Python ^(execfile|from|import) .py, but use file as backup. # Shell ^(\.|source)\s+ .*sh, but use file as backup. # # If you go this route, you'll probably want a language detection function, # and functions to manage the paths, and maybe a ~/.lsrcrc. my $hdr_pat = qr/^#include\s+(<|")([^">]+)/; ##### End of customizable stuff ############# my $readme = <) { if (-d $dn) { my $sdn = $dn; $sdn =~ s,\.\./code/include/,,; $sdn =~ s,../casacore/,,; symlink($dn, $sdn) or warn "Error $! symbolically linking $dn to $sdn"; } } chdir($startdir); } push @rootincs, $casamap; } else { # Old CASA. push @rootincs, "$casaroot/code/include"; } } # Not well tested for nonCASA projects! foreach my $rootinc (@rootincs) { my ($rootdir, $restofrootinc) = split(/\//, $rootinc, 2); if ($ENV{'PWD'} =~ $rootdir) { my $ip = $ENV{'PWD'}; $ip =~ s,$rootdir.*,$rootinc,; $incpath = "$ip:$incpath"; } } $incpath =~ s/:$//; return $incpath; } my $compactify = 0; my $editor_mode = 0; my $help = 0; my $case_insensitive = 0; my $verbose = 0; my $max_finds = -1; # = infinity Getopt::Long::Configure("bundling"); GetOptions('compact|c' => \$compactify, 'editor|e' => \$editor_mode, 'insensitive|i' => \$case_insensitive, 'max=i' => \$max_finds, 'verbose|v' => \$verbose, 'help|?|h' => \$help) or provide_help_and_die(1); if ($compactify && !-x $compactifier) { print "$compactifier is not executable, so it will be ignored.\n"; $compactify = 0; } sub provide_help_and_die{ my $exit_status = $_[0]; print $readme; exit($exit_status); } # bin/grep exits with 0 if it finds a match, 2 on error, and 1 otherwise. For # interacting with emacs it is best to be semigreplike. provide_help_and_die(1) if($help); provide_help_and_die(2) if(@ARGV < 2 || @ARGV > 3); my $pat = $ARGV[0]; my $srcfile = $ARGV[1]; my $inc_path = $ARGV[2] || $deflt_inc; # Make the include paths absolute for now so the cache can be used when the # script is run from different directories. my @inc_paths = split(/:/, $inc_path); foreach my $ip (@inc_paths) { if (!File::Spec->file_name_is_absolute($ip)) { $ip = File::Spec->rel2abs($ip); } } # Hash of arrays: the keys are absolute paths, and the arrays are the absolute # paths of files #included by the key. This gets Stored in and retrieved from # a cache, so its contents must be true regardless of the particulars of a # particular run. (i.e. absolute paths & no referrer info) my %hdr_hash = (); my $update_cachefn = 0; # Don't rewrite it unless necessary. # Another hash of arrays: the keys are absolute paths of files that include # $pat, and the arrays are lists of the header files in $srcfile (as listed # there) responsible for #including the key, or $srcfile itself if $pat # is in $srcfile. Since it depends on $srcfile and $pat it is not cached. my %rfr_hash = (); # Hash of hashes. # 1st key: Absolute path of a file that contains $pat. # 2nd key: Line number (starting from 1) of a line containing $pat. # value: The line. # Depends on $pat, so not cached. my %mentions = (); my $num_finds = 0; # Headers as they are referred to in the code, keyed by their absolute paths. # Cached along with %hdr_hash. my %shtsrc = (); # The modification times ([9] of the array returned by stat) of the keys. my %hdr_searched = (); # Keyed by absolute paths of files that have been searched for $pat. my %searched_for_pat = (); # keys: a partial membership list of the tree of headers #included by the # current $rootref. # Not cachable because it would defeat the point to keep track of whether files # between $rootref and the key have changed. my %connected = (); # Ensure that $hdr_hash{$srcfile} is the list of headers #included by $srcfile. sub find_hdrs{ my ($srcfile, $shtsrc, $rootref) = @_; my $srcmtime = (stat($srcfile))[9]; # $hdr_hash{$srcfile} needs to be computed.if it doesn't exist # or $srcfile has been modified in the meantime. if (!exists $hdr_hash{$srcfile} || ($srcmtime > $hdr_searched{$srcfile})) { my @hdrs = (); if (!open(SRC, $srcfile)) { print "Error $! opening $srcfile in find_hdrs().\n"; return; } # Even though headers tend to cluster at the top, I don't think there's any # safe way to make it stop looking for headers early. This is why caching # %hdr_hash really helps. foreach my $srcline () { if ($srcline =~ $hdr_pat) { my ($loc, $hdr) = ($1, $2); my $exphdr = ''; if ($loc eq '<') { foreach my $hdr_root (@inc_paths) { if (-f "$hdr_root/$hdr") { $exphdr = "$hdr_root/$hdr"; last; } elsif (-f "$hdr_root/${hdr}.h") { # Catch style. $exphdr = "$hdr_root/${hdr}.h"; last; } } } elsif (-f $hdr) { $exphdr = File::Spec->rel2abs($hdr); # Absolute because of the cache. } if ($exphdr ne ''){ push @hdrs, $exphdr; $shtsrc{$exphdr} = $hdr; } } } close(SRC); $hdr_hash{$srcfile} = [@hdrs]; $hdr_searched{$srcfile} = $srcmtime; $update_cachefn = 1; } foreach my $exphdr (@{$hdr_hash{$srcfile}}) { if (grep(/\Q$shtsrc\E/, @{$rfr_hash{$exphdr}}) < 1) { push @{$rfr_hash{$exphdr}}, $shtsrc; } } } sub find_pat{ my $exphdr = $_[0]; if (!exists $searched_for_pat{$exphdr}) { open(HDR, $exphdr) or return "Error $! opening $exphdr\n"; my $linenum = 0; my $initial_finds = $num_finds; foreach my $hdrline () { ++$linenum; if ($hdrline =~ /$pat/ || ($case_insensitive && $hdrline =~ /$pat/i)) { chomp $hdrline; $mentions{$exphdr}{$linenum} = $hdrline; ++$num_finds; last if(!keep_finding_pat()); } } close(HDR); $searched_for_pat{$exphdr} = 1; } } sub keep_finding_pat{ return ($max_finds < 0 || $num_finds < $max_finds); } sub keep_finding_refs{ my $keepgoing = keep_finding_pat(); if (!$keepgoing) { # Getting all the refs to a mention for my $exphdr (keys %mentions) { # is tougher. if (!exists $connected{$exphdr}) { $keepgoing = 1; last; } } } return $keepgoing; } sub process_file{ my ($srcfile, $shtsrc, $calllevel, $rootref) = @_; find_pat($srcfile) if keep_finding_pat(); if (!$calllevel && $num_finds > 0) { push @{$rfr_hash{$srcfile}}, $shtsrc; $shtsrc{$srcfile} = $shtsrc; } elsif ($calllevel == 1) { %connected = (); # Initialize it for $rootref. } if (keep_finding_refs()) { find_hdrs($srcfile, $shtsrc, $rootref); foreach my $hdr (@{$hdr_hash{$srcfile}}) { if (!exists $connected{$hdr}) { # loops are common + nasty $connected{$hdr} = 1; process_file($hdr, !$calllevel ? $shtsrc{$hdr} : $shtsrc, $calllevel + 1, !$calllevel ? $hdr : $rootref); } last if (!keep_finding_refs() && $calllevel); } } } my %rfr_grouper = (); sub print_mentstr { my $mentstr = $_[0]; print "\n"; print colored('-' x 70, $LINE); print "\n$mentstr\n"; } sub abs_and_rel { my $absfn = $_[0]; my $relfn = $absfn; if (File::Spec->file_name_is_absolute($absfn)) { $relfn = File::Spec->abs2rel($absfn); } else { $absfn = File::Spec->rel2abs($relfn); } return ($absfn, $relfn); } memoize('abs_and_rel'); sub shortest_path { my ($absfn, $relfn) = abs_and_rel($_[0]); my %candidates = ($absfn => '', $relfn => ''); # if ($absfn =~ /\.lsrc_casamap/) { # my $lstart = $absfn; # # # Chop off everything past .lsrc_casamap/x # $lstart =~ s,(\.lsrc_casamap/[^/]+)/.+,$1,; # # my $lcmdir = $lstart; # $lcmdir =~ s,\.lsrc_casamap/.+,.lsrc_casamap,; # # my $startdir = $ENV{'PWD'}; # chdir($lcmdir) or warn "Error $! cding to $lcmdir"; # my $ltarg = readlink $lstart; # my ($abslt, $rellt) = abs_and_rel($ltarg); # chdir($startdir); # # my $alias = $absfn; # $alias =~ s/$lstart/$abslt/; # # # The above process often leaves $alias in a state like # # /gnuactive/.lsrc_casamap/../casacore. Get rid of the .lsrc_casamap/../. # $alias =~ s,\.*[^./]+\.*/\.\./,,g; # Can have dots, but not just dots. # # push @candidates, $alias, File::Spec->abs2rel($alias); # } my ($absdelinkified, $reldelinkified) = abs_and_rel(File::Spec::Link->full_resolve($absfn)); $candidates{$absdelinkified} = ''; $candidates{$reldelinkified} = ''; #print "absdelinkified = $absdelinkified\n"; #print "reldelinkified = $reldelinkified\n"; # # Need to check if the path of $absfn includes a link, not whether $absfn is # # a link. # if (-l $absfn) { # push @candidates, readlink($absfn); # } my @candidates = keys %candidates; # Perl is stranger than just line noise. my $minlength = length($candidates[0]); my $bestpath = $candidates[0]; foreach my $cand (@candidates) { #print "cand: $cand\n"; my $candlength = length($cand); if ($candlength < $minlength) { $minlength = $candlength; $bestpath = $cand; } } return $bestpath; } memoize('shortest_path'); sub print_mentions{ my $exit_status = 1; # 1 = None found. foreach my $exphdr (sort keys %mentions) { my @rfrs = @{$rfr_hash{$exphdr}}; $exit_status = 0; #my $mentstr = join("\n", @mentions) . "\n"; my $mentstr = ""; foreach my $linenum (sort {$a <=> $b} keys %{$mentions{$exphdr}}) { if ($editor_mode) { my $shtrhdr = shortest_path($exphdr); $mentstr .= "$shtrhdr:$linenum:"; } $mentstr .= "$mentions{$exphdr}{$linenum}\n"; } if (@rfrs > 0) { my $rfrstr = join("\n", @rfrs); if ($compactify) { $rfrstr = `echo "$rfrstr" | $compactifier`; chomp $rfrstr; } if ($editor_mode) { if (exists $rfr_grouper{$rfrstr}) { $rfr_grouper{$rfrstr} .= $mentstr; } else { $rfr_grouper{$rfrstr} = $mentstr; } } else { print colored($rfrstr, $FIRST_HDR); print colored(' < ', $FROM_ATTR); } } if (!$editor_mode || @rfrs == 0) { print colored($shtsrc{$exphdr}, $LAST_HDR) if (!$editor_mode); print_mentstr($mentstr); } } if ($editor_mode) { foreach my $rfrstr (sort keys %rfr_grouper) { print $rfrstr; print_mentstr($rfr_grouper{$rfrstr}); } } return $exit_status; } ############v v v v v v v## 'main()' ##v v v v v v v############## if (-f "${cachefn}.gz") { print "Retrieving known header branches from ${cachefn}.gz.\n" if $verbose; system("gunzip ${cachefn}.gz"); my $cache = retrieve($cachefn); %hdr_hash = %{$cache->{'hdr'}}; %shtsrc = %{$cache->{'shtsrc'}}; %hdr_searched = %{$cache->{'mtimes'}}; } print "Searching the header tree.\n" if $verbose; my $abssrc = File::Spec->rel2abs($srcfile); process_file($abssrc, $srcfile, 0, $abssrc); my $exit_status = print_mentions(); if ($update_cachefn) { print "Storing found header branches to ${cachefn}.gz.\n" if $verbose; my %cache = ('hdr' => \%hdr_hash, 'shtsrc' => \%shtsrc, 'mtimes' => \%hdr_searched); store(\%cache, $cachefn); } elsif ($verbose){ print "No previously unexplored code branches were searched.\n"; } system("gzip -f $cachefn"); exit($exit_status);