166 lines
4.3 KiB
Plaintext
166 lines
4.3 KiB
Plaintext
|
#!/usr/bin/perl
|
||
|
|
||
|
# This file can find requirements of html and jhtml files (cgi, gif,
|
||
|
# java dependencies). It is a bit of a hack but it turns out to work
|
||
|
# well. We track only dependencies between Relative URLs, absolute
|
||
|
# URL's are assumed to be extenernal to the RPM system. We do not
|
||
|
# parse the HTML but look through the set of strings (text surrounded
|
||
|
# by quotes) for something which looks like a reference. This avoids
|
||
|
# writing a full HTML parsers and tends to work really well. In this
|
||
|
# manner we can track dependencies for: href, src, action and other
|
||
|
# HTML tags which have not been invented yet.
|
||
|
|
||
|
|
||
|
# The reference:
|
||
|
#
|
||
|
# href="http://www.perl.org/images/arrow.gif"
|
||
|
#
|
||
|
# does not create a dependency but the reference
|
||
|
#
|
||
|
# href="images/arrow.gif"
|
||
|
#
|
||
|
# will create a dependency.
|
||
|
|
||
|
# Additionally this program will find the requirements for sun jhtml
|
||
|
# (html with embedded java) since jhtml is deprecated so is this part
|
||
|
# of the code.
|
||
|
|
||
|
# These references create dependencies:
|
||
|
|
||
|
# <form action="signup.jhtml" method="POST">
|
||
|
#
|
||
|
# <img src="images/spacer.gif" width=1>
|
||
|
#
|
||
|
# <A HREF="signup.jhtml">
|
||
|
#
|
||
|
# adWidget.writeAd(out, "login.html", "expired");
|
||
|
#
|
||
|
# response.sendRedirect("http://"+request.getServerName()+"/mailcom/login.jhtml");
|
||
|
|
||
|
|
||
|
# Notice how we look for strings WITH the proper ending. This is
|
||
|
# because the java sometimes has really strange double quoting
|
||
|
# conventions. Look at how splitting out the strings in this
|
||
|
# fragment would get you the wrong text.
|
||
|
|
||
|
# <img src="`c.getImage("bhunterlogo.gif")`" width=217 >
|
||
|
|
||
|
# Ignore non relative references since these dependencies can not be
|
||
|
# met. (ie, no package you install will ever provide
|
||
|
# 'http://www.yahoo.com').
|
||
|
|
||
|
# I use basename since I have seen too many http references which
|
||
|
# begin with '../' and I can not figure out where the document root
|
||
|
# is for the webserver this would just kill the dependnecy tracking
|
||
|
# mechanism.
|
||
|
|
||
|
|
||
|
|
||
|
use File::Basename;
|
||
|
|
||
|
# this is the pattern of extensions to call requirements
|
||
|
|
||
|
$DEPS_PAT = '\.((cgi)|(ps)|(pdf)|(png)|(jpg)|(gif)|(tiff)|(tif)|(xbm)|(html)|(htm)|(shtml)|(jhtml))'; #'
|
||
|
|
||
|
if ("@ARGV") {
|
||
|
foreach (@ARGV) {
|
||
|
process_file($_);
|
||
|
}
|
||
|
} else {
|
||
|
|
||
|
# notice we are passed a list of filenames NOT as common in unix the
|
||
|
# contents of the file.
|
||
|
|
||
|
foreach (<>) {
|
||
|
process_file($_);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
foreach $key (sort keys %seen) {
|
||
|
print "$key\n";
|
||
|
}
|
||
|
|
||
|
|
||
|
sub process_file {
|
||
|
|
||
|
my ($file) = @_;
|
||
|
chomp $file;
|
||
|
|
||
|
open(FILE, "<$file")||
|
||
|
die("$0: Could not open file: '$file' : $!\n");
|
||
|
|
||
|
# we have to suck in the whole file at once because too many people
|
||
|
# split lines around <java></java> tags.
|
||
|
|
||
|
my (@file) = <FILE>;
|
||
|
|
||
|
$_= "@file";
|
||
|
|
||
|
# ignore line based comments ( careful although it has two slashes
|
||
|
# 'http://www.yahoo.com' is not a comment! )
|
||
|
|
||
|
s!^\s*//.*$!!mg;
|
||
|
s!//\s.*$!!mg;
|
||
|
s!\s//.*$!!mg;
|
||
|
|
||
|
# ignore multi-line comments
|
||
|
# (use non greedy operators)
|
||
|
|
||
|
s!/\*.*?\*/!!g;
|
||
|
s/<!--.*?-->//g;
|
||
|
|
||
|
# Ignore non relative references since these dependencies can not be
|
||
|
# met. (ie, no package you install will ever provide
|
||
|
# 'http://www.yahoo.com').
|
||
|
|
||
|
# I use basename since I have seen too many http references which
|
||
|
# begin with '../' and I can not figure out where the document root
|
||
|
# is for the webserver this would just kill the dependnecy tracking
|
||
|
# mechanism.
|
||
|
|
||
|
|
||
|
# Notice how we look for strings WITH the proper ending. This is
|
||
|
# because the java sometimes has really strange double quoting
|
||
|
# conventions. Look at how splitting out the strings in this
|
||
|
# fragment would get you the wrong text.
|
||
|
|
||
|
# <img src="`c.getImage("bhunterlogo.gif")`" width=217 >
|
||
|
|
||
|
while ( m{\"([^\"]+$DEPS_PAT)\"}g ) {
|
||
|
my $string = $1;
|
||
|
chomp $string;
|
||
|
if ( $string !~ m!http://! ) {
|
||
|
$string = basename($string);
|
||
|
$string =~ s!\s+!!g;
|
||
|
$seen{"http(${string})"} = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
|
||
|
# This section is only for use with (Sun) jhtml dependencies, and
|
||
|
# since jhtml is deprecated so is this code.
|
||
|
|
||
|
# java imports in jhtml (may have stars for leaf class)
|
||
|
# these may span several lines
|
||
|
|
||
|
while ( m!<java type=((import)|(extends))>\s*([^<]+)\s*<!g ) {
|
||
|
my $java_list = $4;
|
||
|
$java_list =~ s/;/ /g;
|
||
|
$java_list =~ s/\n+/ /g;
|
||
|
$java_list =~ s/\s+/ /g;
|
||
|
foreach $java_class ( split(/\s+/, $java_list) ) {
|
||
|
$seen{"java(${java_class})"} = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
close(FILE)||
|
||
|
die("$0: Could not close file: '$file' : $!\n");
|
||
|
|
||
|
return ;
|
||
|
}
|