rpm-build/scripts/http.req
2002-03-25 20:16:26 +00:00

166 lines
4.3 KiB
Perl
Executable File

#!/usr/bin/perl
# This file can find requirements of html and jhtml files (cgi, gif,
# java dependencies). It is a bit of a hack but it turns out to work
# well. We track only dependencies between Relative URLs, absolute
# URL's are assumed to be extenernal to the RPM system. We do not
# parse the HTML but look through the set of strings (text surrounded
# by quotes) for something which looks like a reference. This avoids
# writing a full HTML parsers and tends to work really well. In this
# manner we can track dependencies for: href, src, action and other
# HTML tags which have not been invented yet.
# The reference:
#
# href="http://www.perl.org/images/arrow.gif"
#
# does not create a dependency but the reference
#
# href="images/arrow.gif"
#
# will create a dependency.
# Additionally this program will find the requirements for sun jhtml
# (html with embedded java) since jhtml is deprecated so is this part
# of the code.
# These references create dependencies:
# <form action="signup.jhtml" method="POST">
#
# <img src="images/spacer.gif" width=1>
#
# <A HREF="signup.jhtml">
#
# adWidget.writeAd(out, "login.html", "expired");
#
# response.sendRedirect("http://"+request.getServerName()+"/mailcom/login.jhtml");
# Notice how we look for strings WITH the proper ending. This is
# because the java sometimes has really strange double quoting
# conventions. Look at how splitting out the strings in this
# fragment would get you the wrong text.
# <img src="`c.getImage("bhunterlogo.gif")`" width=217 >
# Ignore non relative references since these dependencies can not be
# met. (ie, no package you install will ever provide
# 'http://www.yahoo.com').
# I use basename since I have seen too many http references which
# begin with '../' and I can not figure out where the document root
# is for the webserver this would just kill the dependnecy tracking
# mechanism.
use File::Basename;
# this is the pattern of extensions to call requirements
$DEPS_PAT = '\.((cgi)|(ps)|(pdf)|(png)|(jpg)|(gif)|(tiff)|(tif)|(xbm)|(html)|(htm)|(shtml)|(jhtml))'; #'
if ("@ARGV") {
foreach (@ARGV) {
process_file($_);
}
} else {
# notice we are passed a list of filenames NOT as common in unix the
# contents of the file.
foreach (<>) {
process_file($_);
}
}
foreach $key (sort keys %seen) {
print "$key\n";
}
sub process_file {
my ($file) = @_;
chomp $file;
open(FILE, "<$file")||
die("$0: Could not open file: '$file' : $!\n");
# we have to suck in the whole file at once because too many people
# split lines around <java></java> tags.
my (@file) = <FILE>;
$_= "@file";
# ignore line based comments ( careful although it has two slashes
# 'http://www.yahoo.com' is not a comment! )
s!^\s*//.*$!!mg;
s!//\s.*$!!mg;
s!\s//.*$!!mg;
# ignore multi-line comments
# (use non greedy operators)
s!/\*.*?\*/!!g;
s/<!--.*?-->//g;
# Ignore non relative references since these dependencies can not be
# met. (ie, no package you install will ever provide
# 'http://www.yahoo.com').
# I use basename since I have seen too many http references which
# begin with '../' and I can not figure out where the document root
# is for the webserver this would just kill the dependnecy tracking
# mechanism.
# Notice how we look for strings WITH the proper ending. This is
# because the java sometimes has really strange double quoting
# conventions. Look at how splitting out the strings in this
# fragment would get you the wrong text.
# <img src="`c.getImage("bhunterlogo.gif")`" width=217 >
while ( m{\"([^\"]+$DEPS_PAT)\"}g ) {
my $string = $1;
chomp $string;
if ( $string !~ m!http://! ) {
$string = basename($string);
$string =~ s!\s+!!g;
$seen{"http(${string})"} = 1;
}
}
{
# This section is only for use with (Sun) jhtml dependencies, and
# since jhtml is deprecated so is this code.
# java imports in jhtml (may have stars for leaf class)
# these may span several lines
while ( m!<java type=((import)|(extends))>\s*([^<]+)\s*<!g ) {
my $java_list = $4;
$java_list =~ s/;/ /g;
$java_list =~ s/\n+/ /g;
$java_list =~ s/\s+/ /g;
foreach $java_class ( split(/\s+/, $java_list) ) {
$seen{"java(${java_class})"} = 1;
}
}
}
close(FILE)||
die("$0: Could not close file: '$file' : $!\n");
return ;
}