#!/usr/local/bin/perl -w # $Id: ians,v 1.37.1.5 1999/06/01 14:48:49 brianr Exp $ $ourexefile=$0; $version='$Revision: 1.37.1.5 $ '; $version =~ m/\ (.*) /; $version = "$1"; $fullversion = "IANS $1"; $urlctr = 0 ; print "$$ [". time2str() ."] $version \n"; STDOUT->flush(); # Internet Alternate Namespace # Copyright (C) 1998 Brian Ristuccia # With contributions by Bennett Haselton # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # Where's the redirection form website? $website="http://www.osiris.978.org/~brianr/ians/"; { package RefFixer; require HTML::Filter; @ISA=qw(HTML::Filter); sub output { my $self = shift; my $text = $_[0]; if ($self->{prohibtags}) { # if ( $text =~ m/\<\! IANS-VERSION \>/ ) { # push(@{$self->{fhtml}}, "*" . $main::fullversion); # return; # } #elsif ( $_[0] eq '' ) #push(@{$self->{fhtml}}, ""); 1; } else { # Skip javascript and other cruft. push(@{$self->{fhtml}}, $text); } } sub filtered_html { join("", @{$_[0]->{fhtml}}) } sub start { my $self = shift; my %props = %{$_[1]}; my @order = @{$_[2]}; my $fixurl; my $altbase; $self->{prohibtags}++ if $_[0] eq "script"; $_[0] = "X-$_[0]" if $_[0] eq "meta"; $compltext="<$_[0]"; foreach $prop (@order) { if ( $prop =~ m/^on.+/ ) { # No javascrit on* tags. Comment em out. $props{$prop} = "// " . $props{$prop} } if ( $prop =~ m/(href|src|background|action)/ ) { $fixurl=undef; if ( ($props{$prop} =~ m/^[^:\/]*:/) ) { # Reference like http://www.site.com/file/ $fixurl="$props{$prop}"; } elsif ($props{$prop} =~ m/^\// ) { # root reference $altbase = $main::base; $altbase =~ m/([^:\/]*:\/\/[^\/]*)\//; $fixurl="$1$props{$prop}"; } else { #wicked relative my $url1 = new URI::URL "$props{$prop}", "$main::base"; $fixurl=$url1->abs; } $fixurl = main::EncodeURL($fixurl); if ($fixurl) { $props{$prop}="/$fixurl"; }; } $compltext="$compltext $prop=\"$props{$prop}\""; } $compltext="$compltext\>"; $self->SUPER::start($_[0],$_[1],$_[2],$compltext); } sub end { my $self = shift; $self->SUPER::end(@_); $self->{prohibtags}-- if $_[0] eq "script"; } } use HTTP::Daemon; use HTTP::Status; use HTTP::Date; use LWP::UserAgent; use URI::URL; use Net::hostent; use POSIX ":sys_wait_h"; use MIME::Base64; use IO::Socket; use HTML::Entities (); # Set up our user agent to snag stuff for us. We're doing this before we fork # because the kernel can copy-on-write parts of the UA as neccessary much # faster than perl can set up entire UA's. $ua = new LWP::UserAgent; # If our local squid is up, use it. $ua->env_proxy(); # Set the UA string $ua->agent("$fullversion " . $ua->agent); # Discourage abuse @abuse_strings = ( # PHF Exploit 'phf?(.*?)(passwd|group|htpasswd|xterm|rxvt)', # Chargen provides a continuous stream of characters, and can be used # to flood out network links or deplete resources on the proxy. '^http://(.*?):(chargen|19)' ); sub IsAbuseURL { my($url) = @_; foreach $string (@abuse_strings) { if ($url =~ /$string/i) { return 1; } } return 0; } sub DecodeFragment { print $urldsock "dec @_\n"; $baz=<$urldsock>; chomp $baz; return $baz; } sub EncodeFragment { print $urldsock "enc @_\n"; $baz=<$urldsock>; chomp $baz; return $baz; } sub EncodeURL { return EncodeFragment(@_); #$originalurl; } sub DecodeURL { my($originalurl) = @_; # $encodedurl =~ s/(..)/chr(hex($1))/eg; return DecodeFragment($originalurl); #decode_base64($originalurl); #return $originalurl; } # set up as a web server listening on whatever port $d = new HTTP::Daemon(LocalPort => 8802, Listen => 32, Reuse => 1) or die("Couldn't establish listening socket for HTTP server"); # Sometimes we have socket accepts that fail, but this doesn't mean it's a # permenant failure condition. This keeps the program looping forever. while (1) { # Get a connection while ($c = $d->accept) { # Clean up any left over zombies while (($deadpid= waitpid(-1,&WNOHANG)) > 0) { 1; } # Now we fork off a separate copy of ourselves to read and process the # Reques. This makes us immediately available to process other connections. while ( !defined ($forkvalue = fork())) { print "$$ [" . time2str . "] Couldn't fork -- collecting zombies, pausing, and then trying again.\n"; STDOUT->flush(); while (($deadpid= waitpid(-1,&WNOHANG)) > 0) { 1; } sleep 1; } if ( $forkvalue eq "0" ) { # Get a connection to the URL daemon. while ( !defined ($urldsock = IO::Socket::INET->new('127.0.0.1:9000')) ) { # Couldn't connect? Maybe it got hosed. Start another. system('urld') or die "Couldn't start URL Daemon: $!"; } # Try to resolve the remote machine's hostname if ($remotehostname = gethost($c->peerhost)) { $remotehostname=$remotehostname->name; } else { $remotehostname=$c->peerhost; } # sometimes we don't get a request right away, so set up the status # indicator. $0="$ourexefile $remotehostname (0)\000\000"; # Set a timeout on this socket so we can't get busied out forever. We # don't set the timeout right away because resolving the host on its # initial connection may take some time. $c->timeout(32); # Now we get the request. Note that since I implemented Keep-Alive, we # can get multiple requests on the same connection. while ($r = $c->get_request) { # Set some information about our current status $numberofconnections++; $0="$ourexefile $remotehostname ($numberofconnections)\000\000"; # Display some information for the log file print "$$ [" . time2str . "] Request: " . $r->url . " from " . $remotehostname . ":" . $c->peerport . " : "; # Look at the Host: header to determine the hostname we were # accessed by, and use it in all redirects. We have no idea if # we're being accessed through a port # redirector, DNS alias, etc. If we have't been told, make a # reasonable guess. if (! ($myhost = $r->headers->header(Host))) { $myhost= $c->sockhost() . ":" . $c->sockport(); } $MYURL="http://" . $myhost. "/"; # Get the path that was requested from the server and remove any # leading slash. my $temppath = $r->url->full_path . ""; if ($temppath =~ s!^/via-another-proxy-from/([^/]+)/!!) { # Perhaps this could be abused to make an attack appear to come # from another host, but they could do that via proxy servers or # the Via: header anyway, so it's no real loss. $reallyfrom="$1, "; #print "REALLY FROM $1 -- adjusted to $temppath -- \n"; } $temppath =~ s/^\///; #print "\n* ---". $temppath . "---\n"; if ($temppath eq "") { # No URL? Show them a web site where they can enter one. $r->url($website); } else { # Separate any query information from GET requests and separate it # from the path we're about to decode. if ( $temppath =~ m/(.*)(\?.*)/ ) { $temppath = $1; $querypart =$2; } else { $querypart = ""; } # Decode the URL they gave us. $temppath =DecodeURL($temppath); #~ s/(..)/chr(hex($1))/eg; # Now set the request URL to the decoded path plus any query # information $r->url ( $temppath . $querypart ); } # And print the decoded and nicified URL into the log. print $r->url . "\n"; # if ( $r->url eq "" ) { # $r->url($website); #} els if ( &IsAbuseURL( $r->url ) ) { # Someone thinks they're cool trying old-school phf lameness or other crap $r->url ( $website . "abuse.html" ); } elsif ( ! ( $r->url =~ m/^(http|ftp):\/\// ) ) { # Unsupported protocol (or perhaps something nasty like file://) $r->url ( $website . "otherprot.cgi?url=" . $r->url ); } # If our document has a referer header, then we need to decode it, # because it's currently hexified. if ( $referer = $r->headers->header(Referer) ) { # strip us off the referer for sites that check. if ( $referer =~ s/^$MYURL// ) { $referer = DecodeURL($referer); $r->headers->header(Referer => "$referer"); # print "\n*Referer is $referer\n"; #$referer =~ s/(..)/chr(hex($1))/eg; } } # print "$$ Referer is fixed as " . $r->headers->header(Referer) . "\n"; $r->headers->remove_header(Content_Length); $r->headers->remove_header(Host); $browserhaskeepalive = $r->headers->header(Connection => 'Close'); # Inlcude valuable diagnostic information and discourage use as an anonymous # means to defraud or vandalize remote sites. #$via=$r->headers->header(Via); if ( defined ($via = $r->headers->header(Via)) ) { $via=$via.", "; } else { $via = ""; } $r->headers->header(Via => $via . $r->protocol() . " $myhost ($fullversion request received from $remotehostname)"); ; if ( defined ($via=$r->headers->header(X_Forwarded_For)) ) { $via=$via.", "; } else { $via=""; } $r->headers->header(X_Forwarded_For => $via . $reallyfrom . $c->peerhost ); # show the request as a debug message #print $r->as_string; my $res = $ua->simple_request($r); # print "$$ Got response\n"; if ($res->headers->header(Location)) { # lets check before tacking our URL onto the header... What if we're # already on there (kaboom!) $location=$res->headers->header(Location); if ( !($location =~ m/^$MYURL/) ) { # encode and update the location if it's not already. # print "$$ Encoding location $location\n"; $location = EncodeURL($location); #~ s/(.)/sprintf("%02x", ord($1))/ge; # make this more relative (like the href encoder - should help # SSL proxies, although might violate RFC a little bit. $res->headers->header(Location => "/$location" ); } } # print "$$ Sent response\n"; if ( !($res->code =~ m/(400|500|501)/ ) ) { if ( ( $res->headers->header(Content_Type) || '' ) eq "text/html" ) { # we need to do some fixup here. $base=$res->base; # print "$$ content base is " . $base . "\n"; $res->add_content("
-"); $res->add_content(""); #
Via Internet Alternate Namespace ' . $version . ' Copyright © 1998 '. # 'Brian Ristuccia' . # '
[visit another site]'); $p = RefFixer->new->parse($res->content); $res->content($p->filtered_html); # could trigger keyword blocking. Sorry. #$res->add_content( '[view directly]'. "\n"); } # Set the Content-Length so Keep-Alive doesn't break. $res->headers->header(Content_Length => length($res->content())); # If the browser had Keep-Alive, enable it. $res->headers->header(Connection => $browserhaskeepalive); # This header was returned by the server, and may have no basis # in reality. $res->headers->remove_header('Keep-Alive'); $c->send_response($res); } else { #if ($res->code == 400) { # # Send user to 'Where do you want to go?' page. # $setmeup = $website; # $setmeup =~ s/(.)/sprintf("%02x", ord($1))/ge; # $c->send_redirect("$MYURL" . $setmeup); #}# else { $c->send_error ($res->code, $res->error_as_HTML); #} } $c->flush; } $c = undef; exit; } else { # This is what we do if we're the parent of the fork() $c = undef; # close connection } } }