#!/usr/bin/perl -w # find the title and size of a webdocument, adapted from the Perl cookbook # usage: titlebytes.pl http://www.cnn.com use LWP::UserAgent; use HTTP::Request; use HTTP::Response; use URI::Heuristic; my $raw_url = shift or die "usage: $0 url\n"; my $url = URI::Heuristic::uf_urlstr($raw_url); $| = 1; # flush next line printf "%s =>\n\t", $url; my $ua = LWP::UserAgent->new(); $ua->agent("Mozilla/v9.14 Platinum"); # give it time, it'll get there my $req = HTTP::Request->new(GET => $url); $req->referer("http://how.are.you"); # perplex the log analysers my $response = $ua->request($req); if ($response->is_error()) { printf " %s\n", $response->status_line; } else { my $count; my $bytes; my $content = $response->content(); # the html document $bytes = length $content; # length of the document $count = ($content =~ tr/\n/\n/); # number of lines printf "%s (%d lines, %d bytes)\n", $response->title(), $count, $bytes; }