randomfox: (Default)
[personal profile] randomfox
This script parses the postcards sent/received pages from Postcrossing and produces a table showing the number of postcards received from each country ranked by postcard count. It can differentiate between the sent page and the received page so you don't have to tell it which one it is processing.


#!perl -w
use strict;

sub readinput {
    my $tablecount = 0;
    my $tablemode = 0;

    my $rec;
    my @recs;

    while (<>) {
	chomp;
	if ($tablemode) {
	    if (/<\/table>/) {
		$tablemode = 0;
		last;
	    }
	    # Skip over thead section.
	    elsif (/<tbody>/) {
		$tablemode = 2;
	    }
	    elsif ($tablemode == 2) {
		if (/<tr/) {
		    $rec = [];
		}
		elsif (/<\/tr>/) {
		    push @recs, $rec;
		}
		elsif (/<td.*\/td>/) {
		    my $s = "";
		    if (/<span.*>(.*)<\/span>/ ||
			/<a href=.*>(.*)<\/a>/ ||
			/<td.*>(.*)<\/td>/) {
			$s = $1;
		    }
		    push @$rec, $s;
		}
	    }
	}
	elsif (/<table/) {
	    # Look for the 4th table.
	    ++$tablecount;
	    $tablecount == 4 and $tablemode = 1;
	}
    }
    \@recs;
}

sub count {
    my $recs = shift;

    my %countries;
    my $total = 0;

    for my $rec (@$recs) {
	if (defined $rec->[6] and $rec->[6] =~ /Registered/) {
	    $countries{$rec->[2]} ++;
	    $total++;
	}
    }

    if ($total == 0) {
	# This must be a "postcards received" page. So recount without looking
	# for "Registered" status.
	for my $rec (@$recs) {
	    $countries{$rec->[2]} ++;
	    $total++;
	}
    }

    ( $total, \%countries );
}

sub output {
    my $total = shift;
    my $countries = shift;

    print <<EOM;
<table cellpadding="2" cellspacing="1" border="0">
<tr color="#000000" bgcolor="#ffc56d" align="left">
<th>Rank</th>
<th>Country</th>
<th>Postcards</th>
<th>Percentage</th>
</tr>
EOM

    my $rank = 0;

    for my $country
    (sort { $countries->{$b} <=> $countries->{$a} } keys %$countries) {
	++$rank;
	my $percent = sprintf "%.2f", $countries->{$country} / $total * 100;
	my $bgcolor = $rank % 2 ? "#ffe2b6" : "#fff0db";
	print <<EOM;
<tr color="#000000" bgcolor="$bgcolor" align="left">
<td>$rank</td>
<td>$country</td>
<td>$countries->{$country}</td>
<td>$percent%</td>
</tr>
EOM
    }

    print <<EOM;
<tr color="#000000" bgcolor="#ffc56d" align="left">
<th>&nbsp;</th>
<th>Total</th>
<th>$total</th>
<th>&nbsp;</th>
</tr>
</table>
EOM
}

my $recs = readinput;
my ($total, $countries) = count $recs;
output $total, $countries;

__END__

From:
Anonymous( )Anonymous This account has disabled anonymous posting.
OpenID( )OpenID You can comment on this post while signed in with an account from many other sites, once you have confirmed your email address. Sign in using OpenID.
User
Account name:
Password:
If you don't have an account you can create one now.
Subject:
HTML doesn't work in the subject.

Message:

 
Notice: This account is set to log the IP addresses of everyone who comments.
Links will be displayed as unclickable URLs to help prevent spam.

Profile

randomfox: (Default)
randomfox

November 2012

S M T W T F S
    123
45678910
11121314151617
18192021222324
25262728 2930 

Most Popular Tags

Style Credit

Expand Cut Tags

No cut tags
Page generated Sep. 26th, 2017 05:33 am
Powered by Dreamwidth Studios