#! /usr/bin/perl -w # srom: Sucks-Rules-O-Meter # Copyright 1998 Electric Lichen L.L.C. # Don Marti # 16 June 2001 -- switched to google, made ignore "procedures", etc. # also minor cosmetic changes (mattdm-srom@mattdm.org) # 20 February 2001 -- switched to Raging Search # revised 15 January 2000 -- added OpenBSD. # revised 9 July 2000 -- added logging functionality # (Johan Walles, d92-jwa@nada.kth.se) # revised 3 June 1999 -- new AltaVista result page format # revised 19 Mar 1998 -- added $rule_offset # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. require 5.004; # Uncomment exactly one of the following two lines: # require "SimpleGet.pl"; use LWP::Simple; #my $SEARCH_PREFIX = qq{http://ragingsearch.altavista.com/cgi-bin/query?pg=q&what=web&kl=XX&q=%2B%22}; my $SEARCH_PREFIX = qq{http://google.com/search?q=%22}; my $SEARCH_SUFFIX = '%22+-procedure+-procedures+-%22Daily+Rush%22+-policy+-photo+-photos+-photography'; # The VOTES_THRESHOLD value determines how much the number of votes # for a certain OS affects its weighted result. OSes with exactly # this many votes will have a weighted score that is exactly in # between its raw score and the average rating for all operating # systems. For more on this, se the discussion of the "true Bayesian # estimate" below. my $VOTES_THRESHOLD = 25; #my %aliases = ('Black & White' => ['Black %2B%26 White'], # 'Black and White' => ['Black %2Band White'] # ); my %aliases = ('Black & White' => ['Black %2B%26 White','Black %2Band White', 'B%26W'], 'Peter Molyneux' => ['Peter Molyneux'], 'Lionhead' => ['Lionhead','Lionhead Studios','Lionhead Studios Ltd.'], 'Electronic Arts' => ['Electronic Arts','EA'] ); my %synonyms = ('sucks' => ['sucks'], 'rules' => ['rules', 'rocks', 'kicks'] ); ########################################################################### $greatest = 1; foreach my $os (keys(%aliases)) { # Nuke some warnings $count{$os}{'sucks'} = 0; $count{$os}{'rules'} = 0; foreach my $alias (@{$aliases{$os}}) { foreach $quality ('sucks', 'rules') { foreach my $synonym (@{$synonyms{$quality}}) { $result = get($SEARCH_PREFIX . lc("$alias+$synonym") . $SEARCH_SUFFIX); if ($result =~ /Results.*of about ([\d,]+)<\/b>\./i) { $raw = $1; $raw =~ s/\D//g; $count{$os}{$quality} += $raw + 0; print STDERR "$alias $synonym ($os $quality): $raw\n"; } } $greatest = $count{$os}{$quality} if $count{$os}{$quality} > $greatest; } } print "\n"; } die "bad Google, bad, bad " if $greatest == 1; # print comment for easy conversion #print ""; print qq{}; print qq{\n}; foreach my $os (sort(keys(%aliases))) { my $suckage = int (100* $count{$os}{'sucks'}/$greatest); my $suck_offset = 100 - $suckage; my $ruleage = int (100* $count{$os}{'rules'}/$greatest); my $rule_offset = 100 - $ruleage; print qq{\n}; print '"; print '"; } print "
 sucks rules, rocks, kicks
$os'; print qq{}; print qq{"; print qq{
}; if ($suck_offset >= 75) { print qq{}, $count{$os}{'sucks'} + 0, qq{}; } else { print " "; } print "}; if ($suckage > 25) { print qq{$count{$os}{'sucks'}}; } elsif ($suckage == 0) { } else { print " "; } print "
'; print qq{}; print qq{"; print qq{
}; if ($ruleage >= 25) { print qq{$count{$os}{'rules'}}; } elsif ($ruleage == 0) { } else { print " "; } print "}; if ($ruleage < 25) { print qq{$count{$os}{'rules'}}; } else { print " "; } print "
"; my $date = scalar(gmtime(time())); print qq{

Updated $date GMT.

}; # Log the retrieved data for later use by gnuplot # Has a log file directory name been specified on the command line? if (defined($ARGV[0])) { $logdirectory = $ARGV[0]; # Make sure that the log file directory exists if (! -e $logdirectory) { unless (mkdir $logdirectory,0777) { die "Error: Unable to create log file directory ($!)"; } warn "Warning: New log file directory $logdirectory created.\n"; } # Make sure that the log file directory is a directory die "Error: $logdirectory is not a directory!\n" unless (-d $logdirectory); # Find out what OSes already have log files foreach my $logfile_name (split /\n/,`ls $logdirectory/*.gnuplot 2> /dev/null`) { # Find out what OS the logfile is for by reading the comment # on the first line and stripping "# " from it open (LOGFILE, $logfile_name) or die "Error: Can't open $logfile_name for reading ($!)\n"; my $os_comment = ; chomp $os_comment; close LOGFILE or warn "Warning: Couldn't close $logfile_name ($!)"; (my $os) = ($os_comment =~ /^\# (.+)/) or die "Error: The first line of $logfile_name is not on '# OS-name' format"; die "Error: $os has more than one log file (at least $logfile_name and $logfile{$os})\n" if (defined $logfile{$os}); $logfile{$os} = $logfile_name; } # Create log files for operating systems that don't have one already foreach my $os (sort(keys(%aliases))) { if (! defined $logfile{$os}) { my $logfile_name = $os; $logfile_name =~ s {[^a-zA-Z0-9]+} {_}; $logfile_name =~ tr/A-Z/a-z/; if (-e ($logdirectory . "/" . $logfile_name . ".gnuplot")) { my $counter = 1; while (-e ($logdirectory . "/" . $logfile_name . $counter . ".gnuplot")) { $counter++; } $logfile_name .= $counter; }; $logfile_name = $logdirectory . "/" . $logfile_name . ".gnuplot"; $logfile{$os} = $logfile_name; unless ((system "echo '# $os' > $logfile_name") / 256 == 0) { die "Error: Couldn't create file $logfile_name"; } warn "Warning: New log file created for $os ($logfile_name)\n"; } } # Calculate popularity percentages for each operating system. # This is done using the "true Bayesian estimate" as described # at the bottom of the Internet Movie Database's top 250 list # ("http://us.imdb.com/top_250_films"). The idea is that to get # very high or very low ratings, you have to have a lot of votes. # Operating systems with few votes will be pushed towards the middle # of the pack. # # Here's IMDb's description of the formula used: # weighted rank (WR) = (v / (v+m)) x R + (m / (v+m)) x C # where: # R = average for the movie (mean) = (Rating) # v = number of votes for the movie = (votes) # m = minimum votes required to be listed # C = the mean vote across the whole report # Calculate the average popularity of all operating systems my $sucks = 0; my $rules = 0; foreach my $os (sort(keys(%aliases))) { $sucks += $count{$os}{'sucks'}; $rules += $count{$os}{'rules'}; } die "Error: Weird sucks ($sucks) and rules ($rules) totals" if ($sucks <= 0 || $rules <= 0); my $average_popularity = $rules / ($sucks + $rules); # Create a datestring understandable by gnuplot ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time); # Avoid some warnings undef $sec; undef $min; undef $hour; undef $wday; undef $yday; undef $isdst; $mon++; # Convert mon to 1-12 instead of 0-11 $year += 1900; # Convert year to the real year my $datestring = $year . "-" . $mon . "-" . $mday; # YYYY-MM-DD # Log the current date + the popularity percentages to the log files foreach my $os (sort(keys(%aliases))) { # Verify that the date is not already present in the log file if (`grep "$datestring " $logfile{$os}`) { warn "Warning: $os log file $logfile{$os} already has an entry for today. Not adding one more.\n"; next; } # Calculate the popularity for this operating system my $sucks = $count{$os}{'sucks'}; my $rules = $count{$os}{'rules'}; my $votes = $sucks + $rules; if ($votes <= 0 || ($sucks <= 0 && $rules <= 0)) { warn "Warning: Data not available or illegal for $os. Sucks=$sucks, rules=$rules, votes=$votes"; next; } my $rating = $rules / ($sucks + $rules); my $popularity = $rating * ($votes / ($votes + $VOTES_THRESHOLD)) + $average_popularity * ($VOTES_THRESHOLD / ($votes + $VOTES_THRESHOLD)); $popularity *= 100; die "Error: $os popularity ($popularity) out of bounds" if ($popularity < 0 || $popularity > 100.0); my $previous_popularity = `tail -1 $logfile{$os} | awk '{ print \$2 }'`; chomp $previous_popularity; # Don't log the same number time after time if ($previous_popularity eq $popularity) { # FIXME: Is this what we want? warn "Warning: $os popularity still $popularity, not adding to the log file.\n"; next; } # Open the operating system's log file for appending open (LOGFILE, ">>" . $logfile{$os}) or die "Error: Can't open $logfile{$os} for appending ($!)"; print LOGFILE "$datestring $popularity\n"; close LOGFILE or warn "Warning: Couldn't close log file $logfile{$os}"; } } else { # No log directory has been specified on the command line warn "Warning: You have not provided any log file directory on the command line. No logs will be produced.\n"; } exit 0;