Jump to content

User:FairuseBot/libBot.pm

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Carnildo (talk | contribs) at 06:45, 17 November 2008 (New version). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
#!/usr/bin/perl

# libBot: A Perl module of useful routines for running a bot

package libBot;

use strict;
use warnings;

use Pearle;
use Data::Dumper;
use Array::Utils;

require Exporter;

our @ISA = qw(Exporter);
our @EXPORT = qw(config usernotify wikilog botwarnlog notelog LoadInfoboxPatterns FixupLinks MakeWikiRegex DoIHaveMessages GetPageCategories GetPageTemplates GetPageLinks GetPageText GetPageList GetFullPageList SaveImage UpdateLink RemoveImageFromPage IsNotified IsPageNotified isDated getDate getUploadDates getLastEditDate GetImageUploader loadNotificationList saveNotificationList usesTemplate);
our $VERSION = 1.00;

my $test_only = 0;
my $username = "";
my @infobox_patterns = ();

sub config
{
	my %params = @_;
	
	$test_only = $params{test_only} if(defined($params{test_only}));
	$username = $params{username} if(defined($params{username}));
}

# Log a warning on a user's talkpage, using an existing edit session
sub usernotify
{
	my ($wikipage, $text, $user, $summary);
	$wikipage = $_[1];
	$summary = $_[2];
	$summary = "Logging warning message" if(!defined($summary));
	
	if(!$wikipage->isa("Pearle::WikiPage"))
	{
		Pearle::myLog(0, "usernotify(): Second parameter is not a WikiPage object\n");
		die "usernotify(): Second parameter is not a WikiPage object\n";
	}
	
	# We've been handed an editing session
	Pearle::myLog(4, "Warning with existing edit session\n");

	if($test_only)
	{
		print STDERR $_[0];
		return;
	}
	
	if($wikipage->getWikiText() =~ /^#redirect/i)
	{
		botwarnlog("*User talk page [[User talk:$user]] is a redirect\n");
		return;
	}
	$text = $wikipage->getEditableText();
	$text .= $_[0];
	$wikipage->setEditableText($text);
	Pearle::postPage($wikipage, $summary, 0);
	print STDERR $_[0];
}

# General-purpose on-Wiki logging routine
sub wikilog
{
	my($target, $text, $wikipage, $summary);
	$target = $_[0];
	$summary = $_[2] || "Logging note";

	eval
	{
		$wikipage = Pearle::getPage($target);
	};
	if($@)
	{
		if($@ =~ /^925/)
		{
			Pearle::myLog(1, "Failed to notify: Protected page $target\n");
		}
		else
		{
			die;
		}
	}

	if($test_only)
	{
		print STDERR $_[1];
		return;
	}

	if($wikipage->getWikiText() =~ /^#redirect/i)
	{
		botwarnlog("*Log target [[:$target]] is a redirect\n");
		return;
	}

	$text = $wikipage->getEditableText();
	$text .= $_[1];
	$wikipage->setEditableText($text);

	Pearle::postPage($wikipage, $summary, 0);
	print STDERR $_[1];
}

# Log a warning on the talk page of the bot
sub botwarnlog
{
	my ($page, $text, $summary);
	$text = $_[0];
	$summary = $_[1];
	$summary = "Logging warning message" if(!defined($summary));
	$page = "User talk:${username}/log";
	
	wikilog($page, $text, $summary);
}

# Log a notification message to the console
sub notelog
{
	print STDERR @_;
}

# Fix all wikilinks in a string so that they shows as a link, not inline, if it's for a category or image
sub FixupLinks
{
	my $link = shift;
	$link =~ s/\[\[(Category|Image)/[[:$1/g;
	return $link;
}

# Make a string into a Wikipedia-compatible regex
sub MakeWikiRegex
{
	my $string = shift;
	my @chars = split //, $string;
	my $result = '';

	foreach my $char (@chars)
	{
		# Escape metacharacters, and add percent-encoding for certain characters
		if($char eq '\\') {$result .= '\\\\';}
		elsif($char eq '/') {$result .= '\/';}
		elsif($char eq '.') {$result .= '\.';}
		elsif($char eq '(') {$result .= '(?:\(|%28)';}
		elsif($char eq ')') {$result .= '(?:\)|%29)';}
		elsif($char eq '[') {$result .= '\[';}
		elsif($char eq ']') {$result .= '\]';}
		elsif($char eq '+') {$result .= '\+';}
		elsif($char eq '*') {$result .= '\*';}
		elsif($char eq '?') {$result .= '(?:\?|%3F)';}
		elsif($char eq '^') {$result .= '\^';}
		elsif($char eq '$') {$result .= '\$';}
		elsif($char eq '&') {$result .= '(?:&|%26)';}
		elsif($char eq '!') {$result .= '(?:!|%21)';}
		elsif($char eq '~') {$result .= '(?:~|%7E)';}
		elsif($char eq "'") {$result .= "(?:'|%27)";}
		elsif($char eq '"') {$result .= '(?:"|%22)';}
		elsif($char eq ',') {$result .= '(?:,|%2C)';}
		else {$result .= $char;}
	}
	# Process the string to match both with spaces and with underscores
	$result =~ s/[ _]/[ _]+/g;

	# Process the string to match both upcase and lowercase first characters
	if($result =~ /^[A-Za-z]/)
	{
		$result =~ s/^(.)/"[$1".lc($1)."]"/e;
	}
	return $result;
}

sub HTMLEncode
{
	my $char = shift;
	return sprintf("&X%X;", ord($char));
}

# Make a string into something that can match most image name formats
sub MakeFancyRegex
{
	my $string = shift;
	my @chars = split //, $string;
	my $result;
	
	foreach my $char (@chars)
	{
		if($char eq '\\')
		{
			$result .= "(\\\\|%5C|%5c|&x5C;)";
		}
		elsif($char eq '.')
		{
		}
		elsif($char eq '(')
		{
		}
		elsif($char eq ')')
		{
		}
		else
		{
			$result .= "($char|" . uri_escape_utf8($char) . "|" . lc(uri_escape_utf8($char)) . "|" . HTMLEncode($char) . "|" . lc(HTMLEncode($char)) . ")";
		}
	}

	return $result;
}

# Check for new talk page messages
sub DoIHaveMessages
{
	my $xml = shift;
	my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
	
	if(exists($parsed_xml->{query}->{userinfo}->{messages}) and defined($parsed_xml->{query}->{userinfo}->{messages}))
	{
		return 1;
	}
	else
	{
		return 0;
	}
}


sub GetPageCategories
{
	my $xml = shift;
	my @pages = ();
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
		
		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}) and defined($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}))
		{
			if(ref($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}) eq 'ARRAY')
			{
				my @all_pages = @{$parsed_xml->{query}->{pages}->{page}->{categories}->{cl}};
				
				@pages = map {$_->{title}} @all_pages;
			}
			else
			{
				@pages = ($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}->{title});
			}
		}
	}
	return @pages;
}

sub GetPageLinks
{
	my $xml = shift;
	my @pages = ();
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
		
		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{pages}->{page}->{links}->{pl}) and defined($parsed_xml->{query}->{pages}->{page}->{links}->{pl}))
		{
			if(ref($parsed_xml->{query}->{pages}->{page}->{links}->{pl}) eq 'ARRAY')
			{
				my @all_pages = @{$parsed_xml->{query}->{pages}->{page}->{links}->{pl}};
				
				@pages = map {$_->{title}} @all_pages;
			}
			else
			{
				@pages = ($parsed_xml->{query}->{pages}->{page}->{links}->{pl}->{title});
			}
		}
	}
	return @pages;
}

sub GetPageText
{
	my $xml = shift;
	my $text = undef;
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
		
		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}) and defined($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}))
		{
			if(ref($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}))
			{
				# The API/XML parser interact to produce a HASH ref if the revision is empty
				$text = "";
			}
			else
			{
				$text = $parsed_xml->{query}->{pages}->{page}->{revisions}->{rev};
			}
		}
	}
	return $text;
}

# Input: XML from the API, generated with prop => 'templates' and with only a single title
#        Either as text or as a parsed tree
#
# Returns: A list of templates used by the page
#
# Side effects: None
sub GetPageTemplates
{
	my $xml = shift;
	my @templates;
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['tl']);
		if(exists($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}) and defined($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}))
		{
			@templates = map {$_->{title}} @{$parsed_xml->{query}->{pages}->{page}->{templates}->{tl}};
		}
	}
	return @templates;
}

# Input: XML, either a tree produced by parsing, or XML text
# 
# Returns: A list of pages that this image is used on
#
# Side effects: For pages in certain namespaces, posts on the bot's log page
sub GetPageList
{
	my $xml = shift;
	my $image;
	my @pages = ();

	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
		my $image = $parsed_xml->{query}->{pages}->{page}->{title};

		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{imageusage}->{iu}) and defined($parsed_xml->{query}->{imageusage}->{iu}))
		{
			if(ref($parsed_xml->{query}->{imageusage}->{iu}) eq 'ARRAY')
			{
				my @bad_pages = grep {$_->{ns} == 10 or $_->{ns} == 12} @{$parsed_xml->{query}->{imageusage}->{iu}};
				my @good_pages = grep {$_->{ns} != 10 and $_->{ns} != 12} @{$parsed_xml->{query}->{imageusage}->{iu}};
				
				@pages = map {$_->{title}} @good_pages;
				
				if(scalar(@bad_pages) > 0 and defined($image))	# If "image" is undefined, we're probably doing a pure usage check, rather than one in preparation for removal
				{
					my $notice;
					foreach my $page (@bad_pages)
					{
						$notice .= "*Found image [[:$image]] in [[$page->{title}]]\n";
					}
					botwarnlog($notice);
				}
			}
			else
			{
				if($parsed_xml->{query}->{imageusage}->{iu}->{ns} != 10 and $parsed_xml->{query}->{imageusage}->{iu}->{ns} != 12)
				{
					@pages = $parsed_xml->{query}->{imageusage}->{iu}->{title};
				}
				else
				{
					if(defined($image))
					{
						botwarnlog("*Found image [[:$image]] in [[$parsed_xml->{query}->{imageusage}->{iu}->{title}]]\n");
					}
				}
			}
		}
	}
	
	return @pages;
}

# Get all pages.  Don't filter for bad namespaces.
sub GetFullPageList
{
	my $image = shift;
	my @pages = ();

	my $xml = Pearle::APIQuery(list => 'imageusage', iutitle => $image);
	if(defined($xml))
	{
		my $parsed_xml = Pearle::getXMLParser()->XMLin($xml);

		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{imageusage}->{iu}) and defined($parsed_xml->{query}->{imageusage}->{iu}))
		{
			if(ref() eq 'ARRAY')
			{
				@pages = map {$_->{title}} @{$parsed_xml->{query}->{imageusage}->{iu}};
			}
			else
			{
				@pages = $parsed_xml->{query}->{imageusage}->{iu}->{title};
			}
		}
	}
	
	return @pages;
}

sub UpdateLink
{
	my $page = shift;
	my $from = shift;
	my $to = shift;
	my $summary = shift || "Updating link to bypass a redirect or disambiguation page";
	
	die "No page to edit" if(!defined($page));
	die "No link to change" if(!defined($from));
	die "No new link" if(!defined($to));
	
	Pearle::myLog(3, "Updating link from $from to $to for page $page\n");
	
	my $wikipage = Pearle::getPage($page);
	$wikipage->canonicalizeLinks();
	my $text = $wikipage->getEditableText();
	my $link_regex = MakeWikiRegex($from);
		
	my $matches = $text =~ s/\x01($link_regex)\x02/\x01${to}|${1}\x02/gi;
	$matches += $text =~ s/\x01$link_regex([#|])/\x01${to}${1}/gi;
	$matches += $text =~ s/([^=]\s*=\s*)$link_regex(\s*[|\n])/${1}$to${2}/gi;
	
	$wikipage->setEditableText($text);
	print $text;
	if($matches > 0)
	{
		Pearle::postPage( $wikipage, $summary, 0);
	}
	else
	{
		Pearle::myLog(3, "No update\n");
	}
	
	return $matches;
}

sub RemoveImageFromPage
{
	my $image = shift;
	my $page = shift;
	my $image_regex = shift;
	my $removal_prefix = shift;
	my $removal_comment = shift;

	my $wikipage;
	my $text;
	my ($match1, $match2);
	my $old_length;
	my $new_length;
	my $change_len;
	my $match_len;

tryagain:
	# Fetch an article page
	$wikipage = Pearle::getPage($page);
	$wikipage->canonicalizeLinks();
	$text = $wikipage->getEditableText();
			
	if(!defined($text))
	{
		Pearle::myLog(1, "Error: Bad edit page [[$page]]\n");
		botwarnlog(FixupLinks("*Error: Bad edit page [[$page]]\n"));
		sleep(300);
		return 0;
	}
	
	if($text =~ /^\s*$/)
	{
		# Might be protected instead of empty
		Pearle::myLog(1, "Error: Empty or protected page [[$page]]\n");
		botwarnlog(FixupLinks("*Error: Empty or protected page [[$page]]\n"));
		sleep(300);
		return 0;
	}
	
	if($text =~ /^#redirect/i)
	{
		Pearle::myLog(1, "Redirect found for page [[$page]] (image [[:$image]])\n");
		botwarnlog(FixupLinks("*Redirect found for page [[$page]] (image [[:$image]])\n"));
		print $text;
		return 0;
	}

	# Remove the image
	my $regex3 = "(\x01${image_regex}[^\x01]*?(\x01[^\x02]*?\x02[^\x01]*?|)+\x02[ \\t]*)";		# Regex to match images

	#my $regex3 = "(
	#               \x01                             # Open double-bracket for the image
	#               ${image_regex}                   # The image itself
	#               [^\x01]*?                        # Anything up to the first link in the caption, or a closing double bracket (minimal match)
	#                   (\x01                            # Open double-bracket for a link in the caption
	#                   [^\x02]*?                        # Anything but a closing double-bracket
	#                   \x02                             # The closing double-bracket for the link
	#                   [^\x01]*?|)                      # Any non-link text, or nothing
	#                   +                                # Matches one or more times
	#               \x02                             # The closing double-bracket for the image
	#               [ \\t]*)                         # Any trailing whitespace
	#             ";

	my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w";								# Regex to try to spot inline images
	my $regex3g = "(${image_regex}.*)";											# Regex to match gallery images
	my ($raw_image) = $image =~ /Image:(.*)/;	

	my $regex4m = "\x01[ _]*[Mm]edia[ _]*:[ _]*" . MakeWikiRegex($raw_image) . "[ _]*\\|([^]]*)\x02";	# Regex to match inline Media: links

	Pearle::myLog(3, "Regex 3: $regex3\n");
	notelog("Regex 3 extended: $regex3ex\n");
	notelog("Regex 3 gallery: $regex3g\n");
	Pearle::myLog(3, "Raw regex: $raw_image\n");
	notelog("Regex 4 Media: $regex4m\n");
	
	if($text =~ /$regex3ex/)
	{
		Pearle::myLog(1, "Possible inline image in [[$page]]\n");
		botwarnlog(FixupLinks("*Possible inline image [[:$image]] in [[$page]]\n"));
		return 0;	# Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox
	}

# Not worth checking for anymore
#	if($text =~ /-->/) # With the new editing markup, any close-comment means that somebody fucked up their wikimarkup
#	{
#		Pearle::myLog(3, "Fractional comment in page [[$page]]\n");
#		botwarnlog(FixupLinks("*Fractional comment in page [[$page]]\n"));
#	}
	
	$text =~ /$regex3/;
	$match_len = length($1);
	if(defined($removal_prefix))
	{
		$match2 = $text =~ s/$regex3/<!-- $removal_prefix $1 -->/g;
	}
	else
	{
		$match2 = $text =~ s/$regex3//g;
	}

	$new_length = length($text);
	print "Num: $match2 Len: $match_len\n";
	if($match2)
	{
		# If a whole lot of text was removed, log a warning
		if($match_len > (500 + length($image)))
		{
			botwarnlog(FixupLinks("*Long caption of $match_len bytes replaced in [[$page]]\n"));
			if($match_len > (1000 + length($image)))
			{
				Pearle::myLog(2, "Unusually long caption of $match_len found in [[$page]] ($match2 matches).\n");
				print $text, "\n";
#				exit;
				return 0;
			}
		}
		if($match_len < (2 + length($image)))
		{
			Pearle::myLog(0, "Short replacement of $match_len bytes (min " . (length($image) + 2) . ") in [[$page]] ($match2 matches).  Exiting.\n");
			Pearle::myLog(0, "Text:\n$text\n");
			print Dumper($1);
			print Dumper($image);
			exit;
		}
		# If many matches, log a warning
		if($match2 > 2)
		{
			Pearle::myLog(3, "More than one match ($match2) in page [[$page]]\n");
#			botwarnlog(FixupLinks("*More than one match ($match2) in page [[$page]]\n"));
		}
		if($match2 > 100)
		{
			Pearle::myLog(1, "Too many matches ($match2) in page [[$page]].  Skipping.\n");
			botwarnlog("*Too many matches ($match2) in page [[$page]].  Skipping.\n");
			return 0;
		}
	}

	# Put the text back and get it again in order to fold any comments resulting from removing non-gallery images.
	# This is because gallery image matching will also match commented images.
	$wikipage->setEditableText($text);
	$text = $wikipage->getEditableText();

	if($text =~ /<gallery/i)
	{
		Pearle::myLog(3, "*Possible image gallery in page [[$page]]\n");
		if(defined($removal_prefix))
		{
			if($text =~ s/$regex3g/<!-- $removal_prefix $1 -->/g)
			{
				$match2 += 1;
			}
		}
		else
		{
			if($text =~ s/$regex3g//g)
			{
				$match2 += 1;
			}
		}
	}

	if($match2 > 0)
	{
		if($text =~ /\[\[(?: |)<!--/)
		{
			Pearle::myLog(2, "Possible multiline image in page [[$page]]\n");
			botwarnlog(FixupLinks("*Possible multiline image in page [[$page]]\n"));
		}
	}

	# Improved infobox removal
	my $infobox_regex = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
	my $infobox_regex_full = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '[Ii]mage[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
	if($text =~ /$infobox_regex/)
	{
		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex\n");
		Pearle::myLog(3, "Infobox parameter: $1\n");
		if($& =~ /puic/)
		{
			botwarnlog(FixupLinks("*PUIC in page [[$page]]\n"));
		}
		else
		{
			my $sub = $1;
			my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
			$text =~ s/$match_regex/$sub/g;
			$match2 += 1;
		}
	}
	if($text =~ /$infobox_regex_full/)
	{
		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex_full\n");
		Pearle::myLog(3, "Infobox parameter: $1\n");
		if($& =~ /puic/)
		{
			botwarnlog(FixupLinks("*PUIC in page [[$page]]\n"));
		}
		else
		{
			my $sub = $1;
			my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '[Ii]mage[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
			$text =~ s/$match_regex/$sub/g;
			$match2 += 1;
		}
	}

	if($match2)	# No need to null-edit articles anymore
	{
		if($test_only)
		{
			notelog("Test removal from page succeeded\n");
		}
		else
		{
			# Submit the changes
			$wikipage->setEditableText($text);
			eval
			{
				Pearle::postPage($wikipage, $removal_comment, 0);
			};
			if($@)
			{
				if($@ =~ /^924 Spam filter: (.*)$/)
				{
					botwarnlog("*Spam filter on page [[$page]], url <nowiki>$1\n");
					$match2 = 0;	# We weren't able to remove it
				}
				elsif($@ =~ /^922/)
				{
					# Edit conflict.  Try editing the page again.
					botwarnlog("*Edit conflict on page [[$page]]\n");
					goto tryagain;
				}
				else
				{
					die;
				}
			}
		}
	}
	
	return ($match2)
}

# Returns 1 if the user has been notified, or 0 if they haven't
sub IsNotified
{
	my $uploader = shift;
	my $image_regex = shift;
	my $image_name = shift;
	my $notes_ref = shift;
	my $donts_ref = shift;

	# Check notification list
	if(defined($notes_ref) and $notes_ref->{"$uploader,$image_name"})
	{
		Pearle::myLog(2, "Already notified for this image\n");
		return 1;
	}

	if(defined($donts_ref) and $donts_ref->{$uploader})
	{
		Pearle::myLog(2, "On exception list: $uploader\n");
		return 1;
	}
	
	# Check uploader's talkpage
	my $page_data = Pearle::APIQuery(titles => "User talk:$uploader", prop => ['links', 'templates'], plnamespace => 6, pllimit => 500, tlnamespace => 10, tllimit => 500);
	$image_regex = MakeWikiRegex($image_name) if(!defined($image_regex));
	if($page_data =~ /$image_regex/)
	{
		Pearle::myLog(2, "Has a link from userpage\n");
		return 1;
	}
	if(usesTemplate($page_data, "Template:Nobots"))
	{
		Pearle::myLog(2, "Uses {{nobots}}\n");
		return 1;
	}
	return 0;
}

# Returns 1 if the page has been notified, or 0 if it hasn't
sub IsPageNotified
{
	my $page = shift;
	my $image_regex = shift;
	my $image_name = shift;
	my $notes_ref = shift;
	my $donts_ref = shift;
	
	# Check notification list
	if($notes_ref->{"$page,$image_name"})
	{
		Pearle::myLog(2, "Already notified for this image\n");
		return 1;
	}

	if($donts_ref->{$page})
	{
		Pearle::myLog(2, "On exception list: $page\n");
		return 1;
	}

	# Check page
	my $page_data = Pearle::APIQuery(titles => $page, prop => 'links', plnamespace => 6);
	if($page_data =~ /$image_regex/)
	{
		Pearle::myLog(2, "Has a link from page\n");
		return 1;
	}
	return 0;
}


sub isDated
{
	my $image_text = shift;
	if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/)	# Dated template
	{
		Pearle::myLog(4, "Dated tag $1 $2 $3\n");
		return 1;
	}
	# as of 6 October 2006">
	elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category
	{
		Pearle::myLog(4, "Template borked; category $1 $2 $3\n");
		return 1;
	}
	elsif($image_text =~ /{{{day}}} {{{month}}} \d\d\d\d/)	# Generic template
	{
		Pearle::myLog(4, "Generic tag\n");
		return 0;
	}
	else
	{
		Pearle::myLog(4, "No tag match\n");
		return 0;
	}
}

# Return the tag date if there is one, the upload date if not
# Returns in (day, month, year) format
sub getDate
{
	my $image_text = shift;
	if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/)
	{
		Pearle::myLog(4, "Template date $1-$2-$3\n");
		return ($1, $2, $3);
	}
	elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category
	{
		Pearle::myLog(4, "Category date $1-$2-$3\n");
		return ($1, $2, $3);
	}
	else
	{
		Pearle::myLog(4, "No date\n");
		return (1, "January", 2007);
	}
}

# Return a list of upload dates
sub getUploadDates
{
	my @dates;
	my $image_text = shift;
	while($image_text =~ />\d\d?:\d\d, (\d\d?) (\w*) (\d\d\d\d)</g)
	{
		push @dates, [$1, $2, $3];
	}
	return @dates;
}

sub getLastEditDate
{
	my ($day, $month, $year);
	my $image = shift;
	
	my @history = Pearle::parseHistory($image);
	(undef, $day, $month, $year) = @{$history[0]};
	
	return ($day, $month, $year);
}

# Find the most recent non-vandal, non-revert uploader
sub GetImageUploader
{
	my $image_data = shift;
	my ($uploader, $sha1, $comment);
	my @uploaders;
	my $uploader_data;
	my $i = 0;
	my $count = 0;
	
	my $parsed_xml = Pearle::getXMLParser()->XMLin($image_data);

	Pearle::myLog(4, Dumper($parsed_xml));
		
	if(exists($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}) and defined($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}))
	{
		if(ref($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}) eq 'ARRAY')
		{
			@uploaders = @{$parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}};
		}
		else
		{
			return $parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}->{user};
		}
	}
	else
	{
		return undef;
	}
	
	$uploader = $uploaders[0]->{user};
	$sha1 = $uploaders[0]->{sha1};
	$comment = $uploaders[0]->{comment} || "";
	
	my $done = 0;
	while(!$done)
	{
		if($comment =~ /^Reverted/)
		{
			Pearle::myLog(4, "Revert found\n");
			$i += 1;
			while($uploaders[$i]->{sha1} ne $sha1)
			{
				$i = $i + 1;
			}
		}
		elsif($comment =~ /optimi(z|s)ed|adjust|tweak|scale|crop|change|resize|remove|reduce/i)
		{
			Pearle::myLog(4, "Tweak found\n");
			$i = $i + 1;
		}
		elsif(!defined($uploader))
		{
			Pearle::myLog(4, "Something went wrong with finding the uploader\n");
			$done = 1;
		}
		elsif($count > 500)
		{
			Pearle::myLog(4, "Took too long finding the uploader\n");
			$uploader = undef;
			$done = 1;
		}
		else
		{
			$done = 1;
		}
		$uploader = $uploaders[$i]->{user};
		$sha1 = $uploaders[$i]->{sha1};
		$comment = $uploaders[$i]->{comment} || "";
		$count = $count + 1;
	}
	if(defined($uploader))
	{
		Pearle::myLog(4, "Uploader: $uploader\n");
		return $uploader;
	}
	else
	{
		return undef;
	}
}

sub loadNotificationList
{
	my $file = shift;
	my %notelist;
	my $i = 0;
	notelog("File: $file\n");
	open INFILE, "<:utf8", $file;
	while(<INFILE>)
	{
		$_ =~ s/\s*#.*$//g;
		chomp;
		$notelist{$_} = 1;
		$i++;
	}
	close INFILE;
	notelog("$i notifications loaded\n");
	return %notelist;
}

sub saveNotificationList
{
	return if($test_only);
	
	my $file = shift;
	my %notelist = @_;
	my $key;
	
	open OUTFILE, ">:utf8", $file;
	foreach $key (keys(%notelist))
	{
		print OUTFILE "$key\n";
	}
	close OUTFILE;
}

# Does a page transclude any of a set of templates?  Template names must be in the canonical form, with prefix.
sub usesTemplate
{
	my $xml = shift;
	my @templates = @_;
	
	my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['tl']);
	Pearle::myLog(4, "Templates: " . join(", ", @templates) . "\n");
#	Pearle::myLog(4, Dumper($parsed_xml));
	
	if(!exists($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}) or !defined($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}))
	{
		Pearle::myLog(4, "No templates or no page\n");
		return 0;
	}
	
	my $result = eval
	{
		my @used_templates = @{$parsed_xml->{query}->{pages}->{page}->{templates}->{tl}};
		@used_templates = map {$_->{title}} @used_templates;
		
		Pearle::myLog(4, "Used: " . join(", ", @used_templates) . "\n");
		Pearle::myLog(4, "Intersect: " . join(", ", Array::Utils::intersect( @templates, @used_templates )) . "\n");
		
		if(Array::Utils::intersect( @templates, @used_templates ))
		{
			return 1;
		}
		else
		{
			return 0;
		}
	};
	if($@)
	{
		# Probably more than one page in the xml
		print "usesTemplate error: $@";
		return 0;
	}
	return $result;
}

1;
</nowiki>