User:Wrh2/AWB module

From Wikivoyage
Jump to navigation Jump to search
// implementing the "aggressive" rules is more likely to produce false positives
private static readonly bool AGGRESSIVE = true;
private static readonly Regex ListingTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("listing,see,do,buy,eat,drink,sleep".Split(',')));
private static readonly string[] ListingTemplateParamNames = {"type","name","alt","url","email","address","lat","long","directions","phone","tollfree","fax","image","hours","checkin","checkout","price","lastedit","content"};
private static readonly string[] ListingPhoneParamNames = {"phone","fax","tollfree"};
private static readonly Regex TextToListingSectionNames = new Regex(@"(get in|get around|see|do|buy|eat|drink|sleep)", RegexOptions.IgnoreCase);
private static readonly Regex PhoneNumberRegex = new Regex(@"'*[\d\+\(][\s\d\(\)\-\.'\+]+[\d\)]'*");
private static readonly Regex MailtoRegex = new Regex(@"mailto:[/]*", RegexOptions.IgnoreCase);
private static readonly Regex ExcessWhitespaceRegex = new Regex(@"\s\s+", RegexOptions.Singleline);
// "http://www.example.com"
private static readonly string ValidUrlPattern = @"((http(s)?:)//)?(([a-z]+\.)+)([a-z]+)";
private static readonly Regex ValidUrlRegex = new Regex(ValidUrlPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
// "[http://www.example.com]"
private static readonly Regex FootnoteUrlRegex = new Regex(@"\[(" + ValidUrlPattern + @"([^\]\s]+))\]", RegexOptions.Singleline);
// "Foo (disambiguation)"
private static readonly Regex CityNameWithDisambiguationRegex = new Regex(@"([^\(]+) \([^\)]+\)", RegexOptions.Singleline);
// "blah, CA 99999", "blah, CA 99999-1234", "blah CA", etc
private static readonly Regex AddressWithStateOrZipRegex = new Regex(@"(.+)[\.,\-]+\s*(AL|alabama|AK|alaska|AZ|arizona|AR|arkansas|CA|california|CO|colorado|CT|connecticut|DC|DE|delaware|FL|florida|GA|georgia|HI|hawaii|ID|idaho|IL|illinois|IN|indiana|IA|iowa|KS|kansas|KY|kentucky|LA|louisiana|ME|maine|MD|maryland|MA|massachusetts|MI|michigan|MN|minnesota|MS|mississippi|MO|missouri|MT|montana|NE|nebraska|NV|nevada|NH|new hampshire|NJ|new jersey|NM|new mexico|NY|new york|NC|north carolina|ND|north dakota|OH|ohio|OK|oklahoma|OR|oregon|PA|pennsylvania|RI|rhode island|SC|south carolina|SD|south dakota|TN|tennessee|TX|texas|UT|utah|VT|vermont|VA|virginia|WA|washington|WV|west virginia|WI|wisconsin|WY|wyoming)([\s,\-]*[0-9]{5}(\-[0-9]{4})?)?$", RegexOptions.Singleline | RegexOptions.IgnoreCase);
// "* ", "** ", etc
private static readonly Regex EmptyListItemRegex = new Regex(@"^\*+\s*\n", RegexOptions.Multiline);
private static readonly Regex FootnoteToFrontLinkRegex = new Regex(@"('*)((?!The )\p{Lu}[\w\-'/]*[\w]( (and|del|de|of|&|the|la|le|for|\p{Lu}[\w\-'/]*[\w]))*)('*)[, ]*\[(http[^ ]+)( )*\]");
private static readonly Regex TimeValuesShouldUseColonAsSeperator = new Regex(@"\b([1-9]|10|11|12)\.([0-5][0-9])([ ]*)((a|p)\.?\s*m\.|(a|p)\.?\s*m\b)", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesSuffixAM = new Regex(@"\b([1-9]|10|11|12)(:[0-5][0-9])*[ ]*(a\.\s*m\.|a\s*m\b)", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesSuffixPM = new Regex(@"\b([1-9]|10|11|12)(:[0-5][0-9])*[ ]*(p\.\s*m\.|p\s*m\b)", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesRoundToHour = new Regex(@"\b([1-9]|10|11|12):00(AM|PM)\b", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesHourToNoon = new Regex(@"\b(12PM)\b", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesHourToMidnight = new Regex(@"\b(12AM)\b", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesTrimWhitespace = new Regex(@"\b((([1-9]|10|11|12)(:[0-5][0-9])*(AM|PM))|noon|midnight)\s*(\-)\s*((([1-9]|10|11|12)(:[0-5][0-9])*(AM|PM))|noon|midnight)\b", RegexOptions.IgnoreCase);
private static readonly Regex ExternalToInternalLinkWikipedia = new Regex(@"\[http[s]?://en.wikipedia.org/wiki/([^\] ]+)[ ]+([^\]]+)]", RegexOptions.IgnoreCase);
private static readonly Regex ExternalToInternalLinkWikipediaNoText = new Regex(@"\[http[s]?://en.wikipedia.org/wiki/([^\] ]+)[ ]*]", RegexOptions.IgnoreCase);
private static readonly Regex ExternalToInternalLinkWikivoyage = new Regex(@"\[http[s]?://en.wikivoyage.org/wiki/([^\] ]+)[ ]+([^\]]+)]", RegexOptions.IgnoreCase);
private static readonly Regex ExternalToInternalLinkWikivoyageNoText = new Regex(@"\[http[s]?://en.wikivoyage.org/wiki/([^\] ]+)[ ]*]", RegexOptions.IgnoreCase);
// "* blah blah blah", "** blah blah blah", etc
private static readonly Regex CandidateListingItemRegex = new Regex(@"^\*+\s*([^\{]{2}.+)$", RegexOptions.Multiline);
private static readonly Regex ListingNameRegex = new Regex(@"(the\s+)*'''(.{3,}?)'''", RegexOptions.IgnoreCase);
// "[http://www.example.com/ Example Text]"
private static readonly Regex ExternalLinkWithTextRegex = new Regex(@"\[(http[^\]\s]+)\s+([^\]]+)\]", RegexOptions.IgnoreCase);
// "123-456-7890"
private static readonly string ListingPhoneNumber = @"((''|\+|\()*\d+(''|\)*)[\s\-]+)+(''|\+|\()*\d+(''|\)*)( ext\.? \d+)?";
private static readonly Regex ListingPhoneNumberRegex = new Regex(ListingPhoneNumber, RegexOptions.IgnoreCase);
// "fax: 123-456-7890"
private static readonly string ListingFaxNumber = @"(\(''|''\(|\()?fax(:)?(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?";
private static readonly Regex ListingFaxNumberRegex = new Regex(ListingFaxNumber, RegexOptions.IgnoreCase);
// "telephone: 123-456-7890" and similar
private static readonly string ListingPhoneNumberWithLabel = @"(\(''|''\(|\()?(\u260e|call|call:telephone:|telephone|tel:|tel\.:|tel\.|tel|phone:|phone|ph:|ph\.:|ph\.|ph|\u260E)(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?";
private static readonly Regex ListingPhoneNumberWithLabelRegex = new Regex(ListingPhoneNumberWithLabel, RegexOptions.IgnoreCase);
// "toll-free: 123-456-7890" and similar
private static readonly string ListingTollfreeNumber = @"(\(''|''\(|\()?(toll[ \-]?free)(:)?(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?";
private static readonly Regex ListingTollfreeNumberRegex = new Regex(ListingTollfreeNumber, RegexOptions.IgnoreCase);
private static readonly string InvalidLeadingOrTrailingPunctuation = @",|\-|\*|\:|\–|;";
private static readonly string InvalidLeadingPunctuation = @"\.|!|\?|\)|\]|\}|&mdash;|;";
private static readonly string InvalidTrailingPunctuation = @"\(|\[|\{|;";
private static readonly string InvalidDuplicatePunctuation = @"\.|!|\?|" + InvalidLeadingOrTrailingPunctuation;
private static readonly Regex InvalidLeadingPunctuationRegex = new Regex(@"^(\s|" + InvalidLeadingPunctuation + "|" + InvalidLeadingOrTrailingPunctuation + @")+", RegexOptions.Multiline);
private static readonly Regex InvalidTrailingPunctuationRegex = new Regex(@"(\s|" + InvalidTrailingPunctuation + "|" + InvalidLeadingOrTrailingPunctuation + @")+$", RegexOptions.Multiline);
private static readonly Regex InvalidDuplicatePunctuationRegex = new Regex(@"(" + InvalidDuplicatePunctuation + @"|\s)+(" + InvalidDuplicatePunctuation + @")");
// "email: foo@bar.com" OR "mailto:foo@bar.com" OR "foo@bar.com"
private static readonly string ListingEmail = @"(mailto:|e-mail:|email:)?\s*(\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b)";
private static readonly Regex ListingEmailRegex = new Regex(ListingEmail, RegexOptions.IgnoreCase);
// "1234 First St"
private static readonly string ListingAddress = @"([\d]+([/\-][\d]+)? ([nesw]\.? )?([\p{L}\d]+[ \.\-]*){1,3} (avenue|ave|av|boulevard|blvd|court|ct|drive|dr|expressway|expwy|freeway|fwy|highway( \d{1,3})?|hwy( \d{1,3})?|lane|ln|loop|parkway|pkwy|place|pl|road|rd|row|street|st|way)\b(\.? (#(\s)*[\d]+|north|ne|nw|n|east|e|south|se|sw|s|west|w)\b)?)[\. , ]*";
private static readonly Regex ListingAddressRegex = new Regex(ListingAddress, RegexOptions.IgnoreCase);
// the above pattern will match things like "25 km by road", so add a pattern to catch those
private static readonly string ListingAddressFalsePositives = @"\b(km|kilometer|kilometers|mi|mile|miles)\b";
private static readonly Regex ListingAddressFalsePositivesRegex = new Regex(ListingAddressFalsePositives, RegexOptions.IgnoreCase);
// "Calle Ricardo Montalban, 452"
private static readonly string ListingAddressInternational = @"(avenida|ave|av|calle|estrada|est|rua)(\.)? ([\p{L}\d]+[ \.\-]*){1,3}, [\d]+([/\-][\d]+)?";
private static readonly Regex ListingAddressInternationalRegex = new Regex(ListingAddressInternational, RegexOptions.IgnoreCase);
private static readonly Regex ListingAddressIsDirectionsRegex = new Regex(@"^(between|corner|end|next|on|)\s", RegexOptions.Multiline);
private static readonly Regex CityStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinecity,usablecity,guidecity,starcity".Split(',')));
private static readonly Regex CountryStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinecountry,usablecountry,guidecountry,starcountry".Split(',')));
private static readonly Regex DiveguideStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinediveguide,usablediveguide,guidediveguide,stardiveguide".Split(',')));
private static readonly Regex DistrictStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinedistrict,usabledistrict,guidedistrict,stardistrict".Split(',')));
private static readonly Regex ItineraryStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlineitinerary,usableitinerary,guideitinerary,staritinerary".Split(',')));
private static readonly Regex ParkStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinepark,usablepark,guidepark,starpark".Split(',')));
private static readonly Regex PhrasebookStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinephrasebook,usablephrasebook,guidephrasebook,starphrasebook".Split(',')));
private static readonly Regex RegionStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlineregion,usableregion,guideregion,starregion".Split(',')));
private static readonly Regex TopicStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinetopic,usabletopic,guidetopic,startopic".Split(',')));
private static readonly Regex ListingAltParamRegex = new Regex(@"\|\s*alt\s*=", RegexOptions.Singleline);
// "()", "[]", "{}"
private static readonly Regex EmptyPunctuationRegex = new Regex(@"(\(\s*\)|\[\s*\]|\{\s*\})");
private static readonly Regex OnlyPunctuationRegex = new Regex(@"^['\-\(\)\[\]\{\}\*\.\?!\s]+$", RegexOptions.Singleline);
// match a single digit
private static readonly Regex DigitRegex = new Regex(@"\d");
private static readonly Regex DoubleSpaceRegex = new Regex(@"\s{2,}");
private static readonly Regex InvalidEmptySecondLevelCityHeading = new Regex(@"(cope|learn|respect|stay safe|stay healthy|talk|work)", RegexOptions.IgnoreCase);
private static readonly Regex InvalidEmptySecondLevelRegionHeading = new Regex(@"(talk|regions)", RegexOptions.IgnoreCase);
private static readonly Regex InvalidEmptyThirdLevelCityHeading = new Regex(@"(by(\s+\w)+)", RegexOptions.IgnoreCase);
private static readonly Regex InvalidEmptyThirdLevelRegionHeading = new Regex(@"(itineraries)", RegexOptions.IgnoreCase);
private static readonly Dictionary<Regex, string> InvalidSecondLevelHeadings = new Dictionary<Regex, string> {
	{new Regex(@"^(know|information)$", RegexOptions.IgnoreCase), "Understand"},
	{new Regex(@"^(get in|getting in|getting there)$", RegexOptions.IgnoreCase), "Get in"},
	{new Regex(@"^(get around|getting around)$", RegexOptions.IgnoreCase), "Get around"},
	{new Regex(@"^(sights)$", RegexOptions.IgnoreCase), "See"},
	{new Regex(@"^(activities)$", RegexOptions.IgnoreCase), "Do"},
	{new Regex(@"^(shopping|shops|shop)$", RegexOptions.IgnoreCase), "Buy"},
	{new Regex(@"^(restaurants|dining)$", RegexOptions.IgnoreCase), "Eat"},
	{new Regex(@"^(bars|nightlife)$", RegexOptions.IgnoreCase), "Drink"},
	{new Regex(@"^(accommodation|hotels|stay)$", RegexOptions.IgnoreCase), "Sleep"},
	{new Regex(@"^(stay healthy)$", RegexOptions.IgnoreCase), "Stay healthy"},
	{new Regex(@"^(stay safe|safety)$", RegexOptions.IgnoreCase), "Stay safe"},
	{new Regex(@"^(go next|get out|nearby)$", RegexOptions.IgnoreCase), "Go next"}
};
private static readonly Regex MidrangeHeadingRegex = new Regex(@"^(mid[ \-]*range|moderate)", RegexOptions.IgnoreCase);
private static readonly Regex ByOnHeadingRegex = new Regex(@"^(by|on) (.+)", RegexOptions.IgnoreCase);
private static readonly Dictionary<Regex, string> InvalidSymbols = new Dictionary<Regex, string> {
	{new Regex(@"^(•)", RegexOptions.Multiline), "*"},
	{new Regex(@"(“|”)"), "\""},
	{new Regex(@"(’|‘)"), "'"},
	{new Regex(@"…"), "..."},
	{new Regex(@"(®|©|™)"), ""}
};
private static readonly Regex NoHttpUrlRegex = new Regex(@"([^/])(www\.[a-z0-9\-]+\.[a-z0-9\-]+)", RegexOptions.IgnoreCase);
// "-123.1234567", "-123.1234567890"
private static readonly Regex LatLongTrimRegex = new Regex(@"((\-)?[0-9]{1,3}\.[0-9]{8})([0-9]+)");

public string ProcessArticle(string articleText, string articleTitle, int wikiNamespace, out string summary, out bool skip) {
	string originalText = articleText;
	summary = "";
	skip = false;
	articleText = ReplaceInvalidSymbols(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	articleText = FixInvalidUrls(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	articleText = UpdateHeadings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	articleText = RemoveEmptyListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	articleText = RemoveEmptyObsoleteHeadings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	articleText = ExternalToInternalLink(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	articleText = FootnoteToFrontlink(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	articleText = FormatTimeValues(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	if (AGGRESSIVE) {
		articleText = ConvertTextToListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	}
	articleText = FormatListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
	if (articleText.Equals(originalText)) {
		skip = true;
	}
	return articleText;
}
// replace symbols with the appropriate wiki text equivalent
private string ReplaceInvalidSymbols(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 
	foreach(KeyValuePair<Regex, string> invalidSymbolEntry in InvalidSymbols) {
		articleText = invalidSymbolEntry.Key.Replace(articleText, invalidSymbolEntry.Value);
	}
	return articleText;
}
// replace URLs of the form "www.exmaple.com" with "http://www.example.com"
private string FixInvalidUrls(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 
	string originalText = articleText;
	articleText = NoHttpUrlRegex.Replace(articleText, "$1http://$2");
	if (!articleText.Equals(originalText)) {
		summary = UpdateEditSummary(summary, "fix URL(s) missing 'http'");
	}
	return articleText;
}
// ensure that headings match the article templates
private string UpdateHeadings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 
	foreach(Match m in WikiRegexes.Headings.Matches(articleText)) {
		string originalHeading = m.Value;
		string originalTitle = m.Groups[1].Value;
		string newTitle = originalTitle;
		string lowerTitle = originalTitle.ToLower().Trim();
		int headingLevel = 1;
		while (originalHeading[headingLevel] == '=') {
			headingLevel++;
		}
		Match match = null;
		if (headingLevel == 2) {
			foreach(KeyValuePair<Regex, string> invalidHeadingEntry in InvalidSecondLevelHeadings) {
				if (MatchText(lowerTitle, invalidHeadingEntry.Key, ref match)) {
					newTitle = invalidHeadingEntry.Value;
				}
			}
		} else {
			if (MatchText(lowerTitle, MidrangeHeadingRegex, ref match)) {
				newTitle = "Mid-range";
			} else if (MatchText(lowerTitle, ByOnHeadingRegex, ref match)) {
				newTitle = Capitalize(match.Groups[1].Value) + " " + match.Groups[2].Value.ToLower();
			}
		}
		if (!newTitle.Equals(originalTitle)) {
			string headingBars = originalHeading.Substring(0, headingLevel);
			articleText = articleText.Replace(originalHeading, headingBars + newTitle + headingBars);
			summary = UpdateEditSummary(summary, "'" + originalTitle.Trim() + "' &rarr; '" + newTitle + "' per [[WV:AT]]");
		}
	}
	return articleText;
}
// Remove any listing templates in which all fields are empty
private string RemoveEmptyListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
	foreach(Match m in ListingTemplateNamesRegex.Matches(articleText)) {
		string templateCall = m.Value;
		bool listingIsEmpty = true;
		foreach(KeyValuePair<string, string> templateParameter in Tools.GetTemplateParameterValues(templateCall)) {
			if (templateParameter.Value != "") {
				listingIsEmpty = false;
				break;
			}
		}
		if (listingIsEmpty) {
			articleText = articleText.Replace(templateCall, "");
			summary = UpdateEditSummary(summary, "empty listing(s) removed");
		}
	}
	// strip off any list items that are now empty as a result
	articleText = EmptyListItemRegex.Replace(articleText, "");
	return articleText;
}
// remove obsolete headings if they have no content
private string RemoveEmptyObsoleteHeadings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
	bool regionArticle = IsRegionArticle(articleText);
	bool cityArticle = IsCityArticle(articleText) || IsDistrictArticle(articleText);
	if (!regionArticle && !cityArticle) {
		return articleText;
	}
	string emptyRegionHeadings = "";
	string emptyCityHeadings = "";
	foreach(KeyValuePair<string, string> levelTwoSectionData in SplitToSecondLevelSections(articleText)) {
		string levelTwoSectionName = levelTwoSectionData.Key;
		string levelTwoSectionText = levelTwoSectionData.Value;
		if (regionArticle) {
			Match m = InvalidEmptySecondLevelRegionHeading.Match(levelTwoSectionName);
			if (m.Success && IsEmptySecondLevelSection(levelTwoSectionText)) {
				articleText = articleText.Replace(levelTwoSectionText, "");
				emptyRegionHeadings = AppendCSV(emptyRegionHeadings, "'" + levelTwoSectionName + "'");
				continue;
			}
		}
		if (cityArticle) {
			Match m = InvalidEmptySecondLevelCityHeading.Match(levelTwoSectionName);
			if (m.Success && IsEmptySecondLevelSection(levelTwoSectionText)) {
				articleText = articleText.Replace(levelTwoSectionText, "");
				emptyCityHeadings = AppendCSV(emptyCityHeadings, "'" + levelTwoSectionName + "'");
				continue;
			}
		}
		foreach(KeyValuePair<string, string> levelThreeSectionData in SplitToThirdLevelSections(levelTwoSectionText)) {
			string levelThreeSectionName = levelThreeSectionData.Key.Trim();
			string levelThreeSectionText = levelThreeSectionData.Value;
			Match m = InvalidEmptyThirdLevelRegionHeading.Match(levelThreeSectionName);
			if (m.Success && IsEmptyThirdLevelSection(levelThreeSectionText)) {
				articleText = articleText.Replace(levelThreeSectionText, "");
				emptyRegionHeadings = AppendCSV(emptyRegionHeadings, "'" + levelThreeSectionName + "'");
				continue;
			}
			if (cityArticle) {
				m = InvalidEmptyThirdLevelCityHeading.Match(levelThreeSectionName);
				if (m.Success && IsEmptyThirdLevelSection(levelThreeSectionText)) {
					articleText = articleText.Replace(levelThreeSectionText, "");
					emptyCityHeadings = AppendCSV(emptyCityHeadings, "'" + levelThreeSectionName + "'");
					continue;
				}
			}
		}
	}
	if (!String.IsNullOrEmpty(emptyRegionHeadings)) {
		summary = UpdateEditSummary(summary, "remove empty " + emptyRegionHeadings + " heading(s) per [[WV:Region article template]]");
	}
	if (!String.IsNullOrEmpty(emptyCityHeadings)) {
		summary = UpdateEditSummary(summary, "remove empty " + emptyCityHeadings + " heading(s) per [[WV:Huge city article template]]");
	}
	return articleText;
}
// convert footnote links to frontlinks
private string FootnoteToFrontlink(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
	string originalText = articleText;
	articleText = FootnoteToFrontLinkRegex.Replace(articleText, "$1[$6 $2]$5");
	if (!articleText.Equals(originalText)) {
		summary = UpdateEditSummary(summary, "footnote &rarr; frontlink per [[WV:XL]]");
	}
	return articleText;
}
// convert external links to interwiki/internal links
private string ExternalToInternalLink(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
	string originalText = articleText;
	articleText = ExternalToInternalLinkWikipedia.Replace(articleText, "[[w:$1|$2]]");
	articleText = ExternalToInternalLinkWikipediaNoText.Replace(articleText, "[[w:$1]]");
	articleText = ExternalToInternalLinkWikivoyage.Replace(articleText, "[[$1|$2]]");
	articleText = ExternalToInternalLinkWikivoyageNoText.Replace(articleText, "[[$1]]");
	if (!articleText.Equals(originalText)) {
		summary = UpdateEditSummary(summary, "external &rarr; internal link(s)");
	}
	return articleText;
}
private string FormatTimeValues(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
	string originalText = articleText;
	articleText = TimeValuesShouldUseColonAsSeperator.Replace(articleText, "$1:$2$3$4");
	articleText = TimeValuesSuffixAM.Replace(articleText, "$1$2AM");
	articleText = TimeValuesSuffixPM.Replace(articleText, "$1$2PM");
	articleText = TimeValuesRoundToHour.Replace(articleText, "$1$2");
	articleText = TimeValuesHourToNoon.Replace(articleText, "noon");
	articleText = TimeValuesHourToMidnight.Replace(articleText, "midnight");
	articleText = TimeValuesTrimWhitespace.Replace(articleText, "$1$6$7");
	if (!articleText.Equals(originalText)) {
		summary = UpdateEditSummary(summary, "update time(s) per [[WV:TDF]]");
	}
	return articleText;
}
private string ConvertTextToListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
	if (!IsCityArticle(articleText) && !IsDistrictArticle(articleText) && !IsParkArticle(articleText)) {
		// do not try to convert text to listings for non-city or park articles
		return articleText;
	}
	string originalText = articleText;
	// loop through all sections
	int count = 0;
	foreach(KeyValuePair<string, string> sectionData in SplitToSecondLevelSections(articleText)) {
		string sectionName = sectionData.Key;
		Match sectionNameMatch = TextToListingSectionNames.Match(sectionName);
		if (!sectionNameMatch.Success) {
			// only convert text that's in a section that supports non-generic listings
			continue;
		}
		string listingType = GetValidListingTypeForSection(sectionName);
		string sectionText = sectionData.Value;
		// get all list items within the section
		foreach(Match m in CandidateListingItemRegex.Matches(sectionText)) {
			string listItemText = m.Groups[1].Value;
			string templateCall = ConvertListingItemtoTemplatedListing(listItemText, listingType, ref summary);
			if (templateCall != "") {
				articleText = articleText.Replace(listItemText, templateCall);
				count++;
			}
		}
	}
	if (count > 0) {
		string plural = "";
		if (count > 1) {
			plural = "s";
		}
		summary = UpdateEditSummary(summary, "convert " + count + " plain text listing" + plural + " to [[WV:Listings|templated listing" + plural + "]]");
	}
	return articleText;
}
private string ConvertListingItemtoTemplatedListing(string listItemText, string listingType, ref string summary) {
	string templateCall = "{{" + listingType + "}}";
	if (!ProcessListingNameInListItem(ref listItemText, ref templateCall)) {
		// if we don't have a listing name don't bother trying to convert anything else
		return "";
	}
	ProcessListingPhoneInListItem(ref listItemText, ref templateCall);
	ProcessListingEmailInListItem(ref listItemText, ref templateCall);
	ProcessListingUrlInListItem(ref listItemText, ref templateCall);
	ProcessListingAddressInListItem(ref listItemText, ref templateCall);
	ProcessListingContentInListItem(ref listItemText, ref templateCall);
	return templateCall;
}
private bool ProcessListingNameInListItem(ref string listItemText, ref string templateCall) {
	Match m = ListingNameRegex.Match(listItemText);
	if (!m.Success || m.Index != 0) {
		// if there isn't a name at the beginning of the listing, don't convert
		return false;
	}
	string name = m.Groups[2].Value.Trim();
	templateCall = Tools.SetTemplateParameterValue(templateCall, "name", name);
	// see if the name is front-linked
	Match urlMatch = ExternalLinkWithTextRegex.Match(name);
	if (urlMatch.Success && urlMatch.Index == 0) {
		// split the name & url fields
		string url = urlMatch.Groups[1].Value;
		templateCall = Tools.SetTemplateParameterValue(templateCall, "url", url);
		name = urlMatch.Groups[2].Value;
		templateCall = Tools.SetTemplateParameterValue(templateCall, "name", name);
	}
	// remove name from list item text
	listItemText = RemoveValueFromListItemText(listItemText, m.Value);
	return true;
}
private void ProcessListingPhoneInListItem(ref string listItemText, ref string templateCall) {
	Match phoneMatch = ListingPhoneNumberWithLabelRegex.Match(listItemText);
	if (phoneMatch.Success) {
		string phone = phoneMatch.Groups[4].Value;
		templateCall = Tools.SetTemplateParameterValue(templateCall, "phone", phone);
		listItemText = RemoveValueFromListItemText(listItemText, phoneMatch.Value);
	}
	Match faxMatch = ListingFaxNumberRegex.Match(listItemText);
	if (faxMatch.Success) {
		string fax = faxMatch.Groups[4].Value;
		templateCall = Tools.SetTemplateParameterValue(templateCall, "fax", fax);
		listItemText = RemoveValueFromListItemText(listItemText, faxMatch.Value);
	}
	Match tollfreeMatch = ListingTollfreeNumberRegex.Match(listItemText);
	if (tollfreeMatch.Success) {
		string tollfree = tollfreeMatch.Groups[5].Value;
		templateCall = Tools.SetTemplateParameterValue(templateCall, "tollfree", tollfree);
		listItemText = RemoveValueFromListItemText(listItemText, tollfreeMatch.Value);
	}
	if (!phoneMatch.Success) {
		// try to find a phone number without a label
		phoneMatch = ListingPhoneNumberRegex.Match(listItemText);
		if (phoneMatch.Success) {
			string phone = phoneMatch.Value;
			if (DigitRegex.Matches(phone).Count > 6) {
				// only consider a phone number valid if it contains more than six digits
				templateCall = Tools.SetTemplateParameterValue(templateCall, "phone", phone);
				listItemText = RemoveValueFromListItemText(listItemText, phoneMatch.Value);
			}
		}
	}
}
private void ProcessListingEmailInListItem(ref string listItemText, ref string templateCall) {
	Match emailMatch = ListingEmailRegex.Match(listItemText);
	if (emailMatch.Success) {
		string email = emailMatch.Groups[2].Value;
		templateCall = Tools.SetTemplateParameterValue(templateCall, "email", email);
		listItemText = RemoveValueFromListItemText(listItemText, emailMatch.Value);
	}
}
private void ProcessListingUrlInListItem(ref string listItemText, ref string templateCall) {
	if (Tools.GetTemplateParameterValue(templateCall, "url") != "") {
		// url was already set when processing listing name
		return;
	}
	Match urlMatch = FootnoteUrlRegex.Match(listItemText);
	if (urlMatch.Success) {
		string url = urlMatch.Groups[1].Value;
		templateCall = Tools.SetTemplateParameterValue(templateCall, "url", url);
		listItemText = RemoveValueFromListItemText(listItemText, urlMatch.Value);
	}
}
private void ProcessListingAddressInListItem(ref string listItemText, ref string templateCall) {
	Match addressMatch = ListingAddressRegex.Match(listItemText);
	if (addressMatch.Success) {
		string address = addressMatch.Groups[1].Value;
		Match falsePositiveMatch = ListingAddressFalsePositivesRegex.Match(address);
		if (!falsePositiveMatch.Success) {
			templateCall = Tools.SetTemplateParameterValue(templateCall, "address", address);
			listItemText = RemoveValueFromListItemText(listItemText, addressMatch.Value);
		}
	} else {
		addressMatch = ListingAddressInternationalRegex.Match(listItemText);
		if (addressMatch.Success) {
			string address = addressMatch.Groups[1].Value;
			templateCall = Tools.SetTemplateParameterValue(templateCall, "address", address);
			listItemText = RemoveValueFromListItemText(listItemText, addressMatch.Value);
		}
	}
}
private void ProcessListingContentInListItem(ref string listItemText, ref string templateCall) {
	if (String.IsNullOrEmpty(listItemText)) {
		return;
	}
	listItemText = SanitizeListingContent(listItemText);
	templateCall = Tools.SetTemplateParameterValue(templateCall, "content", listItemText);
}
private string RemoveValueFromListItemText(string listItemText, string value) {
	listItemText = listItemText.Replace(value, "").Trim();
	listItemText = InvalidDuplicatePunctuationRegex.Replace(listItemText, "$1");
	listItemText = StripLeadingPunctuation(listItemText).Trim();
	return listItemText;
}
// Perform various tasks on listings to ensure params and other values
// are formatted correctly.
private string FormatListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
	string originalText = articleText;
	// loop through all sections
	foreach(KeyValuePair<string, string> sectionData in SplitToSecondLevelSections(articleText)) {
		string sectionName = sectionData.Key;
		string sectionText = sectionData.Value;
		// get all listing tags within the section
		foreach(Match m in ListingTemplateNamesRegex.Matches(sectionText)) {
			string templateCall = m.Value;
			string originalTemplateCall = m.Value;
			templateCall = ConvertGenericListingToSpecificType(templateCall, ref summary);
			templateCall = MatchListingTypeToSection(templateCall, sectionName, ref summary);
			templateCall = FormatListingParams(templateCall, ref summary);
			templateCall = SanitizeListingPhoneNumbers(templateCall, ref summary);
			templateCall = SanitizeListingEmail(templateCall, ref summary);
			templateCall = SanitizeListingLatLong(templateCall, ref summary);
			templateCall = SanitizeListingUrl(templateCall, ref summary);
			templateCall = SanitizeListingContent(templateCall, ref summary);
			if (AGGRESSIVE) {
				templateCall = SanitizeListingAddress(templateCall, articleTitle, ref summary);
			}
			if (!templateCall.Equals(originalTemplateCall)) {
				articleText = articleText.Replace(originalTemplateCall, templateCall);
			}
		}
	}
	return articleText;
}
// change "{{listing|type=xyz|...}}" to "{{xyz|...}}"
private string ConvertGenericListingToSpecificType(string templateCall, ref string summary) {
	string originalTemplateCall = templateCall;
	string listingType = Tools.GetTemplateName(templateCall);
	if (listingType.Equals("listing")) {
		string templateType = Tools.GetTemplateParameterValue(templateCall, "type").ToLower();
		if (templateType.Equals("see") || templateType.Equals("do") || templateType.Equals("buy") || templateType.Equals("eat") || templateType.Equals("drink") || templateType.Equals("sleep")) {
			templateCall = Tools.RenameTemplate(templateCall, templateType);
			templateCall = Tools.RemoveTemplateParameter(templateCall, "type");
		}
	}
	if (!templateCall.Equals(originalTemplateCall)) {
		summary = UpdateEditSummary(summary, "update listing type to match expected section type");
	}
	return templateCall;
}
// make sure listings are formatted according to the style guidelines in
// Wikivoyage:Listings
private string FormatListingParams(string templateCall, ref string summary) {
	string originalTemplateCall = templateCall;
	string listingType = Tools.GetTemplateName(templateCall);
	string formattedValue = "{{" + listingType + "\n";
	// loop through expected template arguments and format appropriately
	foreach(string param in ListingTemplateParamNames) {
		if (param.Equals("content")) {
			continue;
		}
		string paramValue = Tools.GetTemplateParameterValue(templateCall, param);
		if (param.Equals("type") && (paramValue == "" || !listingType.Equals("listing"))) {
			// only listing uses the "type" attribute
			if (paramValue == "") {
				templateCall = Tools.RemoveTemplateParameter(templateCall, param);
			}
			continue;
		}
		if ((param.Equals("image") || param.Equals("lastedit")) && paramValue == "") {
			// empty image & lastedit attributes are unnecessary
			continue;
		}
		if (listingType.Equals("sleep") && param.Equals("hours") && paramValue == "") {
			// sleep listings don't use the "hours" attribute
			if (paramValue == "") {
				templateCall = Tools.RemoveTemplateParameter(templateCall, param);
			}
			continue;
		}
		if (!listingType.Equals("sleep") && (param.Equals("checkin") || param.Equals("checkout"))) {
			// only sleep listings use the "checkin" and "checkout" attributes
			if (paramValue == "") {
				templateCall = Tools.RemoveTemplateParameter(templateCall, param);
			}
			continue;
		}
		if (param.Equals("alt") && paramValue == "" && !ListingAltParamRegex.IsMatch(templateCall)) {
			// do not add an alt tag if it isn't already present
			continue;
		}
		// replace excess whitespace with single spaces
		paramValue = DoubleSpaceRegex.Replace(paramValue, " ");
		formattedValue += "| " + param + "=" + paramValue;
		// add either a newline or a space after the param value, depending on param and template type
		if (param.Equals("email") || param.Equals("directions") || param.Equals("fax") || param.Equals("price") || param.Equals("image") || param.Equals("lastedit")) {
			formattedValue += "\n";
		} else {
			formattedValue += " ";
		}
		templateCall = Tools.RemoveTemplateParameter(templateCall, param);
	}
	// loop through any unexpected template args and format on their own lines
	int remainingTemplateArgs = Tools.GetTemplateArgumentCount(templateCall);
	foreach(KeyValuePair<string, string> templateParameter in Tools.GetTemplateParameterValues(templateCall)) {
		string param = templateParameter.Key;
		if (param.Equals("content")) {
			continue;
		}
		string paramValue = templateParameter.Value;
		if (paramValue == "") {
			// any empty, unrecognized listing params can be removed
			continue;
		}
		formattedValue += "| " + param + "=" + paramValue + "\n";
	}
	// now add template content on its own line
	formattedValue += "| content=" + Tools.GetTemplateParameterValue(templateCall, "content") + "\n";
	formattedValue += "}}";
	if (!originalTemplateCall.Equals(formattedValue)) {
		// suppress edit summary for now - it is being applied to any article with listings
		// whether they are updated or not
		// summary = UpdateEditSummary(summary, "format listing per [[WV:Listings]]");
	}
	return formattedValue;
}
// update the listing phone number fields as much as possible to match
// Wikivoyage:Phone numbers
private string SanitizeListingPhoneNumbers(string templateCall, ref string summary) {
	string originalTemplateCall = templateCall;
	// loop through expected template arguments and format appropriately
	foreach(string param in ListingPhoneParamNames) {
		string paramValue = Tools.GetTemplateParameterValue(templateCall, param);
		paramValue = SanitizePhoneNumber(paramValue);
		templateCall = Tools.UpdateTemplateParameterValue(templateCall, param, paramValue);
	}
	if (!templateCall.Equals(originalTemplateCall)) {
		summary = UpdateEditSummary(summary, "format listing phone # per [[WV:Phone numbers]]");
	}
	return templateCall;
}
// update the listing email field to ensure it is valid
private string SanitizeListingEmail(string templateCall, ref string summary) {
	string originalTemplateCall = templateCall;
	string email = Tools.GetTemplateParameterValue(templateCall, "email");
	email = MailtoRegex.Replace(email, "").Trim();
	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "email", email);
	if (!templateCall.Equals(originalTemplateCall)) {
		summary = UpdateEditSummary(summary, "fix invalid listing email");
	}
	return templateCall;
}
// trim lat/long precision to no more than eight digits
private string SanitizeListingLatLong(string templateCall, ref string summary) {
	string originalTemplateCall = templateCall;
	string latitude = Tools.GetTemplateParameterValue(templateCall, "lat");
	latitude = LatLongTrimRegex.Replace(latitude, "$1");
	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "lat", latitude);
	string longitude = Tools.GetTemplateParameterValue(templateCall, "long");
	longitude = LatLongTrimRegex.Replace(longitude, "$1");
	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "long", longitude);
	if (!templateCall.Equals(originalTemplateCall)) {
		summary = UpdateEditSummary(summary, "trim lat/long precision");
	}
	return templateCall;
}
// update the listing URL field to ensure it is valid
private string SanitizeListingUrl(string templateCall, ref string summary) {
	string originalTemplateCall = templateCall;
	string url = Tools.GetTemplateParameterValue(templateCall, "url");
	url = FootnoteUrlRegex.Replace(url, "$1");
	Match match = ValidUrlRegex.Match(url);
	if (match.Success && !url.ToLower().StartsWith("http://") && !url.ToLower().StartsWith("https://") && !url.ToLower().StartsWith("//")) {
		url = "http://" + url;
	}
	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "url", url);
	if (!templateCall.Equals(originalTemplateCall)) {
		summary = UpdateEditSummary(summary, "fix invalid listing URL");
	}
	return templateCall;
}
// make sure the listing content is capitalized, isn't just punctuation, etc.
private string SanitizeListingContent(string templateCall, ref string summary) {
	string content = Tools.GetTemplateParameterValue(templateCall, "content");
	content = SanitizeListingContent(content);
	return Tools.UpdateTemplateParameterValue(templateCall, "content", content);
}
private string SanitizeListingContent(string content) {
	if (String.IsNullOrEmpty(content)) {
		return content;
	}
	// strip empty punctuation
	content = EmptyPunctuationRegex.Replace(content, "");
	// strip sentence fragments left over from text-to-listing conversions
	if (content.ToLower().StartsWith("is ")) {
		content = content.Substring("is ".Length);
	}
	// make sure first character is capitalized
	content = Capitalize(content);
	// if only punctuation is left, remove everything
	if (OnlyPunctuationRegex.IsMatch(content)) {
		content = "";
	}
	return content;
}
// update the listing address field to ensure it is valid
private string SanitizeListingAddress(string templateCall, string articleName, ref string summary) {
	string originalTemplateCall = templateCall;
	string address = Tools.GetTemplateParameterValue(templateCall, "address");
	if (String.IsNullOrEmpty(address)) {
		return templateCall;
	}
	Match m = ListingAddressIsDirectionsRegex.Match(address);
	if (m.Success) {
		// the address field belongs in the directions field
		string directions = Tools.GetTemplateParameterValue(templateCall, "directions");
		if (String.IsNullOrEmpty(directions)) {
			templateCall = Tools.UpdateTemplateParameterValue(templateCall, "directions", address);
			templateCall = Tools.UpdateTemplateParameterValue(templateCall, "address", "");
			summary = UpdateEditSummary(summary, "listing address moved to directions");
			return templateCall;
		}
	}
	// strip out state and zip code if present
	address = AddressWithStateOrZipRegex.Replace(address, "$1");
	address = StripStrayPunctuation(address);
	// strip out the city if it is present
	string city = GetCityFromArticleName(articleName);
	if (address.ToLower().EndsWith(city.ToLower())) {
		int pos = address.ToLower().LastIndexOf(city.ToLower());
		address = address.Substring(0, pos).Trim();
	}
	address = StripStrayPunctuation(address);
	// properly abbreviate street name
	address = AbbreviateStreeType(address, "Avenue", "Ave");
	address = AbbreviateStreeType(address, "Boulevard", "Blvd");
	address = AbbreviateStreeType(address, "Court", "Ct");
	address = AbbreviateStreeType(address, "Drive", "Dr");
	address = AbbreviateStreeType(address, "Lane", "Ln");
	address = AbbreviateStreeType(address, "Place", "Pl");
	address = AbbreviateStreeType(address, "Road", "Rd");
	address = AbbreviateStreeType(address, "Street", "St");
	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "address", address);
	if (!templateCall.Equals(originalTemplateCall)) {
		summary = UpdateEditSummary(summary, "format listing address per [[WV:Listings]]");
	}
	return templateCall;
}
// make sure that the listing type matches the section in which the listing
// is found (example: "see" listings in the "See" section)
private string MatchListingTypeToSection(string templateCall, string sectionName, ref string summary) {
	string originalTemplateCall = templateCall;
	if (sectionName.ToLower() == "eat and drink" || sectionName.ToLower() == "see and do") {
		// skip these "combined" sections
		return templateCall;
	}
	string expectedListingType = GetValidListingTypeForSection(sectionName);
	string listingType = Tools.GetTemplateName(templateCall);
	if (!listingType.Equals(expectedListingType)) {
		templateCall = Tools.RenameTemplate(templateCall, expectedListingType, false);
	}
	if (!templateCall.Equals(originalTemplateCall)) {
		summary = UpdateEditSummary(summary, "update listing type to match expected section type");
	}
	return templateCall;
}
// return a map of section name-section content for all second level headings.
// the opening text of the article is returned without a section name
private static List<KeyValuePair<string, string>> SplitToSecondLevelSections(string articleContent) {
	return SplitToHeadingSections(articleContent, WikiRegexes.HeadingLevelTwo);
}
private static List<KeyValuePair<string, string>> SplitToThirdLevelSections(string articleContent) {
	return SplitToHeadingSections(articleContent, WikiRegexes.HeadingLevelThree);
}
private static List<KeyValuePair<string, string>> SplitToHeadingSections(string articleContent, Regex headingRegex) {
	List<KeyValuePair<string, string>> sections = new List<KeyValuePair<string, string>>();
	int lastmatchpos = 0;
	Match lastMatch = null;
	foreach(Match m in headingRegex.Matches(articleContent)) {
		if (m.Index > 0) {
			// Don't add empty first section if page starts with heading
			string sectionContent = articleContent.Substring(lastmatchpos, m.Index-lastmatchpos);
			string sectionName = (lastMatch != null) ? lastMatch.Groups[1].Value.Trim() : "";
			sections.Add(new KeyValuePair<string, string>(sectionName, sectionContent));
		}
		lastmatchpos = m.Index;
		lastMatch = m;
	}
	// Add text of final section
	string sectionContentLast = articleContent.Substring(lastmatchpos);
	string sectionNameLast = (lastMatch != null) ? lastMatch.Groups[1].Value : "";
	sections.Add(new KeyValuePair<string, string>(sectionNameLast, sectionContentLast));
	return sections;
}
private static bool IsEmptySecondLevelSection(string sectionContent) {
	return IsEmptySection(sectionContent, WikiRegexes.HeadingLevelTwo);
}
private static bool IsEmptyThirdLevelSection(string sectionContent) {
	return IsEmptySection(sectionContent, WikiRegexes.HeadingLevelThree);
}
private static bool IsEmptySection(string sectionContent, Regex headingRegex) {
	Match m = headingRegex.Match(sectionContent);
	if (!m.Success || m.Index != 0) {
		// this shouldn't happen, but just in case
		return false;
	}
	return (m.Value.Trim().Length == sectionContent.Trim().Length);
}
// return the expected listing type for the given section name ("See" returns "see").
private static string GetValidListingTypeForSection(string sectionName) {
	if (sectionName == "See") {
		return "see";
	} if (sectionName == "Do") {
		return "do";
	} if (sectionName == "Buy") {
		return "buy";
	} if (sectionName == "Eat" || sectionName.ToLower() == "eat and drink") {
		return "eat";
	} if (sectionName == "Drink") {
		return "drink";
	} if (sectionName == "Sleep") {
		return "sleep";
	} else {
		return "listing";
	}
}
private static string SanitizePhoneNumber(string phoneNumberText) {
	// strip out the phone number, in case it is contained within other text
	// such as "888-888-8888 (front office)"
	Match match = PhoneNumberRegex.Match(phoneNumberText);
	if (match.Success) {
		string phoneNumber = match.Value;
		string originalPhoneNumber = match.Value;
		// remove invalid characters
		phoneNumber = phoneNumber.Replace("'", "");
		phoneNumber = phoneNumber.Replace("(", " ").Trim();
		phoneNumber = phoneNumber.Replace(")", " ").Trim();
		// convert periods to dashes
		phoneNumber = phoneNumber.Replace(".", "-");
		phoneNumber = ExcessWhitespaceRegex.Replace(phoneNumber, " ");
		// if there is a pattern like " -", "- ", "+ " left, replace the space
		phoneNumber = phoneNumber.Replace(" -", "-");
		phoneNumber = phoneNumber.Replace("- ", "-");
		phoneNumber = phoneNumber.Replace("+ ", "+");
		// if the phone number starts with a 1, change it to +1
		if (phoneNumber.StartsWith("1 ") || phoneNumber.StartsWith("1-")) {
			phoneNumber = "+" + phoneNumber;
		}
		phoneNumberText = phoneNumberText.Replace(originalPhoneNumber, phoneNumber);
	}
	return phoneNumberText;
}
// return the city from the article name.  if the article name is "Foo (Disambiguation)"
// then this method returns "Foo".
private static string GetCityFromArticleName(string articleName) {
	string basePageName = Tools.BasePageName(articleName);
	Match match = CityNameWithDisambiguationRegex.Match(basePageName);
	return (match.Success) ? match.Groups[1].Value : basePageName;
}
// if the address ends in a full street type value, convert to the abbreviated value
private static string AbbreviateStreeType(string address, string invalidStreetType, string validStreetType) {
	if (address.ToLower().EndsWith(" " + invalidStreetType.ToLower())) {
		int pos = address.ToLower().LastIndexOf(invalidStreetType.ToLower());
		if (pos > 0) {
			address = address.Substring(0, pos) + validStreetType;
		}
	}
	return address;
}
// remove any leading or trailing punctuation
private static string StripStrayPunctuation(string text) {
	return StripPunctuation(text, true, true);
}
// remove any leading punctuation
private static string StripLeadingPunctuation(string text) {
	return StripPunctuation(text, true, false);
}
// remove any trailing punctuation
private static string StripTrailingPunctuation(string text) {
	return StripPunctuation(text, false, true);
}
// remove any leading punctuation
private static string StripPunctuation(string text, bool stripLeading, bool stripTrailing) {
	if (stripTrailing) {
		text = InvalidTrailingPunctuationRegex.Replace(text, "");
	}
	if (stripLeading) {
		text = InvalidLeadingPunctuationRegex.Replace(text, "");
	}
	return text;
}
// return true if the text matches the pattern, otherwise return null,  the "match"
// param will be populated with the match object
private static bool MatchText(string matchText, Regex regex, ref Match match) {
	match = regex.Match(matchText);
	return (match.Success);
}
private static string Capitalize(string text) {
	if (String.IsNullOrEmpty(text)) {
		return text;
	}
	// make sure first character is capitalized
	return (text.Length == 1) ? Char.ToUpper(text[0]) + "" : Char.ToUpper(text[0]) + text.Substring(1);
}
// add the value to the edit summary if it is not already present
private static string UpdateEditSummary(string summary, string textToAdd) {
	if (summary == "") {
		return textToAdd;
	}
	foreach(string summaryField in summary.Split(',')) {
		if (summaryField.Trim().Equals(textToAdd)) {
			// text already present in edit summary
			return summary;
		}
	}
	return summary += ", " + textToAdd;
}
// append the value to the existing list as a CSV
private static string AppendCSV(string currentList, string valueToAdd) {
	if (String.IsNullOrEmpty(currentList)) {
		return valueToAdd;
	}
	return currentList += ", " + valueToAdd;
}
// return true if the article contains a city status template
private static bool IsCityArticle(string articleText) {
	return CityStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a country status template
private static bool IsCountryArticle(string articleText) {
	return CountryStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a dive guide status template
private static bool IsDiveguideArticle(string articleText) {
	return DiveguideStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a district status template
private static bool IsDistrictArticle(string articleText) {
	return DistrictStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains an itinerary status template
private static bool IsItineraryArticle(string articleText) {
	return ItineraryStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a park status template
private static bool IsParkArticle(string articleText) {
	return ParkStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a phrasebook status template
private static bool IsPhrasebookArticle(string articleText) {
	return PhrasebookStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a region status template
private static bool IsRegionArticle(string articleText) {
	return RegionStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a topic status template
private static bool IsTopicArticle(string articleText) {
	return TopicStatusTemplateNamesRegex.IsMatch(articleText);
}
// TODO:
// - move tollfree numbers to tollfree in listings
// - don't allow "otheruses" to be moved above the page banner