User:Wrh2/AWB module
Jump to navigation
Jump to search
// implementing the "aggressive" rules is more likely to produce false positives
private static readonly bool AGGRESSIVE = true;
private static readonly Regex ListingTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("listing,see,do,buy,eat,drink,sleep".Split(',')));
private static readonly string[] ListingTemplateParamNames = {"type","name","alt","url","email","address","lat","long","directions","phone","tollfree","fax","image","hours","checkin","checkout","price","lastedit","content"};
private static readonly string[] ListingPhoneParamNames = {"phone","fax","tollfree"};
private static readonly Regex TextToListingSectionNames = new Regex(@"(get in|get around|see|do|buy|eat|drink|sleep)", RegexOptions.IgnoreCase);
private static readonly Regex PhoneNumberRegex = new Regex(@"'*[\d\+\(][\s\d\(\)\-\.'\+]+[\d\)]'*");
private static readonly Regex MailtoRegex = new Regex(@"mailto:[/]*", RegexOptions.IgnoreCase);
private static readonly Regex ExcessWhitespaceRegex = new Regex(@"\s\s+", RegexOptions.Singleline);
// "http://www.example.com"
private static readonly string ValidUrlPattern = @"((http(s)?:)//)?(([a-z]+\.)+)([a-z]+)";
private static readonly Regex ValidUrlRegex = new Regex(ValidUrlPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
// "[http://www.example.com]"
private static readonly Regex FootnoteUrlRegex = new Regex(@"\[(" + ValidUrlPattern + @"([^\]\s]+))\]", RegexOptions.Singleline);
// "Foo (disambiguation)"
private static readonly Regex CityNameWithDisambiguationRegex = new Regex(@"([^\(]+) \([^\)]+\)", RegexOptions.Singleline);
// "blah, CA 99999", "blah, CA 99999-1234", "blah CA", etc
private static readonly Regex AddressWithStateOrZipRegex = new Regex(@"(.+)[\.,\-]+\s*(AL|alabama|AK|alaska|AZ|arizona|AR|arkansas|CA|california|CO|colorado|CT|connecticut|DC|DE|delaware|FL|florida|GA|georgia|HI|hawaii|ID|idaho|IL|illinois|IN|indiana|IA|iowa|KS|kansas|KY|kentucky|LA|louisiana|ME|maine|MD|maryland|MA|massachusetts|MI|michigan|MN|minnesota|MS|mississippi|MO|missouri|MT|montana|NE|nebraska|NV|nevada|NH|new hampshire|NJ|new jersey|NM|new mexico|NY|new york|NC|north carolina|ND|north dakota|OH|ohio|OK|oklahoma|OR|oregon|PA|pennsylvania|RI|rhode island|SC|south carolina|SD|south dakota|TN|tennessee|TX|texas|UT|utah|VT|vermont|VA|virginia|WA|washington|WV|west virginia|WI|wisconsin|WY|wyoming)([\s,\-]*[0-9]{5}(\-[0-9]{4})?)?$", RegexOptions.Singleline | RegexOptions.IgnoreCase);
// "* ", "** ", etc
private static readonly Regex EmptyListItemRegex = new Regex(@"^\*+\s*\n", RegexOptions.Multiline);
private static readonly Regex FootnoteToFrontLinkRegex = new Regex(@"('*)((?!The )\p{Lu}[\w\-'/]*[\w]( (and|del|de|of|&|the|la|le|for|\p{Lu}[\w\-'/]*[\w]))*)('*)[, ]*\[(http[^ ]+)( )*\]");
private static readonly Regex TimeValuesShouldUseColonAsSeperator = new Regex(@"\b([1-9]|10|11|12)\.([0-5][0-9])([ ]*)((a|p)\.?\s*m\.|(a|p)\.?\s*m\b)", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesSuffixAM = new Regex(@"\b([1-9]|10|11|12)(:[0-5][0-9])*[ ]*(a\.\s*m\.|a\s*m\b)", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesSuffixPM = new Regex(@"\b([1-9]|10|11|12)(:[0-5][0-9])*[ ]*(p\.\s*m\.|p\s*m\b)", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesRoundToHour = new Regex(@"\b([1-9]|10|11|12):00(AM|PM)\b", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesHourToNoon = new Regex(@"\b(12PM)\b", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesHourToMidnight = new Regex(@"\b(12AM)\b", RegexOptions.IgnoreCase);
private static readonly Regex TimeValuesTrimWhitespace = new Regex(@"\b((([1-9]|10|11|12)(:[0-5][0-9])*(AM|PM))|noon|midnight)\s*(\-)\s*((([1-9]|10|11|12)(:[0-5][0-9])*(AM|PM))|noon|midnight)\b", RegexOptions.IgnoreCase);
private static readonly Regex ExternalToInternalLinkWikipedia = new Regex(@"\[http[s]?://en.wikipedia.org/wiki/([^\] ]+)[ ]+([^\]]+)]", RegexOptions.IgnoreCase);
private static readonly Regex ExternalToInternalLinkWikipediaNoText = new Regex(@"\[http[s]?://en.wikipedia.org/wiki/([^\] ]+)[ ]*]", RegexOptions.IgnoreCase);
private static readonly Regex ExternalToInternalLinkWikivoyage = new Regex(@"\[http[s]?://en.wikivoyage.org/wiki/([^\] ]+)[ ]+([^\]]+)]", RegexOptions.IgnoreCase);
private static readonly Regex ExternalToInternalLinkWikivoyageNoText = new Regex(@"\[http[s]?://en.wikivoyage.org/wiki/([^\] ]+)[ ]*]", RegexOptions.IgnoreCase);
// "* blah blah blah", "** blah blah blah", etc
private static readonly Regex CandidateListingItemRegex = new Regex(@"^\*+\s*([^\{]{2}.+)$", RegexOptions.Multiline);
private static readonly Regex ListingNameRegex = new Regex(@"(the\s+)*'''(.{3,}?)'''", RegexOptions.IgnoreCase);
// "[http://www.example.com/ Example Text]"
private static readonly Regex ExternalLinkWithTextRegex = new Regex(@"\[(http[^\]\s]+)\s+([^\]]+)\]", RegexOptions.IgnoreCase);
// "123-456-7890"
private static readonly string ListingPhoneNumber = @"((''|\+|\()*\d+(''|\)*)[\s\-]+)+(''|\+|\()*\d+(''|\)*)( ext\.? \d+)?";
private static readonly Regex ListingPhoneNumberRegex = new Regex(ListingPhoneNumber, RegexOptions.IgnoreCase);
// "fax: 123-456-7890"
private static readonly string ListingFaxNumber = @"(\(''|''\(|\()?fax(:)?(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?";
private static readonly Regex ListingFaxNumberRegex = new Regex(ListingFaxNumber, RegexOptions.IgnoreCase);
// "telephone: 123-456-7890" and similar
private static readonly string ListingPhoneNumberWithLabel = @"(\(''|''\(|\()?(\u260e|call|call:telephone:|telephone|tel:|tel\.:|tel\.|tel|phone:|phone|ph:|ph\.:|ph\.|ph|\u260E)(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?";
private static readonly Regex ListingPhoneNumberWithLabelRegex = new Regex(ListingPhoneNumberWithLabel, RegexOptions.IgnoreCase);
// "toll-free: 123-456-7890" and similar
private static readonly string ListingTollfreeNumber = @"(\(''|''\(|\()?(toll[ \-]?free)(:)?(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?";
private static readonly Regex ListingTollfreeNumberRegex = new Regex(ListingTollfreeNumber, RegexOptions.IgnoreCase);
private static readonly string InvalidLeadingOrTrailingPunctuation = @",|\-|\*|\:|\–|;";
private static readonly string InvalidLeadingPunctuation = @"\.|!|\?|\)|\]|\}|—|;";
private static readonly string InvalidTrailingPunctuation = @"\(|\[|\{|;";
private static readonly string InvalidDuplicatePunctuation = @"\.|!|\?|" + InvalidLeadingOrTrailingPunctuation;
private static readonly Regex InvalidLeadingPunctuationRegex = new Regex(@"^(\s|" + InvalidLeadingPunctuation + "|" + InvalidLeadingOrTrailingPunctuation + @")+", RegexOptions.Multiline);
private static readonly Regex InvalidTrailingPunctuationRegex = new Regex(@"(\s|" + InvalidTrailingPunctuation + "|" + InvalidLeadingOrTrailingPunctuation + @")+$", RegexOptions.Multiline);
private static readonly Regex InvalidDuplicatePunctuationRegex = new Regex(@"(" + InvalidDuplicatePunctuation + @"|\s)+(" + InvalidDuplicatePunctuation + @")");
// "email: foo@bar.com" OR "mailto:foo@bar.com" OR "foo@bar.com"
private static readonly string ListingEmail = @"(mailto:|e-mail:|email:)?\s*(\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b)";
private static readonly Regex ListingEmailRegex = new Regex(ListingEmail, RegexOptions.IgnoreCase);
// "1234 First St"
private static readonly string ListingAddress = @"([\d]+([/\-][\d]+)? ([nesw]\.? )?([\p{L}\d]+[ \.\-]*){1,3} (avenue|ave|av|boulevard|blvd|court|ct|drive|dr|expressway|expwy|freeway|fwy|highway( \d{1,3})?|hwy( \d{1,3})?|lane|ln|loop|parkway|pkwy|place|pl|road|rd|row|street|st|way)\b(\.? (#(\s)*[\d]+|north|ne|nw|n|east|e|south|se|sw|s|west|w)\b)?)[\. , ]*";
private static readonly Regex ListingAddressRegex = new Regex(ListingAddress, RegexOptions.IgnoreCase);
// the above pattern will match things like "25 km by road", so add a pattern to catch those
private static readonly string ListingAddressFalsePositives = @"\b(km|kilometer|kilometers|mi|mile|miles)\b";
private static readonly Regex ListingAddressFalsePositivesRegex = new Regex(ListingAddressFalsePositives, RegexOptions.IgnoreCase);
// "Calle Ricardo Montalban, 452"
private static readonly string ListingAddressInternational = @"(avenida|ave|av|calle|estrada|est|rua)(\.)? ([\p{L}\d]+[ \.\-]*){1,3}, [\d]+([/\-][\d]+)?";
private static readonly Regex ListingAddressInternationalRegex = new Regex(ListingAddressInternational, RegexOptions.IgnoreCase);
private static readonly Regex ListingAddressIsDirectionsRegex = new Regex(@"^(between|corner|end|next|on|)\s", RegexOptions.Multiline);
private static readonly Regex CityStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinecity,usablecity,guidecity,starcity".Split(',')));
private static readonly Regex CountryStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinecountry,usablecountry,guidecountry,starcountry".Split(',')));
private static readonly Regex DiveguideStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinediveguide,usablediveguide,guidediveguide,stardiveguide".Split(',')));
private static readonly Regex DistrictStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinedistrict,usabledistrict,guidedistrict,stardistrict".Split(',')));
private static readonly Regex ItineraryStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlineitinerary,usableitinerary,guideitinerary,staritinerary".Split(',')));
private static readonly Regex ParkStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinepark,usablepark,guidepark,starpark".Split(',')));
private static readonly Regex PhrasebookStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinephrasebook,usablephrasebook,guidephrasebook,starphrasebook".Split(',')));
private static readonly Regex RegionStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlineregion,usableregion,guideregion,starregion".Split(',')));
private static readonly Regex TopicStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinetopic,usabletopic,guidetopic,startopic".Split(',')));
private static readonly Regex ListingAltParamRegex = new Regex(@"\|\s*alt\s*=", RegexOptions.Singleline);
// "()", "[]", "{}"
private static readonly Regex EmptyPunctuationRegex = new Regex(@"(\(\s*\)|\[\s*\]|\{\s*\})");
private static readonly Regex OnlyPunctuationRegex = new Regex(@"^['\-\(\)\[\]\{\}\*\.\?!\s]+$", RegexOptions.Singleline);
// match a single digit
private static readonly Regex DigitRegex = new Regex(@"\d");
private static readonly Regex DoubleSpaceRegex = new Regex(@"\s{2,}");
private static readonly Regex InvalidEmptySecondLevelCityHeading = new Regex(@"(cope|learn|respect|stay safe|stay healthy|talk|work)", RegexOptions.IgnoreCase);
private static readonly Regex InvalidEmptySecondLevelRegionHeading = new Regex(@"(talk|regions)", RegexOptions.IgnoreCase);
private static readonly Regex InvalidEmptyThirdLevelCityHeading = new Regex(@"(by(\s+\w)+)", RegexOptions.IgnoreCase);
private static readonly Regex InvalidEmptyThirdLevelRegionHeading = new Regex(@"(itineraries)", RegexOptions.IgnoreCase);
private static readonly Dictionary<Regex, string> InvalidSecondLevelHeadings = new Dictionary<Regex, string> {
{new Regex(@"^(know|information)$", RegexOptions.IgnoreCase), "Understand"},
{new Regex(@"^(get in|getting in|getting there)$", RegexOptions.IgnoreCase), "Get in"},
{new Regex(@"^(get around|getting around)$", RegexOptions.IgnoreCase), "Get around"},
{new Regex(@"^(sights)$", RegexOptions.IgnoreCase), "See"},
{new Regex(@"^(activities)$", RegexOptions.IgnoreCase), "Do"},
{new Regex(@"^(shopping|shops|shop)$", RegexOptions.IgnoreCase), "Buy"},
{new Regex(@"^(restaurants|dining)$", RegexOptions.IgnoreCase), "Eat"},
{new Regex(@"^(bars|nightlife)$", RegexOptions.IgnoreCase), "Drink"},
{new Regex(@"^(accommodation|hotels|stay)$", RegexOptions.IgnoreCase), "Sleep"},
{new Regex(@"^(stay healthy)$", RegexOptions.IgnoreCase), "Stay healthy"},
{new Regex(@"^(stay safe|safety)$", RegexOptions.IgnoreCase), "Stay safe"},
{new Regex(@"^(go next|get out|nearby)$", RegexOptions.IgnoreCase), "Go next"}
};
private static readonly Regex MidrangeHeadingRegex = new Regex(@"^(mid[ \-]*range|moderate)", RegexOptions.IgnoreCase);
private static readonly Regex ByOnHeadingRegex = new Regex(@"^(by|on) (.+)", RegexOptions.IgnoreCase);
private static readonly Dictionary<Regex, string> InvalidSymbols = new Dictionary<Regex, string> {
{new Regex(@"^(•)", RegexOptions.Multiline), "*"},
{new Regex(@"(“|”)"), "\""},
{new Regex(@"(’|‘)"), "'"},
{new Regex(@"…"), "..."},
{new Regex(@"(®|©|™)"), ""}
};
private static readonly Regex NoHttpUrlRegex = new Regex(@"([^/])(www\.[a-z0-9\-]+\.[a-z0-9\-]+)", RegexOptions.IgnoreCase);
// "-123.1234567", "-123.1234567890"
private static readonly Regex LatLongTrimRegex = new Regex(@"((\-)?[0-9]{1,3}\.[0-9]{8})([0-9]+)");
public string ProcessArticle(string articleText, string articleTitle, int wikiNamespace, out string summary, out bool skip) {
string originalText = articleText;
summary = "";
skip = false;
articleText = ReplaceInvalidSymbols(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
articleText = FixInvalidUrls(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
articleText = UpdateHeadings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
articleText = RemoveEmptyListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
articleText = RemoveEmptyObsoleteHeadings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
articleText = ExternalToInternalLink(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
articleText = FootnoteToFrontlink(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
articleText = FormatTimeValues(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
if (AGGRESSIVE) {
articleText = ConvertTextToListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
}
articleText = FormatListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip);
if (articleText.Equals(originalText)) {
skip = true;
}
return articleText;
}
// replace symbols with the appropriate wiki text equivalent
private string ReplaceInvalidSymbols(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
foreach(KeyValuePair<Regex, string> invalidSymbolEntry in InvalidSymbols) {
articleText = invalidSymbolEntry.Key.Replace(articleText, invalidSymbolEntry.Value);
}
return articleText;
}
// replace URLs of the form "www.exmaple.com" with "http://www.example.com"
private string FixInvalidUrls(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
string originalText = articleText;
articleText = NoHttpUrlRegex.Replace(articleText, "$1http://$2");
if (!articleText.Equals(originalText)) {
summary = UpdateEditSummary(summary, "fix URL(s) missing 'http'");
}
return articleText;
}
// ensure that headings match the article templates
private string UpdateHeadings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
foreach(Match m in WikiRegexes.Headings.Matches(articleText)) {
string originalHeading = m.Value;
string originalTitle = m.Groups[1].Value;
string newTitle = originalTitle;
string lowerTitle = originalTitle.ToLower().Trim();
int headingLevel = 1;
while (originalHeading[headingLevel] == '=') {
headingLevel++;
}
Match match = null;
if (headingLevel == 2) {
foreach(KeyValuePair<Regex, string> invalidHeadingEntry in InvalidSecondLevelHeadings) {
if (MatchText(lowerTitle, invalidHeadingEntry.Key, ref match)) {
newTitle = invalidHeadingEntry.Value;
}
}
} else {
if (MatchText(lowerTitle, MidrangeHeadingRegex, ref match)) {
newTitle = "Mid-range";
} else if (MatchText(lowerTitle, ByOnHeadingRegex, ref match)) {
newTitle = Capitalize(match.Groups[1].Value) + " " + match.Groups[2].Value.ToLower();
}
}
if (!newTitle.Equals(originalTitle)) {
string headingBars = originalHeading.Substring(0, headingLevel);
articleText = articleText.Replace(originalHeading, headingBars + newTitle + headingBars);
summary = UpdateEditSummary(summary, "'" + originalTitle.Trim() + "' → '" + newTitle + "' per [[WV:AT]]");
}
}
return articleText;
}
// Remove any listing templates in which all fields are empty
private string RemoveEmptyListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
foreach(Match m in ListingTemplateNamesRegex.Matches(articleText)) {
string templateCall = m.Value;
bool listingIsEmpty = true;
foreach(KeyValuePair<string, string> templateParameter in Tools.GetTemplateParameterValues(templateCall)) {
if (templateParameter.Value != "") {
listingIsEmpty = false;
break;
}
}
if (listingIsEmpty) {
articleText = articleText.Replace(templateCall, "");
summary = UpdateEditSummary(summary, "empty listing(s) removed");
}
}
// strip off any list items that are now empty as a result
articleText = EmptyListItemRegex.Replace(articleText, "");
return articleText;
}
// remove obsolete headings if they have no content
private string RemoveEmptyObsoleteHeadings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
bool regionArticle = IsRegionArticle(articleText);
bool cityArticle = IsCityArticle(articleText) || IsDistrictArticle(articleText);
if (!regionArticle && !cityArticle) {
return articleText;
}
string emptyRegionHeadings = "";
string emptyCityHeadings = "";
foreach(KeyValuePair<string, string> levelTwoSectionData in SplitToSecondLevelSections(articleText)) {
string levelTwoSectionName = levelTwoSectionData.Key;
string levelTwoSectionText = levelTwoSectionData.Value;
if (regionArticle) {
Match m = InvalidEmptySecondLevelRegionHeading.Match(levelTwoSectionName);
if (m.Success && IsEmptySecondLevelSection(levelTwoSectionText)) {
articleText = articleText.Replace(levelTwoSectionText, "");
emptyRegionHeadings = AppendCSV(emptyRegionHeadings, "'" + levelTwoSectionName + "'");
continue;
}
}
if (cityArticle) {
Match m = InvalidEmptySecondLevelCityHeading.Match(levelTwoSectionName);
if (m.Success && IsEmptySecondLevelSection(levelTwoSectionText)) {
articleText = articleText.Replace(levelTwoSectionText, "");
emptyCityHeadings = AppendCSV(emptyCityHeadings, "'" + levelTwoSectionName + "'");
continue;
}
}
foreach(KeyValuePair<string, string> levelThreeSectionData in SplitToThirdLevelSections(levelTwoSectionText)) {
string levelThreeSectionName = levelThreeSectionData.Key.Trim();
string levelThreeSectionText = levelThreeSectionData.Value;
Match m = InvalidEmptyThirdLevelRegionHeading.Match(levelThreeSectionName);
if (m.Success && IsEmptyThirdLevelSection(levelThreeSectionText)) {
articleText = articleText.Replace(levelThreeSectionText, "");
emptyRegionHeadings = AppendCSV(emptyRegionHeadings, "'" + levelThreeSectionName + "'");
continue;
}
if (cityArticle) {
m = InvalidEmptyThirdLevelCityHeading.Match(levelThreeSectionName);
if (m.Success && IsEmptyThirdLevelSection(levelThreeSectionText)) {
articleText = articleText.Replace(levelThreeSectionText, "");
emptyCityHeadings = AppendCSV(emptyCityHeadings, "'" + levelThreeSectionName + "'");
continue;
}
}
}
}
if (!String.IsNullOrEmpty(emptyRegionHeadings)) {
summary = UpdateEditSummary(summary, "remove empty " + emptyRegionHeadings + " heading(s) per [[WV:Region article template]]");
}
if (!String.IsNullOrEmpty(emptyCityHeadings)) {
summary = UpdateEditSummary(summary, "remove empty " + emptyCityHeadings + " heading(s) per [[WV:Huge city article template]]");
}
return articleText;
}
// convert footnote links to frontlinks
private string FootnoteToFrontlink(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
string originalText = articleText;
articleText = FootnoteToFrontLinkRegex.Replace(articleText, "$1[$6 $2]$5");
if (!articleText.Equals(originalText)) {
summary = UpdateEditSummary(summary, "footnote → frontlink per [[WV:XL]]");
}
return articleText;
}
// convert external links to interwiki/internal links
private string ExternalToInternalLink(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
string originalText = articleText;
articleText = ExternalToInternalLinkWikipedia.Replace(articleText, "[[w:$1|$2]]");
articleText = ExternalToInternalLinkWikipediaNoText.Replace(articleText, "[[w:$1]]");
articleText = ExternalToInternalLinkWikivoyage.Replace(articleText, "[[$1|$2]]");
articleText = ExternalToInternalLinkWikivoyageNoText.Replace(articleText, "[[$1]]");
if (!articleText.Equals(originalText)) {
summary = UpdateEditSummary(summary, "external → internal link(s)");
}
return articleText;
}
private string FormatTimeValues(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
string originalText = articleText;
articleText = TimeValuesShouldUseColonAsSeperator.Replace(articleText, "$1:$2$3$4");
articleText = TimeValuesSuffixAM.Replace(articleText, "$1$2AM");
articleText = TimeValuesSuffixPM.Replace(articleText, "$1$2PM");
articleText = TimeValuesRoundToHour.Replace(articleText, "$1$2");
articleText = TimeValuesHourToNoon.Replace(articleText, "noon");
articleText = TimeValuesHourToMidnight.Replace(articleText, "midnight");
articleText = TimeValuesTrimWhitespace.Replace(articleText, "$1$6$7");
if (!articleText.Equals(originalText)) {
summary = UpdateEditSummary(summary, "update time(s) per [[WV:TDF]]");
}
return articleText;
}
private string ConvertTextToListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
if (!IsCityArticle(articleText) && !IsDistrictArticle(articleText) && !IsParkArticle(articleText)) {
// do not try to convert text to listings for non-city or park articles
return articleText;
}
string originalText = articleText;
// loop through all sections
int count = 0;
foreach(KeyValuePair<string, string> sectionData in SplitToSecondLevelSections(articleText)) {
string sectionName = sectionData.Key;
Match sectionNameMatch = TextToListingSectionNames.Match(sectionName);
if (!sectionNameMatch.Success) {
// only convert text that's in a section that supports non-generic listings
continue;
}
string listingType = GetValidListingTypeForSection(sectionName);
string sectionText = sectionData.Value;
// get all list items within the section
foreach(Match m in CandidateListingItemRegex.Matches(sectionText)) {
string listItemText = m.Groups[1].Value;
string templateCall = ConvertListingItemtoTemplatedListing(listItemText, listingType, ref summary);
if (templateCall != "") {
articleText = articleText.Replace(listItemText, templateCall);
count++;
}
}
}
if (count > 0) {
string plural = "";
if (count > 1) {
plural = "s";
}
summary = UpdateEditSummary(summary, "convert " + count + " plain text listing" + plural + " to [[WV:Listings|templated listing" + plural + "]]");
}
return articleText;
}
private string ConvertListingItemtoTemplatedListing(string listItemText, string listingType, ref string summary) {
string templateCall = "{{" + listingType + "}}";
if (!ProcessListingNameInListItem(ref listItemText, ref templateCall)) {
// if we don't have a listing name don't bother trying to convert anything else
return "";
}
ProcessListingPhoneInListItem(ref listItemText, ref templateCall);
ProcessListingEmailInListItem(ref listItemText, ref templateCall);
ProcessListingUrlInListItem(ref listItemText, ref templateCall);
ProcessListingAddressInListItem(ref listItemText, ref templateCall);
ProcessListingContentInListItem(ref listItemText, ref templateCall);
return templateCall;
}
private bool ProcessListingNameInListItem(ref string listItemText, ref string templateCall) {
Match m = ListingNameRegex.Match(listItemText);
if (!m.Success || m.Index != 0) {
// if there isn't a name at the beginning of the listing, don't convert
return false;
}
string name = m.Groups[2].Value.Trim();
templateCall = Tools.SetTemplateParameterValue(templateCall, "name", name);
// see if the name is front-linked
Match urlMatch = ExternalLinkWithTextRegex.Match(name);
if (urlMatch.Success && urlMatch.Index == 0) {
// split the name & url fields
string url = urlMatch.Groups[1].Value;
templateCall = Tools.SetTemplateParameterValue(templateCall, "url", url);
name = urlMatch.Groups[2].Value;
templateCall = Tools.SetTemplateParameterValue(templateCall, "name", name);
}
// remove name from list item text
listItemText = RemoveValueFromListItemText(listItemText, m.Value);
return true;
}
private void ProcessListingPhoneInListItem(ref string listItemText, ref string templateCall) {
Match phoneMatch = ListingPhoneNumberWithLabelRegex.Match(listItemText);
if (phoneMatch.Success) {
string phone = phoneMatch.Groups[4].Value;
templateCall = Tools.SetTemplateParameterValue(templateCall, "phone", phone);
listItemText = RemoveValueFromListItemText(listItemText, phoneMatch.Value);
}
Match faxMatch = ListingFaxNumberRegex.Match(listItemText);
if (faxMatch.Success) {
string fax = faxMatch.Groups[4].Value;
templateCall = Tools.SetTemplateParameterValue(templateCall, "fax", fax);
listItemText = RemoveValueFromListItemText(listItemText, faxMatch.Value);
}
Match tollfreeMatch = ListingTollfreeNumberRegex.Match(listItemText);
if (tollfreeMatch.Success) {
string tollfree = tollfreeMatch.Groups[5].Value;
templateCall = Tools.SetTemplateParameterValue(templateCall, "tollfree", tollfree);
listItemText = RemoveValueFromListItemText(listItemText, tollfreeMatch.Value);
}
if (!phoneMatch.Success) {
// try to find a phone number without a label
phoneMatch = ListingPhoneNumberRegex.Match(listItemText);
if (phoneMatch.Success) {
string phone = phoneMatch.Value;
if (DigitRegex.Matches(phone).Count > 6) {
// only consider a phone number valid if it contains more than six digits
templateCall = Tools.SetTemplateParameterValue(templateCall, "phone", phone);
listItemText = RemoveValueFromListItemText(listItemText, phoneMatch.Value);
}
}
}
}
private void ProcessListingEmailInListItem(ref string listItemText, ref string templateCall) {
Match emailMatch = ListingEmailRegex.Match(listItemText);
if (emailMatch.Success) {
string email = emailMatch.Groups[2].Value;
templateCall = Tools.SetTemplateParameterValue(templateCall, "email", email);
listItemText = RemoveValueFromListItemText(listItemText, emailMatch.Value);
}
}
private void ProcessListingUrlInListItem(ref string listItemText, ref string templateCall) {
if (Tools.GetTemplateParameterValue(templateCall, "url") != "") {
// url was already set when processing listing name
return;
}
Match urlMatch = FootnoteUrlRegex.Match(listItemText);
if (urlMatch.Success) {
string url = urlMatch.Groups[1].Value;
templateCall = Tools.SetTemplateParameterValue(templateCall, "url", url);
listItemText = RemoveValueFromListItemText(listItemText, urlMatch.Value);
}
}
private void ProcessListingAddressInListItem(ref string listItemText, ref string templateCall) {
Match addressMatch = ListingAddressRegex.Match(listItemText);
if (addressMatch.Success) {
string address = addressMatch.Groups[1].Value;
Match falsePositiveMatch = ListingAddressFalsePositivesRegex.Match(address);
if (!falsePositiveMatch.Success) {
templateCall = Tools.SetTemplateParameterValue(templateCall, "address", address);
listItemText = RemoveValueFromListItemText(listItemText, addressMatch.Value);
}
} else {
addressMatch = ListingAddressInternationalRegex.Match(listItemText);
if (addressMatch.Success) {
string address = addressMatch.Groups[1].Value;
templateCall = Tools.SetTemplateParameterValue(templateCall, "address", address);
listItemText = RemoveValueFromListItemText(listItemText, addressMatch.Value);
}
}
}
private void ProcessListingContentInListItem(ref string listItemText, ref string templateCall) {
if (String.IsNullOrEmpty(listItemText)) {
return;
}
listItemText = SanitizeListingContent(listItemText);
templateCall = Tools.SetTemplateParameterValue(templateCall, "content", listItemText);
}
private string RemoveValueFromListItemText(string listItemText, string value) {
listItemText = listItemText.Replace(value, "").Trim();
listItemText = InvalidDuplicatePunctuationRegex.Replace(listItemText, "$1");
listItemText = StripLeadingPunctuation(listItemText).Trim();
return listItemText;
}
// Perform various tasks on listings to ensure params and other values
// are formatted correctly.
private string FormatListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {
string originalText = articleText;
// loop through all sections
foreach(KeyValuePair<string, string> sectionData in SplitToSecondLevelSections(articleText)) {
string sectionName = sectionData.Key;
string sectionText = sectionData.Value;
// get all listing tags within the section
foreach(Match m in ListingTemplateNamesRegex.Matches(sectionText)) {
string templateCall = m.Value;
string originalTemplateCall = m.Value;
templateCall = ConvertGenericListingToSpecificType(templateCall, ref summary);
templateCall = MatchListingTypeToSection(templateCall, sectionName, ref summary);
templateCall = FormatListingParams(templateCall, ref summary);
templateCall = SanitizeListingPhoneNumbers(templateCall, ref summary);
templateCall = SanitizeListingEmail(templateCall, ref summary);
templateCall = SanitizeListingLatLong(templateCall, ref summary);
templateCall = SanitizeListingUrl(templateCall, ref summary);
templateCall = SanitizeListingContent(templateCall, ref summary);
if (AGGRESSIVE) {
templateCall = SanitizeListingAddress(templateCall, articleTitle, ref summary);
}
if (!templateCall.Equals(originalTemplateCall)) {
articleText = articleText.Replace(originalTemplateCall, templateCall);
}
}
}
return articleText;
}
// change "{{listing|type=xyz|...}}" to "{{xyz|...}}"
private string ConvertGenericListingToSpecificType(string templateCall, ref string summary) {
string originalTemplateCall = templateCall;
string listingType = Tools.GetTemplateName(templateCall);
if (listingType.Equals("listing")) {
string templateType = Tools.GetTemplateParameterValue(templateCall, "type").ToLower();
if (templateType.Equals("see") || templateType.Equals("do") || templateType.Equals("buy") || templateType.Equals("eat") || templateType.Equals("drink") || templateType.Equals("sleep")) {
templateCall = Tools.RenameTemplate(templateCall, templateType);
templateCall = Tools.RemoveTemplateParameter(templateCall, "type");
}
}
if (!templateCall.Equals(originalTemplateCall)) {
summary = UpdateEditSummary(summary, "update listing type to match expected section type");
}
return templateCall;
}
// make sure listings are formatted according to the style guidelines in
// Wikivoyage:Listings
private string FormatListingParams(string templateCall, ref string summary) {
string originalTemplateCall = templateCall;
string listingType = Tools.GetTemplateName(templateCall);
string formattedValue = "{{" + listingType + "\n";
// loop through expected template arguments and format appropriately
foreach(string param in ListingTemplateParamNames) {
if (param.Equals("content")) {
continue;
}
string paramValue = Tools.GetTemplateParameterValue(templateCall, param);
if (param.Equals("type") && (paramValue == "" || !listingType.Equals("listing"))) {
// only listing uses the "type" attribute
if (paramValue == "") {
templateCall = Tools.RemoveTemplateParameter(templateCall, param);
}
continue;
}
if ((param.Equals("image") || param.Equals("lastedit")) && paramValue == "") {
// empty image & lastedit attributes are unnecessary
continue;
}
if (listingType.Equals("sleep") && param.Equals("hours") && paramValue == "") {
// sleep listings don't use the "hours" attribute
if (paramValue == "") {
templateCall = Tools.RemoveTemplateParameter(templateCall, param);
}
continue;
}
if (!listingType.Equals("sleep") && (param.Equals("checkin") || param.Equals("checkout"))) {
// only sleep listings use the "checkin" and "checkout" attributes
if (paramValue == "") {
templateCall = Tools.RemoveTemplateParameter(templateCall, param);
}
continue;
}
if (param.Equals("alt") && paramValue == "" && !ListingAltParamRegex.IsMatch(templateCall)) {
// do not add an alt tag if it isn't already present
continue;
}
// replace excess whitespace with single spaces
paramValue = DoubleSpaceRegex.Replace(paramValue, " ");
formattedValue += "| " + param + "=" + paramValue;
// add either a newline or a space after the param value, depending on param and template type
if (param.Equals("email") || param.Equals("directions") || param.Equals("fax") || param.Equals("price") || param.Equals("image") || param.Equals("lastedit")) {
formattedValue += "\n";
} else {
formattedValue += " ";
}
templateCall = Tools.RemoveTemplateParameter(templateCall, param);
}
// loop through any unexpected template args and format on their own lines
int remainingTemplateArgs = Tools.GetTemplateArgumentCount(templateCall);
foreach(KeyValuePair<string, string> templateParameter in Tools.GetTemplateParameterValues(templateCall)) {
string param = templateParameter.Key;
if (param.Equals("content")) {
continue;
}
string paramValue = templateParameter.Value;
if (paramValue == "") {
// any empty, unrecognized listing params can be removed
continue;
}
formattedValue += "| " + param + "=" + paramValue + "\n";
}
// now add template content on its own line
formattedValue += "| content=" + Tools.GetTemplateParameterValue(templateCall, "content") + "\n";
formattedValue += "}}";
if (!originalTemplateCall.Equals(formattedValue)) {
// suppress edit summary for now - it is being applied to any article with listings
// whether they are updated or not
// summary = UpdateEditSummary(summary, "format listing per [[WV:Listings]]");
}
return formattedValue;
}
// update the listing phone number fields as much as possible to match
// Wikivoyage:Phone numbers
private string SanitizeListingPhoneNumbers(string templateCall, ref string summary) {
string originalTemplateCall = templateCall;
// loop through expected template arguments and format appropriately
foreach(string param in ListingPhoneParamNames) {
string paramValue = Tools.GetTemplateParameterValue(templateCall, param);
paramValue = SanitizePhoneNumber(paramValue);
templateCall = Tools.UpdateTemplateParameterValue(templateCall, param, paramValue);
}
if (!templateCall.Equals(originalTemplateCall)) {
summary = UpdateEditSummary(summary, "format listing phone # per [[WV:Phone numbers]]");
}
return templateCall;
}
// update the listing email field to ensure it is valid
private string SanitizeListingEmail(string templateCall, ref string summary) {
string originalTemplateCall = templateCall;
string email = Tools.GetTemplateParameterValue(templateCall, "email");
email = MailtoRegex.Replace(email, "").Trim();
templateCall = Tools.UpdateTemplateParameterValue(templateCall, "email", email);
if (!templateCall.Equals(originalTemplateCall)) {
summary = UpdateEditSummary(summary, "fix invalid listing email");
}
return templateCall;
}
// trim lat/long precision to no more than eight digits
private string SanitizeListingLatLong(string templateCall, ref string summary) {
string originalTemplateCall = templateCall;
string latitude = Tools.GetTemplateParameterValue(templateCall, "lat");
latitude = LatLongTrimRegex.Replace(latitude, "$1");
templateCall = Tools.UpdateTemplateParameterValue(templateCall, "lat", latitude);
string longitude = Tools.GetTemplateParameterValue(templateCall, "long");
longitude = LatLongTrimRegex.Replace(longitude, "$1");
templateCall = Tools.UpdateTemplateParameterValue(templateCall, "long", longitude);
if (!templateCall.Equals(originalTemplateCall)) {
summary = UpdateEditSummary(summary, "trim lat/long precision");
}
return templateCall;
}
// update the listing URL field to ensure it is valid
private string SanitizeListingUrl(string templateCall, ref string summary) {
string originalTemplateCall = templateCall;
string url = Tools.GetTemplateParameterValue(templateCall, "url");
url = FootnoteUrlRegex.Replace(url, "$1");
Match match = ValidUrlRegex.Match(url);
if (match.Success && !url.ToLower().StartsWith("http://") && !url.ToLower().StartsWith("https://") && !url.ToLower().StartsWith("//")) {
url = "http://" + url;
}
templateCall = Tools.UpdateTemplateParameterValue(templateCall, "url", url);
if (!templateCall.Equals(originalTemplateCall)) {
summary = UpdateEditSummary(summary, "fix invalid listing URL");
}
return templateCall;
}
// make sure the listing content is capitalized, isn't just punctuation, etc.
private string SanitizeListingContent(string templateCall, ref string summary) {
string content = Tools.GetTemplateParameterValue(templateCall, "content");
content = SanitizeListingContent(content);
return Tools.UpdateTemplateParameterValue(templateCall, "content", content);
}
private string SanitizeListingContent(string content) {
if (String.IsNullOrEmpty(content)) {
return content;
}
// strip empty punctuation
content = EmptyPunctuationRegex.Replace(content, "");
// strip sentence fragments left over from text-to-listing conversions
if (content.ToLower().StartsWith("is ")) {
content = content.Substring("is ".Length);
}
// make sure first character is capitalized
content = Capitalize(content);
// if only punctuation is left, remove everything
if (OnlyPunctuationRegex.IsMatch(content)) {
content = "";
}
return content;
}
// update the listing address field to ensure it is valid
private string SanitizeListingAddress(string templateCall, string articleName, ref string summary) {
string originalTemplateCall = templateCall;
string address = Tools.GetTemplateParameterValue(templateCall, "address");
if (String.IsNullOrEmpty(address)) {
return templateCall;
}
Match m = ListingAddressIsDirectionsRegex.Match(address);
if (m.Success) {
// the address field belongs in the directions field
string directions = Tools.GetTemplateParameterValue(templateCall, "directions");
if (String.IsNullOrEmpty(directions)) {
templateCall = Tools.UpdateTemplateParameterValue(templateCall, "directions", address);
templateCall = Tools.UpdateTemplateParameterValue(templateCall, "address", "");
summary = UpdateEditSummary(summary, "listing address moved to directions");
return templateCall;
}
}
// strip out state and zip code if present
address = AddressWithStateOrZipRegex.Replace(address, "$1");
address = StripStrayPunctuation(address);
// strip out the city if it is present
string city = GetCityFromArticleName(articleName);
if (address.ToLower().EndsWith(city.ToLower())) {
int pos = address.ToLower().LastIndexOf(city.ToLower());
address = address.Substring(0, pos).Trim();
}
address = StripStrayPunctuation(address);
// properly abbreviate street name
address = AbbreviateStreeType(address, "Avenue", "Ave");
address = AbbreviateStreeType(address, "Boulevard", "Blvd");
address = AbbreviateStreeType(address, "Court", "Ct");
address = AbbreviateStreeType(address, "Drive", "Dr");
address = AbbreviateStreeType(address, "Lane", "Ln");
address = AbbreviateStreeType(address, "Place", "Pl");
address = AbbreviateStreeType(address, "Road", "Rd");
address = AbbreviateStreeType(address, "Street", "St");
templateCall = Tools.UpdateTemplateParameterValue(templateCall, "address", address);
if (!templateCall.Equals(originalTemplateCall)) {
summary = UpdateEditSummary(summary, "format listing address per [[WV:Listings]]");
}
return templateCall;
}
// make sure that the listing type matches the section in which the listing
// is found (example: "see" listings in the "See" section)
private string MatchListingTypeToSection(string templateCall, string sectionName, ref string summary) {
string originalTemplateCall = templateCall;
if (sectionName.ToLower() == "eat and drink" || sectionName.ToLower() == "see and do") {
// skip these "combined" sections
return templateCall;
}
string expectedListingType = GetValidListingTypeForSection(sectionName);
string listingType = Tools.GetTemplateName(templateCall);
if (!listingType.Equals(expectedListingType)) {
templateCall = Tools.RenameTemplate(templateCall, expectedListingType, false);
}
if (!templateCall.Equals(originalTemplateCall)) {
summary = UpdateEditSummary(summary, "update listing type to match expected section type");
}
return templateCall;
}
// return a map of section name-section content for all second level headings.
// the opening text of the article is returned without a section name
private static List<KeyValuePair<string, string>> SplitToSecondLevelSections(string articleContent) {
return SplitToHeadingSections(articleContent, WikiRegexes.HeadingLevelTwo);
}
private static List<KeyValuePair<string, string>> SplitToThirdLevelSections(string articleContent) {
return SplitToHeadingSections(articleContent, WikiRegexes.HeadingLevelThree);
}
private static List<KeyValuePair<string, string>> SplitToHeadingSections(string articleContent, Regex headingRegex) {
List<KeyValuePair<string, string>> sections = new List<KeyValuePair<string, string>>();
int lastmatchpos = 0;
Match lastMatch = null;
foreach(Match m in headingRegex.Matches(articleContent)) {
if (m.Index > 0) {
// Don't add empty first section if page starts with heading
string sectionContent = articleContent.Substring(lastmatchpos, m.Index-lastmatchpos);
string sectionName = (lastMatch != null) ? lastMatch.Groups[1].Value.Trim() : "";
sections.Add(new KeyValuePair<string, string>(sectionName, sectionContent));
}
lastmatchpos = m.Index;
lastMatch = m;
}
// Add text of final section
string sectionContentLast = articleContent.Substring(lastmatchpos);
string sectionNameLast = (lastMatch != null) ? lastMatch.Groups[1].Value : "";
sections.Add(new KeyValuePair<string, string>(sectionNameLast, sectionContentLast));
return sections;
}
private static bool IsEmptySecondLevelSection(string sectionContent) {
return IsEmptySection(sectionContent, WikiRegexes.HeadingLevelTwo);
}
private static bool IsEmptyThirdLevelSection(string sectionContent) {
return IsEmptySection(sectionContent, WikiRegexes.HeadingLevelThree);
}
private static bool IsEmptySection(string sectionContent, Regex headingRegex) {
Match m = headingRegex.Match(sectionContent);
if (!m.Success || m.Index != 0) {
// this shouldn't happen, but just in case
return false;
}
return (m.Value.Trim().Length == sectionContent.Trim().Length);
}
// return the expected listing type for the given section name ("See" returns "see").
private static string GetValidListingTypeForSection(string sectionName) {
if (sectionName == "See") {
return "see";
} if (sectionName == "Do") {
return "do";
} if (sectionName == "Buy") {
return "buy";
} if (sectionName == "Eat" || sectionName.ToLower() == "eat and drink") {
return "eat";
} if (sectionName == "Drink") {
return "drink";
} if (sectionName == "Sleep") {
return "sleep";
} else {
return "listing";
}
}
private static string SanitizePhoneNumber(string phoneNumberText) {
// strip out the phone number, in case it is contained within other text
// such as "888-888-8888 (front office)"
Match match = PhoneNumberRegex.Match(phoneNumberText);
if (match.Success) {
string phoneNumber = match.Value;
string originalPhoneNumber = match.Value;
// remove invalid characters
phoneNumber = phoneNumber.Replace("'", "");
phoneNumber = phoneNumber.Replace("(", " ").Trim();
phoneNumber = phoneNumber.Replace(")", " ").Trim();
// convert periods to dashes
phoneNumber = phoneNumber.Replace(".", "-");
phoneNumber = ExcessWhitespaceRegex.Replace(phoneNumber, " ");
// if there is a pattern like " -", "- ", "+ " left, replace the space
phoneNumber = phoneNumber.Replace(" -", "-");
phoneNumber = phoneNumber.Replace("- ", "-");
phoneNumber = phoneNumber.Replace("+ ", "+");
// if the phone number starts with a 1, change it to +1
if (phoneNumber.StartsWith("1 ") || phoneNumber.StartsWith("1-")) {
phoneNumber = "+" + phoneNumber;
}
phoneNumberText = phoneNumberText.Replace(originalPhoneNumber, phoneNumber);
}
return phoneNumberText;
}
// return the city from the article name. if the article name is "Foo (Disambiguation)"
// then this method returns "Foo".
private static string GetCityFromArticleName(string articleName) {
string basePageName = Tools.BasePageName(articleName);
Match match = CityNameWithDisambiguationRegex.Match(basePageName);
return (match.Success) ? match.Groups[1].Value : basePageName;
}
// if the address ends in a full street type value, convert to the abbreviated value
private static string AbbreviateStreeType(string address, string invalidStreetType, string validStreetType) {
if (address.ToLower().EndsWith(" " + invalidStreetType.ToLower())) {
int pos = address.ToLower().LastIndexOf(invalidStreetType.ToLower());
if (pos > 0) {
address = address.Substring(0, pos) + validStreetType;
}
}
return address;
}
// remove any leading or trailing punctuation
private static string StripStrayPunctuation(string text) {
return StripPunctuation(text, true, true);
}
// remove any leading punctuation
private static string StripLeadingPunctuation(string text) {
return StripPunctuation(text, true, false);
}
// remove any trailing punctuation
private static string StripTrailingPunctuation(string text) {
return StripPunctuation(text, false, true);
}
// remove any leading punctuation
private static string StripPunctuation(string text, bool stripLeading, bool stripTrailing) {
if (stripTrailing) {
text = InvalidTrailingPunctuationRegex.Replace(text, "");
}
if (stripLeading) {
text = InvalidLeadingPunctuationRegex.Replace(text, "");
}
return text;
}
// return true if the text matches the pattern, otherwise return null, the "match"
// param will be populated with the match object
private static bool MatchText(string matchText, Regex regex, ref Match match) {
match = regex.Match(matchText);
return (match.Success);
}
private static string Capitalize(string text) {
if (String.IsNullOrEmpty(text)) {
return text;
}
// make sure first character is capitalized
return (text.Length == 1) ? Char.ToUpper(text[0]) + "" : Char.ToUpper(text[0]) + text.Substring(1);
}
// add the value to the edit summary if it is not already present
private static string UpdateEditSummary(string summary, string textToAdd) {
if (summary == "") {
return textToAdd;
}
foreach(string summaryField in summary.Split(',')) {
if (summaryField.Trim().Equals(textToAdd)) {
// text already present in edit summary
return summary;
}
}
return summary += ", " + textToAdd;
}
// append the value to the existing list as a CSV
private static string AppendCSV(string currentList, string valueToAdd) {
if (String.IsNullOrEmpty(currentList)) {
return valueToAdd;
}
return currentList += ", " + valueToAdd;
}
// return true if the article contains a city status template
private static bool IsCityArticle(string articleText) {
return CityStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a country status template
private static bool IsCountryArticle(string articleText) {
return CountryStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a dive guide status template
private static bool IsDiveguideArticle(string articleText) {
return DiveguideStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a district status template
private static bool IsDistrictArticle(string articleText) {
return DistrictStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains an itinerary status template
private static bool IsItineraryArticle(string articleText) {
return ItineraryStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a park status template
private static bool IsParkArticle(string articleText) {
return ParkStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a phrasebook status template
private static bool IsPhrasebookArticle(string articleText) {
return PhrasebookStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a region status template
private static bool IsRegionArticle(string articleText) {
return RegionStatusTemplateNamesRegex.IsMatch(articleText);
}
// return true if the article contains a topic status template
private static bool IsTopicArticle(string articleText) {
return TopicStatusTemplateNamesRegex.IsMatch(articleText);
}
// TODO:
// - move tollfree numbers to tollfree in listings
// - don't allow "otheruses" to be moved above the page banner