Bikarhêner:Balyozxane/updateW.py
Xuyakirin
#<nowiki>
import pywikibot
import re
from pywikibot import pagegenerators
from pywikibot.bot import AutomaticTWSummaryBot, ConfigParserBot, SingleSiteBot
from pywikibot.exceptions import NoPageError
from pywikibot.data import api
import requests
import re
def escapeRegExp(text):
return re.sub(r'[.*+?^${}()|[\]\\]', r'\\&', text)
def sortAlphabetically(content):
wergerSections = extractWergerSections(content)
if wergerSections:
fetchPromises = []
for wergerSection in wergerSections:
lines = wergerSection.strip().split("\n")
langSet = []
currentMainLang = None
for line in lines:
langCodeMatches = re.match(r'\* \{\{Z\|([a-zA-Z-]+)\}\}:', line)
if langCodeMatches:
langCode = langCodeMatches.group(1).lower() or ""
if not line.startswith("*:"): # Lines that don't start with "*:" are normal lines
if currentMainLang:
langSet.append(currentMainLang)
currentMainLang = {
'type': 'mainLang',
'line': line,
'langCode': langCode,
'subsets': [],
}
elif currentMainLang:
currentMainLang['subsets'].append(line)
else:
if currentMainLang:
currentMainLang['subsets'].append(line)
else:
langSet.append({
'type': 'unknown', # Mark unknown lines to distinguish from subsets
'line': line,
})
if currentMainLang:
langSet.append(currentMainLang)
try:
# Fetch language names and perform sorting here
langSet = sort_by_kurdish_alphabet(langSet)
pywikibot.output(f"langSet hat rêzkirin")
sortedLines = []
for item in langSet:
if item['type'] == 'mainLang':
sortedLines.append(item['line'])
sortedLines.extend(item['subsets'])
else:
sortedLines.append(item['line'])
sortedContent = "\n".join(sortedLines)
try:
content = content.replace(wergerSection, sortedContent)
except Exception as e:
print(f"An error occurred: {str(e)}")
except Exception as e:
print(f"An error occurred: {str(e)}")
return content
def extractWergerSections(content):
werger_sections = []
werger_ser_regex = r'\{\{werger-ser(?:\|[^\}]+)?}}' # Regular expression to match {{werger-ser}} with optional arguments
matches = re.finditer(werger_ser_regex, content, re.IGNORECASE)
for match in matches:
start_index = match.start()
end_index = content.find("{{werger-bin}}", start_index)
if end_index != -1:
section_content = content[start_index + len(match.group(0)):end_index].strip()
werger_sections.append(section_content)
return werger_sections
def sort_by_kurdish_alphabet(langSet):
kurdish_alphabet = "ABCCÇDEÊFGHIÎJKLÎMNOPQRSŞTUÛVWXYZabccçdeêfghiîjklîmnopqrsştuûvwxyzǃǀǁǂ"
pywikibot.output(f"langSet tê rêzkirin")
lang_codes = [item['langCode'] for item in langSet]
lang_names = fetch_language_names(lang_codes)
def kurdish_key(lang_item):
lang_code = lang_item['langCode']
lang_name = lang_names.get(lang_code, lang_code)
lang_name = lang_name.lower()
return [kurdish_alphabet.find(char) for char in lang_name]
langSet.sort(key=kurdish_key)
return langSet
def fetch_language_names(lang_codes):
pywikibot.output(f"lang_codes: {lang_codes}")
language_names = {}
language_data_url = "https://ku.wiktionary.org/w/index.php?title=MediaWiki:Gadget-translation editor.js/ziman.json&action=raw"
try:
response = requests.get(language_data_url)
data = response.json()
for lang_code in lang_codes:
if lang_code in data:
language_names[lang_code] = data[lang_code]
pywikibot.output(f"fetched language_names")
return language_names
except Exception as e:
print(f"Error fetching language names: {e}")
return {}
def sanitize_page_title(page_title):
# Define a regular expression pattern to match illegal characters
illegal_chars_pattern = r'[#<>[\]|{}]'
# Remove any illegal characters from the page title
sanitized_title = re.sub(illegal_chars_pattern, '', page_title)
return sanitized_title
@staticmethod
def page_exists(lang_code, page_title):
# Define domain mappings
wm_liens = {
'cmn': 'zh',
'fra-nor': 'nrm',
'ko-Hani': 'ko',
'lzh': 'zh-classical',
'nan': 'zh-min-nan',
'nb': 'no',
'rup': 'roa-rup',
'yue': 'zh-yue',
'zza': 'diq',
}
wiktios = [
'af', 'am', 'an', 'ang', 'ar', 'ast', 'ay', 'az', 'be', 'bg',
'bn', 'br', 'bs', 'ca', 'chr', 'co', 'cs', 'csb', 'cy', 'da',
'de', 'dv', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi',
'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv',
'ha', 'he', 'hi', 'hr', 'hsb', 'hu', 'hy', 'ia', 'id', 'ie',
'io', 'is', 'it', 'iu', 'ja', 'jbo', 'jv', 'ka', 'kk', 'kl',
'km', 'kn', 'ko', 'ks', 'ku', 'kw', 'ky', 'la', 'lb', 'li',
'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr',
'ms', 'mt', 'my', 'na', 'nah', 'nds', 'ne', 'nl', 'nn', 'no',
'oc', 'om', 'or', 'pa', 'pl', 'pnb', 'ps', 'pt', 'qu', 'ro',
'roa-rup', 'ru', 'rw', 'sa', 'scn', 'sd', 'sg', 'sh', 'si',
'simple', 'sk', 'sl', 'sm', 'so', 'sq', 'sr', 'ss', 'st', 'su',
'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'tpi',
'tr', 'ts', 'tt', 'ug', 'uk', 'ur', 'uz', 'vec', 'vi', 'vo',
'wa', 'wo', 'yi', 'za', 'zh', 'zh-min-nan', 'zu'
]
keepApos = ['fr', 'de']
# Check if the language code is valid
if not lang_code:
return False
# Check if page_title starts with "^" and remove it
if page_title.startswith("^"):
page_title = page_title.replace("^", '', 1)
page_title = sanitize_page_title(page_title)
# Call remove_diacritics to process the page title
processed_title = remove_diacritics(lang_code, page_title)
try:
# Determine the domain based on lang_code
if lang_code in wm_liens:
domain = wm_liens[lang_code]
elif lang_code in wiktios:
domain = lang_code
else:
return False
site = pywikibot.Site(code=domain, fam="wiktionary")
# Check if the site is valid before loading the page
if site.sitename() != 'RemovedSite':
page = pywikibot.Page(site, processed_title)
# Debug output
print(f"Checking page existence: {site}:{page}")
# Check if the page exists by loading its content
try:
page.get()
return True
except pywikibot.exceptions.IsRedirectPageError:
# Redirect pages are considered valid
return True
except pywikibot.exceptions.NoPageError:
# Non-existing pages are explicitly checked here
return False
except pywikibot.exceptions.UnknownSiteError:
return False
@staticmethod
def remove_diacritics(lang_code, text):
try:
# Initialize a MediaWiki API session
site = pywikibot.Site()
# Define the parameters for the API request
params = {
'action': 'expandtemplates',
'format': 'json',
'text': f'{{{{#invoke:ziman/şablon|makeEntryName|{lang_code}|{text}}}}}',
'prop': 'wikitext'
}
# Make the API request using the Request class
request = api.Request(site=site, parameters=params)
response = request.submit()
data = response['expandtemplates']
# Extract and return the expanded wikitext
expanded_text = data['wikitext']
return expanded_text
except Exception as e:
print(f"Error expanding template: {e}")
return text
class AppendTextBot(
SingleSiteBot,
ConfigParserBot,
AutomaticTWSummaryBot,
):
summary_key = 'basic-changing'
use_redirects = False
update_options = {
'summary': None,
'text': '',
'top': False,
}
def treat_page(self) -> None:
page = self.current_page
pywikibot.output(f"Processing page: {page.title()}")
# Get the page content
page_text = page.text
# Sort alphabetically
page_text = sortAlphabetically(page_text)
pywikibot.output(f"page_text hat rêzkirin")
# Define a regex pattern to match W templates
w_template_pattern = r"\{\{W(\+|-{1,2})?\|([^\|]+)\|([^\|\}]+)"
matches = re.findall(w_template_pattern, page_text)
pywikibot.output(f"+/- dest pê kir")
total_matches = len(matches)
current_match = 0
for sign, lang_code, page_title in matches:
current_match += 1
pywikibot.output(f"Processing match {current_match}/{total_matches} of {page.title()} +{lang_code}:{page_title}")
try:
exists = page_exists(lang_code, page_title)
# Determine the new outcome based on page existence
new_outcome = '+' if exists else '-'
# Construct the new template with the updated outcome
new_template = f"{{{{W{new_outcome}|{lang_code}|{page_title}"
# Replace the old template with the new one in the page content
page_text = page_text.replace(f"{{{{W{sign}|{lang_code}|{page_title}", new_template)
except Exception as e:
# Handle the exception (print error message, log, etc.)
print(f"Error processing match {current_match}: {e}")
# Save the updated page content
self.put_current(page_text, summary="Sererastkirin/rêzkirina şablonên {{[[Şablon:W-|W-]]}}, {{[[Şablon:W+|W+]]}} ([[User:Balyozxane/updateW.py|updateW]])")
def main(*args: str) -> None:
local_args = pywikibot.handle_args(args)
gen_factory = pagegenerators.GeneratorFactory()
local_args = gen_factory.handle_args(local_args)
options = {'text': ''}
for arg in local_args:
option, _, value = arg.partition(':')
if option in ('summary', 'text'):
if not value:
pywikibot.input(f'Please enter a value for {option}')
options[option] = value
else:
options[option] = True
gen = gen_factory.getCombinedGenerator(preload=True)
if not pywikibot.bot.suggest_help(missing_generator=not gen):
bot = AppendTextBot(generator=gen, **options)
bot.run()
if __name__ == '__main__':
main()
#</nowiki>