ویکی‌پدیا:درخواست‌های ربات/ربات مترجم بر اساس میان‌ویکی/ویرایش 0

ربات برای ترجمه مقاله به کمک اینترویکی

برای شروع کدهای زیر را در یک فایل متنی با پسوند py. در کنار (در همان فولدر) دیگر رباتهای استاندارد ویکی‌پدیا (pywikipedia) ذخیره کنید و سپس به عنوان تست فایل را اجرا نمایید.

موارد مورد نیاز[ویرایش]

برای اجرای این ربات موارد زیر مورد نیاز هست

پایتون ۲٫۶ یا ۲٫۷ این کد بر روی پایتون خانواده ۳ کار نمی‌کند
مجموعه رباتهای استاندارد ویکی‌پدیا (pywikipedia)
برنامه خواندن متن مانند notepad (که معمولا در همه سیستم عامل‌ها به صورت پیش فرض موجود هست)

#!/usr/bin/python
# -*- coding: utf-8  -*-
#
# Reza(User:reza1615), 2011
#
# Distributed under the terms of the CC-BY-SA 3.0 .
import catlib ,pprint ,pagegenerators
import wikipedia,add_text
import codecs,time,string,re
 
site = wikipedia.getSite( 'en' )
cat = catlib.Category( site,u'Category:Roads in Iran' )#---category
gen = pagegenerators.CategorizedPageGenerator( cat )
 
 
def englishdictionry( enlink ):
    site = wikipedia.getSite( 'en' )
    apage = wikipedia.Page( site,enlink )
    enlinkfinall = '[[' + enlink + '$]]'
    NOPAGE = False
    try:
        atext = apage.get()
    except wikipedia.IsRedirectPage:
        apage = apage.getRedirectTarget()
        try:
                atext = apage.get()
        except:
                return enlinkfinall
    except:
        return enlinkfinall
    if not NOPAGE == True:
       if atext.find( '[[fa:' ) != -1:
            falink = atext.split( '[[fa:' )[1]
            falink = falink.split( ']]' )[0]
            falink = '[[' + falink + '$]]'
            return falink
       else:
            return enlinkfinall
    else:
        return enlinkfinall
 
 
 
def farsiwriter( text ):
    with codecs.open( 'resultr.txt',mode = 'a',encoding = 'utf8' ) as f:
                f.write( text )
    f.close()
#    pagename = address
#    wikipedia.output( u"Loading %s..." % pagename )
#    sitefa = wikipedia.getSite( 'fa' )
#    pageinput = wikipedia.Page( sitefa,pagename )
#    pageinput.put( text,comment = 'ربات مترجم',watchArticle = None,minorEdit = True )
 
 
for pagework in gen:
#---------------------------------------------ع¯ط±ظپطھظ† ظ…طھظ† ط§ط² طµظپطظ‡ ظˆغŒع©غŒ
        try:
            text = pagework.get( force = False,get_redirect = False,throttle = True,sysop = False,change_edit_time = True ) # text = page.get() <-- is the same
        except wikipedia.NoPage: # First except, prevent empty pages
            text = ''
        except wikipedia.IsRedirectPage: # second except, prevent redirect
            wikipedia.output( u'%s is a redirect!' % pagework )
            exit()# wikipedia.stopme() is in the finally, we don't need to use it twice, exit() will only close the script
        except wikipedia.Error: # third exception, take the problem and print
            wikipedia.output( u"Some Error, skipping.." )
            exit()
        
            
        newtext = ' '
        count = 0
        pagework = str( pagework )
        if pagework == '[[en:AH1]]' or pagework == '[[en:AH2]]':
            continue
        pprint.pprint( pagework )
        for link in text.split( '[[' ):
            count = count + 1
            link = link.split( ']]' )[0]
            linkeng = '[[' + link + ']]'
            link = link.split( '|' )[0].strip()
            if link == '':
                continue
            if link.find( '{' ) != -1:
                continue
            if link.find( 'file:' ) != -1:
                continue
            if link.find( 'File:' ) != -1:
                continue
            if link.find( 'Image:' ) != -1:
                continue
            if link.find( 'image:' ) != -1:
                continue
            if link.find( '$' ) != -1:
                continue
            falink = englishdictionry( link )
            first = text.split( '[[' )[0:count - 1]
            newtext = newtext + falink
            text = text.replace( linkeng,falink )
            pprint.pprint( linkeng )
            pprint.pprint( falink + '====' )
        text = text.replace( '$','' )
        pprint.pprint( text )
        pagework = str( pagework )
        pagework = pagework.replace( '[[','' )
        pagework = pagework.replace( ']]','' ).strip()
        pagework = englishdictionry( pagework )
        pagework = pagework.replace( '[[','' )
        pagework = pagework.replace( '$','' )
        pagework = pagework.replace( ']]','' ).strip()
        textresult = u'xxx\nXXX%sXXX\n%s\nxxx\n' % ( pagework,text )
        farsiwriter( textresult )