zoukankan      html  css  js  c++  java
  • Python 批量翻译 使用有道api;

    妹子是做翻译相关的,遇到个问题,要求得到句子中的所有单词的 音标;  

    有道翻译只能对单个单词翻译音标,不能对多个单词或者句子段落翻译音标;

     手工一个一个翻的话那就要累死人了.....于是就让我写个翻译音标工具

    一开始没想到该怎么搞,,之后突然想到了利用有道api网页翻译来做每个单词的音标翻译; 

    选择了python语言来写;也想过用C#或者c++来做,但是要用到curl库,解析json代码也麻烦;就直接用python写了;

    有道翻译api网站:  需要申请key,直接替换  self.key = 'xxxx' self.keyfrom = 'xxxx'  就可以了

    http://fanyi.youdao.com/openapi?path=data-mode

     后来妹子说,他们有时候需要处理 字幕srt 文件的音标翻译,一句一句太慢了,
    想直接读取srt,输出txt的工具;

    下面上代码: 支持单行输入及输出:

    # -*- coding: utf-8 -*-
    import sys
    import urllib2
    import re
    import json
    import string
    class Youdao:
    	def __init__(self):
    		self.url = 'http://fanyi.youdao.com/openapi.do'
    		self.key = '1106591478'
    		self.keyfrom = 'left69'
    
    	def get_translation(self,words):
    		url = self.url + '?keyfrom=' + self.keyfrom + '&key='+self.key + '&type=data&doctype=json&version=1.1&q=' + words
    		result = urllib2.urlopen(url).read()
    		json_result = json.loads(result)
    		json_result = json_result["translation"]
    		for i in json_result:
    		    print i    
    			
    youdao = Youdao()
    def get_yinbiao(words):
    		splitStr = words
    		for c in string.punctuation:
    			if c != "'":
    				splitStr = splitStr.replace(c, ' ')
    		print " "+splitStr
    		listu = splitStr.split(' ')
    		output = ""
    		for j in listu:
    			output = output + ' ' + SendGet(j)
    		print output
                
    def SendGet(str):
                    judge = str.lower() 
                    if judge.lower()=="it":
                            return "it"
                    if judge.lower()=="mr":
                            return "'miste(r)"
                    #print str
                    url = "http://fanyi.youdao.com/openapi.do?keyfrom=left69&key=1106591478&type=data&doctype=json&version=1.1&q="+str
                    req = urllib2.Request(url)
                    res_data = urllib2.urlopen(req)
                    res = res_data.read()
                    #print res
                    if(res == "no query"):
                        return judge
                    hjson = json.loads(res)
                    #print hjson['basic']['uk-phonetic']
                    #danci = hjson['basic']['uk-phonetic']
                    if(hjson['errorCode']!=0):
                            return judge
                    if hjson.has_key('basic'):
                            if hjson['basic'].has_key('uk-phonetic'):
                                    danci=hjson['basic']['uk-phonetic']
                            else:
                                    return judge
                            danci = danci.replace('[','')
                            danci = danci.replace(']','')
                            if danci.find(";") != -1:
                                    listu = danci.split(';')
                                    for j in listu:
                                            if len(j)>0 :
                                                return j
                            if danci.find(",") != -1:
                                    listu = danci.split(',')
                                    for j in listu:
                                            if len(j)>0 :
                                                return j
                            return danci
                    elif hjson.has_key('query'):
                            danci=hjson['query']
                            if danci.find(";") != -1:
                                    listu = danci.split(';')
                                    for j in listu:
                                        return j
                            return danci
                    return judge
    while True:
    	msg=raw_input("Enter input:")
    	if msg == 'quit':
    	    break
    	get_yinbiao(msg)
    	#youdao.get_translation(msg)
    

    上代码: 支持 srt格式的字幕

    # -*- coding: utf-8 -*-
    import sys
    import urllib2
    import re
    import json
    import string
    import os
    
    import sys
    reload(sys)
    sys.setdefaultencoding( "utf-8" )
    
    class Youdao:
    	def __init__(self):
    		self.url = 'http://fanyi.youdao.com/openapi.do'
    		self.key = '1106591478'
    		self.keyfrom = 'left69'
     
    	def get_yinbiao(self,words):
    		splitStr = words
    		for c in string.punctuation:
    			if c != "'":
    				splitStr = splitStr.replace(c, ' ')
    		#print " "+splitStr
    		listu = splitStr.split(' ')
    		output = ""
    		for j in listu:
    			output = output + ' ' + self.SendGet(j)
    		return output
                
    	def SendGet(self,str):
                    judge = str.lower() 
                    if judge.lower()=="it":
                            return "it"
                    if judge.lower()=="mr":
                            return "'miste(r)"
                    #print str
                    url = "http://fanyi.youdao.com/openapi.do?keyfrom="+self.keyfrom+"Trans&key="+self.key+"&type=data&doctype=json&version=1.1&q="+str
                    req = urllib2.Request(url)
                    res_data = urllib2.urlopen(req)
                    res = res_data.read()
                    #print res
                    if(res == "no query"):
                        return judge
                    hjson = json.loads(res)
                    #print hjson['basic']['uk-phonetic']
                    #danci = hjson['basic']['uk-phonetic']
                    if(hjson['errorCode']!=0):
                            return judge
                    if hjson.has_key('basic'):
                            if hjson['basic'].has_key('uk-phonetic'):
                                    danci=hjson['basic']['uk-phonetic']
                            else:
                                    return judge
                            danci = danci.replace('[','')
    
                            danci = danci.replace(']','')
                            if danci.find(";") != -1:
                                    listu = danci.split(';')
                                    for j in listu:
                                            if len(j)>0 :
                                                return j
                            if danci.find(",") != -1:
                                    listu = danci.split(',')
                                    for j in listu:
                                            if len(j)>0 :
                                                return j
                            return danci
                    elif hjson.has_key('query'):
                            danci=hjson['query']
                            if danci.find(";") != -1:
                                    listu = danci.split(';')
                                    for j in listu:
                                        return j
                            return danci
                    return judge
    youdao = Youdao()
    srt_path = sys.path[0]
    #print srt_path
    os.chdir(srt_path)
    FileNames = os.listdir(srt_path)  
    #print FileNames
    #for d_file in FileNames:#
    #	if ('.txt' not in d_file and '.srt' not in d_file):
    #		continue
    #        print d_file
    while True:
            #file = open(d_file, 'r+','utf8')
    	d_file = raw_input("Enter file name:")
    	if d_file == 'q':
    		break
    	file = open(d_file, 'r+')
    	count = len(open(d_file, 'r+').readlines())
    	print count
    	w_file = d_file.split('.')[0] + "_out.txt"
    	#print w_file
    	Wfile = open(w_file,'w')
            line = 0
    	pocess = 1
            while 1:
                    line = line + 1
                    line2 = 1
                    data = file.readline()
                    if not data :
                            break
                    lines = line % 5
                    if lines == 3:
    			pp  = pocess*500/count
    			ppp = '%d' %pp
    			pos = "Process:"+ppp + "%"
    			print pos
    			pocess = pocess+1
    			Wfile.write(data)
                            writedata=youdao.get_yinbiao(data)
                            Wfile.write(writedata+"
    ")
                    if lines == 4:
                            Wfile.write(data+"
    ")
                            Wfile.write("")
    	print "翻译 success!"
    	print " "
    	Wfile.close()
    
  • 相关阅读:
    什么是用户画像——从零开始搭建实时用户画像(一)
    一站式Kafka平台解决方案——KafkaCenter
    Druid 0.17入门(4)—— 数据查询方式大全
    流媒体与实时计算,Netflix公司Druid应用实践
    解读银行卡支付背后的原理
    求求你了,不要再自己实现这些逻辑了,开源工具类不香吗?
    编程坑太多,Map 集合怎么也有这么多坑?一不小心又踩了好几个!
    设计数据库 ER 图太麻烦?不妨试试这两款工具,自动生成数据库 ER 图!!!
    一口气带你踩完五个 List 的大坑,真的是处处坑啊!
    轻轻一扫,立刻扣款,付款码背后的原理你不想知道吗?|原创
  • 原文地址:https://www.cnblogs.com/left69/p/6000448.html
Copyright © 2011-2022 走看看