5.3 获得汉字长度和分解
#coding=utf8 import arcpy import os import sys import math import string from collections import namedtuple #判断字符串是否为汉字 def isChineseWord(string): if string.isalpha(): if ord(string) in range(65,91) or ord(string) in range(97,123) : print("是字母") return False else: print("是汉字") return True print("不是汉字也不是字母") return False #返回汉字和英文数量 def getChinesenum(s): s_len = len(s) cnum=0 for c in s: if isChineseWord(c): cnum+=1 return cnum,s_len-cnum def str_count(s): '''找出字符串中的中英文、空格、数字、标点符号个数''' count_en = count_dg = count_sp = count_zh = count_pu = 0 s_len = len(s) for c in s: if c in string.ascii_letters: #英文 count_en += 1 elif c.isdigit(): #数字 count_dg += 1 elif c.isspace(): #空格 count_sp += 1 elif c.isalpha(): #方法检测字符串是否只由字母组成。 count_zh += 1 else: count_pu += 1 #标点 total_chars = count_zh + count_en + count_sp + count_dg + count_pu if total_chars == s_len: return namedtuple('Count', ['total', 'zh', 'en', 'space', 'digit', 'punc'])(s_len, count_zh, count_en, count_sp, count_dg, count_pu) else: print('Something is wrong!') return None return None #汉字长度 def getcharlength(mystr): #在u下一个汉字长度为3 lenTxt = len(mystr) lenTxt_utf8 = len(mystr.decode('utf-8')) size = int((lenTxt_utf8 - lenTxt)/2 + lenTxt) return size #如果返回为None,就是没有返回值 def getlength(): ss="我们的gis" num=len(ss) arcpy.AddMessage(u"{1} length={0}".format(num,ss)) #u加不加都一样 一个汉字为3 arcpy.AddMessage("{1} length={0}".format(num,ss)) ss=u"我们的gis" #加u,一个汉字为1 num=len(ss) arcpy.AddMessage(u"ss={1} length={0}".format(num,ss)) arcpy.AddMessage("ss={1} length={0}".format(num,ss)) ss=u"我们的gis" #加u,一个汉字为1 num=getcharlength(ss) arcpy.AddMessage("{1} length={0}".format(num,ss)) ss="我们的gis" #不加u,一个汉字为2 num=getcharlength(ss) arcpy.AddMessage("{1} length={0}".format(num,ss)) s = u'我们的gis gisoracle 2019.11.07' #一定要加u count = str_count(s) arcpy.AddMessage(s) arcpy.AddMessage('该字符串共有 {} 个字符,其中有 {} 个汉字,{} 个英文,{} 个空格,{} 个数字,{} 个标点符号。'.format(count.total, count.zh, count.en, count.space, count.digit, count.punc)) cnum,enum=getChinesenum(s) arcpy.AddMessage('该字符串共有 {} 汉字字符,非汉字{}'.format(cnum,enum)) s=u"我们的gis" arcpy.AddMessage('该字符串 {} 取左边4个{}'.format(s,s[0:4])) s="我们的gis" #下面没有取出来,一定加u arcpy.AddMessage('该字符串 {} 取左边4个{}'.format(s,s[0:4])) def printinfo(): #不能名称为print arcpy.AddMessage("====================我们都是Python程序员=============") arcpy.AddMessage(u"====================我们都是Python程序员=============") #加不加都一样 arcpy.AddMessage("{0:<20s}1".format(u"我们都是程序员")) arcpy.AddMessage("{0:<20s}12345678901234567890".format("我们都是程序员")) #需要加u arcpy.AddMessage("%-20s12345678901234567890"%(u"我们都是程序员")) arcpy.AddMessage("%-20s12345678901234567890"%("我们都是程序员")) #需要加u arcpy.AddMessage(u"没有对应数据".encode('gbk')) # 汉字乱码的解决 #把字符串按汉字固定2为截取 def gettrim(s,n): num=0 first="" after="" for c in s: if isChineseWord(c): num+=2 #汉字2位 else: num+=1 #其他1位 if num<=n: first=first+c else: after=after+c return first,after def strtrim(): s=u"我们1的gis1" first,after=gettrim(s,6) arcpy.AddMessage('{} 取6位,前{} 后{}'.format(s,first,after)) s="我们1的gis1" #不加u,一个汉字3位 first,after=gettrim(s,6) arcpy.AddMessage('{} 取6位,前{} 后{}'.format(s,first,after)) def Main(): getlength() strtrim() printinfo() Main()
5.4 表中读写汉字
#coding=utf8 import arcpy import os import sys import math def readHZ(): fields = [inField] with arcpy.da.SearchCursor(inTable, fields) as cursor: i=1 for row in cursor: arcpy.AddMessage(u"序号{0}, 字段={1}, 值={2}".format(i,inField,row[0])) i=i+1 def updateHZ(): fields = [inField] with arcpy.da.UpdateCursor(inTable, fields) as cursor: i=1 for row in cursor: #mystr=row[0]+u" 长度"+str(i) mystr=row[0]+" 长度"+str(i) #加u,不加u都可以 mystr=mystr[0:50] row[0]=mystr cursor.updateRow(row) i=i+1 def insertHZ(): fields = (inField) cursor = arcpy.da.InsertCursor(inTable, fields) for x in xrange(0, 2): #cursor.insertRow((str(x*100)+u"我爱你",)) #最后必须加,可以不加u 加u和不加u一样 cursor.insertRow((str(x*100)+"我爱你",)) del cursor def main(): readHZ() updateHZ() insertHZ() inTable=arcpy.GetParameterAsText(0) inField=arcpy.GetParameterAsText(1) main()
5.5 读写文本文件中汉字
#coding=utf8 import arcpy import os import sys import math import codecs def ReadTXTNEW(txtFile): f = codecs.open(txtFile,'r','gbk') #'utf-8' lines = f.readlines() f.close() return lines def ReadTXT(txtFile): f = open(txtFile) mystr= f.read() f.close() return mystr #返回数组 def ReadTXTList(txtFile): sumlist=[] f = open(txtFile,"r") # try: lines = f.readlines() for line in lines: curline=line.replace(' ', '') #删除 sumlist.append(curline) finally: f.close() return sumlist def WriteTXT(mylist,txtFile): wfiles = open(txtFile,'w') num=len(mylist) try: for i in range(num): wfiles.write(mylist[i]+' ') finally: wfiles.close() if wfiles: del wfiles def main(inFile): pList=ReadTXTList(inFile) num=len(pList) for i in range(num): arcpy.AddMessage("{0}={1}".format(i,pList[i])) #arcpy.AddMessage("u{0}={1}".format(i,pList[i])) #这里不需要加u,加u出错误,因为没有汉字 txtFile=inFile.lower().replace('.txt', '1.txt') mystr=u"我爱你 gisoracle" pList.append(mystr.encode("GBK")) ##解决中文乱码问题,直接pList.append(mystr)出来乱码 #pList.append(mystr) #错误 WriteTXT(pList,txtFile) inFile=arcpy.GetParameterAsText(0) main(inFile)
5.6 元组和列表中汉字使用
#coding=utf8 import arcpy import os import sys import math ##将列表转换为元组。 def usetuple(): tup1 = ('我爱你 qq', '我爱你 gisorcle', 2019, 2000) #不要加u,否则后面乱码如 u'u6211u7231u4f60 gisorcle' num=len(tup1) for i in range(num): arcpy.AddMessage("{0}={1}".format(i,tup1[i])) ss=str(tup1[2:4]) #从2,4,不包括4 arcpy.AddMessage("{0}".format(ss)) ss=str(tup1[0:1]) #从0,1,不包括1 ss = ss.decode('string-escape')# 不加ss = ss.decode('string-escape') 输出 xe6x88x91xe7x88xb1xe4xbdxa0 arcpy.AddMessage("{0}".format(ss)) #列表 def useList(): List = ['我爱你 qq', '我爱你 gisorcle'] #不要加u,否则后面乱码入 u'u6211u7231u4f60 gisorcle' List.append("我爱") num=len(List) for i in range(num): arcpy.AddMessage("{0}={1}".format(i,List[i])) ss=str(List[0:3]) #从0,3,不包括3 ss = ss.decode('string-escape')# 不加ss = ss.decode('string-escape') 输出 xe6x88x91xe7x88xb1xe4xbdxa0 arcpy.AddMessage("{0}".format(ss)) def main(): useList() usetuple() main()
5.7 字典使用
#coding=utf8 import arcpy import os import sys import math def main(): d = {} fields = arcpy.ListFields(inFeature) for field in fields: d[field.name] = field.aliasName for key in d.keys(): arcpy.AddMessage("{0}={1}".format(key,d[key])) for item in d.items(): ss = str(item).decode("unicode_escape") arcpy.AddMessage("===={0}".format(ss)) inFeature=arcpy.GetParameterAsText(0) main()