zoukankan html css js c++ java

使用python获取pptx文件的文本内容范例

get_text_from_pptx_pptm.py

#!/bin/python
# -*- coding: utf-8 -*-

from pptx import Presentation
import sys
import base64

reload(sys)
sys.setdefaultencoding('utf8')

fileName = sys.argv[1]
# print(fileName)

def tripSpace( str ):
    return str.replace("　", "").replace(" ", "").replace("	", "").replace("
", "").replace("
", "").replace("
", "").replace("v", "")

prs = Presentation(fileName)

# ファイル概要(1スライド目のノート)
file_summary = ""
# ファイル注釈(2スライド目以降のノート)
file_note = ""
# ファイル内容(オブジェクトのテキスト全文)
file_content = ""
for i, sld in enumerate(prs.slides, start=1):
    for shp in sld.shapes:
        if shp.has_text_frame:
            file_content += shp.text
    if ( i == 1 ) :
        file_summary = sld.notes_slide.notes_text_frame.text
    else :
        file_note += tripSpace(sld.notes_slide.notes_text_frame.text)
    
print(base64.b64encode(file_summary))
print(tripSpace(file_note))
print(tripSpace(file_content))

查看全文

相关阅读:
私活。
sql server 模拟数组【转】
Updlock 与 Holdlock
连上交换机后电脑无法上网
 linux的发展
 MySQL5.7中，用root用户登陆不进去数据库，报以下错误，然后重新修改了密码，好了。
nginx反响代理tomcat配置ssl
tomcat日志的切割脚本
 重启nginx报错：[error] invalid PID number "" in "/application/nginx-1.13.3/logs/nginx.pid"
数据盘的挂载

原文地址：https://www.cnblogs.com/gaoBlog/p/14042502.html