zoukankan      html  css  js  c++  java
  • pyquery的中文编码问题

    # coding=UTF-8
    import urllib.request
    import pyquery
    import requests
    import time
    import json
    import pymysql
    import sys
    import math
    from datetime import datetime
    import time
    import csv
    from time import sleep
    import random
    from bs4 import BeautifulSoup
    import asyncio
    from pyppeteer import launch
    import pyppeteer
    from pyquery import PyQuery as pq
    import chardet
    import codecs
    
    path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    txt = open(path, 'rb').read()
    encodings = chardet.detect(txt)['encoding']
    
    with open(path, "r", encoding=encodings)as f:
        content = f.read()
        doc = pq(content)
        name=doc('.tit.clearfix h1>strong')
        title=name.text()
        print(title) #扬州天下花园
    
    # path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    # with open(path, "r")as f:
    #     content = f.read()
    #     doc = pq(content)
    #     name=doc('.tit.clearfix h1>strong')
    #     title=name.text()
    #     print(title) #UnicodeDecodeError: 'gbk' codec can't decode byte 
    
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'   #ansi编码都不行
    # with open(path, "r")as f:
    #     content = f.read()
    #     doc = pq(content)
    #     name=doc('.tit.clearfix h1>strong')
    #     title=name.text()
    #     print(title) # 扬州天下花园
    
    # path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    # doc = pq(filename=path, encoding='utf-8') 直接读也不行
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title) # UnicodeDecodeError: 'gbk' codec can't decode byte
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'
    # doc = pq(filename=path, encoding='utf-8')
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title)  # æ¬å·¤©ä¸è±å
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'
    # doc = pq(filename=path, encoding='gbk')
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title)  # æ¬å·¤©ä¸è±å
  • 相关阅读:
    as3工程和flex工程的区别
    Timer的repeatCount和currentCount的区别
    mouseChildren为false后,
    flex编译时,会把trace语句也编译进去
    stage和root的区别
    flex编译时,会把trace语句也编译进去
    水瓶座(1.202.19)更多星座运程
    如何更改titleWindow组件上的title字体大小?
    转贴关于AsWing和MXML 选项
    Eclipse中的文本编辑器使用技巧
  • 原文地址:https://www.cnblogs.com/yansc/p/15512731.html
Copyright © 2011-2022 走看看