zoukankan      html  css  js  c++  java
  • pyquery的中文编码问题

    # coding=UTF-8
    import urllib.request
    import pyquery
    import requests
    import time
    import json
    import pymysql
    import sys
    import math
    from datetime import datetime
    import time
    import csv
    from time import sleep
    import random
    from bs4 import BeautifulSoup
    import asyncio
    from pyppeteer import launch
    import pyppeteer
    from pyquery import PyQuery as pq
    import chardet
    import codecs
    
    path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    txt = open(path, 'rb').read()
    encodings = chardet.detect(txt)['encoding']
    
    with open(path, "r", encoding=encodings)as f:
        content = f.read()
        doc = pq(content)
        name=doc('.tit.clearfix h1>strong')
        title=name.text()
        print(title) #扬州天下花园
    
    # path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    # with open(path, "r")as f:
    #     content = f.read()
    #     doc = pq(content)
    #     name=doc('.tit.clearfix h1>strong')
    #     title=name.text()
    #     print(title) #UnicodeDecodeError: 'gbk' codec can't decode byte 
    
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'   #ansi编码都不行
    # with open(path, "r")as f:
    #     content = f.read()
    #     doc = pq(content)
    #     name=doc('.tit.clearfix h1>strong')
    #     title=name.text()
    #     print(title) # 扬州天下花园
    
    # path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    # doc = pq(filename=path, encoding='utf-8') 直接读也不行
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title) # UnicodeDecodeError: 'gbk' codec can't decode byte
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'
    # doc = pq(filename=path, encoding='utf-8')
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title)  # æ¬å·¤©ä¸è±å
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'
    # doc = pq(filename=path, encoding='gbk')
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title)  # æ¬å·¤©ä¸è±å
  • 相关阅读:
    mysql 三星索引设置
    mysql 索引长度解释及不使用索引的一种特殊情况
    null作为方法的参数,并在方法里面赋值后的结果是什么?
    线程、调度线程池、异常
    系统服务化,需要考虑的问题
    05-Python之高级语法
    01-python基本语法元素
    04-Python之文件、异常和模块
    03-Python之类
    02-Python之函数
  • 原文地址:https://www.cnblogs.com/yansc/p/15512731.html
Copyright © 2011-2022 走看看