zoukankan      html  css  js  c++  java
  • pyquery的中文编码问题

    # coding=UTF-8
    import urllib.request
    import pyquery
    import requests
    import time
    import json
    import pymysql
    import sys
    import math
    from datetime import datetime
    import time
    import csv
    from time import sleep
    import random
    from bs4 import BeautifulSoup
    import asyncio
    from pyppeteer import launch
    import pyppeteer
    from pyquery import PyQuery as pq
    import chardet
    import codecs
    
    path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    txt = open(path, 'rb').read()
    encodings = chardet.detect(txt)['encoding']
    
    with open(path, "r", encoding=encodings)as f:
        content = f.read()
        doc = pq(content)
        name=doc('.tit.clearfix h1>strong')
        title=name.text()
        print(title) #扬州天下花园
    
    # path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    # with open(path, "r")as f:
    #     content = f.read()
    #     doc = pq(content)
    #     name=doc('.tit.clearfix h1>strong')
    #     title=name.text()
    #     print(title) #UnicodeDecodeError: 'gbk' codec can't decode byte 
    
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'   #ansi编码都不行
    # with open(path, "r")as f:
    #     content = f.read()
    #     doc = pq(content)
    #     name=doc('.tit.clearfix h1>strong')
    #     title=name.text()
    #     print(title) # 扬州天下花园
    
    # path='D:/code-py-download/02fang/xqxq_demo_utf8.html'
    # doc = pq(filename=path, encoding='utf-8') 直接读也不行
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title) # UnicodeDecodeError: 'gbk' codec can't decode byte
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'
    # doc = pq(filename=path, encoding='utf-8')
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title)  # æ¬å·¤©ä¸è±å
    
    # path='D:/code-py-download/02fang/小区详情demo.txt'
    # doc = pq(filename=path, encoding='gbk')
    # name=doc('.tit.clearfix h1>strong')
    # title=name.text()
    # print(title)  # æ¬å·¤©ä¸è±å
  • 相关阅读:
    ERP渠道管理添加验证和查询(二十二)
    SqlServer导入Excel数据
    WebApi帮助类
    SqlServer 递归查询树
    SqlServer查看表、存储过程、耗时查询、当前进程、开销较大的语句
    Excel上传找到错误数据类型
    索引Hint提示(INDEX Hint)
    SqlServer批量Sql一个表的数据导入到另一个数据
    SqlServer 游标
    JAVA运算符
  • 原文地址:https://www.cnblogs.com/yansc/p/15512731.html
Copyright © 2011-2022 走看看