zoukankan html css js c++ java

python图片下载&本地图片对比

　　管理系统用户头像出现未识别图像，即cdn路径下的图片文件正常，只是较多图片为大小5k的小图片，现需找出这些图片对应的用户。

　　注：本博只是为避免将来重复造轮子，不做额外赘述

　　思路

　　下载 id - img 一一对应的图片到本地

　　通过opencv，图片文件读取为Mat矩阵，转灰度图，获取dhash，发现未识别图片对应的dhash距离小于1

　　故，基于dhash完成图片对应用户id的筛选即可

　　图片爬取

　　页面爬取

#! /usr/bin/env python
# -*- coding:utf-8 -*-
# __author__ = "NYA"

import urllib
import urllib.request
from bs4 import BeautifulSoup
import sys
import chardet


def download(url, path):
    req = urllib.request.Request(url)
    content = urllib.request.urlopen(req).read()
    typeEncode = sys.getfilesystemencoding()
    infoencode = chardet.detect(content).get('encoding', 'utf-8')
    html = content.decode(infoencode, 'ignore').encode(typeEncode)

    soup = BeautifulSoup(html, 'html.parser')
    table_tr = soup.select('table > tr')
    print(len(table_tr))
    index = 0
    for tr_td in table_tr:
        if index > 0:
            td_index=0
            name = ''
            url = ''
            for td in tr_td.findAll('td'):
                # print(td)
                if td_index==0:
                    name=td.getText()
                if td_index==4:
                    url=td.getText()
                td_index = td_index + 1
            local_path = path + name
            print(url)
            urllib.request.urlretrieve(url, local_path)
        index = index + 1

url = 'aaa'
path = 'G:images\used\'
download(url, path)

　　csv爬取

#!/usr/bin/python3
# -*- coding:utf8 -*-

import imghdr
import urllib.request

path = 'G:images\used\'
csv_path = 'G:images\robot1.csv'
f = open('G:images\check_error.txt', 'wb')

data = []
error_images = []
with open(csv_path, 'r', encoding="utf-8") as f:
    header = f.readline().split(',')
    counter = 0
    for line in f:
        try:
            fields = line.split(",")
            name = fields[0]
            url = fields[2]
            local_path = path + name + '.jpg'
            print(url.strip())
            print(local_path)
            urllib.request.urlretrieve(url, local_path)
            check = imghdr.what(local_path)
            if check == None:
                f.write(name)
                f.write(',')
                f.write(url)
                f.write(',')
                f.write(local_path)
                f.write('
')
                error_images.append(url)
        except:
            print(line)
        print(counter)
        counter = counter + 1
print(counter)
print(error_images)

　　dhash筛选

public class ImageCheck {

    static {
        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
    }

    public static void main(String[] args) throws IOException {
//        Mat mat = Imgcodecs.imread("G:\images\used\4.jpg");
//        Mat mat1 = Imgcodecs.imread("G:\images\used\7.jpg");
//        Integer dhashGRAY28 = DhashDetector.getDhashGRAY28(mat);
//        Integer dhashGRAY281 = DhashDetector.getDhashGRAY28(mat1);
//        System.out.println(dhashGRAY28);
//        System.out.println(dhashGRAY281);

        File file = new File("G:\images\used");
        File[] tempFiles = file.listFiles();
        List<String> params = new ArrayList<>();

        Integer dhash = 268423200;
        for (int i = 0 ; i < tempFiles.length;i++) {
            if (tempFiles[i].isFile()){
                params.add(tempFiles[i].getPath());
            }
        }

        BufferedWriter bwScdPca = new BufferedWriter(new FileWriter("G:\images\error1.txt"));
        List<Integer> res = new ArrayList<>();
        for (String path:params) {
            String[] split = path.split("\\");
            String now = split[split.length - 1].replace(".jpg", "");
            System.out.println(now);
            Mat mat = Imgcodecs.imread(path);
            try {
                Integer dhashGRAY28 = DhashDetector.getDhashGRAY28(mat);
                if (DhashDetector.calcHammingDistance(dhash,dhashGRAY28) < 2) {
                    res.add(Integer.parseInt(now));
                }
            } catch (Exception e) {
                System.out.println(e);
            }

            mat.release();
        }
        res = res.stream().sorted().collect(Collectors.toList());
        System.out.println(res);
        for (Integer re :
                res) {
            bwScdPca.write(re+"");
            bwScdPca.newLine();
            bwScdPca.flush();
        }

        bwScdPca.close();
    }

}

查看全文

相关阅读:
mybatis 二级缓存
 前端学习记
 消息队列高手课笔记11
cache业务
 这个前端课程主要讲mui框架
 spring cloud stream
最近学习freemarker
说点什么
 即将进入Windows 11时代，DevExpress控件将会有哪些改变呢？
UI开发框架Kendo React R3 2021更新亮点——新的 React 组件

原文地址：https://www.cnblogs.com/nyatom/p/10820894.html