zoukankan      html  css  js  c++  java
  • make words counter for image with the help of paddlehub model

    说明

    接着上篇,实现拍摄统计功能。

    https://www.cnblogs.com/lightsong/p/14592798.html

    功能明细:

    • 实时展示摄像头内容
    • 实时统计
    • 统计结果在展示视频中显示

    技术依赖

    上篇是基于linux环境, 由于需要添加实时展示功能,需要切换到windows。

    OCR模块依赖

    https://paddlehub.readthedocs.io/zh_CN/develop/quick_experience/cmd_quick_run.html

    需要安装 shapely 和 pyclipper 库。

    # 下载待测试的图片
    $ wget https://paddlehub.bj.bcebos.com/model/image/ocr/test_ocr.jpg
    
    # 该Module依赖于第三方库shapely和pyclipper,需提前安装
    $ pip install shapely
    $ pip install pyclipper
    
    # 通过命令行方式实现文字识别任务
    $ hub run chinese_ocr_db_crnn_mobile --input_path test_ocr.jpg --visualization=True --output_dir='ocr_result'

    pyclipper库是 clipper工具的一个封装, 需要在windows上安装clipper。

    http://www.angusj.com/delphi/clipper.php

    pyclipper提供的“From Source”安装方法,可以安装这个依赖。

    https://github.com/fonttools/pyclipper

    From source

    Cython required.

    Clone the repository:

    git clone git@github.com:fonttools/pyclipper.git
    

    Install:

    python setup.py install
    

    After every modification of .pyx files compile with Cython:

    python setup.py build_ext --inplace
    

    cv2.VideoCapture捕捉和显示控制

    注意, cv2.VideoCapture参数为0时候,为捕捉摄像头。

    https://blog.csdn.net/j18423532754/article/details/106520257

    import time
    import cv2
    cap = cv2.VideoCapture("D:\jc\Myself\video\Hacker_glasses_07_Videvo.mov")  # 读取文件
    start_time = time.time()
    counter = 0
    # 获取视频宽度
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    # 获取视频高度
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS) #视频平均帧率
    while (True):
        ret, frame = cap.read()
        # 键盘输入空格暂停,输入q退出
        key = cv2.waitKey(1) & 0xff
        if key == ord(" "):
            cv2.waitKey(0)
        if key == ord("q"):
            break
        counter += 1  # 计算帧数
        if (time.time() - start_time) != 0:  # 实时显示帧数
            cv2.putText(frame, "FPS {0}".format(float('%.1f' % (counter / (time.time() - start_time)))), (500, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                        3)
            src = cv2.resize(frame, (frame_width // 2, frame_height // 2), interpolation=cv2.INTER_CUBIC)  # 窗口大小
            cv2.imshow('frame', src)
            print("FPS: ", counter / (time.time() - start_time))
            counter = 0
            start_time = time.time()
        time.sleep(1 / fps)  # 按原帧率播放
    cap.release()
    cv2.destroyAllWindows()

    Code

    https://github.com/fanqingsong/writing_words_counter

    # !pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple #由于PaddleHub升级比较快,建议大家直接升级到最新版本的PaddleHub,无需指定版本升级
    # !pip install shapely -i https://pypi.tuna.tsinghua.edu.cn/simple #该Module依赖于第三方库shapely,使用该Module之前,请先安装shapely
    # !pip install pyclipper -i https://pypi.tuna.tsinghua.edu.cn/simple #该Module依赖于第三方库pyclipper,使用该Module之前,请先安装pyclipper
    #
    import os
    import time
    
    import paddlehub as hub
    import cv2 as cv
    import shutil
    
    
    class WordsCounter:
        def __init__(self):
            self._ocr = hub.Module(name="chinese_ocr_db_crnn_server")
            self._workspace_path = "./workspace"
            self._snapshot_path = f"{self._workspace_path}/snapshot"
    
        def _get_image_data(self, image_path):
            image_data = cv.imread(image_path)
    
            print(type(image_data))
            print(f"image_data.shape={image_data.shape}")
    
            return image_data
    
        def _get_ocr_results_from_image_data(self, image_data):
            if image_data is None:
                print("image_data is none")
                return []
    
            ocr_results = self._ocr.recognize_text(images=[image_data])
            print(ocr_results)
    
            return ocr_results
    
        def _get_text_from_ocr_results(self, ocr_results):
            all_text = []
    
            for one_result in ocr_results:
                data = one_result["data"]
    
                for one_info in data:
                    one_text = one_info["text"]
                    all_text.append(one_text)
    
                # add empty line before storing next image text
                all_text.append("")
    
            all_text = "
    ".join(all_text)
    
            print("----- all text --------")
            print(all_text)
    
            return all_text
    
        def _count_words_in_text(self, text: str):
            pure_text = text.replace("
    ", "")
    
            return len(pure_text)
    
        def count_words_for_one_image(self, image_path):
            image_data = self._get_image_data(image_path)
    
            ocr_results = self._get_ocr_results_from_image_data(image_data)
    
            text = self._get_text_from_ocr_results(ocr_results)
    
            num = self._count_words_in_text(text)
    
            print(f"num = {num}")
    
            return num
    
        def _prepare_for_watch(self):
            if not os.path.exists(self._snapshot_path):
                os.mkdir(self._snapshot_path)
    
            #shutil.rmtree(self._snapshot_path)
    
        def watch_camera(self):
            self._prepare_for_watch()
    
            cap = cv.VideoCapture(0)
    
            fps = cap.get(cv.CAP_PROP_FPS)  # 视频平均帧率
            print(f"fps = {fps}")
    
            index = 0
            while True:
                ret, frame = cap.read()
    
                if not ret:
                    print(f"capture failed with ret={ret} frame={frame}")
                    break
    
                ocr_results = self._get_ocr_results_from_image_data(frame)
    
                text = self._get_text_from_ocr_results(ocr_results)
    
                num = self._count_words_in_text(text)
    
                cv.putText(frame, f"Words total = {num}", (50, 50),
                            cv.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                            2)
    
                cv.imshow('Video', frame)
    
                image_path = f'{self._snapshot_path}/{index}.jpg'
                cv.imwrite(image_path, frame)
    
                index += 1
    
                # 键盘输入空格暂停,输入q退出
                key = cv.waitKey(1) & 0xff
                if key == ord(" "):
                    cv.waitKey(0)
                if key == ord("q"):
                    break
    
                time.sleep(1 / fps)  # 按原帧率播放
    
            cap.release()
            cv.destroyAllWindows()
            print('capture finish, get %d frame' % index)
    
    
    
    
    if __name__ == "__main__":
        words_counter = WordsCounter()
    
        # realtime counting
        words_counter.watch_camera()
    
        # testing one picture
        one_writing_path = './workspace/one_student_writing.jpeg'
        # words_counter.count_words_for_one_image(one_writing_path)

    Effect

    除了实时显示外, 在workspace/snapshot目录下,可以查看帧图片,图片上有字数统计。

    output -- log

    [{'save_path': '', 'data': [{'text': '测', 'confidence': 0.988754391670227, 'text_box_position': [[411, 60], [487, 68], [481, 130], [405, 122]]}, {'text': '式数据', 'confidence': 0.7919352054595947, 'text_box_position': [[380, 188], [457, 202], [414, 446], [337, 432]]}]}]
    ----- all text --------
    测
    式数据
    
    

     

    output -- image

    alt text

    出处:http://www.cnblogs.com/lightsong/ 本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
  • 相关阅读:
    【转】VS2013编译libjpeg库
    玩转百度地图(二)之画圆,高德地图、搜搜地图、搜狗地图等稍微修改即可
    JAVA自动生成正则表达式工具类
    S2SH商用后台权限系统第三讲
    自定义表单验证指令
    关于input/textarea提交内容空格回车转换问题,以及ng-model去除空格问题
    angular ui-router 缓存问题
    ionic 发送请求返回一直都是404
    ionic中获取坐标方法
    ionic的scroll的使用出现的问题
  • 原文地址:https://www.cnblogs.com/lightsong/p/14596755.html
Copyright © 2011-2022 走看看