zoukankan      html  css  js  c++  java
  • 查找大文件中的字符串的位置极前后50个字符串

    import os
    import asyncio
    import time


    async def fun(i, f1, find_str, count, end_offset):
    # print(i*end_offset-1)
    for j in range(count):
    if i == 0:
    f1.seek(j * 1024 * 512, 0)
    else:
    f1.seek((i * end_offset - 1) + j * 1024 * 512, 0)
    r = f1.read(1024 * 513)
    try:
    index = r.index(find_str)
    if index - 50 >= 0:
    q_str = r.decode('utf-8')[index - 50: index]
    else:
    q_str = r.decode('utf-8')[: index]
    h_str = r.decode('utf-8')[index+len(find_str.decode('utf-8')): index+len(find_str.decode('utf-8')) + 50]
    return [i, j * 1024 * 512 + index, q_str, h_str]
    except:
    continue


    def find_path(file_path, find_str, child_file_number):
    s_time = time.time()
    all_size = os.stat(file_path).st_size # 获取文件的大小,字节数
    end_offset = int(all_size / child_file_number)
    count = int(end_offset / (1024 * 512)) + 1
    f1 = open(file_path, 'rb')
    tasks = [asyncio.ensure_future(fun(i, f1, find_str, count, end_offset)) for i in range(child_file_number)]
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))

    for i in range(len(tasks)):
    task = tasks[i]
    if task.result() is None:
    if i < child_file_number - 1:
    continue
    else:
    return 'no find', time.time() - s_time
    else:
    return task.result()[0] * end_offset + task.result()[1], task.result()[2], task.result()[3], time.time() - s_time
    f1.close()


    if __name__ == '__main__':
    print(find_path('D:\wyz\subscript_query\1.txt', aaa', 40))
  • 相关阅读:
    数字配对(bzoj 4514)
    任务查询系统(bzoj 3932)
    楼房重建(bzoj 2957)
    Hotel(poj 3667)
    Can you answer these queries(spoj 1043)
    亚瑟王(bzoj 4008)
    潘多拉的盒子(bzoj 1194)
    Circling Round Treasures(codeforces 375c)
    莫队算法---基础知识介绍(转载)
    HDU 1141---Brackets Sequence(区间DP)
  • 原文地址:https://www.cnblogs.com/yz-w/p/15066560.html
Copyright © 2011-2022 走看看