zoukankan      html  css  js  c++  java
  • gj6 深入python的set和dict

    6.1 collections中的abc

    from collections.abc import Mapping, MutableMapping
    #dict属于mapping类型
    
    a = {}
    print (isinstance(a, MutableMapping))
    # True
    

    6.2 dict的常见用法

    a = {"lewen1": {"company": "imooc"},
         "lewen2": {"company": "imooc2"}
         }
    # clear   Remove all items from D.
    # a.clear()
    # pass# copy, 返回浅拷贝
    new_dict = a.copy()
    new_dict["lewen1"]["company"] = "imooc3"  # 浅拷贝,只是拷贝了指向。这里修改会修改了a 中原来的值
    print(new_dict)
    print(a)
    ​
    ​---
    {'lewen1': {'company': 'imooc3'}, 'lewen2': {'company': 'imooc2'}}
    {'lewen1': {'company': 'imooc3'}, 'lewen2': {'company': 'imooc2'}}
    
    
    ---
    import copy
    a = {"lewen1": {"company": "imooc"},
         "lewen2": {"company": "imooc2"}
         }
    new_dict = copy.deepcopy(a)               # 深拷贝,开辟独立的内存空间,并复制值
    new_dict["lewen1"]["company"] = "imooc3"  # 这里修改,并不会影响字典a的值
    print(new_dict)
    print(a)
    
    
    ---
    {'lewen1': {'company': 'imooc3'}, 'lewen2': {'company': 'imooc2'}}
    {'lewen1': {'company': 'imooc'}, 'lewen2': {'company': 'imooc2'}}
    
    ---
     
    # formkeys
    new_list = ["lewen1", "lewen2"]
    new_dict = dict.fromkeys(new_list, {"company": "imooc"})
    print(new_dict)
    ​
    # new_dict["kevin"]  # KeyError 不存在会抛异常
    ret = new_dict.get("kevin","None") # 
    print(ret)
    ​
    # items 方法
    for key,value in new_dict.items():
        print(key,value)
    
    ret_set = new_dict.setdefault("kevin","new mem")  # 不存在key,就设置并返回值
    print(ret_set)
    print(new_dict)
    ​
    ​new_dict.update(      # update() 括号里面添加为可迭代对象
        (("lewen", "imooc"),)
        )
    ---
    {'lewen1': {'company': 'imooc'}, 'lewen2': {'company': 'imooc'}}
    None
    lewen1 {'company': 'imooc'}
    lewen2 {'company': 'imooc'}
    new mem
    {'lewen1': {'company': 'imooc'}, 'lewen2': {'company': 'imooc'}, 'kevin': 'new mem'}
    ​
    

    6.3 dict的子类

    #不建议继承list和dict
    class Mydict(dict):
        def __setitem__(self, key, value):
            super().__setitem__(key, value*2)
    
    my_dict = Mydict(one=1)   # value*2 没有生效
    # my_dict["one"] = 1      # 生效了
    print (my_dict)
    {'one': 1}
    
    ---
    
    
    from collections import UserDict
    class Mydict(UserDict):
        def __setitem__(self, key, value):
            super().__setitem__(key, value*2)
    
    my_dict = Mydict(one=1)
    # my_dict["one"] = 1
    print (my_dict)
    {'one': 2}
    
    
    ---
    # defaultdict 
    from collections import defaultdict
    
    
    
    my_dict = defaultdict(dict)
    my_value = my_dict["bobby"]  # 没有则返回空字典
    print(my_value)
    {}

    6.4 set和frozenset

    #set 集合 fronzenset (不可变集合) 无序, 不重复
    s = set('abcdee')
    print(s)
    
    s2 = set(['a','b','c','d','e'])
    print(s2)
    
    s3 = {'a','b', 'c'}
    print(type(s3))
    
    s = frozenset("abcde") #frozenset 不可变,以作为dict的key
                           # 不能添加值
    print(s)
    # ---
    {'a', 'e', 'c', 'd', 'b'}
    {'a', 'e', 'c', 'd', 'b'}
    <class 'set'>
    frozenset({'a', 'e', 'c', 'd', 'b'})
    
    
    # ---
    #向set添加数据
    s = set('abcdee')
    another_set = set("cef")
    s.update(another_set)
    print(s)
    
    re_set = s.difference(another_set)  # {'b', 'd', 'a'}
    re_set = s - another_set            # {'b', 'd', 'a'}
    re_set = s & another_set            # {'c', 'f', 'e'}
    re_set = s | another_set              # {'a', 'f', 'c', 'e', 'd', 'b'}
    
    #set性能很高
    # | & -  #集合运算
    print(re_set)
    
    print (s.issubset(re_set))
    if "c" in re_set:
        print ("i am in set")
    # ---
    {'a', 'f', 'e', 'c', 'd', 'b'}
    {'a', 'f', 'c', 'e', 'd', 'b'}
    True
    i am in set

    6.5 dict和set实现原理

    from random import randint
    
    
    def load_list_data(total_nums, target_nums):
        """
        从文件中读取数据,以list的方式返回
        :param total_nums: 读取的数量
        :param target_nums: 需要查询的数据的数量
        """
        all_data = []
        target_data = []
        file_name = "D:电子书Python面试宝典Version8.1.pdf"
        with open(file_name, encoding="utf8", mode="r") as f_open:
            for count, line in enumerate(f_open):
                if count < total_nums:
                    all_data.append(line)
                else:
                    break
    
        for x in range(target_nums):
            random_index = randint(0, total_nums)
            if all_data[random_index] not in target_data:
                target_data.append(all_data[random_index])
                if len(target_data) == target_nums:
                    break
    
        return all_data, target_data
    
    def load_dict_data(total_nums, target_nums):
        """
        从文件中读取数据,以dict的方式返回
        :param total_nums: 读取的数量
        :param target_nums: 需要查询的数据的数量
        """
        all_data = {}
        target_data = []
        file_name = "D:电子书Python面试宝典Version8.1.pdf"
        with open(file_name, encoding="utf8", mode="r") as f_open:
            for count, line in enumerate(f_open):
                if count < total_nums:
                    all_data[line] = 0
                else:
                    break
        all_data_list = list(all_data)
        for x in range(target_nums):
            random_index = randint(0, total_nums-1)
            if all_data_list[random_index] not in target_data:
                target_data.append(all_data_list[random_index])
                if len(target_data) == target_nums:
                    break
    
        return all_data, target_data
    
    
    def find_test(all_data, target_data):
        #测试运行时间
        test_times = 100
        total_times = 0
        import time
        for i in range(test_times):
            find = 0
            start_time = time.time()
            for data in target_data:
                if data in all_data:
                    find += 1
            last_time = time.time() - start_time
            total_times += last_time
        return total_times/test_times
    
    
    if __name__ == "__main__":
        # all_data, target_data = load_list_data(10000, 1000)
        # all_data, target_data = load_list_data(100000, 1000)
        # all_data, target_data = load_list_data(1000000, 1000)
    
    
        # all_data, target_data = load_dict_data(10000, 1000)
        # all_data, target_data = load_dict_data(100000, 1000)
        # all_data, target_data = load_dict_data(1000000, 1000)
        all_data, target_data = load_dict_data(2000000, 1000)
        last_time = find_test(all_data, target_data)
    
    
    view
        #dict查找的性能远远大于list
        #在list中随着list数据的增大 查找时间会增大
        #在dict中查找元素不会随着dict的增大而增大
        print(last_time)
    
    #1.dict的key或者set的值 都必须是可以hash的
    #不可变对象 都是可hash的, str, fronzenset, tuple,自己实现的类 __hash__
    #2. dict的内存花销大(有大量空余的表元),但是查询速度快, 自定义的对象 或者python内部的对象都是用dict包装的
    # 3. dict的存储顺序和元素添加顺序有关
    # 4. 添加数据有可能改变已有数据的顺序
    

    image

    哈希冲突后重新计算位置

    在剩余空间小于三分之一时,申请更大的空间,然后数据搬迁,有可能会改变顺序

    image

  • 相关阅读:
    Hibernate事务代码规范写法
    关于hibernate插入数据时的乱码问题
    搭建hibernate环境(重点)
    接口测试概念以及用postman进行接口测试
    Atom编辑器之加快React开发的插件汇总
    如何搭建git服务器
    phpstorm 配置 xdebug调试工具
    linux 获取指定行范围文本内容
    odoo 创建一个qweb
    linux nohup 使用
  • 原文地址:https://www.cnblogs.com/wenyule/p/10363223.html
Copyright © 2011-2022 走看看