zoukankan      html  css  js  c++  java
  • [Erlang 0128] Term sharing in Erlang/OTP 下篇

      继续昨天的话题,昨天提到io:format对数据共享的间接影响,如果是下面两种情况恐怕更容易成为"坑", 呃,恰好我都遇到过;

      如果是测试代码是下面这样,得到的结果会是怎样?猜!

    s2()->
      L=[1,2,3,4,5,6],
      L2=[L,L,L,L],
    erlang:display( {{erts_debug:size(L),erts_debug:flat_size(L)},{erts_debug:size(L2),erts_debug:flat_size(L2)}}
    ).
    

    结果是

    5> d:s2().
    
    {{12,12},{56,56}}
    

      

     

      这个结果出来之后,我足足用了5分钟用来怀疑人生,为什么和期望的结果不一样呢?是因为我现在用的最新版本(17.2)吗?是实现已经修改掉但是没有更新文档吗?出于好奇,我还是按照之前探索问题的套路,生成了一下to_core文件,真相大白:

    's2'/0 =
        %% Line 11
        fun () ->
            let <_cor5> =
                %% Line 14
                call 'erts_debug':'size'
                    ([1|[2|[3|[4|[5|[6]]]]]])
            in  let <_cor4> =
                    %% Line 14
                    call 'erts_debug':'flat_size'
                        ([1|[2|[3|[4|[5|[6]]]]]])
                in  let <_cor3> =
                        %% Line 14
                        call 'erts_debug':'size'
                            ([[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]]]]])
                    in  let <_cor2> =
                            %% Line 14
                            call 'erts_debug':'flat_size'
                                ([[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]|[[1|[2|[3|[4|[5|[6]]]]]]]]]])
                        in  %% Line 14
                            call 'erlang':'display'
                                ({{_cor5,_cor4},{_cor3,_cor2}})
    

    修改一下代码: 

    s3(L)->
        L2=[L,L,L,L],
        {{erts_debug:size(L),erts_debug:flat_size(L)},{erts_debug:size(L2),erts_debug:flat_size(L2)}}
    .
    

      

    对应的s3的代码是

    's3'/1 =
        %% Line 18
        fun (_cor0) ->
            let <L2> =
                %% Line 19
                [_cor0|[_cor0|[_cor0|[_cor0|[]]]]]
            in  let <_cor5> =
                    %% Line 20
                    call 'erts_debug':'size'
                        (_cor0)
                in  let <_cor4> =
                        %% Line 20
                        call 'erts_debug':'flat_size'
                            (_cor0)
                    in  let <_cor3> =
                            %% Line 20
                            call 'erts_debug':'size'
                                (L2)
                        in  let <_cor2> =
                                %% Line 20
                                call 'erts_debug':'flat_size'
                                    (L2)
                            in  %% Line 20
                                {{_cor5,_cor4},{_cor3,_cor2}}
    

      

      换句话,在编译阶段s2方法里面的常量数据就已经展开了,所以L2无论是size还是flat_size都是一样的.之所以要先把这个测试做了,就是避免后面的测试误入陷阱.

     这个怎么破呢?除了上面传入参数的方法之外,还有一个路子:换成函数调用即可,如下:

    s4()->
      L=lists:seq(1,6),
      L2=[L,L,L,L],
    erlang:display( {{erts_debug:size(L),erts_debug:flat_size(L)},{erts_debug:size(L2),erts_debug:flat_size(L2)}}
    ).
    

      

     

    对应的代码为:

    's4'/0 =
        %% Line 24
        fun () ->
            let <L> =
                %% Line 25
                call 'lists':'seq'
                    (1, 6)
            in  let <L2> =
                    %% Line 26
                    [L|[L|[L|[L|[]]]]]
                in  let <_cor5> =
                        %% Line 27
                        call 'erts_debug':'size'
                            (L)
                    in  let <_cor4> =
                            %% Line 27
                            call 'erts_debug':'flat_size'
                                (L)
                        in  let <_cor3> =
                                %% Line 27
                                call 'erts_debug':'size'
                                    (L2)
                            in  let <_cor2> =
                                    %% Line 27
                                    call 'erts_debug':'flat_size'
                                        (L2)
                                in  %% Line 27
                                    call 'erlang':'display'
                                        ({{_cor5,_cor4},{_cor3,_cor2}})
    

      

    不要小看这个问题,这样一个常量优化在极端情况下会有"大惊喜",论文里面给了这样一个例子:

    show_compiler_crashes() ->
    
    L0 = [0],
    
    L1 = [L0, L0, L0, L0, L0, L0, L0, L0, L0, L0],
    
    L2 = [L1, L1, L1, L1, L1, L1, L1, L1, L1, L1],
    
    L3 = [L2, L2, L2, L2, L2, L2, L2, L2, L2, L2],
    
    L4 = [L3, L3, L3, L3, L3, L3, L3, L3, L3, L3],
    
    L5 = [L4, L4, L4, L4, L4, L4, L4, L4, L4, L4],
    
    L6 = [L5, L5, L5, L5, L5, L5, L5, L5, L5, L5],
    
    L7 = [L6, L6, L6, L6, L6, L6, L6, L6, L6, L6],
    
    L8 = [L7, L7, L7, L7, L7, L7, L7, L7, L7, L7],
    
    L9 = [L8, L8, L8, L8, L8, L8, L8, L8, L8, L8],
    
    L = [L9, L9, L9, L9, L9, L9, L9, L9, L9, L9],
    
    L.
    
     
    

      

    影响有多大呢?看结果:
     
    After a bit more of 45 minutes of struggling, the compiler tries to allocate 3.7 GB of memory and gives up:

    $ erlc demo.erl
    Crash dump was written to: erl_crash.dump
    eheap_alloc: Cannot allocate 3716993744 bytes of
    memory (of type "heap_frag").
    Abort

      好吧,勇于自黑,由于上面遇到这样让人恼火的问题,我决定在Shell中完成后续的测试,然后,我一脚踏进"新坑":

     

    陷阱2 Shell ! Shell !

    Eshell V6.0  (abort with ^G)
    1> L=[1,2,3,4,5,6,7,8,9,10].
    [1,2,3,4,5,6,7,8,9,10]
    2>  L2=[L,L,L,L,L,L].
    [[1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10]]
    3> erts_debug:size(L2).
    32
    4> erts_debug:flat_size(L2).
    132
    5> io:format("~p",[L2]).
    [[1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10]]ok
    6> erts_debug:size(L2).
    32
    7> erts_debug:flat_size(L2).
    132
    

      

        一开始启动shell的时候,Shell的Pid是<0.33.0>.然后我们在中间故意执行一个不存在的方法 fake:fake().这时查看一下,Shell已经重启,Pid变成<0.40.0>.注意再执行erts_debug:size(L2).结果已经变成了132了,换句话说,这里L2数据已经展开了.

    Eshell V6.0  (abort with ^G)
    1> self().
    <0.33.0>
    2>  L=[1,2,3,4,5,6,7,8,9,10].
    [1,2,3,4,5,6,7,8,9,10]
    3>  L2=[L,L,L,L,L,L].
    [[1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10],
    [1,2,3,4,5,6,7,8,9,10]]
    4>  erts_debug:size(L2).
    32
    5> erts_debug:flat_size(L2).
    132
    6> fake:fake().
    ** exception error: undefined function fake:fake/0
    7> self().
    <0.40.0>
    8>  erts_debug:size(L2).
    132
    9> erts_debug:flat_size(L2).
    132
    10>
    

      

       那为什么会触发数据展开(expand ,flattening)呢? 看下面的代码,在Shell启动的时候,会把之前已经绑定的变量作为spawn_link参数以启动新的shell.

    erl6.2libstdlib-2.2src
    
    start_eval(Bs, RT, Ds) ->
        Self = self(),
        Eval = spawn_link(fun() -> evaluator(Self, Bs, RT, Ds) end),
        put(evaluator, Eval),
        Eval.
    

      

      换句话说,Erlang中使用spawn创建进程,传入的参数(包括函数闭包),需要拷贝到新进程的heap,换句话说进程创建的时候需要考虑参数的大小.

      OK,这个问题差不多了,休息.

  • 相关阅读:
    awk 字符串函数
    C标准函数库中获取时间与日期、对时间与日期数据操作及格式化
    Redis 命令总结
    个人网站和博客赚钱之路(转)
    二叉树的前序、中序、后序遍历与创建
    16个值得个人站长做的广告联盟[转自cnzz]
    mysql 添加[取消]timestamp的自动更新
    转:函数式编程初探
    可爱的 Python : Python中的函数式编程,第三部分
    可爱的 Python : Python中函数式编程,第二部分
  • 原文地址:https://www.cnblogs.com/me-sa/p/term_sharing_in_erlang_otp_two.html
Copyright © 2011-2022 走看看