zoukankan      html  css  js  c++  java
  • 关联规则挖掘之apriori算法

    前言:

    众所周知,关联规则挖掘是数据挖掘中重要的一部分,如著名的啤酒和尿布的问题。今天要学习的是经典的关联规则挖掘算法——Apriori算法



    一、算法的基本原理

    由k项频繁集去导出k+1项频繁集。


    二、算法流程


    1.扫描事务数据库,找出1项集,并根据最小支持度计数,剪枝得出频繁1项集。k=1.


    2.由频繁k项集进行连接步操作,形成候选的k+1项集,并扫描数据库,得出每一项的支持度计数,并根据最小支持度计数,剪枝得到频繁k+1项集。

    迭代的进行第2步直到频繁k项集是空的。


    3.由频繁项集构造关联规则。先列出所有可能的关联规则,然后计算相应的置信度,最终筛选出满足最小置信度的强关联规则。



    三、算法实现


    测试数据集:data.txt

    1, 7, 15, 44, 49
    2, 1, 19
    3, 1, 19
    4, 3, 4, 15, 18, 35, 44
    5, 2, 4, 7, 9, 23
    6, 14, 21, 44
    7, 4, 12, 31, 36, 44, 48
    8, 15, 27, 28
    9, 2, 28
    10, 3, 18, 35
    11, 23, 24, 40, 41, 43
    12, 20, 43, 48
    13, 49
    14, 1, 19, 26
    15, 5, 22, 39
    16, 16, 32, 45
    17, 4, 6, 9, 10, 16, 22
    18, 1, 19, 23
    19, 7, 11, 37, 45
    20, 3, 18, 32, 35
    21, 1, 8, 19, 47
    22, 34, 39, 44
    23, 13, 19
    24, 4, 9, 38
    25, 7, 22, 48
    26, 7, 11, 14
    27, 23, 24, 40, 41, 43
    28, 9, 14
    29, 0, 2, 42
    30, 13, 35
    31, 23
    32, 8, 21, 25, 38
    33, 4, 46
    34, 23, 24, 40, 41, 43
    35, 4, 17, 29, 47
    36, 12, 31, 36
    37, 14, 22, 26, 37, 44
    38, 0, 16, 30, 32, 45, 47
    39, 1, 11, 19, 25, 27, 29, 46
    40, 15, 16, 18, 21, 26
    41, 4, 10, 14
    42, 3, 36
    43, 23, 27, 28
    44, 15, 21, 40
    45, 10, 19, 25, 32
    46, 11, 22, 44
    47, 8
    48, 0, 2, 46
    49, 33, 42
    50, 28, 39
    51, 7, 17, 28
    52, 1, 19
    53, 32, 34
    54, 0, 2, 46
    55, 15, 30, 45
    56, 39, 49
    57, 46
    58, 4, 9, 19
    59, 0, 2, 16, 19, 46
    60, 17, 21, 40
    61, 2, 4, 6, 9, 39
    62, 23, 24, 40, 41, 43
    63, 13, 27, 28
    64, 40
    65, 12, 14, 44
    66, 27, 28
    67, 1, 36
    68, 12, 31, 36, 48
    69, 3, 18, 35
    70, 5, 12, 16
    71, 30, 49
    72, 7, 29, 30
    73, 17, 19, 30
    74, 1, 13, 36
    75, 19, 33, 49
    76, 1, 5, 14, 38
    77, 31
    78, 1, 5, 8, 14, 22, 26
    79, 7, 27, 30, 36, 37, 43
    80, 13, 35, 42
    81, 5, 22, 32
    82, 7, 11, 20, 28, 37, 45
    83, 23, 24, 40, 41
    84, 3, 5, 14, 22, 36
    85, 12, 31, 36, 48
    86, 45
    87, 12, 31, 36, 48
    88, 14, 44
    89, 5, 19, 22, 34, 43
    90, 0, 2, 45, 46
    91, 16, 32, 45, 47, 48
    92, 1, 48
    93, 1, 3, 18, 35
    94, 7, 11, 37, 45
    95, 16, 26, 32, 41, 45
    96, 1, 29, 30
    97, 1, 19
    98, 1
    99, 14, 22, 37
    100, 22, 34, 40
    101, 31
    102, 16, 17, 43
    103, 8, 19, 20, 48
    104, 1, 12, 15, 27, 31, 34, 36, 48
    105, 3, 40, 45
    106, 13, 18, 21, 41, 47, 48
    107, 3, 18, 22, 27
    108, 6, 16, 17, 31, 33, 42
    109, 5, 22, 27, 35
    110, 7, 15, 49
    111, 6, 12, 26, 29, 31, 32, 36, 48
    112, 11, 13, 14, 44
    113, 23, 33, 35
    114, 5, 8, 18, 29, 46
    115, 41
    116, 1, 3, 8, 27, 28
    117, 1, 11, 19, 33
    118, 23, 24, 40, 41
    119, 16, 18, 23, 27, 29, 30, 40, 49
    120, 14, 18, 44
    121, 0, 2, 46
    122, 3, 18, 35
    123, 12, 31, 36, 48
    124, 7, 11, 20, 29, 37, 45
    125, 2, 24, 27
    126, 5, 13, 22
    127, 22, 42, 46, 47
    128, 39, 41
    129, 13, 23
    130, 1, 24, 40, 42
    131, 14, 20, 26, 44
    132, 12, 31, 36, 48
    133, 7, 21, 27, 28, 33, 36, 47
    134, 17, 29, 47
    135, 12, 31, 36
    136, 4, 9, 11
    137, 3, 11, 14, 18, 29, 35
    138, 20, 33, 42
    139, 22, 44, 46
    140, 5, 6, 16, 22, 33, 38, 49
    141, 0, 2, 12, 24, 26, 31, 46
    142, 0, 6, 25
    143, 5, 19, 22
    144, 11, 13, 14, 35, 48
    145, 17, 29, 35, 44, 47
    146, 7, 15
    147, 5, 22
    148, 0, 1, 2, 46
    149, 4, 9, 18, 41
    150, 1, 12, 18, 19, 39
    151, 0, 2, 19, 22, 46
    152, 33, 37, 42
    153, 0, 2, 37, 46
    154, 11, 18, 35, 45
    155, 4, 9, 15
    156, 19, 24, 28, 35, 49
    157, 5, 22, 37
    158, 0, 2, 5, 19, 33, 35, 39, 46
    159, 3, 4, 5, 48
    160, 4, 6, 18, 28, 35
    161, 3, 18, 31, 35
    162, 6, 17, 25, 49
    163, 17
    164, 3, 8, 9, 20, 22, 23, 42
    165, 4, 15, 17, 21, 26, 36, 48
    166, 14, 41, 44
    167, 19, 28, 42
    168, 4, 9
    169, 13, 31, 33, 41, 42
    170, 5, 22, 28
    171, 0, 16, 32
    172, 5, 28, 43
    173, 24, 36, 37, 42
    174, 31
    175, 40, 42
    176, 11, 34, 48
    177, 14, 28, 40, 43
    178, 0, 13, 26
    179, 16, 32, 45
    180, 10, 14, 44, 46
    181, 4, 7, 9, 19, 36
    182, 23, 24, 40, 41, 43
    183, 4, 9, 37
    184, 5, 22, 31
    185, 21, 24, 29
    186, 0, 6, 22, 46
    187, 3, 18, 21, 35, 39, 46
    188, 12, 31, 36, 38, 48
    189, 28
    190, 23, 24, 40, 41, 43
    191, 12, 24
    192, 6, 27, 28
    193, 23, 24, 40, 41, 43
    194, 27, 28
    195, 5, 14, 16, 22
    196, 5, 22
    197, 27, 28, 44, 47
    198, 5, 22
    199, 0, 2, 46
    200, 0, 10, 30
    201, 15, 33, 42
    202, 4, 9, 40
    203, 1, 36
    204, 23, 24, 40, 41
    205, 25, 27, 28, 32
    206, 7, 11, 37, 45
    207, 20, 48
    208, 7, 11, 37, 45
    209, 9, 27, 28
    210, 7, 11, 37
    211, 1, 2, 9, 22, 48
    212, 7, 27, 39, 45
    213, 1, 6, 8, 15, 18, 19, 21, 42
    214, 23, 24, 40, 41
    215, 4, 14, 20, 32, 36, 44
    216, 16, 32, 45
    217, 18, 26, 42, 45
    218, 2, 11, 23
    219, 11, 15, 35
    220, 31, 39, 43, 45, 46
    221, 0, 2, 28, 40, 46
    222, 13, 24, 26, 35, 39, 47
    223, 7, 21, 23, 30, 36
    224, 23, 40, 47
    225, 5, 22
    226, 1, 5, 19, 30
    227, 7, 15, 49
    228, 7, 11, 37, 45
    229, 23, 31, 34, 45
    230, 7, 29, 48
    231, 27, 28
    232, 24, 44
    233, 4, 5, 9, 39
    234, 5, 22, 25, 28, 34, 46
    235, 23, 32, 48
    236, 16, 42
    237, 5, 22
    238, 4, 9, 15, 25
    239, 1, 12, 16, 17, 19, 26, 48
    240, 1, 3, 16, 19, 35
    241, 11, 12
    242, 4, 9, 10, 12, 45
    243, 12, 31, 36, 46, 48
    244, 17, 28, 47
    245, 3, 18, 35
    246, 4, 9
    247, 7, 11, 37, 45
    248, 5, 12, 22
    249, 6, 14, 38, 48
    250, 9, 10, 34, 36, 42
    251, 3, 18, 35
    252, 3
    253, 14, 27
    254, 0, 1, 19
    255, 3, 18, 24, 35
    256, 12, 31, 36, 48
    257, 19, 31, 41
    258, 1, 11, 14, 32, 34, 35
    259, 5, 14, 28, 39, 49
    260, 9, 14, 21, 46
    261, 6
    262, 5, 22, 31
    263, 11, 40
    264, 0, 2, 46
    265, 23, 24, 40, 41, 43
    266, 3, 14, 23, 44
    267, 18, 22, 42, 49
    268, 3, 24
    269, 12, 31, 36, 48
    270, 20, 25, 27
    271, 15, 30, 39
    272, 7, 15, 49
    273, 12, 31, 36, 48
    274, 16, 18, 32, 42
    275, 27, 31, 33, 42
    276, 5, 9, 22, 43
    277, 2, 4, 36, 40, 41, 48
    278, 19, 28, 30, 32
    279, 2, 31, 39, 44
    280, 1, 2, 19, 21, 23, 28, 45
    281, 7, 10, 14, 16, 23, 32, 45
    282, 4, 9, 14, 36, 44
    283, 21, 29
    284, 9, 13, 17, 46
    285, 38, 39
    286, 1, 8, 19
    287, 0, 33, 42
    288, 2, 33, 36, 47
    289, 12, 31, 36
    290, 28
    291, 16, 27, 28
    292, 7, 15, 49
    293, 8, 21, 24, 33, 34, 40, 42, 44
    294, 1, 14, 26, 32, 41
    295, 23, 33, 42
    296, 14, 21, 23, 24
    297, 0, 2, 46
    298, 11, 29
    299, 1, 23, 33, 42, 47
    300, 16, 32, 45
    301, 3, 4, 9, 33, 37, 43
    302, 19, 25, 30, 43, 46
    303, 0, 2, 46
    304, 16, 17, 34
    305, 5, 21, 22, 25, 27, 31, 42
    306, 5, 12, 16, 31, 36
    307, 8, 22, 42
    308, 3, 27, 36
    309, 16, 32, 45
    310, 12, 31, 36, 48
    311, 5, 22
    312, 7, 11, 37, 45
    313, 9, 13, 20, 21, 24, 39, 41, 45
    314, 1, 9, 15, 24, 37
    315, 4, 9
    316, 18, 35, 45
    317, 0, 2, 46
    318, 5, 10, 14, 47
    319, 1, 4, 10, 14, 25, 36, 49
    320, 12, 34
    321, 7, 15, 49
    322, 23, 28
    323, 1, 19
    324, 0, 2, 46
    325, 1, 2, 19
    326, 7, 11, 37, 45
    327, 29, 33, 38, 45, 48
    328, 4, 9, 24
    329, 1, 19, 23, 30, 44
    330, 5, 13
    331, 12, 31, 36, 48
    332, 0, 2, 46
    333, 5, 14, 20, 24, 28, 31, 39, 46
    334, 4, 9, 13, 33, 43, 46
    335, 5, 14, 22, 29
    336, 2, 8, 10, 19, 35, 45
    337, 14, 24, 34, 44
    338, 5, 22, 28, 30, 47
    339, 0, 28, 47
    340, 7, 15, 49
    341, 5, 9, 28, 41
    342, 1, 7, 11, 37
    343, 26
    344, 44
    345, 0, 2, 46
    346, 5, 10, 26, 30
    347, 8, 12, 14, 33, 44, 47
    348, 3, 4, 27, 42
    349, 9, 11, 40, 45
    350, 3, 4, 9, 11, 12, 47
    351, 5, 22, 35
    352, 26, 29, 45
    353, 4, 9, 31
    354, 0, 13, 27, 28, 31, 36
    355, 17, 32, 47
    356, 1, 31, 41
    357, 14, 43, 44
    358, 18, 35
    359, 5, 10, 16
    360, 33, 37
    361, 4, 30, 31
    362, 9, 14, 25
    363, 14, 44
    364, 23, 27, 28
    365, 9, 18, 22
    366, 5, 8, 22
    367, 10, 29
    368, 3, 15, 16, 20, 33, 45
    369, 4, 8, 12, 25, 34
    370, 9, 26, 28
    371, 7, 9, 15, 49
    372, 3, 48, 49
    373, 5, 21, 30, 31, 43
    374, 4, 9
    375, 1, 19, 28
    376, 4, 9, 26, 33, 47
    377, 1, 13, 19, 41
    378, 15, 18, 41
    379, 14, 28, 48
    380, 5, 22, 47
    381, 49
    382, 7, 15, 49
    383, 8, 28, 47
    384, 8, 25, 29
    385, 0, 4, 10, 21
    386, 41
    387, 5, 23, 26, 44
    388, 25
    389, 4, 9
    390, 12, 17, 26, 29, 31, 47
    391, 3, 18, 35
    392, 7, 11, 34, 37, 45
    393, 9, 13, 18, 23, 25, 33, 40
    394, 15
    395, 5, 17, 22, 40, 48
    396, 23, 33, 46
    397, 7, 15, 49
    398, 16, 32, 45
    399, 7, 15, 49
    400, 1, 48
    401, 2
    402, 36, 39, 49
    403, 3, 4, 10
    404, 34, 36
    405, 7, 17, 34, 35, 46
    406, 5, 22
    407, 32, 34, 36, 42
    408, 14, 46
    409, 3, 18, 29, 35, 37, 48
    410, 27, 33, 42
    411, 27, 28
    412, 27, 28
    413, 4, 9, 13, 21
    414, 1, 5, 7, 10, 21, 22, 30, 31
    415, 7, 15, 49
    416, 4, 5, 14, 23, 42
    417, 17, 29, 47
    418, 30
    419, 5, 33, 42
    420, 27, 28, 31
    421, 7, 15, 49
    422, 16, 32, 45
    423, 1, 2, 8, 25, 32
    424, 12, 13
    425, 16, 44
    426, 0, 17, 24
    427, 26
    428, 4, 10, 27, 28, 43
    429, 14
    430, 4, 19, 47
    431, 33, 42
    432, 47
    433, 8, 17, 23, 29, 43, 47
    434, 0, 5, 33, 46
    435, 9, 18, 35, 38, 40, 47
    436, 2, 4, 11, 39
    437, 4, 5, 9, 23, 24
    438, 4, 24, 32
    439, 8, 47
    440, 2, 19
    441, 2, 4, 9, 40
    442, 0, 2, 46
    443, 11, 19, 33, 42
    444, 16, 32, 45
    445, 5, 7, 22, 32, 42
    446, 4, 33, 47
    447, 19, 27, 36
    448, 1, 28, 40
    449, 23
    450, 4, 9, 31, 33
    451, 4, 28, 47
    452, 25, 27, 34, 49
    453, 3, 18, 35
    454, 9, 27, 28
    455, 14, 15, 35, 36
    456, 14, 27, 28
    457, 12, 16, 34
    458, 0, 2, 13, 18, 24, 36, 46, 47
    459, 14, 32, 44
    460, 16, 32, 45
    461, 2, 8, 16
    462, 7, 15, 49
    463, 14, 26, 30, 39, 44
    464, 1, 23, 32
    465, 0, 12, 20, 22, 49
    466, 16, 20, 32, 49
    467, 23, 24, 40, 41
    468, 1, 3, 29, 41, 42, 43, 46
    469, 5, 16, 25, 48
    470, 17, 29, 47
    471, 11, 16, 32, 45
    472, 4, 9, 13
    473, 17, 47
    474, 11, 32
    475, 12, 33, 42, 46
    476, 23, 24, 40, 41
    477, 9, 13, 14, 33, 38, 49
    478, 14, 15, 26, 47
    479, 3, 18, 35
    480, 3, 21, 44
    481, 3, 46
    482, 9
    483, 16, 24, 30, 31, 48
    484, 19
    485, 33, 42
    486, 4, 34
    487, 23, 24, 40, 41, 43
    488, 5, 22
    489, 10, 31
    490, 4, 17, 40, 47
    491, 39, 47
    492, 14, 15, 31
    493, 5, 26, 33, 42, 44
    494, 13, 30, 38
    495, 2, 3, 18, 35, 47
    496, 12, 31, 36, 48
    497, 17, 27, 28
    498, 17, 29, 47
    499, 14, 44
    500, 0, 2, 46
    501, 0, 21, 33, 39
    502, 14, 27, 44
    503, 12, 31, 36
    504, 5, 18
    505, 7, 11, 37, 45
    506, 0, 20
    507, 23, 30
    508, 0, 14, 16, 32
    509, 8
    510, 13, 26, 27, 28, 46
    511, 4, 9, 12, 17, 37
    512, 10, 20, 37
    513, 3, 7, 11, 21, 23
    514, 21, 31, 32
    515, 0, 1, 5, 23, 32, 42, 44
    516, 0, 2, 46
    517, 34, 37, 47
    518, 16, 28, 32, 45
    519, 3, 18, 35
    520, 0, 5, 23, 33, 35, 46, 48
    521, 14, 28, 29, 44
    522, 13, 15, 24
    523, 8, 22, 23
    524, 11, 14, 26, 28, 41, 43, 45
    525, 22, 39
    526, 7, 11, 37, 45
    527, 0, 2, 15, 16, 22, 35, 46
    528, 41, 42
    529, 6, 9, 30
    530, 4, 13, 37, 45, 49
    531, 0, 16
    532, 14, 27, 28, 37, 48
    533, 13, 28
    534, 13, 18
    535, 14, 29, 31, 35, 41, 49
    536, 15, 34, 46, 48
    537, 15, 28
    538, 3, 18, 35
    539, 12, 31, 36, 48
    540, 33, 35, 42
    541, 16, 32, 45
    542, 7, 9, 15, 43
    543, 7, 11, 37, 45
    544, 12, 31, 36, 48
    545, 14
    546, 5, 8, 14, 21, 33, 38
    547, 0, 3, 10, 38, 40
    548, 10, 14, 18, 47
    549, 11, 37, 38
    550, 1, 3, 18, 19, 35
    551, 3, 5, 22
    552, 9, 24, 27, 34
    553, 4, 7, 9
    554, 0, 12, 35
    555, 28, 29, 37
    556, 9, 11, 28, 33, 41
    557, 14, 44
    558, 4, 11, 41, 43
    559, 17, 47
    560, 16, 20, 27, 32, 45
    561, 2, 14, 42
    562, 4, 9, 21, 43
    563, 17, 29, 47
    564, 23, 28, 33, 42
    565, 6, 46
    566, 3, 18, 35
    567, 5, 9, 14, 18, 40
    568, 4, 9, 34
    569, 13, 14, 43, 44
    570, 27, 28
    571, 16, 32
    572, 6, 14, 22
    573, 0, 10, 41
    574, 0, 18, 27, 31, 34, 37, 39, 47
    575, 5, 7, 22
    576, 4, 7
    577, 7, 15, 49
    578, 4, 15
    579, 1, 16, 18, 20, 25, 29, 36, 46
    580, 13, 30, 42
    581, 24, 25, 31
    582, 4, 7, 15, 18
    583, 5, 22
    584, 28
    585, 23, 33, 42
    586, 44
    587, 15, 19, 27, 28
    588, 0, 11, 18, 23, 47
    589, 5, 22
    590, 7, 11, 37
    591, 7, 11, 37, 45
    592, 6, 16, 33, 39, 42
    593, 23, 24, 40, 41
    594, 0, 9, 30, 31
    595, 1, 32, 48
    596, 2, 27, 28, 43
    597, 1, 19
    598, 35
    599, 3, 10, 18, 35
    600, 4, 9, 12, 48
    601, 1, 6, 19
    602, 7, 15, 49
    603, 20, 22, 27
    604, 13
    605, 15, 16, 26
    606, 3, 18, 35
    607, 19
    608, 5, 22
    609, 1, 19
    610, 12, 36, 37, 39
    611, 2, 3, 10, 34
    612, 30, 44
    613, 13, 20, 22, 47
    614, 4, 12, 26, 45
    615, 13, 27, 35, 36
    616, 0, 2, 46
    617, 7, 10
    618, 1, 2, 34, 39
    619, 7, 11, 37, 45
    620, 39, 43, 44
    621, 3, 14, 30
    622, 2, 12, 30, 32, 49
    623, 1, 19, 32
    624, 10, 21, 28, 31
    625, 9, 17, 27, 28, 45
    626, 10, 11, 16, 32
    627, 12, 31, 36, 48
    628, 3, 18, 29, 35, 40
    629, 23, 24, 40, 41, 43
    630, 14, 17, 29, 44, 47
    631, 38
    632, 0, 2, 46
    633, 3, 18, 20, 35
    634, 17, 29, 43, 47
    635, 0, 6, 19, 26, 43
    636, 3, 18, 35
    637, 5, 21, 26, 27, 28, 39, 42, 49
    638, 12, 31, 36, 48
    639, 0, 2, 30, 46, 47
    640, 5, 22, 33
    641, 14, 33, 42
    642, 0, 30, 43, 46
    643, 11, 17, 35, 47
    644, 0, 2, 46
    645, 16, 32, 45
    646, 20, 37, 39, 42
    647, 14, 30, 44
    648, 36, 39
    649, 1, 17, 25, 37, 41, 49
    650, 30, 36, 44
    651, 12, 31, 36, 48
    652, 24, 27, 28, 34, 39, 45, 48
    653, 0, 37, 46
    654, 18, 25, 44
    655, 4, 9
    656, 5, 22
    657, 33, 42
    658, 0, 26, 27, 28
    659, 5
    660, 2, 22, 33, 42
    661, 7, 15, 49
    662, 4, 9, 13, 17, 37
    663, 5, 22, 35, 38, 47
    664, 0, 2, 46
    665, 27, 28, 39, 42
    666, 12, 31, 36, 48
    667, 27, 28
    668, 16, 32, 45, 49
    669, 21, 40
    670, 5, 22, 48
    671, 23, 33, 39, 42, 45
    672, 1, 7, 25
    673, 33, 42
    674, 16, 19, 32, 38, 45
    675, 7, 23, 30, 46
    676, 42
    677, 2, 10, 23, 42
    678, 26, 30, 48
    679, 7, 15, 49
    680, 12, 25, 39, 48
    681, 16, 17, 33, 47, 49
    682, 49
    683, 27, 45
    684, 1, 11, 21, 42, 43
    685, 8, 9, 22
    686, 27, 28
    687, 0, 2, 26, 43, 46
    688, 16, 32, 45
    689, 10, 22, 39
    690, 7, 11, 14, 18, 23
    691, 5, 22, 43
    692, 17, 23, 29, 47
    693, 1, 2, 15, 19
    694, 7, 11, 37, 45
    695, 5, 12, 22, 44
    696, 8, 11, 22, 37
    697, 20, 29
    698, 7, 9, 11, 13, 37, 39, 45
    699, 7, 11, 37, 45
    700, 3, 36, 42, 49
    701, 16, 32, 45
    702, 21, 35
    703, 0, 2, 46
    704, 10, 18, 44
    705, 6, 15, 21, 27, 28, 34, 41, 46
    706, 7, 11, 37, 45
    707, 16, 40
    708, 37, 43
    709, 7, 15, 29, 49
    710, 12, 31, 36, 48
    711, 5, 22, 24
    712, 12, 13, 14, 32
    713, 4, 5, 9, 32
    714, 10, 40, 44
    715, 3, 18, 35
    716, 1, 13, 20, 31, 45
    717, 37, 47, 48
    718, 4, 9
    719, 1, 19
    720, 4, 12, 31
    721, 10, 33
    722, 14, 44
    723, 5, 15, 34
    724, 23, 36
    725, 5, 29, 32, 35, 42
    726, 3, 11, 29
    727, 4, 6, 8, 10, 18, 22, 35, 37
    728, 21, 24, 27
    729, 33, 39
    730, 27, 28
    731, 6, 10, 44
    732, 22
    733, 0, 46
    734, 16, 32, 45
    735, 3, 9, 37, 42, 44
    736, 19, 21, 23, 27, 30
    737, 5, 19, 30
    738, 5, 15, 19, 22, 23
    739, 5, 22
    740, 3, 18, 35
    741, 17, 29, 47
    742, 12, 26, 31, 36, 41, 48
    743, 3, 18, 35
    744, 10
    745, 7, 15
    746, 28, 43, 49
    747, 12, 27, 28
    748, 1, 19
    749, 17, 21
    750, 0, 18, 24, 43, 48
    751, 16, 32, 45
    752, 0, 11, 33
    753, 3
    754, 9, 15, 40
    755, 12, 30, 49
    756, 45
    757, 23, 24, 40, 41, 43
    758, 14, 40, 44
    759, 15, 41, 47
    760, 27, 28, 29
    761, 0, 2, 23, 46
    762, 7, 8, 11, 20
    763, 6, 15, 22, 32
    764, 5, 13, 26
    765, 2, 20
    766, 1, 7, 15, 48
    767, 34, 45
    768, 12, 24, 31, 46
    769, 26, 27, 28
    770, 23, 24, 40, 41
    771, 20, 27, 29
    772, 26
    773, 27, 28
    774, 1, 13, 29
    775, 11, 23, 25, 36, 38, 45
    776, 17, 29, 47
    777, 22, 30, 39
    778, 33, 42, 45, 47
    779, 0, 2, 12, 39, 41, 46, 49
    780, 12, 31, 36, 48
    781, 12, 17, 27, 43, 45
    782, 17, 47
    783, 12, 31, 36, 48
    784, 8, 20, 29, 32, 46
    785, 10, 22, 23, 26, 36
    786, 7, 20, 26, 44, 47
    787, 2, 18, 27, 28, 33
    788, 14, 21, 23, 24, 30, 42, 46
    789, 18, 35
    790, 1
    791, 0, 14, 20, 44, 46
    792, 5, 7, 11, 24, 27
    793, 0, 18, 25, 39
    794, 1, 19, 27
    795, 23, 24, 40, 41, 43
    796, 16
    797, 0, 6, 28, 33, 35, 46, 48
    798, 1, 19, 44
    799, 17, 29, 47
    800, 6, 31
    801, 23, 24, 40, 41, 43
    802, 33, 42
    803, 1, 3, 19, 29, 31, 44
    804, 17, 29, 47
    805, 27, 31, 32, 36, 46
    806, 16, 32, 45
    807, 1, 19, 34, 44
    808, 0, 2, 7, 18, 48
    809, 14, 19
    810, 7, 15, 49
    811, 5, 13, 22, 30
    812, 17, 29, 47
    813, 13, 27, 28, 42, 48
    814, 1, 5, 22
    815, 4, 9
    816, 7, 11, 37, 45
    817, 0, 3, 7, 22, 37, 39, 40
    818, 1, 19
    819, 22, 26
    820, 33, 37, 42
    821, 4, 8, 13, 27, 28, 46
    822, 27, 28, 40
    823, 27, 28
    824, 33, 42
    825, 5, 22
    826, 14, 22, 44
    827, 16, 32, 45
    828, 3, 16, 28, 48
    829, 22, 23, 24, 39
    830, 15, 26, 28, 33, 36
    831, 7, 15, 49
    832, 15, 22, 27, 31, 33, 40
    833, 18, 35, 41, 43, 49
    834, 4, 9
    835, 12, 31, 36
    836, 1, 19, 28, 31, 38, 44, 48
    837, 4, 9, 18
    838, 6, 32, 34
    839, 15, 42
    840, 13, 27, 28, 36
    841, 7, 15, 22, 37, 43, 49
    842, 7, 30, 43
    843, 15, 25, 49
    844, 22, 33, 41
    845, 34
    846, 4, 14, 21, 25, 41
    847, 20, 23, 33, 42
    848, 9, 13, 22, 42, 45
    849, 0, 14, 22, 29, 46
    850, 1, 19
    851, 3, 16, 32, 45
    852, 4, 9
    853, 4, 9, 21, 39
    854, 3, 18, 39
    855, 4, 13, 32
    856, 17, 29, 47
    857, 12, 31, 36, 48
    858, 14, 15
    859, 23, 46
    860, 5, 22
    861, 3, 18, 35
    862, 12, 22, 47, 48
    863, 3, 5, 18, 35
    864, 23, 24, 40, 41
    865, 22
    866, 8, 12, 16, 18, 29, 34, 49
    867, 33, 34, 42, 44
    868, 13, 23, 24, 41
    869, 23, 24, 40, 41, 43
    870, 13, 25
    871, 5, 36, 41, 42
    872, 5, 22, 31, 37, 47
    873, 5, 7, 20
    874, 4, 14, 16, 37, 44
    875, 3, 18, 35
    876, 8, 21, 29, 33, 42
    877, 5, 14, 18
    878, 27
    879, 14, 20, 22, 24
    880, 1, 3, 14, 31, 44, 45, 47
    881, 29, 36, 41, 46
    882, 16, 32, 45
    883, 23, 24, 40, 41, 43
    884, 7, 11, 19, 38, 42, 43, 47
    885, 0, 27, 28, 32
    886, 0, 2, 28, 41, 46
    887, 23, 24, 40, 41, 43
    888, 3, 18, 35
    889, 14, 31
    890, 8, 14, 44
    891, 3, 14, 18, 22, 35
    892, 1, 3, 30
    893, 6, 12, 41, 44
    894, 11, 30, 43
    895, 7, 15, 49
    896, 0, 2, 46
    897, 1, 17, 38, 43
    898, 25, 36
    899, 7, 9, 27
    900, 5, 13, 22
    901, 12, 31, 36, 48
    902, 26, 39, 43
    903, 12, 26, 38
    904, 1, 16, 20, 40
    905, 23, 24, 40, 41, 43
    906, 6, 28, 37, 38, 49
    907, 14, 16, 17, 18, 26, 31
    908, 37, 44
    909, 11, 30
    910, 22, 28, 41, 48
    911, 12, 31, 36, 48
    912, 4, 5, 7, 21
    913, 3, 9, 18, 35, 41
    914, 5, 27, 28, 42
    915, 5, 22, 31
    916, 33, 42
    917, 4, 9, 15
    918, 7, 11, 37, 45
    919, 23, 24, 40, 41, 43
    920, 40
    921, 1, 8, 14, 19
    922, 14, 31, 44
    923, 6, 28, 33, 39
    924, 0, 6, 16, 30, 39, 47
    925, 12, 31, 36
    926, 1, 36, 46
    927, 27, 42, 44
    928, 4, 9
    929, 20
    930, 5, 22
    931, 10, 16, 26
    932, 13, 36, 42
    933, 12, 33, 42, 47
    934, 4, 27, 28
    935, 1, 19, 39
    936, 1, 16, 32
    937, 12, 31, 36
    938, 16, 32, 41, 45
    939, 16, 33, 42
    940, 12, 13, 31, 35, 36, 41, 44
    941, 6, 9, 19, 24, 44
    942, 23, 24, 40, 41, 43
    943, 5, 12, 31, 36, 48
    944, 0, 2, 20, 44, 46
    945, 3, 18, 35
    946, 4, 12, 14, 19, 32, 48
    947, 41
    948, 0, 14, 44
    949, 1, 10, 13, 19, 25, 26, 33, 39
    950, 7, 15, 16
    951, 4, 29, 46
    952, 14, 20, 40, 43
    953, 7, 38
    954, 3, 18, 45, 47
    955, 19, 29, 43
    956, 9, 16
    957, 0, 20, 42
    958, 5, 10, 32, 33, 39, 44
    959, 27, 28
    960, 5, 22, 25, 29, 37, 43
    961, 19
    962, 4, 16, 34
    963, 9, 17, 44, 45
    964, 7, 15, 29
    965, 7, 11, 27, 37, 39, 45, 49
    966, 12, 31, 36, 48
    967, 34, 49
    968, 31
    969, 25, 33, 42, 45
    970, 16, 27, 32, 45
    971, 25, 41
    972, 6, 9, 15, 24, 25, 40, 45
    973, 7, 8, 9, 11, 29, 37, 45
    974, 21, 36
    975, 5, 7, 14, 17, 22
    976, 38, 40
    977, 2, 7, 15, 22, 25, 33, 41
    978, 27, 28
    979, 16, 32, 45
    980, 8, 18, 28
    981, 13
    982, 41
    983, 27, 28, 38, 48
    984, 3, 18, 35
    985, 16, 20, 32, 36
    986, 16, 32, 45
    987, 4, 9
    988, 22, 31, 39
    989, 0, 2, 46
    990, 34
    991, 0, 2, 46
    992, 0
    993, 3, 6, 13
    994, 3, 29, 44
    995, 9, 27, 28, 45
    996, 2, 10, 22, 31, 33, 46
    997, 2, 6, 22, 32
    998, 0, 4, 9, 30, 33
    999, 3, 18, 35
    1000, 15, 34, 47
    


    package second;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.HashSet;
    import java.util.Iterator;
    import java.util.Map;
    import java.util.Set;
    
    
    /*
     * date:2015-06-10
     * by wenbaoli
     * 
     */
    public class Apriori {
    
    	private float min_sup;//minimum support
    	private float min_conf;//minimum confidence
    	
    	private Map<Integer,Set<String>> TransDataBase;//transaction database
    	private Integer DBnum;
    	
    	private Map<Integer,Map<Set<String>,Float>> freqItermSet;//frequent iterm set,from 1 to k...
    	private Map<Set<String>,Set<Map<Set<String>,Float>>> associationRules;//the final associate rules
    
    
    	public Apriori(Map<Integer,Set<String>> DB , float minSup, float minConf){
    		this.TransDataBase = DB;
    		this.min_conf = minConf;
    		this.min_sup = minSup;
    		this.DBnum = DB.size();
    		freqItermSet =new HashMap<Integer,Map<Set<String>,Float>>();
    		associationRules = new HashMap<Set<String>,Set<Map<Set<String>,Float>>>();
    		
    	}
    
    
    	/**
    	 * @param args
    	 */
    	public static void main(String[] args) {
    		// TODO Auto-generated method stub
    
    		//initial 
    		
    		String file = "data/data.txt";
    		String delimeter = ",";
    		
    		
    		//load data
    		Map<Integer,Set<String>> data = new HashMap<Integer,Set<String>>();
    		
    		int num = 0;
    		try {
    			File mFile = new File(file);
    			FileReader fr = new FileReader(mFile);
    			BufferedReader br = new BufferedReader(fr);
    			String line;
    			
    			
    			while((line=br.readLine())!= null) {
    				line = line.trim();
    				String[] str = line.split(delimeter);
    //				int i = Integer.parseInt(str[0]);
    				Set<String> set = new HashSet<String>();
    
    				for (int i = 1; i < str.length; i++) {
    					set.add(str[i].trim());
    				}
    				num++;
    				data.put(num, set);
    				
    			}
    			br.close();
    		} catch(IOException ex) {
                ex.printStackTrace();
            } 
    		Apriori ap = new Apriori(data,0.005f,0.6f);
    		ap.findAllFreqItermSet();
    		ap.findAssociationRules();
    	}
    	public void findAllFreqItermSet(){
    
    		//找出频繁一项集
    		
    		
    		Map<Set<String>,Float> f1 = new HashMap<Set<String>,Float>();
    		Map<Set<String>,Integer> OneItermSet = new HashMap<Set<String>,Integer>();
    		Iterator<Map.Entry<Integer, Set<String>>> it = this.TransDataBase.entrySet().iterator();
    		
    		while(it.hasNext()){
    			Map.Entry<Integer, Set<String>> entry = it.next();
    			Set<String> itermSet = entry.getValue();
    			for(String iterm:itermSet){
    				Set<String> key = new HashSet<String>();
    				key.add(iterm);
    				if(!OneItermSet.containsKey(key)){
    					OneItermSet.put(key, 1);
    				}else{
    					int value = 1 + OneItermSet.get(key);
    					OneItermSet.put(key, value);
    				}
    			}
    		}
    		//找出支持度大于minSup的频繁一项集
    		Iterator<Map.Entry<Set<String>,Integer>> iter = OneItermSet.entrySet().iterator();
    		while(iter.hasNext()){
    			Map.Entry<Set<String>,Integer> entry = iter.next();
    			//计算支持度
    			Float support = new Float(entry.getValue().toString())/new Float(this.DBnum);
    			if(support >= this.min_sup){
    				f1.put(entry.getKey(), support);
    			}
    		}
    
    		System.out.println("频繁1" + "项集:" + f1);//打印频繁1-项集
    		System.out.println("-------------------------------------------");
    		this.freqItermSet.put(1, f1);
    		
    		//由频繁k项集得到频繁k+1项集
    		int k = 2;
    		while(true){
    			
    			Set<Set<String>> candFreItermSets = this.apriori_gen(k,this.freqItermSet.get(k-1).keySet());
    			Map<Set<String>,Float> freqKItermSetMap = this.getFreqKItermSet(k,candFreItermSets);
    			if(!freqKItermSetMap.isEmpty()){
    				this.freqItermSet.put(k, freqKItermSetMap);
    			} else {
    				break;
    			}
    			System.out.println("频繁" + k + "项集:" + freqKItermSetMap);
    			System.out.println("-------------------------------------------");
    			k++;
    					
    		}
    		
    	}
    	public Map<Set<String>, Float> getFreqKItermSet(int k,
    			Set<Set<String>> candFreItermSets) {
    		Map<Set<String>,Integer> candFreqKItermSetMap = new HashMap<Set<String>,Integer>();
    		
    		//扫描事物数据库
    		Iterator<Map.Entry<Integer, Set<String>>> it = this.TransDataBase.entrySet().iterator();
    		//统计支持度计数
    		while (it.hasNext()){
    			Map.Entry<Integer, Set<String>> entry = it.next();
    			Iterator<Set<String>> iter = candFreItermSets.iterator();
    			while(iter.hasNext()){
    				Set<String> set = iter.next();
    				if(entry.getValue().containsAll(set)){
    					if(!candFreqKItermSetMap.containsKey(set)){
    						candFreqKItermSetMap.put(set, 1);
    					}else {
    						int value = 1+ candFreqKItermSetMap.get(set);
    						candFreqKItermSetMap.put(set, value);
    					}
    				}
    			}
    		}
    		
    		Iterator<Map.Entry<Set<String>, Integer>> iter = candFreqKItermSetMap.entrySet().iterator();
    		Map<Set<String>,Float> freqKIntermSet = new HashMap<Set<String>,Float>();
    		while(iter.hasNext()){
    			Map.Entry<Set<String>, Integer> entry = iter.next();
    			float support = new Float(entry.getValue().toString())/this.DBnum;
    			if(support < this.min_sup){
    				iter.remove();
    			} else {
    				freqKIntermSet.put(entry.getKey(), support);
    			}
    		}
    		
    		return freqKIntermSet;
    	}
    	
    	public Set<Set<String>> apriori_gen(int k, Set<Set<String>> fk){
    		Set<Set<String>> ck = new HashSet<Set<String>>();
    		Iterator<Set<String>> it1 = fk.iterator();
    		while (it1.hasNext()) {
    			Set<String> itermSet1 = it1.next();
    			Iterator<Set<String>> it2 = fk.iterator();
    			while (it2.hasNext()) {
    				Set<String> itermSet2 = it2.next();
    				if(!itermSet1.equals(itermSet2)) {
    					//连接步
    					Set<String> commIterms = new HashSet<String>();
    					commIterms.addAll(itermSet1);
    					commIterms.retainAll(itermSet2);
    					if(commIterms.size() == (k - 2)){
    						Set<String> candIterms = new HashSet<String>();
    						candIterms.addAll(itermSet1);
    						candIterms.removeAll(itermSet2);
    						candIterms.addAll(itermSet2);
    						//剪枝步骤
    						if(!this.has_infrequent_subset(candIterms, fk)){
    							ck.add(candIterms);
    						}
    					}
    				}
    			}
    		}
    		System.out.println(ck.size());
    		return ck;
    	}
    	public boolean has_infrequent_subset(Set<String> set,Set<Set<String>> fk){
    		Set<Set<String>> subItermSet = new HashSet<Set<String>>();
    		Iterator<String> itr = set.iterator();
    		while(itr.hasNext()){
    			//深拷贝
    			Set<String> subItem = new HashSet<String>();
    			Iterator<String> it = set.iterator();
    			while(it.hasNext()){
    				subItem.add(it.next());
    			}
    			
    			//去掉一个项后为k-1子集
    			subItem.remove(itr.next());
    			subItermSet.add(subItem);
    		}
    		
    		Iterator<Set<String>> it = subItermSet.iterator();
    		while(it.hasNext()){
    			if(!fk.contains(it.next())){
    				return true;
    			}
    		}
    		return false;
    	}
    	public void findAssociationRulesTemp(){
    		
    		
    	}
    	public void findAssociationRules(){
    		Iterator<Map.Entry<Integer, Map<Set<String>,Float>>> it = this.freqItermSet.entrySet().iterator();
    		while (it.hasNext()) {
    			Map.Entry<Integer, Map<Set<String>,Float>> entry = it.next();
    			for (Set<String> itemSet : entry.getValue().keySet()) {
    				int n = itemSet.size() / 2;
    				for (int i = 1; i <= n; i++) {
    					Set<Set<String>> subset = this.getProperSubset(i,itemSet);
    					
    					for(Set<String> conditionSet:subset){
    						Set<String> conclusionSet = new HashSet<String>();
    						conclusionSet.addAll(itemSet);
    						conclusionSet.removeAll(conditionSet);
    						int s1 = conditionSet.size();
    						int s2 = conclusionSet.size();
    						
    						float supF = this.freqItermSet.get(s1).get(conditionSet);
    						float supS = this.freqItermSet.get(s2).get(conclusionSet);
    						float sup = this.freqItermSet.get(s1+s2).get(itemSet);
    						
    						float conf1 = sup/supF;
    						float conf2 = sup/supS;
    						
    						if(conf1 >= this.min_conf){
    							if(this.associationRules.get(conditionSet) == null){
    								Set<Map<Set<String>,Float>> conclusionSetSet = new HashSet<Map<Set<String>,Float>>();
    								Map<Set<String>,Float> sets = new HashMap<Set<String>,Float>();
    								sets.put(conclusionSet, conf1);
    								conclusionSetSet.add(sets);
    								
    								this.associationRules.put(conditionSet, conclusionSetSet);
    								
    							} else {
    								Map<Set<String>,Float> sets = new HashMap<Set<String>,Float>();
    								sets.put(conclusionSet, conf1);
    								associationRules.get(conditionSet).add(sets);
    							}
    						}
    						if(conf2 >= this.min_conf){
    							if(this.associationRules.get(conditionSet) == null){
    								Set<Map<Set<String>,Float>> conclusionSetSet = new HashSet<Map<Set<String>,Float>>();
    								Map<Set<String>,Float> sets = new HashMap<Set<String>,Float>();
    								sets.put(conclusionSet, conf2);
    								conclusionSetSet.add(sets);
    								
    								this.associationRules.put(conditionSet, conclusionSetSet);
    								
    							} else {
    								Map<Set<String>,Float> sets = new HashMap<Set<String>,Float>();
    								sets.put(conclusionSet, conf2);
    								associationRules.get(conditionSet).add(sets);
    							}
    						}
    					}
    				}
    				
    			}
    			
    		}
    		System.out.println("关联规则(强规则):" + associationRules);
    	}
    
    
    	private Set<Set<String>> getProperSubset(int i, Set<String> itemSet) {
    		
    		Set<Set<String>> subset = new HashSet<Set<String>>();
    		if(itemSet.size() <= 1){
    			return null;
    		}else if(itemSet.size() == 2){
    			for(String s: itemSet){
    				Set<String> set = new HashSet<String>();
    				set.add(s);
    				if(!subset.contains(s)){
    					subset.add(set);
    				}
    				
    			}
    			return subset;
    		}else {
    			
    			Iterator<String> it = itemSet.iterator();
    			String s = it.next();
    			Set<String> temp = new HashSet<String>(itemSet);
    			temp.remove(s);
    			//包含s的子集
    			Set<Set<String>> subset0 = new HashSet<Set<String>>();
    			subset0 = this.getProperSubset(i-1, temp);
    			subset.addAll(subset0);
    			//不包含s的子集
    			Set<Set<String>> subset1 = new HashSet<Set<String>>();
    			
    			subset1 = this.getProperSubset(i, temp);
    			subset.addAll(subset1);
    			return subset;
    			
    			
    		}
    		
    		
    	}
    
    }
    


  • 相关阅读:
    使用Visual Studio给SharePoint列表添加Event Receiver
    使用客户端对象模型回写SharePoint列表
    思考:通过源码进行安装的前提是什么
    思考:学习一门语言真的是仅仅只有为了使用吗?或者说学习一个语言如果在工作中不使用,那么学习它还有什么好处和价值呢?
    思考:(使用下载工具比如yum,maven等)下载(库或者jar)到本地都需要设置源
    思考:代码块的大小是进行代码抽出一个方法或者进行重构或者是否在设计时设计为一个方法的衡量因素之一吗?
    思考:对源的包装是为了更好的隔离和在中间插入一些层?
    Spring Cloud和Spring Boot的认识
    如何对待基础知识:
    为什么总觉得事情应付不过来,总觉得事情需要花费很多时间?
  • 原文地址:https://www.cnblogs.com/wenbaoli/p/5655746.html
Copyright © 2011-2022 走看看