zoukankan      html  css  js  c++  java
  • 机器学习入门(1)------python基础

    环境:Python 3.6.4 |Anaconda, Inc.

    Python常用容器类型

    1.list

     1 l = [1, 'a', 2, 'b']
     2 print(type(l))
     3 print('修改前:', l)
     4 
     5 # 修改list的内容
     6 l[0] = 3
     7 print('修改后:', l)
     8 
     9 # 末尾添加元素
    10 l.append(4)
    11 print('添加后:', l)
    12 
    13 # 遍历list
    14 print('遍历list(for循环):')
    15 for item in l:
    16     print(item)
    17     
    18 # 通过索引遍历list
    19 print('遍历list(while循环):')
    20 i = 0
    21 while i != len(l):
    22     print(l[i])
    23     i += 1
    24     
    25 # 列表合并
    26 print('列表合并(+):', [1, 2] + [3, 4])
    27 
    28 # 列表重复
    29 print('列表重复(*):', [1, 2] * 5)
    30 
    31 # 判断元素是否在列表中
    32 print('判断元素存在(in):', 1 in [1, 2])
    <class 'list'>
    修改前: [1, 'a', 2, 'b']
    修改后: [3, 'a', 2, 'b']
    添加后: [3, 'a', 2, 'b', 4]
    遍历list(for循环):
    3
    a
    2
    b
    4
    遍历list(while循环):
    3
    a
    2
    b
    4
    列表合并(+): [1, 2, 3, 4]
    列表重复(*): [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
    判断元素存在(in): True

    2.tuple

     1 t = (1, 'a', 2, 'b')
     2 print(type(t))
     3 
     4 #元组的内容不能修改,否则会报错
     5 # t[0] = 3 
     6 
     7 # 遍历tuple
     8 print('遍历list(for循环):')
     9 for item in t:
    10     print(item)
    11     
    12 # 通过索引遍历tuple
    13 print('遍历tuple(while循环):')
    14 i = 0
    15 while i != len(t):
    16     print(t[i])
    17     i += 1
    18     
    19 # 解包 unpack
    20 a, b, _, _ = t
    21 print('unpack: ', c)
    22 
    23 # 确保unpack接收的变量个数和tuple的长度相同,否则报错
    24 # 经常出现在函数返回值的赋值时
    25 # a, b, c = t
    <class 'tuple'>
    遍历list(for循环):
    1
    a
    2
    b
    遍历tuple(while循环):
    1
    a
    2
    b
    
     
    ---------------------------------------------------------------------------
    NameError                                 Traceback (most recent call last)
    <ipython-input-3-88506d8b1a51> in <module>()
         19 # 解包 unpack
         20 a, b, _, _ = t
    ---> 21 print('unpack: ', c)
         22 
         23 # 确保unpack接收的变量个数和tuple的长度相同,否则报错
    
    NameError: name 'c' is not defined

    3.dictionary

     1 d = {'小象学院': 'http://www.chinahadoop.cn/',
     2     '百度': 'https://www.baidu.com/',
     3     '阿里巴巴': 'https://www.alibaba.com/',
     4     '腾讯': 'https://www.tencent.com/'}
     5 
     6 print('通过key获取value: ', d['小象学院'])
     7 
     8 # 遍历key
     9 print('遍历key: ')
    10 for key in d.keys():
    11     print(key)
    12     
    13 # 遍历value
    14 print('遍历value: ')
    15 for value in d.values():
    16     print(value)
    17     
    18 # 遍历item
    19 print('遍历item: ')
    20 for key, value in d.items():
    21     print(key + ': ' + value)
    22 
    23 # format输出格式
    24 print('format输出格式:')
    25 for key, value in d.items():
    26     print('{}的网址是{}'.format(key, value))
    通过key获取value:  http://www.chinahadoop.cn/
    遍历key: 
    小象学院
    百度
    阿里巴巴
    腾讯
    遍历value: 
    http://www.chinahadoop.cn/
    https://www.baidu.com/
    https://www.alibaba.com/
    https://www.tencent.com/
    遍历item: 
    小象学院: http://www.chinahadoop.cn/
    百度: https://www.baidu.com/
    阿里巴巴: https://www.alibaba.com/
    腾讯: https://www.tencent.com/
    format输出格式:
    小象学院的网址是http://www.chinahadoop.cn/
    百度的网址是https://www.baidu.com/
    阿里巴巴的网址是https://www.alibaba.com/
    腾讯的网址是https://www.tencent.com/

    4.set

     1 print('创建set:')
     2 my_set = {1, 2, 3}
     3 print(my_set)
     4 my_set = set([1, 2, 3, 2])
     5 print(my_set)
     6 
     7 print('添加单个元素:')
     8 my_set.add(3)
     9 print('添加3', my_set)
    10 
    11 my_set.add(4)
    12 print('添加4', my_set)
    13 
    14 print('添加多个元素:')
    15 my_set.update([4, 5, 6])
    16 print(my_set)
    创建set:
    {1, 2, 3}
    {1, 2, 3}
    添加单个元素:
    添加3 {1, 2, 3}
    添加4 {1, 2, 3, 4}
    添加多个元素:
    {1, 2, 3, 4, 5, 6}

    5.Counter

    • 初始化
    1 import collections
    2 
    3 c1 = collections.Counter(['a', 'b', 'c', 'a', 'b', 'b'])
    4 c2 = collections.Counter({'a':2, 'b':3, 'c':1})
    5 c3 = collections.Counter(a=2, b=3, c=1)
    6 
    7 print(c1)
    8 print(c2)
    9 print(c3)
    Counter({'b': 3, 'a': 2, 'c': 1})
    Counter({'b': 3, 'a': 2, 'c': 1})
    Counter({'b': 3, 'a': 2, 'c': 1})
    • 更新内容
    1 # 注意这里是做“加法”,不是“替换”
    2 c1.update({'a': 4, 'c': -2, 'd': 4})
    3 print(c1)
    Counter({'a': 6, 'd': 4, 'b': 3, 'c': -1})
    • 访问内容
    1 print('a=', c1['a'])
    2 print('b=', c1['b'])
    3 # 对比和dict的区别
    4 print('e=', c1['e'])
    a= 6
    b= 3
    e= 0
    • element()方法
    1 for element in c1.elements():
    2     print(element)
    d
    d
    d
    d
    b
    b
    b
    a
    a
    a
    a
    a
    a
    • most_common()方法
    1 c1.most_common(3)
    2 [('a', 6), ('d', 4), ('b', 3)]

    6.defaultdict

    1 # 统计每个字母出现的次数
    2 s = 'chinadoop'
    3 
    4 # 使用Counter
    5 print(collections.Counter(s))
    Counter({'o': 2, 'd': 1, 'c': 1, 'p': 1, 'a': 1, 'n': 1, 'h': 1, 'i': 1})
    1 # 使用dict
    2 counter = {}
    3 for c in s:
    4     if c not in counter:
    5         counter[c] = 1
    6     else:
    7         counter[c] += 1
    8         
    9 print(counter.items())
    dict_items([('d', 1), ('c', 1), ('p', 1), ('a', 1), ('o', 2), ('n', 1), ('h', 1), ('i', 1)])
    1 # 使用defaultdict
    2 counter2 = collections.defaultdict(int)
    3 for c in s:
    4     counter2[c] += 1
    5 print(counter2.items())
    dict_items([('d', 1), ('c', 1), ('p', 1), ('a', 1), ('o', 2), ('n', 1), ('h', 1), ('i', 1)])
    1 # 记录相同元素的列表
    2 colors = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
    3 d = collections.defaultdict(list)
    4 for k, v in colors:
    5     d[k].append(v)
    6 
    7 print(d.items())
    dict_items([('blue', [2, 4]), ('yellow', [1, 3]), ('red', [1])])

    7.map函数

     1 import math
     2 
     3 print('示例1,获取两个列表对应位置上的最小值:')
     4 l1 = [1, 3, 5, 7, 9]
     5 l2 = [2, 4, 6, 6, 9]
     6 mins = map(min, l1, l2)
     7 print(mins)
     8 
     9 # map()函数操作时,直到访问数据时才会执行
    10 for item in mins:
    11     print(item)
    12 
    13 print('示例2,对列表中的元素进行平方根操作:')
    14 squared = map(math.sqrt, l2)
    15 print(squared)
    16 print(list(squared))
    示例1,获取两个列表对应位置上的最小值:
    <map object at 0x0000019AF8B0CDD8>
    1
    3
    5
    6
    9
    示例2,对列表中的元素进行平方根操作:
    <map object at 0x0000019AF8A79DD8>
    [1.4142135623730951, 2.0, 2.449489742783178, 2.449489742783178, 3.0]

    8.匿名函数lambda

     1 # my_func = lambda a, b, c: a * b
     2 # print(my_func)
     3 # print(my_func(1, 2, 3))
     4 
     5 # 结合map
     6 print('lambda结合map')
     7 l1 = [1, 3, 5, 7, 9]
     8 l2 = [2, 4, 6, 8, 10]
     9 result = map(lambda x, y: x * 2 + y, l1, l2)
    10 print(list(result))
    lambda结合map
    [4, 10, 16, 22, 28]

    9.python操作csv数据文件

    1 import csv
    2 
    3 with open('grades.csv') as csvfile:
    4     grades_data = list(csv.DictReader(csvfile))
    5     
    6 print('记录个数:', len(grades_data))
    7 print('前2条记录:', grades_data[:2])
    8 print('列名:', list(grades_data[0].keys()))
    记录个数: 2315
    前2条记录: [OrderedDict([('student_id', 'B73F2C11-70F0-E37D-8B10-1D20AFED50B1'), ('assignment1_grade', '92.73394640624123'), ('assignment1_submission', '2015-11-02 06:55:34.282000000'), ('assignment2_grade', '83.03055176561709'), ('assignment2_submission', '2015-11-09 02:22:58.938000000'), ('assignment3_grade', '67.16444141249367'), ('assignment3_submission', '2015-11-12 08:58:33.998000000'), ('assignment4_grade', '53.01155312999494'), ('assignment4_submission', '2015-11-16 01:21:24.663000000'), ('assignment5_grade', '47.710397816995446'), ('assignment5_submission', '2015-11-20 13:24:59.692000000'), ('assignment6_grade', '38.16831825359636'), ('assignment6_submission', '2015-11-22 18:31:15.934000000')]), OrderedDict([('student_id', '98A0FAE0-A19A-13D2-4BB5-CFBFD94031D1'), ('assignment1_grade', '86.79082085792986'), ('assignment1_submission', '2015-11-29 14:57:44.429000000'), ('assignment2_grade', '86.29082085792986'), ('assignment2_submission', '2015-12-06 17:41:18.449000000'), ('assignment3_grade', '69.7726566863439'), ('assignment3_submission', '2015-12-10 08:54:55.904000000'), ('assignment4_grade', '55.0981253490751'), ('assignment4_submission', '2015-12-13 17:32:30.941000000'), ('assignment5_grade', '49.5883128141676'), ('assignment5_submission', '2015-12-19 23:26:39.285000000'), ('assignment6_grade', '44.62948153275085'), ('assignment6_submission', '2015-12-21 17:07:24.275000000')])]
    列名: ['student_id', 'assignment1_grade', 'assignment1_submission', 'assignment2_grade', 'assignment2_submission', 'assignment3_grade', 'assignment3_submission', 'assignment4_grade', 'assignment4_submission', 'assignment5_grade', 'assignment5_submission', 'assignment6_grade', 'assignment6_submission']
    1 avg_assign1 = sum([float(row['assignment1_grade']) for row in grades_data]) / len(grades_data) 
    2 print('assignment1平均分数:', avg_assign1)
    assignment1平均分数: 74.5357320747794
    1 assign1_sub_month = set(row['assignment1_submission'][:7] for row in grades_data)
    2 print(assign1_sub_month)
    {'2016-02', '2015-09', '2016-01', '2016-04', '2016-03', '2016-06', '2016-08', '2015-10', '2016-05', '2016-07', '2015-12', '2015-11'}

    科学计算库NumPy

    1 import numpy as np

    1. 创建Array

    1 my_list = [1, 2, 3]
    2 x = np.array(my_list)
    3 
    4 print('列表:', my_list)
    5 print('Array: ', x)
    列表: [1, 2, 3]
    Array:  [1 2 3]
    1 np.array([1, 2, 3]) - np.array([4, 5, 6])
    array([-3, -3, -3])
    1 m = np.array([[1, 2, 3], [4, 5, 6]])
    2 print(m)
    3 print('shape: ', m.shape)
    [[1 2 3]
     [4 5 6]]
    shape:  (2, 3)
    1 n = np.arange(0, 30, 2)
    2 print(n)
    [ 0  2  4  6  8 10 12 14 16 18 20 22 24 26 28]
    1 n = n.reshape(3, 5)
    2 print('reshape后: ')
    3 print(n)
    reshape后: 
    [[ 0  2  4  6  8]
     [10 12 14 16 18]
     [20 22 24 26 28]]
    1 print('ones:
    ', np.ones((3, 2)))
    2 print('zeros:
    ', np.zeros((3, 2)))
    3 print('eye:
    ', np.eye(3))
    4 print('diag:
    ', np.diag(my_list))
    ones:
     [[1. 1.]
     [1. 1.]
     [1. 1.]]
    zeros:
     [[0. 0.]
     [0. 0.]
     [0. 0.]]
    eye:
     [[1. 0. 0.]
     [0. 1. 0.]
     [0. 0. 1.]]
    diag:
     [[1 0 0]
     [0 2 0]
     [0 0 3]]
    1 print('*操作:
    ', np.array([1, 2, 3] * 3))
    2 print('repeat:
    ', np.repeat([1, 2, 3], 3))
    *操作:
     [1 2 3 1 2 3 1 2 3]
    repeat:
     [1 1 1 2 2 2 3 3 3]
    1 p1 = np.ones((3, 3))
    2 p2 = np.arange(9).reshape(3, 3)
    3 print('纵向叠加: 
    ', np.vstack((p1, p2)))
    4 print('横向叠加: 
    ', np.hstack((p1, p2)))
    纵向叠加: 
     [[ 1.  1.  1.]
     [ 1.  1.  1.]
     [ 1.  1.  1.]
     [ 0.  1.  2.]
     [ 3.  4.  5.]
     [ 6.  7.  8.]]
    横向叠加: 
     [[ 1.  1.  1.  0.  1.  2.]
     [ 1.  1.  1.  3.  4.  5.]
     [ 1.  1.  1.  6.  7.  8.]]

    2. Array操作

    1 p1 = np.array([[1, 1, 1], [1, 1, 1],[1,1,1]])
    2 p2 = np.arange(9).reshape(3, 3)3 print('p1: 
    ', p1)
    4 print('p2: 
    ', p2)
    5 
    6 print('p1 + p2 = 
    ', p1 + p2)
    7 print('p1 * p2 = 
    ', p1 * p2)
    8 print('p2^2 = 
    ', p2 ** 2)
    9 print('p1.p2 = 
    ', p1.dot(p2))
    p1: 
     [[1 1 1]
     [1 1 1]
     [1 1 1]]
    p2: 
     [[0 1 2]
     [3 4 5]
     [6 7 8]]
    p1 + p2 = 
     [[1 2 3]
     [4 5 6]
     [7 8 9]]
    p1 * p2 = 
     [[0 1 2]
     [3 4 5]
     [6 7 8]]
    p2^2 = 
     [[ 0  1  4]
     [ 9 16 25]
     [36 49 64]]
    p1.p2 = 
     [[ 9 12 15]
     [ 9 12 15]
     [ 9 12 15]]
    1 p3 = np.arange(6).reshape(2, 3)
    2 print('p3形状: ', p3.shape)
    3 print(p3)
    4 p4 = p3.T
    5 print('转置后p3形状: ', p4.shape)
    6 print(p4)
    p3形状:  (2, 3)
    [[0 1 2]
     [3 4 5]]
    转置后p3形状:  (3, 2)
    [[0 3]
     [1 4]
     [2 5]]
    1 p3 = np.arange(6).reshape(2, 3)
    2 print('p3数据类型:', p3.dtype)
    3 print(p3)
    4 
    5 p5 = p3.astype('float')
    6 print('p5数据类型:', p5.dtype)
    7 print(p5)
    p3数据类型: int32
    [[0 1 2]
     [3 4 5]]
    p5数据类型: float64
    [[0. 1. 2.]
     [3. 4. 5.]]
    a = np.array([-4, -2, 1, 3, 5])
    print('sum: ', a.sum())
    print('min: ', a.min())
    print('max: ', a.max())
    print('mean: ', a.mean())
    print('std: ', a.std()) //标准差
    print('argmax: ', a.argmax()) //argmax(f(x))是使得 f(x)取得最大值所对应的变量x
    print('argmin: ', a.argmin()) //argmax(f(x))是使得 f(x)取得最小值所对应的变量x
    sum:  3
    min:  -4
    max:  5
    mean:  0.6
    std:  3.2619012860600183
    argmax:  4
    argmin:  0

    3. 索引与切片

    1 # 一维array
    2 s = np.arange(13) ** 2
    3 print('s: ', s)
    4 print('s[0]: ', s[0])
    5 print('s[4]: ', s[4])
    6 print('s[0:3]: ', s[0:3])
    7 print('s[[0, 2, 4]]: ', s[[0, 2, 4]])
    s:  [  0   1   4   9  16  25  36  49  64  81 100 121 144]
    s[0]:  0
    s[4]:  16
    s[0:3]:  [0 1 4]
    s[[0, 2, 4]]:  [ 0  4 16]
    1 # 二维array
    2 r = np.arange(36).reshape((6, 6))
    3 print('r: 
    ', r)
    4 print('r[2, 2]: 
    ', r[2, 2]) //对应矩阵第三行第三列
    5 print('r[3, 3:6]: 
    ', r[3, 3:6]) //对应第四行第四列到第7列的数(只表示该行的数)
    r: 
     [[ 0  1  2  3  4  5]
     [ 6  7  8  9 10 11]
     [12 13 14 15 16 17]
     [18 19 20 21 22 23]
     [24 25 26 27 28 29]
     [30 31 32 33 34 35]]
    r[2, 2]: 
     14
    r[3, 3:6]: 
     [21 22 23]
    1 r = np.arange(36).reshape((6, 6))
    2 r > 30
    array([[False, False, False, False, False, False],
           [False, False, False, False, False, False],
           [False, False, False, False, False, False],
           [False, False, False, False, False, False],
           [False, False, False, False, False, False],
           [False,  True,  True,  True,  True,  True]])
    1 # 过滤
    2 print(r[r > 30])
    3 
    4 # 将大于30的数赋值为30
    5 r[r > 30] = 30
    6 print(r)
    [31 32 33 34 35]
    [[ 0  1  2  3  4  5]
     [ 6  7  8  9 10 11]
     [12 13 14 15 16 17]
     [18 19 20 21 22 23]
     [24 25 26 27 28 29]
     [30 30 30 30 30 30]]
    1 # copy()操作
    2 r2 = r[:3, :3]
    3 print(r2)
    [[ 0  1  2]
     [ 6  7  8]
     [12 13 14]]
    1 # 将r2内容设置为0
    2 r2[:] = 0
    3 
    4 # 查看r的内容
    5 print(r)
    [[ 0  0  0  3  4  5]
     [ 0  0  0  9 10 11]
     [ 0  0  0 15 16 17]
     [18 19 20 21 22 23]
     [24 25 26 27 28 29]
     [30 30 30 30 30 30]]
    1 r3 = r.copy()
    2 r3[:] = 0
    3 print(r)
    [[ 0  0  0  3  4  5]
     [ 0  0  0  9 10 11]
     [ 0  0  0 15 16 17]
     [18 19 20 21 22 23]
     [24 25 26 27 28 29]
     [30 30 30 30 30 30]]

    4. 遍历 Array

    1 import numpy as np
    2 t = np.random.randint(0, 10, (4, 3))
    3 print(t)
    [[3 2 7]
     [4 9 1]
     [1 3 0]
     [0 9 1]]
    1 for row in t:
    2     print(row)
    [3 2 7]
    [4 9 1]
    [1 3 0]
    [0 9 1]
    1 # 使用enumerate()
    2 for i, row in enumerate(t):
    3     print('row {} is {}'.format(i, row))
    row 0 is [3 2 7]
    row 1 is [4 9 1]
    row 2 is [1 3 0]
    row 3 is [0 9 1]
    1 t2 = t ** 2
    2 print(t2)
    [[ 9  4 49]
     [16 81  1]
     [ 1  9  0]
     [ 0 81  1]]
    1 # 使用zip对两个array进行遍历计算
    2 for i, j in zip(t, t2):
    3     print('{} + {} = {}'.format(i, j, i + j))
    [3 2 7] + [ 9  4 49] = [12  6 56]
    [4 9 1] + [16 81  1] = [20 90  2]
    [1 3 0] + [1 9 0] = [ 2 12  0]
    [0 9 1] + [ 0 81  1] = [ 0 90  2]


  • 相关阅读:
    python 使用pyinstaller生成exe,以及编译报错:编译时报错如下:No module named timedeltas not build. If you want import pandas from the source directory, you may need to run 'python setup.py build_ext --inplace --force' to
    Remote desktop manager 如何导入.db配置文件
    C# string怎么转换成泛型T?
    C# 如何在ComboBox输入文字改变时,触发事件?
    C# 检查panel所有的checkbox 是否被选中
    C# bool? 的意思
    WPF: Accessing Databases with Windows Presentation Foundation / WPF链接数据库
    WPF 03
    WPF MVVC 基础
    使用 Topshelf 创建 Windows 服务
  • 原文地址:https://www.cnblogs.com/Hwangzhiyoung/p/8727201.html
Copyright © 2011-2022 走看看