zoukankan      html  css  js  c++  java
  • python pandas tensorflow使用总结

    ################## pd tf 相关使用技巧 ##################
    python 函数只能都放在一个包里。。。
    python 的with 相当于 golang 的 defer
    python 包在导入时不能互相嵌套, test1 import test2 test2 import test1, 一旦如此, 就会无法调用;


    ## 列表、字典判断 IO 异常处理 ##

    #列表、字典不为空的判断:
    if l == []:
    if m == {}:

    #字典中查找是否存在key:
    if 'key' in test.keys():

    #LinkedMap
    from collections import OrderedDict, defaultdict


    #IO
    with open("./sql.txt",'w') as fw:
    l = map(lambda x:x+" ",l)
    fw.writelines(list(l))

    with open("./sql.txt",'r') as fr:
    lines = fr.readlines()
    print(lines)

    os.system(ex) # 执行bash命令

    #获取时间
    datetime.today().strftime('%Y%m%d')

    #获取文件地址:
    path = os.getcwd()
    file_path = os.path.join(path,'prod.cfg')


    #字符串切分: 直接用[]
    if line.find("#")!=-1:
    line=line[0:line.find('#')]


    #异常处理:
    try:
    except Exception as e:
    raise e


    ## 匿名函数 ##
    # map sorted filter


    from typing import Any, Tuple, Iterator


    # map
    l = [1, 2, 3, 4, 5]
    t1: Iterator[Tuple[Any, int]] = map(lambda x: (x, 1), l)

    # sorted
    m = dict({"a": 1, "b": 0})

    t = sorted(m.items(), key=lambda d: d[1], reverse=False) # 按照value排序, 升序


    # filter
    f = list(filter(lambda x: x[0].find('b') == -1, m))


    ## 枚举迭代删除 ##


    # for enumerate

    some_string = "wtf"
    some_dict = {}
    for i, some_dict[i] in enumerate(some_string):
    pass

    print(some_dict)


    # for zip
    index = [1,2,3]
    words = ['a','b','c']
    for i, w in zip(index, words):
    pass


    # 反转列表
    for i in reversed(index):
    pass

    for i in index[::-1]:
    pass


    # all any 判断列表中的所有值是否与条件匹配;

    r = any(i != 1 for i in index)
    print(r)


    ## 在迭代时 删除原表 需要借助副本 ##

    list_3 = [1,2,3,4]
    for idx, item in enumerate(list_3[:]):
    list_3.remove(item)
    print(list_3)

    list_3 = [1,2,3,4]
    list_temp = list_3.copy()
    for idx, item in enumerate(list_3[:]):
    list_3.remove(item)
    print(list_3)


    ## Pandas操作 ##


    import pandas as pd

    data = {'a':[1,2,3],
    'c':[4,5,6],
    'b':[7,8,9]
    }

    # 创建DF
    frame = pd.DataFrame(data,index=['1','2','3'])

    # group by
    d: Union[Union[SeriesGroupBy, DataFrameGroupBy], Any] = df.groupby("vin")

    for key, group_data in d:

    # key, group_data 是一个list[DataFrame]

    for i in range(0, len(group_data)):
    group_data.iloc[i]["mileage"] #遍历iloc第i行df, 取出mileage列;
    pass

    # 如果想改变值, 无法在iloc切片上直接改变, 需要复制一份加到list中;


    # 读取多列:
    result = df[["task_name","task_name_en"]]
    # 读取多行:
    result.iloc[[0,1,2,3]]


    # df拼接
    pd.DataFrame([c1,c2]), pd.concat([p1,p2])时, 首先要保证各df的列数相同,如果还是不行:
    t = {"task_name":result["task_name"].to_list(),"table_name":result["table_name"].to_list(),"content_crt":l_crt, "content_ist":l_ist}
    f = pd.DataFrame(t)


    #numpy:
    np.random.randint(-1,1,size=(5,5)) | np.random.uniform #不重复的
    numpy.take(m,1) # 取出每一行的第m列

    ## 通过map 改变pd字段的值;

    gender_map = {'F':0, 'M':1}
    users['Gender'] = users['Gender'].map(gender_map)

    age_map = {val:ii for ii,val in enumerate(set(users['Age'])) } # 用字典做替换: { 原始值 : 新值 }
    users['Age'] = users['Age'].map(age_map)

    pattern = re.compile(r'^(.*)((d+))$') # 反斜杠+( 转义( Toy Story (1995) -> Toy Story
    title_map = {val: pattern.match(val).group(1) for ii,val in enumerate(set(movies['Title'])) }


    #### tensorflow 测试 ####

    
    
    1. 组件
    使用graph -> 表示计算任务
    使用session.context -> 执行任务
    使用tensor  -> 表示数据                  每个Tensor 是多维数组[batch, height, width, channels] .ndarray
    使用Variable -> 维护状态
    使用feed fetch -> 赋值与获取数据
    
    
    2. 组件使用
    -> 一个常量为一个节点 op, 例如创建两个节点
    matrix = tf.constant([[2., 1.], [2., 1.]])     # 常量不需要初始化, 而变量需要
    product = tf.matmul(matrix, matrix)
    
    
    -> 执行计算任务 使用with自动释放资源, 代替sess.close()
    with tf.Session() as sess:
        #with tf.device("/gpu:1"):
        rs = sess.run(product)
        print(rs)
    
    
    


    -> 变量需要初始化, 使用InteractiveSession()交互环境, Tensor.eval() 和 Operation.run() 方法代替 Session.run()
    sess = tf.InteractiveSession()      #不需要使用sess.run() 而是直接 op.run() 即可初始化变量, 它能让你在运行图的时候,插入一些计算图
    
    x = tf.Variable([[1.0, 2.0], [2.0, 4.0]])
    x.initializer.run()                 # 如果不使用交互模式, 需要tf.initialize_all_variables()
    
    sub = tf.subtract(x, matrix)
    print(sub.eval())
    
    
    -> 改变一个节点op的状态, 即计数器
    state = tf.Variable(0, name="count")
    
    one = tf.constant(1)
    add_op = tf.add(state, one)
    update = tf.assign(state, add_op)       #更新节点状态
    ->  Fetch 与 Feed
    
    input_1 = tf.placeholder(tf.dtypes.float32)         # 通过 run(feed_dict:)  feed
    input_2 = tf.placeholder(tf.dtypes.float32)
    out_1 = tf.add(input_1, input_2)                    # 通过run fetch
    out_2 = tf.subtract(input_1, input_2)
    
    
    with tf.Session() as sess:
        o1, o2 = sess.run([out_1, out_2], feed_dict={input_1: [7.], input_2: [5.]})
        print(o1, o2)
    4. 可视化与保存
    
    # # 训练可视化
    # summary_op = tf.merge_all_summaries()
    # summary_writer = tf.train.SummaryWriter("train_dir",
    #                                         graph_def=sess.graph_def)
    #
    # summary_str = sess.run(summary_op, feed_dict=feed_dict)
    # summary_writer.add_summary(summary_str, step)
    #
    # # 保存参数
    # saver = tf.train.Saver()
    # saver.save(sess, FLAGS.train_dir, global_step=step)
    #
    # saver.restore(sess, FLAGS.train_dir)
    
    
    # 启动TensorBoard 
    # python tensorflow/tensorboard/tensorboard.py --logdir=path/to/log-directory
    # tensorboard --logdir=/path/to/log-directory
    
    
    
    
    # ################# 使用
    
    
    Tensor.get_shape()[1]
    
    # 它两必须一起
    sess.run(tf.initialize_all_variables())
    d1 = sess.run(h_gen, feed_dict={x: x_data, z:z_d})
    init_op = tf.initialize_all_variables()
    
    with tf.Session() as sess:
        # 在任务中初始化变量
        sess.run(init_op)
    
        for _ in range(3):
            sess.run(update)
            print(state.eval())
    
    
    
    

    import tensorflow as tf
    import numpy as np

    uid_max = 500
    batch_size = 10
    embed_dim = 32
    filter_num = 8

    feature_num = 20

    data = np.zeros((batch_size,feature_num))

    uid_data = np.reshape(data, [batch_size, feature_num])


    sess = tf.InteractiveSession()

    uid = tf.placeholder(tf.int32, [None, feature_num], name="uid")

    uid_embed_matrix = tf.Variable(tf.random_uniform([uid_max, 32], -1, 1),
    name="uid_embed_matrix")
    # 根据指定用户ID找到他对应的嵌入层
    uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid,
    name="uid_embed_layer")

    new_layer = tf.reduce_sum(uid_embed_layer, axis=1, keep_dims=True) # 按列加和, 维度保持不变;
    new_layer = tf.expand_dims(uid_embed_layer, -1) # 对卷积而言特殊使用的, 转为 (batch_size, feature_num, 32, 1)

    filter_weights = tf.Variable(tf.truncated_normal([2, embed_dim, 1, filter_num],stddev=0.1),name = "filter_weights") # 卷积部分
    filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name="filter_bias")

    conv_layer = tf.nn.conv2d(new_layer, filter_weights, [1,1,1,1], padding="VALID", name="conv_layer")
    relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer,filter_bias), name ="relu_layer")
    maxpool_layer = tf.nn.max_pool(relu_layer, [1,15 - 2 + 1 ,1,1], [1,1,1,1], padding="VALID", name="maxpool_layer")



    sess.run(tf.initialize_all_variables())

    feed_dict = {uid:uid_data}

    layer = uid_embed_layer.eval(feed_dict)

    print(layer.shape)

  • 相关阅读:
    优化MySchool数据库(存储过程)
    优化MySchool数据库(事务、视图、索引)
    优化MySchool数据库(四)
    优化MySchool数据库(三)
    SQLAchemy
    python操作mysql
    python队列
    零碎知识
    super深究
    Python操作RabbitMQ
  • 原文地址:https://www.cnblogs.com/ruili07/p/11301520.html
Copyright © 2011-2022 走看看