zoukankan      html  css  js  c++  java
  • [代码] kv2sparse

    def kv2sparse(lines, num_cols, sep=',', kv_sep=':', hash_key=False):
        """
        解析kv格式的数据.
    
        Parameters
        ----------
        lines : string or string tensor
            the input data in key-value format.
        num_cols : int64
            the numer of columns for the output sparse matrix.
        sep : string, optional
            the seterator for each key-value pair. The default is ','.
        kv_sep : string, optional
            the seterator between key and value. The default is ':'.
        hash_key : bool, optional
            convert the keys to buckets by hash function. The default is False.
    
        Returns
        -------
        X : SparseTensor
            the output sparse matrix.
    
        Examples
        -------
        
        ::
    
            lines = ["12:4,1:5,88:6,1:3,2:100", "12:4,1:5,88:6,1:3,2:100"]
            X1 = kv2sparse(lines, hash_key=True)
            X2 = kv2sparse(lines)
        
        Then X1, X2 will be returned as follows 
        
        ::
    
            X1 = SparseTensorValue(indices=array([[ 0, 12],
             [ 0,  1],
             [ 0, 88],
             [ 0,  1],
             [ 0,  2],
             [ 1, 12],
             [ 1,  1],
             [ 1, 88],
             [ 1,  1],
             [ 1,  2]]), values=array([  4., 5., 6., 3., 100., 4., 5., 6., 3., 100.],
            dtype=float32), dense_shape=array([  2, 100]))
            X2 = SparseTensorValue(indices=array([[ 0, 88],
             [ 0, 49],
             [ 0, 53],
             [ 0, 49],
             [ 0, 59],
             [ 1, 88],
             [ 1, 49],
             [ 1, 53],
             [ 1, 49],
             [ 1, 59]]), values=array([  4., 5., 6., 3., 100., 4., 5., 6., 3., 100.],
            dtype=float32), dense_shape=array([  2, 100]))
        """
    
        columns = tf.string_split(lines, sep)
        splits = tf.string_split(columns.values, kv_sep)
        id_vals = tf.reshape(splits.values,splits.dense_shape)
        col_ids, vals = tf.split(id_vals,num_or_size_splits=2,axis=1)
        if hash_key:
            col_ids = tf.string_to_hash_bucket_fast(col_ids[:, 0], num_cols)
        else:
            col_ids = tf.string_to_number(col_ids[:, 0], out_type=tf.int64)
        X = tf.SparseTensor(
            indices=tf.stack((columns.indices[:,0], col_ids), axis=-1),
            values=tf.string_to_number(vals[:,0], out_type=tf.float32),
            dense_shape=tf.stack([columns.dense_shape[0], num_cols])
            )
        return X
    
  • 相关阅读:
    td-agent 收集日志到kafka的配置
    ctrl+z 以后怎么恢复挂起的进程
    LCD显示GPS时钟[嵌入式系统]
    树莓派轮盘游戏机[嵌入式系统]
    集成测试工具
    未上线的界面
    前端网页内复杂编辑
    用jquery编写的分页插件
    用jquery编写的tab插件
    EAA脚本语言0.2
  • 原文地址:https://www.cnblogs.com/bregman/p/13743299.html
Copyright © 2011-2022 走看看