zoukankan      html  css  js  c++  java
  • subset_lat_dir.sh

    #!/bin/bash

       

    # Copyright 2018 Jarvan Wang

    # Copyright 2017 Vimal Manohar

    # Apache 2.0.

       

    cmd=run.pl

    nj=40

    if [ -f ./path.sh ]; then . ./path.sh; fi

       

    . ./utils/parse_options.sh

       

    if [ $# -ne 3 ]; then

    cat <<EOF

    This script creates an lattice directory containing a subset of

    utterances contained in <subset-data-dir> from the

    original lattice directory containing lattices for utterances in

    <full-data-dir>.

       

    The number of split jobs in the output lattice directory is

    equal to the number of jobs in the original lattice directory,

    unless the subset data directory has too few speakers.

       

    Usage: $0 [options] <subset-data-dir> <lat-dir> <subset-lat-dir>

    e.g.: $0 data/train exp/tri3_lat_sp exp/tri3_lat

    Options:

    --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.

    EOF

    exit 1

    fi

       

    subset_data=$1

    lat_dir=$2

    dir=$3

       

    ori_nj=$(cat $lat_dir/num_jobs) || exit 1

       

    mkdir -p $dir

    cp $lat_dir/{final.mdl,*.mat,*_opts,tree} $dir/ || true

    cp -r $lat_dir/phones $dir 2>/dev/null || true

       

    $cmd JOB=1:$ori_nj $dir/log/copy_lattices.JOB.log

    lattice-copy "ark:gunzip -c $lat_dir/lat.JOB.gz |"

    ark,scp:$dir/lat_tmp.JOB.ark,$dir/lat_tmp.JOB.scp || exit 1

       

    for n in `seq $ori_nj`; do

    cat $dir/lat_tmp.$n.scp

    done > $dir/lat_tmp.scp

       

    #awk '{hash[$1]=$2}END{for(key in hash){printf("%s %s ",key,hash[key])}}' < $dir/lat_tmp.scp > $dir/lat_tmp_sorted_uniq.scp

    mv $dir/lat_tmp.scp $dir/lat_tmp.scp.bak

    perl -e 'my %hash;while(<>){chomp;($key,$ark)=split;$hash{$key}=$ark};for $key (sort keys %hash){printf("%s %s ",$key,$hash{$key})}' $dir/lat_tmp.scp.bak > $dir/lat_tmp.scp

       

    utils/split_data.sh $subset_data $nj

    $cmd JOB=1:$nj $dir/log/filter_lattices.JOB.log

    lattice-copy

    "scp:utils/filter_scp.pl $subset_data/split${nj}/JOB/utt2spk $dir/lat_tmp.scp |"

    "ark:| gzip -c > $dir/lat.JOB.gz" || exit 1

       

    echo $nj > $dir/num_jobs

       

    #rm $dir/lat_tmp.*.{ark,scp} $dir/lat_tmp.scp

       

    exit 0

       

  • 相关阅读:
    【SQL Server学习笔记】Service Broker创建异步的、数据驱动的消息应用程序
    记录几句不错的话
    DBA最缺的不是技术
    小数点引起的数据类型转换问题
    hdu 3062 2SAT最基础题
    POJ 1679 判断最小生成树是否唯一
    POJ 1459 构图+最大流(Edmond_karp模版)
    POJ 3522 最大边与最小边差值最小的生成树
    POJ 1659 根据度序列构图
    POJ 1273 求最大流(Edmond_karp模板题)
  • 原文地址:https://www.cnblogs.com/JarvanWang/p/10280781.html
Copyright © 2011-2022 走看看