zoukankan      html  css  js  c++  java
  • subset_lat_dir.sh

    #!/bin/bash

       

    # Copyright 2018 Jarvan Wang

    # Copyright 2017 Vimal Manohar

    # Apache 2.0.

       

    cmd=run.pl

    nj=40

    if [ -f ./path.sh ]; then . ./path.sh; fi

       

    . ./utils/parse_options.sh

       

    if [ $# -ne 3 ]; then

    cat <<EOF

    This script creates an lattice directory containing a subset of

    utterances contained in <subset-data-dir> from the

    original lattice directory containing lattices for utterances in

    <full-data-dir>.

       

    The number of split jobs in the output lattice directory is

    equal to the number of jobs in the original lattice directory,

    unless the subset data directory has too few speakers.

       

    Usage: $0 [options] <subset-data-dir> <lat-dir> <subset-lat-dir>

    e.g.: $0 data/train exp/tri3_lat_sp exp/tri3_lat

    Options:

    --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.

    EOF

    exit 1

    fi

       

    subset_data=$1

    lat_dir=$2

    dir=$3

       

    ori_nj=$(cat $lat_dir/num_jobs) || exit 1

       

    mkdir -p $dir

    cp $lat_dir/{final.mdl,*.mat,*_opts,tree} $dir/ || true

    cp -r $lat_dir/phones $dir 2>/dev/null || true

       

    $cmd JOB=1:$ori_nj $dir/log/copy_lattices.JOB.log

    lattice-copy "ark:gunzip -c $lat_dir/lat.JOB.gz |"

    ark,scp:$dir/lat_tmp.JOB.ark,$dir/lat_tmp.JOB.scp || exit 1

       

    for n in `seq $ori_nj`; do

    cat $dir/lat_tmp.$n.scp

    done > $dir/lat_tmp.scp

       

    #awk '{hash[$1]=$2}END{for(key in hash){printf("%s %s ",key,hash[key])}}' < $dir/lat_tmp.scp > $dir/lat_tmp_sorted_uniq.scp

    mv $dir/lat_tmp.scp $dir/lat_tmp.scp.bak

    perl -e 'my %hash;while(<>){chomp;($key,$ark)=split;$hash{$key}=$ark};for $key (sort keys %hash){printf("%s %s ",$key,$hash{$key})}' $dir/lat_tmp.scp.bak > $dir/lat_tmp.scp

       

    utils/split_data.sh $subset_data $nj

    $cmd JOB=1:$nj $dir/log/filter_lattices.JOB.log

    lattice-copy

    "scp:utils/filter_scp.pl $subset_data/split${nj}/JOB/utt2spk $dir/lat_tmp.scp |"

    "ark:| gzip -c > $dir/lat.JOB.gz" || exit 1

       

    echo $nj > $dir/num_jobs

       

    #rm $dir/lat_tmp.*.{ark,scp} $dir/lat_tmp.scp

       

    exit 0

       

  • 相关阅读:
    [模板]洛谷T3369 普通平衡树 链表&普通Treap
    C++语法知识点整理
    [模板]洛谷T3373 线段树 模板2
    [模板]洛谷T3372 线段树 模板1
    [模板]洛谷T3368 树状数组 模板2
    JSON
    code first迁移和部署
    序列化 (C#)
    Linq小记
    文件和注册表
  • 原文地址:https://www.cnblogs.com/JarvanWang/p/10280781.html
Copyright © 2011-2022 走看看