zoukankan      html  css  js  c++  java
  • data_summarize.pl data目录文本时长汇总脚本

    #!/usr/bin/env perl

    # Copyright 2018 Jarvan Wang

    if (@ARGV != 1) {

    #print STDERR "Usage: keyword_summarize.pl text utt2dur ";

    print STDERR "Usage: keyword_summarize.pl <data> ";

    exit(1);

    }

    my $text_file="$ARGV[0]/text";

    my $utt2dur_file="$ARGV[0]/utt2dur";

    unless(-e $text_file && -e $utt2dur_file )

    {

    print STDERR "$text_file or $utt2dur_file does not exist!";

    exit(1);

    }

    my %text_hash;

    my %dur_hash;

    my %sumdur_hash;

    my %count_hash;

    # read text

    open(TEXT,$text_file);

    while(<TEXT>){

    my $temp=$_;

    chomp $temp;

    @line=split(/ /,$temp,2);

    $text_hash{$line[0]}=$line[1];

    }

    # read utt2dur

    open(DUR,$utt2dur_file);

    while(<DUR>){

    my $temp=$_;

    chomp $temp;

    @line=split(/ /,$temp,2);

    $dur_hash{$line[0]}=$line[1];

    }

    # summarize text duration

    for my $key (keys %text_hash)

    {

    $sumdur_hash{$text_hash{$key}}+=$dur_hash{$key};

    $count_hash{$text_hash{$key}}+=1;

    }

    #for my $key (sort keys %sumdur_hash) {

    #printf("文本@语句数@@小时 ");

    printf("文本@语句数@小时 ");

    my $count_sum,$sec_sum,$hour_sum;

    foreach my $key (sort { $sumdur_hash{$a} <=> $sumdur_hash{$b} or $a cmp $b } keys %sumdur_hash)

    {

    my $value=sprintf("%.2f",$sumdur_hash{$key});

    $count_sum+=$count_hash{$key};

    $sec_sum+=$value;

    $hour_sum+=$value/3600;

    if($value>1000)

    {

    #printf("%s@%d@%.2f@%.2f ",$key,$count_hash{$key},$value,$value/3600);

    printf("%s@%d@%.2f ",$key,$count_hash{$key},$value/3600);

    }

    }

    #printf("总和@%d@%.2f@%.2f ",$count_sum,$sec_sum,$hour_sum);

    printf("总和@%d@%.2f ",$count_sum,$hour_sum);

     

  • 相关阅读:
    Linux-文件目录管理
    20. 有效的括号
    242. 有效的字母异位词
    387. 字符串中的第一个唯一字符
    136. 只出现一次的数字
    14. 最长公共前缀
    268. 丢失的数字
    169. 多数元素
    26. 删除有序数组中的重复项
    283. 移动零
  • 原文地址:https://www.cnblogs.com/JarvanWang/p/10280793.html
Copyright © 2011-2022 走看看