zoukankan      html  css  js  c++  java
  • Individual Project Word frequency program by HJB

    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Threading.Tasks;


    using System.Collections;
    namespace ConsoleApplication1
    {
    class v
    {

    public int n { get; set; }
    public string s { get; set; }
    public v(int nu, string st)
    {
    this.n = nu;
    this.s = st;
    }

    }
    class Program
    {
    static void Main(string[] args)
    {
    if (args.Length ==1)
    {
    Dictionary<string, v> ht = new Dictionary<string, v>();

    if (args.Length == 0)
    {
    Console.WriteLine("please input the correct file adress");
    }
    string path = args[0];

    if (!Directory.Exists(path))
    {
    Console.WriteLine("wrong path");
    }
    else
    {
    if ((Directory.GetFiles(path).Length == 0) && (Directory.GetDirectories(path).Length == 0))
    Console.WriteLine("empty directory");
    else
    {

    string[] file = (Directory.GetFiles(path, "*", SearchOption.AllDirectories)).Where(s => s.EndsWith(".txt") || s.EndsWith(".h") || s.EndsWith(".cpp") || s.EndsWith(".cs")).ToArray();
    int l = file.Length;
    for (int i = 0; i < l; i++)
    {
    string s = "(\\b[a-zA-Z]{3}[A-Za-z0-9]+)|(\\b[a-zA-Z]{3})|((_[a-zA-Z]{3}[A-Za-z0-9]+)|(_[a-zA-Z]{3}))";
    string article = File.ReadAllText(file[i]);
    foreach (Match m in Regex.Matches(article, s))
    {
    string c = m.ToString();
    if (c.Contains("_"))
    {
    c = c.Replace("_", "");
    }
    string n = c.ToLower();
    if (ht.ContainsKey(n))
    {
    v w = (v)ht[n];
    if (c.CompareTo(w.s) >= 0)
    {
    w.n += 1;
    w.s = c;
    }
    else
    w.n += 1;
    }
    else
    {
    v w = new v(1, c);
    ht.Add(n, w);
    }
    }
    }

    }
    }
    var re = ht.OrderByDescending(v => v.Value.n).ThenBy(v => v.Value.s);
    StreamWriter sw = new StreamWriter(path + "\\黄敬博.txt", false);
    foreach (var skey in re)
    {

    v w = (v)ht[skey.Key];
    sw.WriteLine(w.s + "\t" + w.n);

    }
    sw.Flush();

    }
    if (args[0].Equals("-e2"))
    {
    int j = 1;
    Dictionary<string, v> ht = new Dictionary<string, v>();
    if (args.Length == 0)
    {
    Console.WriteLine("please input the correct file adress");
    }
    string path = args[1];
    if (!Directory.Exists(path))
    {
    Console.WriteLine("wrong path");
    }
    else
    {
    if ((Directory.GetFiles(path).Length == 0) && (Directory.GetDirectories(path).Length == 0))
    Console.WriteLine("empty directory");
    else
    {

    string[] file = (Directory.GetFiles(path, "*", SearchOption.AllDirectories)).Where(s => s.EndsWith(".txt") || s.EndsWith(".h") || s.EndsWith(".cpp") || s.EndsWith(".cs")).ToArray();
    int l = file.Length;
    for (int i = 0; i < l; i++)
    {
    string s = @"\b[A-Za-z]{3,}[A-Za-z0-9]*\s{1}\b[A-Za-z]{3,}[A-Za-z0-9]*";
    string article = File.ReadAllText(file[i]);
    Regex reg = new Regex(s);
    Match m = reg.Match(article, 0);
    while (m.Success)
    {

    String word = m.ToString();
    string n = word.ToLower();
    if (ht.ContainsKey(n))
    {
    v w = (v)ht[n];
    if (word.CompareTo(w.s) >= 0)
    {
    w.n += 1;
    w.s = word;
    }
    else
    w.n += 1;
    }
    else
    {
    v w = new v(1, word);
    ht.Add(n, w);
    }
    m = reg.Match(article, m.Index + m.ToString().IndexOf(' '));
    }

    }

    }
    }
    var re = ht.OrderByDescending(v => v.Value.n).ThenBy(v => v.Value.s);
    StreamWriter sw = new StreamWriter(path + "\\黄敬博.txt", false);
    foreach (var skey in re)
    {

    v w = (v)ht[skey.Key];
    sw.WriteLine(w.s + "\t" + w.n);
    sw.Flush();
    j++;
    if (j > 10)
    break;
    }
    }
    if (args[0].Equals("-e3") )
    {
    int j = 1;
    Dictionary<string, v> ht = new Dictionary<string, v>();
    if (args.Length == 0)
    {
    Console.WriteLine("please input the correct file adress");
    }
    string path = args[1];
    if (!Directory.Exists(path))
    {
    Console.WriteLine("wrong path");
    }
    else
    {
    if ((Directory.GetFiles(path).Length == 0) && (Directory.GetDirectories(path).Length == 0))
    Console.WriteLine("empty directory");
    else
    {

    string[] file = (Directory.GetFiles(path, "*", SearchOption.AllDirectories)).Where(s => s.EndsWith(".txt") || s.EndsWith(".h") || s.EndsWith(".cpp") || s.EndsWith(".cs")).ToArray();
    int l = file.Length;
    for (int i = 0; i < l; i++)
    {
    string s = @"\b[A-Za-z]{3,}[A-Za-z0-9]*\s{1}\b[A-Za-z]{3,}[A-Za-z0-9]*\s{1}\b[A-Za-z]{3,}[A-Za-z0-9]*";
    string article = File.ReadAllText(file[i]);
    Regex reg = new Regex(s);
    Match m = reg.Match(article, 0);
    while (m.Success)
    {

    String word = m.ToString();
    string n = word.ToLower();
    if (ht.ContainsKey(n))
    {
    v w = (v)ht[n];
    if (word.CompareTo(w.s) >= 0)
    {
    w.n += 1;
    w.s = word;
    }
    else
    w.n += 1;
    }
    else
    {
    v w = new v(1, word);
    ht.Add(n, w);
    }
    m = reg.Match(article, m.Index + m.ToString().IndexOf(' '));
    }

    }

    }
    }
    var re = ht.OrderByDescending(v => v.Value.n).ThenBy(v => v.Value.s);
    StreamWriter sw = new StreamWriter(path+"\\黄敬博.txt", false);

    foreach (var skey in re)
    {

    v w = (v)ht[skey.Key];
    sw.WriteLine(w.s + "\t" + w.n);
    sw.Flush();
    j++;
    if (j > 10)
    break;
    }
    }
    }
    }
    }

    代码总共由3部分组成,每部分对应相对的mode。

    mode2、3是基于mode1基础上做出的一点点改变。

    这个程序主要利用了Dictionary功能。key设为string,使用时将单词的小写形式作为key。value设为一个小结构,包含一个string类型的单词和一个int类型的次数。

    mode1遍历的过程为按照单个单词遍历的,mode2为两个,mode3为三个。

    但mode2读取完两个单词需要往回退一个,mode3读取完要往回退两个。

    若读取的单词的小写形式在dictionary中已经包含了。则value中的int值+1,然后比较value中的string与当前的单词,选择ASC码靠前的那个保存。

    最后按照要求输出。

  • 相关阅读:
    套用JQuery EasyUI列表显示数据、分页、查询
    Linux 进程间通信 信号
    Linux socket编程
    Linux字符设备驱动注册流程
    Linux杂项设备与字符设备
    Linux并发控制解决竞态的一种操作>原子操作
    Linux 进程间通信 管道通信
    Linux串口编程
    博客开通啦!
    实现Windows Phone 8多媒体:视频
  • 原文地址:https://www.cnblogs.com/jinbo123/p/3993316.html
Copyright © 2011-2022 走看看