zoukankan      html  css  js  c++  java
  • PDF解析帮助类

    public class ComPDFHepler
    {
    /// <summary>
    /// 正则获取字符串中两个字符串间的内容
    /// </summary>
    /// <param name="str"></param>
    /// <param name="s"></param>
    /// <param name="e"></param>
    /// <returns></returns>
    public static string GetValue(string str, string s, string e, bool isContansE)
    {
    Regex rg = new Regex("(?<=(" + s + "))[.\s\S]*?(?=(" + e + "))", RegexOptions.Multiline | RegexOptions.Singleline);
    Match matchs = rg.Match(str);
    if (isContansE)
    {
    return matchs.Groups[0].Value + matchs.Groups[2].Value;
    }
    else
    {
    return matchs.Groups[0].Value;
    }

    }

    public static string GetValue(string str, string s, string e)
    {
    Regex rg = new Regex("(?<=(" + s + "))[.\s\S]*?(?=(" + e + "))", RegexOptions.Multiline | RegexOptions.Singleline);
    Match matchs = rg.Match(str);

    return matchs.Groups[0].Value + matchs.Groups[2].Value;
    }
    /// <summary>
    /// 以startString起,取值到结束
    /// </summary>
    /// <param name="str"></param>
    /// <param name="s"></param>
    /// <returns></returns>
    public static string GetStartWithValue(string str, string startString)
    {
    Regex rg = new Regex("(?<=(" + startString + "))[.\s\S]*", RegexOptions.Multiline | RegexOptions.Singleline);
    Match matchs = rg.Match(str);
    return matchs.Value;
    }
    /// <summary>
    /// 转换成列表
    /// </summary>
    /// <param name="tempContent"></param>
    /// <returns></returns>
    public static List<String> ConvertToArrayString(string tempContent)
    {

    List<String> list = new List<string>();


    StringReader sr = new StringReader(tempContent);

    StringBuilder sb = new StringBuilder();

    //读第一行

    var strContent = sr.ReadLine();

    Console.WriteLine(strContent);
    //循环读所有的内容

    while ((strContent = sr.ReadLine()) != null)
    {
    if (strContent.Equals(" "))
    {
    continue;
    }

    bool isMatch = Regex.IsMatch(strContent, @"(^d+.)20[0-9][0-9]");
    if (isMatch)
    {
    if (!string.IsNullOrEmpty(sb.ToString()))
    {
    list.Add(sb.ToString());

    sb = new StringBuilder();
    }

    }


    sb.AppendLine(strContent);

    }

    sr.Close();

    if (!string.IsNullOrEmpty(sb.ToString()))
    {
    list.Add(sb.ToString());
    }


    return list;

    }

    /// <summary>
    /// 获取需要分析的(一)贷款 内容
    /// </summary>
    /// <param name="tagPath"></param>
    /// <returns></returns>
    public static string GetAnalysisContent(string tagPath)
    {

    StringReader sr = new StringReader(tagPath);

    StringBuilder sb = new StringBuilder();
    bool canRead = false;
    //读第一行

    var strContent = sr.ReadLine();

    Console.WriteLine(strContent);
    //循环读所有的内容

    while ((strContent = sr.ReadLine()) != null)
    {
    //读到“信 贷 交 易 信 息 明 细 信 贷 交 易 信 息 明 细”,则是需要分析的文件,将其放在临时变量中sb,
    //一直读到“( 二 ) 贷 记 卡 ( 二 ) 贷 记 卡”。结束。
    if (canRead)
    {
    if (Regex.IsMatch(strContent, "(()[一二三四五六七八九十]())") && !strContent.Contains("贷款"))
    {
    canRead = false;
    }
    else
    {
    sb.AppendLine(strContent);
    }

    }
    else
    {
    canRead = strContent.Contains("信贷交易信息明细");
    }

    }

    sr.Close();

    return sb.ToString();

    }


    /// <summary>
    /// 获取需要分析的(二)贷记卡 内容
    /// </summary>
    /// <param name="tagPath"></param>
    /// <returns></returns>
    public static string GetAnyTextCardInfo(string tagPath)
    {

    string text = ComPDFHepler.GetValue(tagPath, "信贷交易信息明细", "查询记录", false);
    text = ComPDFHepler.GetStartWithValue(text, "贷记卡");
    return text;


    }

    /// <summary>
    /// 读取PDF文件
    /// </summary>
    /// <param name="file"></param>
    /// <returns></returns>
    public static string pdf2txt(FileInfo file)
    {

    PDDocument doc = PDDocument.load(file.FullName);

    PDFTextStripper pdfStripper = new PDFTextStripper();

    string text = pdfStripper.getText(doc);


    doc.close();

    return text;

    }

    /// <summary>
    /// 是否是数据
    /// </summary>
    /// <param name="text"></param>
    /// <returns></returns>
    public static bool IsNumber(string text)
    {
    return Regex.IsMatch(text, @"d+");
    }

    /// <summary>
    /// 获取字符串中的所有数字
    /// </summary>
    /// <param name="par"></param>
    /// <returns></returns>
    public static int GetNumber(string par)
    {
    string strTempContent = par;
    strTempContent = System.Text.RegularExpressions.Regex.Replace(strTempContent, @"[^d]*", "");
    return Convert.ToInt32(strTempContent);
    }

    /// <summary>
    /// 获取字符串中的所有数字,以逗号隔开
    /// </summary>
    /// <param name="par"></param>
    /// <returns></returns>
    public static string GetNumberSplit(string par)
    {
    string strTempContent = par;
    strTempContent = System.Text.RegularExpressions.Regex.Replace(strTempContent, @"D+", ",");
    return strTempContent;
    }

    /// <summary>
    /// 获取逾期记录表
    /// </summary>
    /// <param name="text"></param>
    /// <returns></returns>
    public static List<string> GetListOverDueRecord(string text)
    {
    string tempTableString = ComPDFHepler.GetStartWithValue(text, "逾期金额 ");
    var arrayString = tempTableString.Split(" ".ToCharArray());
    List<string> list = new List<string>();
    foreach (string item in arrayString)
    {
    if (!string.IsNullOrEmpty(item))
    {
    list.Add(item);
    }

    }
    return list;
    }

    /// <summary>
    /// 是否为Month个月内
    /// </summary>
    /// <param name="date"></param>
    /// <returns></returns>
    public static bool isInMonth(string date, int month)
    {
    string dateFormat = date.Replace('.', '-');
    try
    {
    DateTime dt = DateTime.Parse(dateFormat);
    double days = DateTime.Now.Subtract(dt).TotalDays;
    if (days / 30 <= month)
    {
    return true;
    }
    else
    {
    return false;
    }

    }
    catch (Exception)
    {
    return false;
    }

    }
    }

  • 相关阅读:
    tinymce原装插件源码分析(二)-link
    tinymce原装插件源码分析(一)-hr
    pyinstall 常见错误
    matlab Time-domain analysis 渐进式或者实时获取仿真值
    初识python和pycharm
    自定义指令详解
    Vue核心知识一览
    多维数组 转化为 一维数组
    js面试之数组的几个不low操作
    js如何操作或是更改sass里的变量
  • 原文地址:https://www.cnblogs.com/wolf12/p/5439837.html
Copyright © 2011-2022 走看看