zoukankan      html  css  js  c++  java
  • PDF解析帮助类

    public class ComPDFHepler
    {
    /// <summary>
    /// 正则获取字符串中两个字符串间的内容
    /// </summary>
    /// <param name="str"></param>
    /// <param name="s"></param>
    /// <param name="e"></param>
    /// <returns></returns>
    public static string GetValue(string str, string s, string e, bool isContansE)
    {
    Regex rg = new Regex("(?<=(" + s + "))[.\s\S]*?(?=(" + e + "))", RegexOptions.Multiline | RegexOptions.Singleline);
    Match matchs = rg.Match(str);
    if (isContansE)
    {
    return matchs.Groups[0].Value + matchs.Groups[2].Value;
    }
    else
    {
    return matchs.Groups[0].Value;
    }

    }

    public static string GetValue(string str, string s, string e)
    {
    Regex rg = new Regex("(?<=(" + s + "))[.\s\S]*?(?=(" + e + "))", RegexOptions.Multiline | RegexOptions.Singleline);
    Match matchs = rg.Match(str);

    return matchs.Groups[0].Value + matchs.Groups[2].Value;
    }
    /// <summary>
    /// 以startString起,取值到结束
    /// </summary>
    /// <param name="str"></param>
    /// <param name="s"></param>
    /// <returns></returns>
    public static string GetStartWithValue(string str, string startString)
    {
    Regex rg = new Regex("(?<=(" + startString + "))[.\s\S]*", RegexOptions.Multiline | RegexOptions.Singleline);
    Match matchs = rg.Match(str);
    return matchs.Value;
    }
    /// <summary>
    /// 转换成列表
    /// </summary>
    /// <param name="tempContent"></param>
    /// <returns></returns>
    public static List<String> ConvertToArrayString(string tempContent)
    {

    List<String> list = new List<string>();


    StringReader sr = new StringReader(tempContent);

    StringBuilder sb = new StringBuilder();

    //读第一行

    var strContent = sr.ReadLine();

    Console.WriteLine(strContent);
    //循环读所有的内容

    while ((strContent = sr.ReadLine()) != null)
    {
    if (strContent.Equals(" "))
    {
    continue;
    }

    bool isMatch = Regex.IsMatch(strContent, @"(^d+.)20[0-9][0-9]");
    if (isMatch)
    {
    if (!string.IsNullOrEmpty(sb.ToString()))
    {
    list.Add(sb.ToString());

    sb = new StringBuilder();
    }

    }


    sb.AppendLine(strContent);

    }

    sr.Close();

    if (!string.IsNullOrEmpty(sb.ToString()))
    {
    list.Add(sb.ToString());
    }


    return list;

    }

    /// <summary>
    /// 获取需要分析的(一)贷款 内容
    /// </summary>
    /// <param name="tagPath"></param>
    /// <returns></returns>
    public static string GetAnalysisContent(string tagPath)
    {

    StringReader sr = new StringReader(tagPath);

    StringBuilder sb = new StringBuilder();
    bool canRead = false;
    //读第一行

    var strContent = sr.ReadLine();

    Console.WriteLine(strContent);
    //循环读所有的内容

    while ((strContent = sr.ReadLine()) != null)
    {
    //读到“信 贷 交 易 信 息 明 细 信 贷 交 易 信 息 明 细”,则是需要分析的文件,将其放在临时变量中sb,
    //一直读到“( 二 ) 贷 记 卡 ( 二 ) 贷 记 卡”。结束。
    if (canRead)
    {
    if (Regex.IsMatch(strContent, "(()[一二三四五六七八九十]())") && !strContent.Contains("贷款"))
    {
    canRead = false;
    }
    else
    {
    sb.AppendLine(strContent);
    }

    }
    else
    {
    canRead = strContent.Contains("信贷交易信息明细");
    }

    }

    sr.Close();

    return sb.ToString();

    }


    /// <summary>
    /// 获取需要分析的(二)贷记卡 内容
    /// </summary>
    /// <param name="tagPath"></param>
    /// <returns></returns>
    public static string GetAnyTextCardInfo(string tagPath)
    {

    string text = ComPDFHepler.GetValue(tagPath, "信贷交易信息明细", "查询记录", false);
    text = ComPDFHepler.GetStartWithValue(text, "贷记卡");
    return text;


    }

    /// <summary>
    /// 读取PDF文件
    /// </summary>
    /// <param name="file"></param>
    /// <returns></returns>
    public static string pdf2txt(FileInfo file)
    {

    PDDocument doc = PDDocument.load(file.FullName);

    PDFTextStripper pdfStripper = new PDFTextStripper();

    string text = pdfStripper.getText(doc);


    doc.close();

    return text;

    }

    /// <summary>
    /// 是否是数据
    /// </summary>
    /// <param name="text"></param>
    /// <returns></returns>
    public static bool IsNumber(string text)
    {
    return Regex.IsMatch(text, @"d+");
    }

    /// <summary>
    /// 获取字符串中的所有数字
    /// </summary>
    /// <param name="par"></param>
    /// <returns></returns>
    public static int GetNumber(string par)
    {
    string strTempContent = par;
    strTempContent = System.Text.RegularExpressions.Regex.Replace(strTempContent, @"[^d]*", "");
    return Convert.ToInt32(strTempContent);
    }

    /// <summary>
    /// 获取字符串中的所有数字,以逗号隔开
    /// </summary>
    /// <param name="par"></param>
    /// <returns></returns>
    public static string GetNumberSplit(string par)
    {
    string strTempContent = par;
    strTempContent = System.Text.RegularExpressions.Regex.Replace(strTempContent, @"D+", ",");
    return strTempContent;
    }

    /// <summary>
    /// 获取逾期记录表
    /// </summary>
    /// <param name="text"></param>
    /// <returns></returns>
    public static List<string> GetListOverDueRecord(string text)
    {
    string tempTableString = ComPDFHepler.GetStartWithValue(text, "逾期金额 ");
    var arrayString = tempTableString.Split(" ".ToCharArray());
    List<string> list = new List<string>();
    foreach (string item in arrayString)
    {
    if (!string.IsNullOrEmpty(item))
    {
    list.Add(item);
    }

    }
    return list;
    }

    /// <summary>
    /// 是否为Month个月内
    /// </summary>
    /// <param name="date"></param>
    /// <returns></returns>
    public static bool isInMonth(string date, int month)
    {
    string dateFormat = date.Replace('.', '-');
    try
    {
    DateTime dt = DateTime.Parse(dateFormat);
    double days = DateTime.Now.Subtract(dt).TotalDays;
    if (days / 30 <= month)
    {
    return true;
    }
    else
    {
    return false;
    }

    }
    catch (Exception)
    {
    return false;
    }

    }
    }

  • 相关阅读:
    Win10 UWP Tile Generator
    Win10 BackgroundTask
    UWP Tiles
    UWP Ad
    Win10 build package error collections
    Win10 八步打通 Nuget 发布打包
    Win10 UI入门 pivot multiable DataTemplate
    Win10 UI入门 导航滑动条 求UWP工作
    UWP Control Toolkit Collections 求UWP工作
    Win10 UI入门 SliderRectangle
  • 原文地址:https://www.cnblogs.com/wolf12/p/5439837.html
Copyright © 2011-2022 走看看