zoukankan      html  css  js  c++  java
  • 正则表达式匹配

    由于工作需要,学习了正则表达式匹配。从MongoDB导出的数据文本大致是这样的:

    {
    "_id" : ObjectId("55370b4112db760740809f79"),
    "CartKey" : {
    "CustomerId" : NumberLong(471)
    },
    "LastUpdate" : ISODate("2015-06-04T08:21:24.307Z"),
    "Baskets" : [{
    "Items" : [{
    "SKU" : "5170",
    "CategoryName" : "Cables & Adapters "
    }],
    "CountryCode" : 32
    }]
    }

    {
    "_id" : ObjectId("55769cc512db760da847d639"),
    "CartKey" : {
    "CustomerId" : NumberLong(1002)
    },
    "LastUpdate" : ISODate("2015-06-01T00:00:00Z"),
    "Baskets" : [{
    "Items" : [{
    "SKU" : "2716",
    "CategoryName" : "iPhone iPad iPod"
    }],
    "CountryCode" : 46
    }]
    }

    {
    "_id" : ObjectId("54b5e9d412db761c388d6c48"),
    "CartKey" : {
    "CustomerId" : NumberLong(4398734)
    },
    "LastUpdate" : ISODate("2015-06-05T03:49:11.131Z"),
    "Baskets" : [{
    "Items" : [{
    "SKU" : "33883",
    "CategoryName" : "Plugs & Sockets"
    }, {
    "SKU" : "126095",
    "CategoryName" : "Household Thermometers"
    }],
    "CountryCode" : 46
    }]
    }

    目标是将"CustomerId" : NumberLong(4398734)转化为"CustomerId" : 4398734   ,"LastUpdate" : ISODate("2015-06-05T03:49:11.131Z")转换为"LastUpdate" : "2015-06-05T03:49:11.131Z",

    我使用的是NotePad++编辑器的正则替换。

    查找目标的正则表达式是:"CustomerId" : NumberLong((.*))

    替换为:"CustomerId" : 1     

    日期是:"LastUpdate" : ISODate((.*))

    替换为"LastUpdate" : 1

    "_id" : ObjectId((.*)),

    为什么是1了?是因为我们都知道使用()是因为group的原因。

    系列文章:

    http://zhoufoxcn.blog.51cto.com/792419/281956/

    http://www.crifan.com/files/doc/docbook/rec_soft_npp/release/htmls/npp_func_regex_replace.html

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using CartTools.Modela;
    using Newtonsoft.Json;
    using CartTools.Modela.CMS;
    using System.IO;
    using System.Text.RegularExpressions;

    namespace CartTools
    {
    public partial class Form1 : Form
    {
    public Form1()
    {
    InitializeComponent();
    }


    private void button1_Click(object sender, EventArgs e)
    {

    string jsonStr = GetLegalJsonSource(textBox1.Text);

    string content = GetExcelContent(jsonStr);

    textBox2.Text = content;

    }

    private string GetLegalJsonSource(string text)
    {
    string pattern1 = @"""LastUpdate"" : ISODate((.*))";
    string pattern2 = @"""CustomerId"" : NumberLong((.*))";
    string pattern3 = @"""_id"" : ObjectId((.*)),";

    string replaceStr1 = @"""LastUpdate"" : $1";
    string replaceStr2 = @"""CustomerId"" : $1";
    string replaceStr3 = "";
    IDictionary<Regex, string> dict = new Dictionary<Regex, string>();

    System.Text.RegularExpressions.Regex reg1 = new System.Text.RegularExpressions.Regex(pattern1, RegexOptions.IgnoreCase | RegexOptions.Multiline);
    System.Text.RegularExpressions.Regex reg2 = new System.Text.RegularExpressions.Regex(pattern2, RegexOptions.IgnoreCase | RegexOptions.Multiline);
    System.Text.RegularExpressions.Regex reg3 = new System.Text.RegularExpressions.Regex(pattern3, RegexOptions.IgnoreCase | RegexOptions.Multiline);


    dict.Add(reg1, replaceStr1);
    dict.Add(reg2, replaceStr2);
    dict.Add(reg3, replaceStr3);

    foreach (KeyValuePair<Regex, string> kv in dict)
    {
    text = kv.Key.Replace(text, kv.Value);
    }

    return "[" + text + "]";
    }


    private string GetExcelContent(string str)
    {
    StringBuilder sb = new StringBuilder();
    IList<ShoppingCartModel> list = JsonConvert.DeserializeObject<IList<ShoppingCartModel>>(str);
    sb.Append("Country,cicid,日期,SKU,分类 ");

    foreach (var item in list)
    {
    if (item.Baskets != null && item.Baskets.Any())
    {
    sb.Append(item.Baskets.First().CountryCode);
    }
    sb.AppendFormat(",{0}", item.CartKey.CustomerId);

    string skus = "";
    string categories = "";
    foreach (var b in item.Baskets)
    {
    foreach (var sku in b.Items)
    {
    skus += sku.SKU + ";";
    categories += sku.CategoryName + ";";
    }
    }
    sb.AppendFormat(",{0},{1},{2} ", item.LastUpdate, skus.TrimEnd(';'), categories.TrimEnd(';'));

    }
    return sb.ToString().Replace(",", " ");//excel
    }
    }
    }

  • 相关阅读:
    第五章 χ2检验
    统计学中的自由度
    第四章 统计推断
    个性化排序算法实践(二)——FFM算法
    个性化排序算法实践(一)——FM算法
    稀疏矩阵在Python中的表示方法
    个性化召回算法实践(五)——item2vec
    个性化召回算法实践(四)——ContentBased算法
    个性化召回算法实践(三)——PersonalRank算法
    个性化召回算法实践(二)——LFM算法
  • 原文地址:https://www.cnblogs.com/cxzdy/p/4595457.html
Copyright © 2011-2022 走看看