zoukankan      html  css  js  c++  java
  • 2013-正则表达式解析文本

          在项目中可能会出现这样的场景:需要从一段文本中解析出数据,

    列如:需要从下文找出注红的数据

     FSI/*CX
    
    S KA   909Y22MAR PEK1630 2020HKG0X    333   
    
    S CX   806Y23MAR HKG1150 1315ORD0S    77W   
    
    01 YOW2+YX2            17758 CNY                    INCL TAX
    
    *SYSTEM DEFAULT-CHECK OPERATING CARRIER 
    
    *INTERLINE AGREEMENT PRICING APPLIED
    
    *ATTN PRICED ON 21JAN14*1307
    
     BJS
    
    XHKG YOW2            NVB      NVA22MAR 2PC  
    
     CHI YX2             NVB      NVA22MAR 2PC  
    
    FARE  CNY   16480   
    
    TAX   CNY     90CN CNY     94HK CNY   1094XT
    
    TOTAL CNY   17758   
    
    22MAR14BJS KA X/HKG563.99CX CHI Q4.25 2140.91NUC2709.15END R
    
    OE6.081590  
    
    XT CNY 106US CNY 31XA CNY 43XY CNY 34YC CNY 880YR   
    
    ENDOS 02 *T1
    
    *AUTO BAGGAGE INFORMATION AVAILABLE - SEE FSB   
    
    RFSONLN/1E /EFEP_13/FCC=T/ 

    通过下面这个解析类,可以实现我们的功能,主要用到了正则表达式的()捕获功能

    package cn.test;
    
    
    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    public class QTaxParser1 {
    	private static final String QTAX_PATTERN = "^[0-9]{1,2}(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC).*";
    	private static final String TAX_PATTERN = "^TAX.*";
    	private static final String NUM_PATTERN = "([0-9]+)([A-Z]+) *";
    	private static final String QNUM_PATTERN = "Q([0-9]+\.{0,1}[0-9]*)";
    	private static final String QROE_PATTERN = "\s+R\s*O\s*E\s*(([0-9]\s*)+(\.\s*){0,1}([0-9]\s*)*)\s+";
    	private static final String RATE_PATTERN = "=([0-9]+\.{0,1}[0-9]*)";
    
    	private Map<String, Double> tax = new HashMap<String, Double>();
    	private List<Double> qTax = new ArrayList<Double>();
    	private Double roe;
    	private static Logger log = LoggerFactory.getLogger(QTaxParser1.class);
    
    	public Map<String, Double> getTax(String txt){
    		// 分解出TAX 行
    		List<String> taxLine = parase(txt, TAX_PATTERN);
    
    		if ((taxLine != null) && (taxLine.size() > 0)) {
    			// 处理TAX 行
    			List<String> taxItem = parase(taxLine.get(0), NUM_PATTERN);
    
    			for (int i = 0; i < taxItem.size(); i += 2) {
    				tax.put(taxItem.get(i + 1), Double.parseDouble(taxItem.get(i)));
    			}
    		}
    		
    		return tax;
    	}
    	
    	public List<Double> getQTax(String txt){
    		// 分解出TAX 行
    		List<String> taxLine = parase(txt, TAX_PATTERN);
    
    		if ((taxLine != null) && (taxLine.size() > 0)) {
    			// 分解出Q行
    			List<String> qTaxLine = parase(txt, QTAX_PATTERN, false);
    
    			if ((qTaxLine != null) && (qTaxLine.size() > 0)) {
    				// 处理QTAX 行
    				List<String> qTaxItem = parase(qTaxLine.get(0), QNUM_PATTERN);
    				// 提取Q值
    				for (int i = 0; i < qTaxItem.size(); i++) {
    					qTax.add(Double.parseDouble(qTaxItem.get(i)));
    				}
    				
    			}
    
    		}
    		return qTax;
    	}
    	
    	public Double getROE(String txt) {
    		// 分解出ROE行
    		List<String> roeItem = parase(txt, QROE_PATTERN);
    		// 提取ROE值
    		if (roeItem.size() > 0) {
    			roe = Double.parseDouble(roeItem.get(0).replaceAll("\s*", ""));
    		}
    		return roe;
    	}
    
    	public boolean isTaxPage(String txt) {
    		Pattern ptn = Pattern.compile(QTAX_PATTERN, Pattern.MULTILINE);
    		Matcher m = ptn.matcher(txt);
    		if (m.find()) {
    			log.debug("TAX Match:" + m.group());
    			return true;
    		}
    		return false;
    	}
    
    	public String getRateValue(String txt) {
    		List<String> rates = parase(txt, RATE_PATTERN);
    		if (rates.size() > 0) {
    			return parase(txt, RATE_PATTERN).get(0);
    		} else {
    			return null;
    		}
    
    	}
    
    	private List<String> parase(String txt, String pattern) {
    		return parase(txt, pattern, true);
    	}
    
    	private static List<String> parase(String txt, String pattern, boolean grouped) {
    
    		Pattern ptn = Pattern.compile(pattern, Pattern.MULTILINE);
    		Matcher m = ptn.matcher(txt);
    
    		List<String> matches = new ArrayList<String>();
    
    		if (!grouped || (m.groupCount() == 0)) {
    			if (m.find()) {
    				matches.add(m.group());
    			}
    		} else {
    
    			while (m.find()) {
    
    				for (int i = 1; i <= m.groupCount(); i++) {
    					matches.add(m.group(i));
    				}
    			}
    		}
    
    		return matches;
    
    	}
    
    }
    
    测试用例

    package itour.cn.fare.gateway;
    
    import cn.test.QTaxParser1;
    import net.sf.json.JSONArray;
    import net.sf.json.JSONObject;
    
    public class QTaxParserTest {
    	public static void main(String[] args) {
    		QTaxParser1 parser = new QTaxParser1();
    		String txt =" FSICH/*CX  "+ 
    "
    "+
    "S KA   909Y22MAR PEK1630 2020HKG0X    333  "+ 
    "
    "+
    "S CX   806Y23MAR HKG1150 1315ORD0S    77W   "+
    "
    "+
    "01 YOW2+YX2  CH        13464 CNY                    INCL TAX"+
    "
    "+
    "*SYSTEM DEFAULT-CHECK OPERATING CARRIER "+
    "
    "+
    "*INTERLINE AGREEMENT PRICING APPLIED"+
    "
    "+
    "*ACCOMPANIED VALIDATION-ALL PAX MUST BE TKTD AT SAME TIME  "+ 
    "
    "+
    "*VERIFY AGE REQUIREMENTS"+
    "
    "+
    "*ATTN PRICED ON 21JAN14*1158"+
    "
    "+
    "BJS"+
    "
    "+
    "XHKG YOW2     CH25   NVB      NVA22MAR 2PC "+ 
    "
    "+
    " CHI YX2      CH25   NVB      NVA22MAR 2PC "+ 
    "
    "+
    "FARE  CNY   12370   "+
    "
    "+
    "TAX    EXEMPT CN   CNY    106US CNY    988XT"+
    "
    "+
    "TOTAL CNY   13464   "+
    "
    "+
    "22MAR14BJS KA X/HKG422.99CX CHI Q4.25 1605.68NUC2032.92END R"+
    "
    "+
    "OE6.081590  "+
    "
    "+
    "XT CNY 31XA CNY 43XY CNY 34YC CNY 880YR "+
    "
    "+
    "ENDOS 02 *T1"+
    "
    "+
    "*AUTO BAGGAGE INFORMATION AVAILABLE - SEE FSB "+  
    "
    "+
    "RFSONLN/1E /EFEP_23/FCC=T/";
    
    		System.out.println(JSONObject.fromObject(parser.getTax(txt)).toString());
    		System.out.println(JSONArray.fromObject(parser.getQTax(txt)).toString());
    		System.out.println(JSONArray.fromObject(parser.getROE(txt)).toString());
        }
    }
    


  • 相关阅读:
    选择排序
    冒泡排序
    排序算法
    排序的稳定性
    散列表查找的代码实现
    处理散列冲突的方法
    jQuery 实时监听input
    PhpStorm
    Memcache 学习
    豆瓣第三方登录
  • 原文地址:https://www.cnblogs.com/kuyuyingzi/p/4266314.html
Copyright © 2011-2022 走看看