zoukankan      html  css  js  c++  java
  • 基于Predictive Parsing的ABNF语法分析器(十三)——rulelist、rule、rulename、define-as和elements

    我们来看看rulelist,它是整个ABNF文法的入口,就是说一个ABNF文法就是一个规则列表rulelist。一个rulelist由若干个rule规则组成,每个rule由规则名rulename、定义方式define-as和元素elements构成。

    先来看解析代码:

    /*
        This file is one of the component a Context-free Grammar Parser Generator,
        which accept a piece of text as the input, and generates a parser
        for the inputted context-free grammar.
        Copyright (C) 2013, Junbiao Pan (Email: panjunbiao@gmail.com)
    
        This program is free software: you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
        the Free Software Foundation, either version 3 of the License, or
        any later version.
    
        This program is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU General Public License for more details.
    
        You should have received a copy of the GNU General Public License
        along with this program.  If not, see <http://www.gnu.org/licenses/>.
     */
    
    public class AbnfParser {
    //	   rulelist       =  1*( rule / (*c-wsp c-nl) )
    	protected List<Rule> rulelist() throws IOException, MatchException, CollisionException {
            Map<RuleName, Rule> ruleMap = new HashMap<RuleName, Rule>();
            List<Rule> ruleList = new ArrayList<Rule>();
    //      如果前向字符是字母、空格、分号、回车,则认为是rule、c-wsp或者c-nl
            while (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A) || match(is.peek(), 0x20) || match(is.peek(), ';') || match(is.peek(), 0x0D)) {
    //          如果是字母开头,则认为是rule,否则是c-wsp或者c-nl
                if (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A)) {
    //              解析一条规则
                    Rule rule = rule();
    //              判断该条规则是否已经有有定义
                    if (null == ruleMap.get(rule.getRuleName())) {
    //                  如果没有定义则放入规则列表
                        ruleMap.put(rule.getRuleName(), rule);
                        ruleList.add(rule);
                    } else {
    //                  已有定义,则检查定义方式是否为增量定义
                        Rule defined = ruleMap.get(rule.getRuleName());
                        if ("=".equals(rule.getDefinedAs()) && "=".equals(defined.getDefinedAs())) {
    //                      如果不是增量定义,则抛出重复定义异常
                            throw new CollisionException(rule.getRuleName().toString() + " is redefined.", is.getPos(), is.getLine());
                        }
    //                  如果是增量定义则合并两条规则
                        if ("=".equals(rule.getDefinedAs())) defined.setDefinedAs("=");
                        defined.getElements().getAlternation().getConcatenations().addAll(rule.getElements().getAlternation().getConcatenations());
                    }
                } else {
    //              空格、分号、回车,则是c_wsp
                    while (match(is.peek(), 0x20) || match(is.peek(), ';') || match(is.peek(), 0x0D)) {
                        c_wsp();
                    }
                    c_nl();
                }
            }
            return ruleList;
    	}
    
    
    //		        rulename       =  ALPHA *(ALPHA / DIGIT / "-")
    	protected RuleName rulename() throws IOException, MatchException {
    //		 ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
    //	     DIGIT          =  %x30-39
    //      规则名的第一个字符必须是字母
            if (!(match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A))) {
                throw new MatchException("'A'-'Z'/'a'-'z'", is.peek(), is.getPos(), is.getLine());
            }
            String rulename = "";
            rulename += (char)is.read();
    //      规则名的后续字符可以是字母、数字、破折号
            while (match(is.peek(), 0x41, 0x5A) || match(is.peek(), 0x61, 0x7A) || match(is.peek(), 0x30, 0x39) |match(is.peek(), '-')) {
                rulename += (char)is.read();
            }
            return new RuleName(prefix, rulename);
    	}
    
    //		        defined-as     =  *c-wsp ("=" / "=/") *c-wsp
    	protected String defined_as() throws IOException, MatchException {
            String value = "";
    //      等号前面的空格
            while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), ';') || match(is.peek(), (char)0x0D)) {
                c_wsp();
            }
    //      等号
            assertMatch(is.peek(), '=');
            value = String.valueOf((char)is.read());
    //      是否增量定义
            if (match(is.peek(), '/')) {
                value += (char)is.read();
    		}
    //      等号后面的空格
            while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), ';') || match(is.peek(), (char)0x0D)) {
                c_wsp();
            }
            return value;
    	}
    
    //		        elements       =  alternation *c-wsp
    	protected Elements elements() throws IOException, MatchException {
    //              元素elements其实就是alternation再接着若干空格
    		Alternation alternation = alternation();
            while (match(is.peek(), 0x20) || match(is.peek(), 0x09) || match(is.peek(), ';')) {
    			c_wsp();
    		}
    		return new Elements(alternation);
    	}
    }
    

    单元测试就不罗嗦了,直接看代码。

    /*
        This file is one of the component a Context-free Grammar Parser Generator,
        which accept a piece of text as the input, and generates a parser
        for the inputted context-free grammar.
        Copyright (C) 2013, Junbiao Pan (Email: panjunbiao@gmail.com)
    
        This program is free software: you can redistribute it and/or modify
        it under the terms of the GNU General Public License as published by
        the Free Software Foundation, either version 3 of the License, or
        any later version.
    
        This program is distributed in the hope that it will be useful,
        but WITHOUT ANY WARRANTY; without even the implied warranty of
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        GNU General Public License for more details.
    
        You should have received a copy of the GNU General Public License
        along with this program.  If not, see <http://www.gnu.org/licenses/>.
     */
    
        //		        rulename       =  ALPHA *(ALPHA / DIGIT / "-")
        @Test
        public void testRulename() throws Exception {
            Tester<String> tester = new Tester<String>() {
                public String test(AbnfParser parser) throws MatchException, IOException {
                    return parser.rulename().toString();
                }
            };
    
    //      用各种奇怪的情形来虐
            Assert.assertEquals("A", AbnfParserFactory.newInstance("A").rulename().toString());
            Assert.assertEquals("a", AbnfParserFactory.newInstance("a").rulename().toString());
            Assert.assertEquals("Z", AbnfParserFactory.newInstance("Z").rulename().toString());
            Assert.assertEquals("z", AbnfParserFactory.newInstance("z").rulename().toString());
            Assert.assertEquals("ABCDEFGHIJKLMNOPQRSTUVWXYZ", AbnfParserFactory.newInstance("ABCDEFGHIJKLMNOPQRSTUVWXYZ").rulename().toString());
            Assert.assertEquals("abcdefghijklmnopqrstuvwxyz", AbnfParserFactory.newInstance("abcdefghijklmnopqrstuvwxyz").rulename().toString());
            Assert.assertEquals("AbCdEfGhIjKlMnOpQrStUvWxYz", AbnfParserFactory.newInstance("AbCdEfGhIjKlMnOpQrStUvWxYz").rulename().toString());
            Assert.assertEquals("aBcDeFgHiJkLmNoPqRsTuVwXyZ", AbnfParserFactory.newInstance("aBcDeFgHiJkLmNoPqRsTuVwXyZ").rulename().toString());
            Assert.assertEquals("A1234567890Z", AbnfParserFactory.newInstance("A1234567890Z").rulename().toString());
            Assert.assertEquals("a1234567890z", AbnfParserFactory.newInstance("a1234567890z").rulename().toString());
            Assert.assertEquals("A1B2C3D4e5f6g7h8i9j0", AbnfParserFactory.newInstance("A1B2C3D4e5f6g7h8i9j0").rulename().toString());
            Assert.assertEquals("A1B2C3D4e5f6g7h8i9j0aBcDeFgHiJkLmNoPqRsTuVwXyZAbCdEfGhIjKlMnOpQrStUvWxYz", AbnfParserFactory.newInstance("A1B2C3D4e5f6g7h8i9j0aBcDeFgHiJkLmNoPqRsTuVwXyZAbCdEfGhIjKlMnOpQrStUvWxYz").rulename().toString());
            Assert.assertEquals("A1b2C3-4d-", AbnfParserFactory.newInstance("A1b2C3-4d-").rulename().toString());
            Assert.assertEquals("a-------------", AbnfParserFactory.newInstance("a-------------").rulename().toString());
            Assert.assertEquals("A1b2C3-4d-", AbnfParserFactory.newInstance("A1b2C3-4d-#").rulename().toString());
            Assert.assertEquals("A1b2C3-4d-", AbnfParserFactory.newInstance("A1b2C3-4d-..").rulename().toString());
            Assert.assertEquals("RFC3261-A1b2C3-4d-", AbnfParserFactory.newInstance("RFC3261-", "A1b2C3-4d-*&^").rulename().toString());
            Assertion.assertMatchException("1", tester, 1, 1);
            Assertion.assertMatchException("1234567890", tester, 1, 1);
            Assertion.assertMatchException("-", tester, 1, 1);
            Assertion.assertMatchException("-----------", tester, 1, 1);
            Assertion.assertMatchException("#", tester, 1, 1);
            Assertion.assertMatchException(".", tester, 1, 1);
            Assertion.assertMatchException("~", tester, 1, 1);
            Assertion.assertMatchException(")", tester, 1, 1);
            Assertion.assertMatchException("", tester, 1, 1);
        }
    
        //	   rulelist       =  1*( rule / (*c-wsp c-nl) )
        @Test
        public void testRulelist() throws Exception {
            Tester<List<Rule>> tester = new Tester<List<Rule>>() {
                @Override
                public List<Rule> test(AbnfParser parser) throws MatchException, IOException, CollisionException {
                    return parser.rulelist();
                }
            };
    
            String input, rulesInput = "";
            input = "a=b" + (char)0x0d + (char)0x0a;
            rulesInput += input;
            List<Rule> rules = new ArrayList<Rule>();
            rules.add(AbnfParserFactory.newInstance(input).rule());
            input = "b=*c" + (char)0x0d + (char)0x0a;
            rulesInput += input;
            rules.add(AbnfParserFactory.newInstance(input).rule());
            input = "c=[d]" + (char)0x0d + (char)0x0a;
            rulesInput += input;
            rules.add(AbnfParserFactory.newInstance(input).rule());
            input = "d=a/b/c/e" + (char)0x0d + (char)0x0a;
            rulesInput += input;
            rules.add(AbnfParserFactory.newInstance(input).rule());
            input = "e=f/(g h)" + (char)0x0d + (char)0x0a;
            rulesInput += input;
            rules.add(AbnfParserFactory.newInstance(input).rule());
            input = "g=i [j]" + (char)0x0d + (char)0x0a;
            rulesInput += input;
            rules.add(AbnfParserFactory.newInstance(input).rule());
            input = "j=<abcd#1234>" + (char)0x0d + (char)0x0a;
            rulesInput += input;
            rules.add(AbnfParserFactory.newInstance(input).rule());
            input = "j=/"abcd#1234"" + (char)0x0d + (char)0x0a;
            rulesInput += input;
            rules.add(AbnfParserFactory.newInstance(input).rule());
            Assertion.assertMatch(rulesInput, tester, AbnfParserFactory.newInstance(rulesInput).rulelist(), 1, 9);
    
        }
    
        //		        rule           =  rulename defined-as elements c-nl
        @Test
        public void testRule() throws Exception {
            Tester<Rule> tester = new Tester<Rule>() {
                @Override
                public Rule test(AbnfParser parser) throws MatchException, IOException {
                    return parser.rule();
                }
            };
    
            String input;
            Elements elements;
            elements = AbnfParserFactory.newInstance("b").elements();
            Assertion.assertMatch("a=b" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2);
            Assertion.assertMatch("a=/b" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2);
    
            elements = AbnfParserFactory.newInstance("b/c").elements();
            Assertion.assertMatch("a=b/c" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2);
            Assertion.assertMatch("a=/b/c" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2);
    
            elements = AbnfParserFactory.newInstance("b c d").elements();
            Assertion.assertMatch("a=b c d" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2);
            Assertion.assertMatch("a=/b c d" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2);
    
            elements = AbnfParserFactory.newInstance("[b]").elements();
            Assertion.assertMatch("a=[b]" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2);
            Assertion.assertMatch("a=/[b]" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2);
    
            elements = AbnfParserFactory.newInstance("*b").elements();
            Assertion.assertMatch("a=*b" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=", elements), 1, 2);
            Assertion.assertMatch("a=/*b" + (char)0x0D + (char)0x0A, tester, new Rule(new RuleName("a"), "=/", elements), 1, 2);
        }
    
        //WSP            =  SP / HTAB
        //  VCHAR          =  %x21-7E
        //    comment        =  ";" *(WSP / VCHAR) CRLF
        //		        c-nl           =  comment / CRLF
        //    c-wsp          =  WSP / (c-nl WSP)
        //		        defined-as     =  *c-wsp ("=" / "=/") *c-wsp
        @Test
        public void testDefined_as() throws Exception {
            Tester<String> tester = new Tester() {
                public String test(AbnfParser parser) throws MatchException, IOException {
                    return parser.defined_as();
                }
            };
    
            Assertion.assertMatchException("", tester, 1, 1);
            Assertion.assertMatch("=", tester, "=", 2, 1);
            Assertion.assertMatchException("/", tester, 1, 1);
            Assertion.assertMatch("=A", tester, "=", 2, 1);
            Assertion.assertMatchException("A=", tester, 1, 1);
            Assertion.assertMatch("=/", tester, "=/", 3, 1);
            Assertion.assertMatchException(".=/", tester, 1, 1);
            Assertion.assertMatch("=/#", tester, "=/", 3, 1);
            Assertion.assertMatchException("#=/", tester, 1, 1);
            Assertion.assertMatch("    =    ", tester, "=", 10, 1);
            Assertion.assertMatch("    =    =", tester, "=", 10, 1);
            Assertion.assertMatch("    =/    ", tester, "=/", 11, 1);
            Assertion.assertMatch("    =/    3", tester, "=/", 11, 1);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +" " + "=", tester, "=", 3, 2);
            Assertion.assertMatchException("" + (char)0x0D + (char)0x0A + "=", tester, 1, 2);
            Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + " " + "==", tester, 3, 2);
    
    //        Can not handle following case
    //      TODO
            System.out.println("***************************");
            Assertion.assertMatchException("" + (char) 0x0D + (char) 0x0A + " " + "=;", tester, 4, 2);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +" " + "=" + (char)0x0D + (char)0x0A +" ", tester, "=", 2, 3);
    
    //        Can not handle following case
    //      TODO
            Assertion.assertMatchException("" + (char) 0x0D + (char) 0x0A + " " + "=" + (char) 0x0D + (char) 0x0A, tester, 1, 3);
    
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +" " + "=" + (char)0x0D + (char)0x0A +" =", tester, "=", 2, 3);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +" " + "=/", tester, "=/", 4, 2);
            Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + " " + "=//", tester, "=/", 4, 2);
            Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + " " + "=/" + (char) 0x0D + (char) 0x0A + " ", tester, "=/", 2, 3);
            Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + " " + "=/" + (char) 0x0D + (char) 0x0A + " ==", tester, "=/", 2, 3);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=", tester, "=", 3, 2);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "====", tester, 3, 2);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=" + (char)0x0D + (char)0x0A +(char)0x09, tester, "=", 2, 3);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=" + (char)0x0D + (char)0x0A +(char)0x09 + "=", tester, "=", 2, 3);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=/", tester, "=/", 4, 2);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=/=/", tester, "=/", 4, 2);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=/" + (char)0x0D + (char)0x0A +(char)0x09, tester, "=/", 2, 3);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + "=/" + (char)0x0D + (char)0x0A +(char)0x09 + "/=/=/", tester, "=/", 2, 3);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "=", tester, "=", 3, 3);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "==/=/=/", tester, "=", 3, 3);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "=" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09, tester, "=", 2, 5);
            Assertion.assertMatch("" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "=" + (char)0x0D + (char)0x0A +(char)0x09 + (char)0x0D + (char)0x0A +(char)0x09 + "/", tester, "=", 2, 5);
            Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=/", tester, "=/", 4, 3);
            Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=/=", tester, "=/", 4, 3);
            Assertion.assertMatch("" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=/" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09, tester, "=/", 2, 5);
    
    //        Can not handle following case
    //      TODO
            Assertion.assertMatchException(
                    "" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09
                            + "=/" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A,
                    tester, 1, 5
                    );
    
            Assertion.assertMatch(
                    "" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09
                            + "=/" + (char) 0x0D + (char) 0x0A + (char) 0x09 + (char) 0x0D + (char) 0x0A + (char) 0x09 + "=",
                    tester, "=/", 2, 5);
    
            Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "=", tester, "=", 4, 1);
            Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "==/", tester, "=", 4, 1);
            Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "=" + (char)0x09 + (char)0x09, tester, "=", 6, 1);
            Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "=/", tester, "=/", 5, 1);
            Assertion.assertMatch("" + (char)0x09 + (char)0x09 + "=/=", tester, "=/", 5, 1);
            Assertion.assertMatch("" + (char) 0x09 + (char) 0x09 + "=/" + (char) 0x09 + (char) 0x09, tester, "=/", 7, 1);
            Assertion.assertMatch("" + (char) 0x09 + (char) 0x09 + "=/" + (char) 0x09 + (char) 0x09 + "=", tester, "=/", 7, 1);
            Assertion.assertMatch(
                    ";  ; ; "" + (char) 0x0D + (char) 0x0A + " " + "=" + ";  ; ; "" + (char) 0x0D + (char) 0x0A + " ",
                    tester, "=", 2, 3);
            Assertion.assertMatch(
                    ";  ; ; "" + (char)0x0D + (char)0x0A + " " + "=" + ";  ; ; "" + (char)0x0D + (char)0x0A + " /",
                    tester, "=", 2, 3);
    
            Assertion.assertMatch(
                    ";  ; ; "" + (char) 0x0D + (char) 0x0A + " " + "=/" + ";  ; ; "" + (char) 0x0D + (char) 0x0A + " ",
                    tester, "=/", 2, 3);
    
            Assertion.assertMatch(
                    ";  ; ; "" + (char)0x0D + (char)0x0A + " " + "=/" + ";  ; ; "" + (char)0x0D + (char)0x0A + " =/",
                    tester, "=/", 2, 3);
        }
    
        //		        elements       =  alternation *c-wsp
        @Test
        public void testElements() throws Exception {
            Tester<Elements> tester = new Tester<Elements>() {
                @Override
                public Elements test(AbnfParser parser) throws MatchException, IOException {
                    return parser.elements();
                }
            };
    
            String input;
            input = "A/B/C";
            Assertion.assertMatch(input, tester, AbnfParserFactory.newInstance(input).elements(), 6, 1);
    
    //        TODO
            input = "A/B/C  ";
            Assertion.assertMatchException(input, tester, 8, 1);
    
        }
    

    单元测试里面体现出来的一些算法缺陷,我会写一篇帖子专门总结一下。

    到这里,文法解析器就基本完成了,下一篇我会以SIP(RFC3261)协议为例子,生成SIP的ABNF文法表示。

  • 相关阅读:
    WebGL学习笔记(一)
    svn和ftp的不同应用场合
    Java 开源博客 Solo 1.8.0 发布
    【C解毒】滥用变量
    【C解毒】答非所问
    【C解毒】错题无解
    【C解毒】怎样写main()函数
    程序员约架事件中,薛非到底是因为不会用Github还是不会写Tokenizer而没有应战?
    『一些同学学不好C语言,把罪责归于「因为教材是谭浩强写的」实在是很滑稽』吗?
    挂羊头卖狗肉蓄意欺骗读者——谭浩强《C程序设计(第四版)》中所谓的“按照C99”(二)
  • 原文地址:https://www.cnblogs.com/snake-hand/p/3144826.html
Copyright © 2011-2022 走看看