zoukankan      html  css  js  c++  java
  • vue.js 源代码学习笔记 ----- html-parse.js

    /**
     * Not type-checking this file because it's mostly vendor code.
     */
    
    /*!
     * HTML Parser By John Resig (ejohn.org)
     * Modified by Juriy "kangax" Zaytsev
     * Original code by Erik Arvidsson, Mozilla Public License
     * http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
     */
    
    import { makeMap, no } from 'shared/util'
    import { isNonPhrasingTag, canBeLeftOpenTag } from 'web/util/index'
    
    // Regular Expressions for parsing tags and attributes
    const singleAttrIdentifier = /([^s"'<>/=]+)/
    const singleAttrAssign = /(?:=)/
    const singleAttrValues = [
      // attr value double quotes
      /"([^"]*)"+/.source,
      // attr value, single quotes
      /'([^']*)'+/.source,
      // attr value, no quotes
      /([^s"'=<>`]+)/.source
    ]
    const attribute = new RegExp(
      '^\s*' + singleAttrIdentifier.source +
      '(?:\s*(' + singleAttrAssign.source + ')' +
      '\s*(?:' + singleAttrValues.join('|') + '))?'
    )
    
    // could use https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-QName
    // but for Vue templates we can enforce a simple charset
    const ncname = '[a-zA-Z_][\w\-\.]*'
    const qnameCapture = '((?:' + ncname + '\:)?' + ncname + ')'
    const startTagOpen = new RegExp('^<' + qnameCapture)
    const startTagClose = /^s*(/?)>/
    const endTag = new RegExp('^<\/' + qnameCapture + '[^>]*>')
    const doctype = /^<!DOCTYPE [^>]+>/i
    const comment = /^<!--/
    const conditionalComment = /^<![/
    
    let IS_REGEX_CAPTURING_BROKEN = false
    'x'.replace(/x(.)?/g, function (m, g) {
      IS_REGEX_CAPTURING_BROKEN = g === ''
    })
    
    // Special Elements (can contain anything)
    const isScriptOrStyle = makeMap('script,style', true)
    const hasLang = attr => attr.name === 'lang' && attr.value !== 'html'
    const isSpecialTag = (tag, isSFC, stack) => {
      if (isScriptOrStyle(tag)) {
        return true
      }
      if (isSFC && stack.length === 1) {
        // top-level template that has no pre-processor
        if (tag === 'template' && !stack[0].attrs.some(hasLang)) {
          return false
        } else {
          return true
        }
      }
      return false
    }
    
    const reCache = {}
    
    const ltRE = /&lt;/g
    const gtRE = /&gt;/g
    const nlRE = /&#10;/g
    const ampRE = /&amp;/g
    const quoteRE = /&quot;/g
    
    function decodeAttr (value, shouldDecodeNewlines) {
      if (shouldDecodeNewlines) {
        value = value.replace(nlRE, '
    ')
      }
      return value
        .replace(ltRE, '<')
        .replace(gtRE, '>')
        .replace(ampRE, '&')
        .replace(quoteRE, '"')
    }
    
    export function parseHTML (html, options) {
      const stack = []
      const expectHTML = options.expectHTML
      const isUnaryTag = options.isUnaryTag || no
      let index = 0
      let last, lastTag
      while (html) {
        last = html
        // Make sure we're not in a script or style element
        if (!lastTag || !isSpecialTag(lastTag, options.sfc, stack)) {
          let textEnd = html.indexOf('<')
          if (textEnd === 0) {
            // Comment:
            if (comment.test(html)) {
              const commentEnd = html.indexOf('-->')
    
              if (commentEnd >= 0) {
                advance(commentEnd + 3)
                continue
              }
            }
    
            // http://en.wikipedia.org/wiki/Conditional_comment#Downlevel-revealed_conditional_comment
            if (conditionalComment.test(html)) {
              const conditionalEnd = html.indexOf(']>')
    
              if (conditionalEnd >= 0) {
                advance(conditionalEnd + 2)
                continue
              }
            }
    
            // Doctype:
            const doctypeMatch = html.match(doctype)
            if (doctypeMatch) {
              advance(doctypeMatch[0].length)
              continue
            }
    
            // End tag:
            const endTagMatch = html.match(endTag)
            if (endTagMatch) {
              const curIndex = index
              advance(endTagMatch[0].length)
              parseEndTag(endTagMatch[0], endTagMatch[1], curIndex, index)
              continue
            }
    
            // Start tag:
            const startTagMatch = parseStartTag()
            if (startTagMatch) {
              handleStartTag(startTagMatch)
              continue
            }
          }
    
          let text, rest, next
          if (textEnd > 0) {
            rest = html.slice(textEnd)
            while (
              !endTag.test(rest) &&
              !startTagOpen.test(rest) &&
              !comment.test(rest) &&
              !conditionalComment.test(rest)
            ) {
              // < in plain text, be forgiving and treat it as text
              next = rest.indexOf('<', 1)
              if (next < 0) break
              textEnd += next
              rest = html.slice(textEnd)
            }
            text = html.substring(0, textEnd)
            advance(textEnd)
          }
    
          if (textEnd < 0) {
            text = html
            html = ''
          }
    
          if (options.chars && text) {
            options.chars(text)
          }
        } else {
          var stackedTag = lastTag.toLowerCase()
          var reStackedTag = reCache[stackedTag] || (reCache[stackedTag] = new RegExp('([\s\S]*?)(</' + stackedTag + '[^>]*>)', 'i'))
          var endTagLength = 0
          var rest = html.replace(reStackedTag, function (all, text, endTag) {
            endTagLength = endTag.length
            if (stackedTag !== 'script' && stackedTag !== 'style' && stackedTag !== 'noscript') {
              text = text
                .replace(/<!--([sS]*?)-->/g, '$1')
                .replace(/<![CDATA[([sS]*?)]]>/g, '$1')
            }
            if (options.chars) {
              options.chars(text)
            }
            return ''
          })
          index += html.length - rest.length
          html = rest
          parseEndTag('</' + stackedTag + '>', stackedTag, index - endTagLength, index)
        }
    
        if (html === last && options.chars) {
          options.chars(html)
          break
        }
      }
    
      // Clean up any remaining tags
      parseEndTag()
    
      function advance (n) {
        index += n
        html = html.substring(n)
      }
    
      function parseStartTag () {
        const start = html.match(startTagOpen)
        if (start) {
          const match = {
            tagName: start[1],
            attrs: [],
            start: index
          }
          advance(start[0].length)
          let end, attr
          while (!(end = html.match(startTagClose)) && (attr = html.match(attribute))) {
            advance(attr[0].length)
            match.attrs.push(attr)
          }
          if (end) {
            match.unarySlash = end[1]
            advance(end[0].length)
            match.end = index
            return match
          }
        }
      }
    
      function handleStartTag (match) {
        const tagName = match.tagName
        let unarySlash = match.unarySlash
    
        if (expectHTML) {
          if (lastTag === 'p' && isNonPhrasingTag(tagName)) {
            parseEndTag('', lastTag)
          }
          if (canBeLeftOpenTag(tagName) && lastTag === tagName) {
            parseEndTag('', tagName)
          }
        }
    
        const unary = isUnaryTag(tagName) || tagName === 'html' && lastTag === 'head' || !!unarySlash
    
        const l = match.attrs.length
        const attrs = new Array(l)
        for (let i = 0; i < l; i++) {
          const args = match.attrs[i]
          // hackish work around FF bug https://bugzilla.mozilla.org/show_bug.cgi?id=369778
          if (IS_REGEX_CAPTURING_BROKEN && args[0].indexOf('""') === -1) {
            if (args[3] === '') { delete args[3] }
            if (args[4] === '') { delete args[4] }
            if (args[5] === '') { delete args[5] }
          }
          const value = args[3] || args[4] || args[5] || ''
          attrs[i] = {
            name: args[1],
            value: decodeAttr(
              value,
              options.shouldDecodeNewlines
            )
          }
        }
    
        if (!unary) {
          stack.push({ tag: tagName, attrs: attrs })
          lastTag = tagName
          unarySlash = ''
        }
    
        if (options.start) {
          options.start(tagName, attrs, unary, match.start, match.end)
        }
      }
    
      function parseEndTag (tag, tagName, start, end) {
        let pos
        if (start == null) start = index
        if (end == null) end = index
    
        // Find the closest opened tag of the same type
        if (tagName) {
          const needle = tagName.toLowerCase()
          for (pos = stack.length - 1; pos >= 0; pos--) {
            if (stack[pos].tag.toLowerCase() === needle) {
              break
            }
          }
        } else {
          // If no tag name is provided, clean shop
          pos = 0
        }
    
        if (pos >= 0) {
          // Close all the open elements, up the stack
          for (let i = stack.length - 1; i >= pos; i--) {
            if (options.end) {
              options.end(stack[i].tag, start, end)
            }
          }
    
          // Remove the open elements from the stack
          stack.length = pos
          lastTag = pos && stack[pos - 1].tag
        } else if (tagName.toLowerCase() === 'br') {
          if (options.start) {
            options.start(tagName, [], true, start, end)
          }
        } else if (tagName.toLowerCase() === 'p') {
          if (options.start) {
            options.start(tagName, [], false, start, end)
          }
          if (options.end) {
            options.end(tagName, start, end)
          }
        }
      }
    }
  • 相关阅读:
    centos下安装chrome
    【CRT】设置 ip 显示和标签动作
    【IDEA】修改自动提示框的颜色
    【IDEA】monikai 主题
    【snmp】snmpwalk 指定端口
    【多对多】多对多取数无序的问题
    【百度搜索】屏蔽广告,高效搜索
    mybatis报错:Cause: java.io.FileNotFoundException: http://commons.apache.org/dtds/mbeans-descriptors.dtd
    怎么让request inputstream 可以多次读取
    win10下git bash console中文乱码
  • 原文地址:https://www.cnblogs.com/dhsz/p/7245843.html
Copyright © 2011-2022 走看看