zoukankan      html  css  js  c++  java
  • Delphi 解析HTML

    uses mshtml;

    IHTMLEleMent.ID;

    IHTMLEleMent.tagName;

    IHTMLEleMent.title;
    elmt._className;
    elmt.getAttribute('anchor', 0);

    procedure
    TForm1.btnphClick(Sender: TObject); var Document: IHTMLDocument2; FTableCollection, tempCoc: IHTMLElementCollection; table: IHTMLTABLE; TableRow: IHTMLTableRow; elmt: IHTMLEleMent; I, J, K: integer; str: string; begin Document := WebBrowser1.Document as IHTMLDocument2; FTableCollection := Document.all; FTableCollection.Length; // FTableCollection.item(1, 0); FTableCollection := Document.all.tags('table') as IHTMLElementCollection; for I := 0 to FTableCollection.Length - 1 do begin table := FTableCollection.item(I, 0) as IHTMLTABLE; //题数 for J := 0 to table.rows.Length - 1 do begin TableRow := (table.rows.item(J, 0) as IHTMLTableRow); //每道题信息 str := ''; for K := 0 to TableRow.cells.Length - 1 do begin elmt := TableRow.cells.item(K, 0) as IHTMLEleMent; str := str + elmt.innerText + #9; end; str := StringReplace(str, ''#$D#$A'', '', [rfReplaceAll]); Memo3.Lines.Add(str); Memo3.Lines.Add('------------------------------------------'); end; end; end;

      URL := 'http://bbs.csdn.net/forums/Delphi';
      WebBrowser1.Navigate(URL);

    下面是一行tablerow的数据,可以分解出来,<td><td>之间是一个cell列。

    如何解析第一列的href、class之间的3个数据呢,?、delphi7 滚动条颜色、VCL组件开发及应用

              <tr>
        <td class="title">
          <strong class="green"></strong>
          <a href="/topics/390861446" target="_blank" title="delphi7 滚动条颜色">delphi7 滚动条颜色</a>
          <span class="forum_link">[<span class="parent"><a href="/forums/Delphi">Delphi</a></span> <a href="/forums/DelphiVCL">VCL组件开发及应用</a>]</span>
        </td>
        <td class="tc">20</td>
        <td class="tc">
          <a href="http://my.csdn.net/u010745617" rel="nofollow" target="_blank">u010745617</a><br />
          <span class="time">08-15 16:25</span></td>
        <td class="tc">1</td>
        <td class="tc">
          <a href="http://my.csdn.net/NongCunGongLu" rel="nofollow" target="_blank">NongCunGongLu</a><br />
          <span class="time">08-17 13:41</span>
        </td>
        <td class="tc">
          <a href="/topics/390861446/close" target="_blank">管理</a>
        </td>
      </tr>
        <td class="title">
          <strong class="green"></strong>
          <a href="/topics/390861446" target="_blank" title="delphi7 滚动条颜色">delphi7 滚动条颜色</a>
          <span class="forum_link">[<span class="parent"><a href="/forums/Delphi">Delphi</a></span> <a href="/forums/DelphiVCL">VCL组件开发及应用</a>]</span>
        </td>

    把td这一部分IHTMLEleMent当作IHTMLElementCollection解析就可以了,tagName,getAttribute('href',0),title,_className都可以获得正确的值,是6个集合元素。

           tempCoc := elmt.all as IHTMLElementCollection;
            if (tempCoc.Length = 6) then
            begin
              for q := 0 to tempCoc.Length - 1 do
              begin
                emt2 := tempCoc.item(q, 0) as IHTMLEleMent;
                if emt2.tagName = 'STRONG' then
                  s2 := emt2.innerText
                else if emt2.tagName = 'A' then
                begin
                  s2 := emt2.getAttribute('href',0);
                  s2 := emt2.title;
                  s2 := emt2.innerText;
                end
                else if emt2.tagName = 'SPAN' then
                begin
                  emt2.tagName;
                  emt2._className;
                  emt2.title;
                  emt2.innerText;
                end;
              end;
            end;

     elmt: IHTMLEleMent;

      elmt := (WebBrowser1.Document as ihtmldocument3).getElementById('idbtn001');

    getElementsByName

    getElementById

    getElementsByTagName

    doc2: IHTMLDocument2;
    doc3: IHTMLDocument3;

      doc2.forms.item('form1',0) as IHTMLFormElement;//取form1的表单

    elmt := (WebBrowser1.Document as IHTMLDocument3).getElementById('divfirstID');//通过ID取得某个节点
    
    elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
    elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
    elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
    elmt := ((elmt.children as IHTMLElementCollection).item(2, 0)) as IHTMLEleMent;//子节点里的第3个子节点
    elmt := ((elmt.children as IHTMLElementCollection).item(0, 0)) as IHTMLEleMent;//子节点
  • 相关阅读:
    《C# to IL》第一章 IL入门
    multiple users to one ec2 instance setup
    Route53 health check与 Cloudwatch alarm 没法绑定
    rsync aws ec2 pem
    通过jvm 查看死锁
    wait, notify 使用清晰讲解
    for aws associate exam
    docker 容器不能联网
    本地运行aws lambda credential 配置 (missing credential config error)
    Cannot connect to the Docker daemon. Is 'docker daemon' running on this host?
  • 原文地址:https://www.cnblogs.com/cb168/p/3918237.html
Copyright © 2011-2022 走看看