zoukankan      html  css  js  c++  java
  • iOS开发之html解析

    使用XPath解析html

    可以从此处https://github.com/topfunky/hpple下载工程,将TFHpple.h,TFHpple.m,TFHppleElement.h,TFHppleElement.m,XPathQuery.h,XPathQuery.m加到自己的项目中,在Frameworks中导入libxml2.x
    iOS开发之html解析

     
    在项目中找到Other Linker Flags,加入-libxml2
    iOS开发之html解析

    在项目中找到Header Search Paths,加入/usr/include/libxml2
    代码如下:

    NSString *urlString = nil;

    urlString = @"http://www.xiyou.edu.cn/new/lm.jsp?urltype=tree.TreeTempUrl&wbtreeid=724";

    NSData *htmlData = [[NSData alloc] initWithContentsOfURL:[NSURL URLWithString:urlString]];

    NSData *toHtmlData = [self toUTF8:htmlData];

    TFHpple *xpathParser = [[TFHpple alloc] initWithHTMLData:toHtmlData];

    NSArray *aArray = [xpathParser searchWithXPathQuery:@"//a"];

    if ([span count] > 0) {

                

                for (int i = 87; i < 102; i++) {

                                //从<a>的第82个开始取值,共获取15个值

                    TFHppleElement *aElement = [aArray objectAtIndex:i];    

                    NSArray *aArr = [aElement children];

                    TFHppleElement *aEle = [aArr objectAtIndex:0];

                    NSArray *aChild = [aEle children];

                    TFHppleElement *aChildEle = [aChild objectAtIndex:0];

                    NSArray *aChildren = [aChildEle children];

                    NSString *aStr = [[aChildren objectAtIndex:0] content];

                    NSLog(@"aStr:%@",aStr);

                    NSDictionary *aAttributeDict = [aElement attributes];

                    NSLog(@"aAttributeDict:%@",aAttributeDict);

                    

                                //获取a中的属性值

                    NSString *hrefStr = [NSString stringWithFormat:@"http://www.xiyou.edu.cn%@",[aAttributeDictobjectForKey:@"href"]];

                    NSLog(@"hrefStr:%@",hrefStr);

                    

                    [currentNewsArr addObject:aStr];

                    [currentHrefArr addObject:hrefStr];

                    

                }

    [htmlData release];

    [xpathParser release];

    }

    //如果解析的网页不是utf8编码,如gbk编码,可以先将其转换为utf8编码再对其进行解析

    -(NSData *) toUTF8:(NSData *)sourceData {  

        CFStringRef gbkStr = CFStringCreateWithBytes(NULL, [sourceData bytes], [sourceData length],kCFStringEncodingGB_18030_2000, false);  

        

        if (gbkStr == NULL) {  

            return nil;  

        } else {  

            NSString *gbkString = (NSString *)gbkStr; 

            //根据网页源代码中编码方式进行修改,此处为从gbk转换为utf8

                 NSString *utf8_String = [gbkString stringByReplacingOccurrencesOfString:@"META http-equiv="Content-Type" content="text/html; charset=GBK""  

                                                                          withString:@"META http-equiv="Content-Type" content="text/html; charset=UTF-8""];  

            

            return [utf8_String dataUsingEncoding:NSUTF8StringEncoding];                             

        }                                     

    }

  • 相关阅读:
    JavaScript 显示数据
    c#运算符重载
    C++栈和队列标准库函数
    unity AB打包 unity2018.2.2
    VR AR SDK汇总
    Unity程序们经常用到的网址(方便自己用,一直更新)
    Unity打包Visual Studio部署HoloLens找不到WindowsMobile SDK的解决方案
    【Unity3D】串口通信
    【Unity3D】锁屏、解锁相关函数回调
    Unity3D Destroy方法的细节
  • 原文地址:https://www.cnblogs.com/fakemessi/p/4900901.html
Copyright © 2011-2022 走看看