zoukankan      html  css  js  c++  java
  • 获取网页上数据(图片、文字、视频)-b

    Demo地址:http://download.csdn.net/detail/u012881779/8831835

    获取网页上所有图片、获取所有html、获取网页title、获取网页内容文字。。。

    .h 文件  代码:

    //网页  
    //NSString *strPath = [NSString stringWithFormat:@"http://www.baidu.com/s?wd=%@&cl=3",theWord];  
    
    //视频  
    //NSString *strPath = [NSString stringWithFormat:@"http://www.itinge.com/music/16241.mp4"];  
    
    //图片  
    NSString *strPath = [NSString stringWithFormat:@"http://image.baidu.com/search/index?tn=baiduimage&istype=2&ie=utf-8&word=%@",theWord];  
    
    strPath = [strPath stringByAddingPercentEscapesUsingEncoding:NSUTF8StringEncoding];

    .m 文件  代码:

    @interface ViewController ()<UISearchBarDelegate , UIWebViewDelegate,UIGestureRecognizerDelegate>  
    @property (weak, nonatomic) IBOutlet UISearchBar *searchBar;  
    @property (weak, nonatomic) IBOutlet UIWebView *webview;  
    
    @end  
    
    @implementation ViewController  
    
    - (void)viewDidLoad {  
        [super viewDidLoad];  
    
        _searchBar.delegate = self;  
        _webview.delegate = self;  
        [self addTapOnWebView];  
    }  
    
    -(void)addTapOnWebView{  
        UITapGestureRecognizer* singleTap = [[UITapGestureRecognizer alloc] initWithTarget:self action:@selector(handleSingleTap:)];  
        [_webview addGestureRecognizer:singleTap];  
        singleTap.delegate = self;  
        singleTap.cancelsTouchesInView = NO;  
    }  
    
    #pragma mark- TapGestureRecognizer  
    - (BOOL)gestureRecognizer:(UIGestureRecognizer *)gestureRecognizer shouldRecognizeSimultaneouslyWithGestureRecognizer:(UIGestureRecognizer *)otherGestureRecognizer{  
        return YES;  
    }  
    //被点击位置对应链接  
    -(void)handleSingleTap:(UITapGestureRecognizer *)sender{  
        CGPoint pt = [sender locationInView:_webview];  
        NSString *imgURL = [NSString stringWithFormat:@"document.elementFromPoint(%f, %f).src", pt.x, pt.y];  
        NSString *urlToSave = [_webview stringByEvaluatingJavaScriptFromString:imgURL];  
        NSLog(@"image url=%@", urlToSave);  
    
        NSString * JsToGetHTMLSource = @"top.location.href";  
        NSString * pageSource = [_webview   stringByEvaluatingJavaScriptFromString:JsToGetHTMLSource];  
        NSLog(@"
    
    __url=%@", pageSource);  
    
        if (urlToSave.length > 4) {  
            NSString *substr = [urlToSave substringFromIndex:urlToSave.length-3];  
            if([substr isEqualToString:@"jpg"] || [substr isEqualToString:@"png"]){  
                [self showImageURL:urlToSave point:pt];  
            }  
        }  
    }  
    
    //呈现图片,HTML是否适配分辨率将影响点击资源与获取到得资源是否一致  
    -(void)showImageURL:(NSString *)url point:(CGPoint)point  
    {  
        UIImageView *showView = [[UIImageView alloc] initWithFrame:[[UIScreen mainScreen]bounds]];  
        showView.center = point;  
        CGPoint newPoint = self.view.center;  
        showView.center = newPoint;  
    
        showView.backgroundColor = [UIColor blackColor];  
        showView.alpha = 1;  
        showView.userInteractionEnabled = YES;  
        [self.view addSubview:showView];  
        [showView setContentMode:UIViewContentModeScaleAspectFit];  
        [showView showImageFromURL:url placeHolder:nil CompletionBlock:nil];  
    
        UITapGestureRecognizer* singleTap = [[UITapGestureRecognizer alloc] initWithTarget:self action:@selector(handleSingleViewTap:)];  
        [showView addGestureRecognizer:singleTap];  
    
        [self.navigationController setNavigationBarHidden:YES animated:YES];  
    }  
    
    //移除图片查看视图  
    -(void)handleSingleViewTap:(UITapGestureRecognizer *)sender  
    {  
        for (id obj in self.view.subviews) {  
            if ([obj isKindOfClass:[UIImageView class]]) {  
                [obj removeFromSuperview];  
            }  
        }  
        [self.navigationController setNavigationBarHidden:YES animated:YES];  
    }  
    
    - (void)didReceiveMemoryWarning {  
        [super didReceiveMemoryWarning];  
        // Dispose of any resources that can be recreated.  
    }  
    
    /* 
     *JavaScript获取网页信息总结 
     获取所有html:NSString *lJs = @"document.documentElement.innerHTML"; 
     获取网页title:NSString *lJs2 = @"document.title"; 
     UIWebView *lWebView = [self getCurrentWebView]; 
     NSString *lHtml1 = [lWebView stringByEvaluatingJavaScriptFromString:lJs]; 
     NSString *lHtml2 = [lWebView stringByEvaluatingJavaScriptFromString:lJs2]; 
    
     JavaScript获取网页信息总结 
     JavaScript获取当前页面URL、title等 
    
     thisURL = document.URL; 
     thisHREF = document.location.href; 
     thisSLoc = self.location.href; 
     thisDLoc = document.location; 
     thisTLoc = top.location.href; 
     thisPLoc = parent.document.location; 
     thisTHost = top.location.hostname; 
     thisHost = location.hostname; 
     thisTitle = document.title; 
     thisProtocol = document.location.protocol; 
     thisPort = document.location.port; 
     thisHash = document.location.hash; 
     thisSearch = document.location.search; 
     thisPathname = document.location.pathname; 
     thisHtml = document.documentElement.innerHTML; 
     thisBodyText = document.documentElement.innerText;//获取网页内容文字 
     thisBodyText = document.body.innerText;//获取网页内容文字  怎么和上一个一样?有知道的请解释 
     */  
    //获取  
    - (IBAction)receiveAction:(id)sender {  
        /*1视频*/  
        /* 
         //获取网页中所有视频 
         NSString *getVideoTitle = [_webview getVideoTitle]; 
         NSLog(@"
    
     视频名称 : %@",getVideoTitle); 
         double getVideoDuration = [_webview getVideoDuration]; 
         NSLog(@"
    
     视频总时间 : %f",getVideoDuration); 
         double getVideoCurrentTime = [_webview getVideoCurrentTime]; 
         NSLog(@"
    
     视频当前时间 : %f",getVideoCurrentTime); 
         */  
    
        /*2网页*/  
        /* 
        //获取网页中所有图片 
        NSString *imageUrls = [_webview stringByEvaluatingJavaScriptFromString:@"var str=new Array();""$('img').each(function(){str.push($(this).attr('src'));});" 
                               "str.join(',');"]; 
        NSLog(@"
    
     所有图片 : %@",imageUrls); 
    
        //获取所有html 
        NSString *lJs = @"document.documentElement.innerHTML"; 
        NSString *lHtml1 = [_webview stringByEvaluatingJavaScriptFromString:lJs]; 
        //NSLog(@"1.%@",lHtml1); 
    
        //获取网页title: 
        NSString *lJs2 = @"document.title"; 
        NSString *lHtml2 = [_webview stringByEvaluatingJavaScriptFromString:lJs2]; 
        NSLog(@"2.%@",lHtml2); 
    
        //thisURL = document.URL 
        NSString *lJs3 = @"document.URL"; 
        NSString *lHtml3 = [_webview stringByEvaluatingJavaScriptFromString:lJs3]; 
        NSLog(@"3.%@",lHtml3); 
    
        //获取网页内容文字 
        NSString *lJs4 = @"document.documentElement.innerText"; 
        NSString *lHtml4 = [_webview stringByEvaluatingJavaScriptFromString:lJs4]; 
        NSLog(@"4.%@",lHtml4); 
    
        //获取网页内容文字 
        NSString *lJs5 = @"document.body.innerText"; 
        NSString *lHtml5 = [_webview stringByEvaluatingJavaScriptFromString:lJs5]; 
        NSLog(@"5.%@",lHtml5); 
        */  
    
        /*3图片*/  
        /**/  
        //获取所有html  
        NSString *innerHTML = @"document.documentElement.innerHTML";  
        NSString *innerHTMLString = [_webview stringByEvaluatingJavaScriptFromString:innerHTML];  
        //检索图片  
        if(![innerHTMLString isEqualToString:@"<head></head><body></body>"]){  
            [self searchPictureFromHTML:innerHTMLString];  
        }else{  
            UIAlertView *alert = [[UIAlertView alloc] initWithTitle:nil message:@"请先搜索关键字" delegate:nil cancelButtonTitle:@"确定" otherButtonTitles:nil, nil nil];  
            [alert show];  
        }  
    
    }  
    
    //检索图片  
    -(void)searchPictureFromHTML:(NSString *)theHTML{  
        //"http://img0.bdstatic.com/img/image/shouye/qwscmeb02.jpg”  
        NSMutableArray *picMutableArr = [[NSMutableArray alloc] init];  
        NSMutableArray *picHttpArr = [[theHTML componentsSeparatedByString:@"http://"] mutableCopy];  
    
        for (int i = 0; i < picHttpArr.count ; i ++) {  
            NSString *tempStr  = [picHttpArr objectAtIndex:i];  
            NSArray  *tempArr  = [tempStr componentsSeparatedByString:@".jpg"];  
            NSString *firstStr = [tempArr firstObject];  
            //判断字符串是否为图片  
            if([self judgeStringIsPicture:firstStr]){  
                if([self judgeStringIsNull:firstStr]){  
                    NSString *picUrl = [NSString stringWithFormat:@"http://%@.jpg",firstStr];  
                    [picMutableArr addObject:picUrl];  
                }  
            }else{  
            }  
        }  
        //清除重复图片  
        picMutableArr = [self cleanRepeatPicture:picMutableArr];  
    
        //展示获取图片  
        PictureViewController *picVc = [[PictureViewController alloc] initWithNibName:@"PictureViewController" bundle:nil];  
        picVc.valueArr = picMutableArr;  
        [self.navigationController pushViewController:picVc animated:YES];  
    
    }  
    
    //判断字符串是否为图片链接  
    -(BOOL)judgeStringIsPicture:(NSString *)string{  
        BOOL result = YES;  
        NSMutableArray *mutable = [[NSMutableArray alloc] initWithObjects:@"<",@">",@"{",@"}",@"[",@"]",@"(",@")",@"|",@"||",@"$",@"?",@";", nil nil];  
    
        if(string != nil && string.length > 0){  
            for (int i = 0; i < string.length; i ++) {  
                NSString *subStr = [string substringWithRange:NSMakeRange(i, 1)];  
                for (int j = 0; j < mutable.count ; j ++) {  
                    NSString *markStr = [mutable objectAtIndex:j];  
                    if([subStr isEqualToString:markStr]){  
                        result = NO;  
                    }  
                }  
            }  
        }  
    
        return result;  
    }  
    
    //清除重复图片  
    -(NSMutableArray *)cleanRepeatPicture:(NSMutableArray *)picarr{  
        NSMutableArray *tempArr = [[NSMutableArray alloc] init];  
    
        for (int i = picarr.count-1 ; i >= 0 ; i --) {  
            NSString *tempStr = [picarr objectAtIndex:i];  
            NSArray *oneArr = [tempStr componentsSeparatedByString:@"&fm"];  
            if(tempArr.count == 0){  
                [tempArr insertObject:tempStr atIndex:0];  
            }else{  
                BOOL result = YES;  
                for (int j = 0 ; j < tempArr.count ; j ++) {  
                    NSString *jstr = [tempArr objectAtIndex:j];  
                    if([jstr isEqualToString:tempStr]){  
                        result = NO;  
                    }else{  
                        if(oneArr.count > 1){  
                            NSArray *twoArr = [jstr componentsSeparatedByString:@"&fm"];  
                            if([[oneArr firstObject] isEqualToString:[twoArr firstObject]]){  
                                result = NO;  
                            }  
                        }  
                    }  
                }  
                if(result){  
                    [tempArr insertObject:tempStr atIndex:0];  
                }  
            }  
        }  
    
        return tempArr;  
    }  
    
    //HTML  
    -(void)detailsWithUrl:(NSString *)urlStr{  
        NSURL *url =[NSURL URLWithString:urlStr];  
        NSURLRequest *request =[NSURLRequest requestWithURL:url];  
        [_webview loadRequest:request];  
        [_webview setScalesPageToFit:YES];  
    
        //隐藏滚动条  
        _webview.backgroundColor=[UIColor clearColor];  
        _webview.opaque = NO;  
        for (UIView *aView in [_webview subviews]){  
            [aView setBackgroundColor:[UIColor clearColor]];  
            if ([aView isKindOfClass:[UIScrollView class]]){  
                UIScrollView *tempSV = (UIScrollView *)aView;  
                tempSV.tag = 1321;  
                [tempSV setShowsHorizontalScrollIndicator:NO]; //右侧的滚动条 (水平的类似)  
                [tempSV setShowsVerticalScrollIndicator:NO];  
                [tempSV setBounces:NO];  
                [tempSV setContentSize:CGSizeMake(1,tempSV.contentSize.height )];  
                for (UIView *shadowView in tempSV.subviews){  
                    if ([shadowView isKindOfClass:[UIImageView class]]){  
                        shadowView.hidden = YES;  //上下滚动出边界时的黑色的图片 也就是拖拽后的上下阴影  
                    }  
                }  
            }  
        }  
    }  
    
    //判断字符串不全为空  
    -(BOOL)judgeStringIsNull:(NSString *)string{  
        BOOL result = NO;  
        if(string != nil && string.length > 0){  
            for (int i = 0; i < string.length; i ++) {  
                NSString *subStr = [string substringWithRange:NSMakeRange(i, 1)];  
                if(![subStr isEqualToString:@" "] && ![subStr isEqualToString:@""]){  
                    result = YES;  
                }  
            }  
        }  
        return result;  
    }  
    
    #pragma mark UISearchBarDelegate  
    - (void)searchBarSearchButtonClicked:(UISearchBar *)searchBar{  
        if([self judgeStringIsNull:searchBar.text]){  
            //搜索接口  
            NSString *urlStr = [NetPortShared baiduSearchDelegate:self andTag:33333 andWord:_searchBar.text];  
            [self detailsWithUrl:urlStr];  
        }else{  
            UIAlertView *alert = [[UIAlertView alloc] initWithTitle:nil message:@"请输入关键字" delegate:nil cancelButtonTitle:@"确定" otherButtonTitles:nil, nil nil];  
            [alert show];  
        }  
    }  
    
    @end

    示意图:

    文/作者:枫志应明

    c博客地址:http://blog.csdn.net/wsyx768/article/details/46618125

  • 相关阅读:
    特征词选择算法对文本分类准确率的影响(前言)
    答火星人.NET。如何使用我的本科毕业程序 正文提取DEMO
    有关matlab画图格式的部分代码
    文本分类中的特征词选择算法系列科普(前言AND 一)
    c++杂项备忘
    写一点应用关于 Lucene.Net,snowball的重新组装(一)在Lucene.Net中加入词性标注与词根还原功能
    C++字符串处理:批量去重,以及大写变小写
    Python打印到文件
    中文分词:采用二元词图以及viterbi算法(三)
    博客园和百度空间,我的两个家
  • 原文地址:https://www.cnblogs.com/isItOk/p/5838337.html
Copyright © 2011-2022 走看看