zoukankan      html  css  js  c++  java
  • Jsoup抓取数据实现为一个网站做第三方Android客户端

    一前言

    学Android有很大一部分的乐趣其实就是做出一个网站客户端,比官方广告少,速度快,不臃肿,受到众多网友追捧。

    由于学校图书馆网站没有APP,网站也没有移动版的,所以说体验相当差,用的实在是太烦就做了这个小应用,先下看下效果。

    二Jsoup实现抓取书名

    Jsoup是一个Java的一个工具包,百度一搜一大堆,不对Jsoup做过多介绍,先来看看对http://222.188.3.137:8080/opac/search.php的抓取。

    这里抓取到数据只要往适配器里填充数据就好了。

     1 package com.wyf.newlibrary;
     2 
     3 import org.jsoup.Jsoup;
     4 import org.jsoup.nodes.Document;
     5 import org.jsoup.nodes.Element;
     6 import org.jsoup.select.Elements;
     7 
     8 import java.io.IOException;
     9 
    10 public class BookNameJsoup {
    11     String url;
    12 
    13     String[] bName;
    14     String[] bLink;
    15     String nextPage;
    16 
    17     public BookNameJsoup(String  link) {
    18 
    19         url = link;
    20         bName = new String[20];
    21         bLink = new String[20];
    22 
    23     }
    24 
    25     public void init() {
    26         try {
    27             Document doc = Jsoup.connect(url).get();
    28             int j = 0;
    29             Elements bookName = doc.getElementsByTag("h3").select("a");
    30             for (Element i : bookName) {
    31                 bName[j] = i.text().trim();
    32                 bLink[j++] = i.attr("abs:href");
    33             }
    34 
    35             Elements next=doc.getElementsByAttributeValue("class", "blue");
    36         for(Element i:next)
    37         {
    38             if(i.text().contains("下一页"))
    39             {
    40                 nextPage=i.attr("abs:href");
    41                 break;
    42             }
    43 
    44         }
    45             
    46         } catch (IOException e) {
    47             // TODO Auto-generated catch block
    48             e.printStackTrace();
    49         }
    50     }
    51 
    52     /***************
    53      * 得到LINK
    54      ********************/
    55     public String[] getBookName() {
    56         return bName;
    57     }
    58 
    59 
    60 
    61     /*******************
    62      * 得到LINK
    63      ******************************/
    64 
    65     public String[] getLink() {
    66         return bLink;
    67     }
    68 
    69 
    70     public  String getNextPage()
    71     {
    72         return  nextPage;
    73     }
    74 
    75 
    76     /***
    77      * 判断搜索结果是否为空
    78      */
    79 
    80     
    81     }

    三 MainActivity(书名列表界面)

    在MainActivity中有一个ListView,在用Jsoup抓取到的数据往里面填充,ListView只用极其简单的布局,一看就能明白。

    package com.wyf.newlibrary;
    
    import android.content.Intent;
    import android.os.AsyncTask;
    import android.os.Bundle;
    import android.support.v7.app.AppCompatActivity;
    import android.util.Log;
    import android.view.KeyEvent;
    import android.view.LayoutInflater;
    import android.view.View;
    import android.view.inputmethod.EditorInfo;
    import android.widget.AbsListView;
    import android.widget.AdapterView;
    import android.widget.ArrayAdapter;
    import android.widget.EditText;
    import android.widget.ImageButton;
    import android.widget.ListView;
    import android.widget.ProgressBar;
    import android.widget.TextView;
    import android.widget.Toast;
    
    import com.umeng.analytics.MobclickAgent;
    import com.umeng.update.UmengUpdateAgent;
    
    import java.io.UnsupportedEncodingException;
    import java.net.MalformedURLException;
    import java.net.URL;
    import java.net.URLEncoder;
    import java.util.ArrayList;
    
    
    public class MainActivity extends AppCompatActivity  {
    
    
        EditText edit_search;
        String url="我去",key;
        ListView list_bookname;
        ArrayList<String> bookName;  //得到的书名
        ArrayList<String> bookLink;   //书名链接
        ArrayAdapter adapter;
        GetBookName get;
        View foot,complete,fail;
        ProgressBar progressBar;
        boolean firstLoad=false;
        ImageButton ibtn_clear;
    
    
        @Override
        protected void onCreate(Bundle savedInstanceState) {
            super.onCreate(savedInstanceState);
            getSupportActionBar().hide();
            setContentView(R.layout.activity_main);
    
            init();
            initEvent();
            list_bookname.setAdapter(adapter);
    
    
    
        }
    
        private void initEvent() {
            
            edit_search.setOnEditorActionListener(new TextView.OnEditorActionListener() {
                @Override
                public boolean onEditorAction(TextView v, int actionId, KeyEvent event) {
    
                    if(actionId== EditorInfo.IME_ACTION_SEARCH)
                     {
                         url=edit_search.getText().toString().trim();
                         try {
                             url= URLEncoder.encode(url,"UTF-8");
                         } catch (UnsupportedEncodingException e) {
                             e.printStackTrace();
                         }
                         bookName.clear();
                         bookLink.clear();
                         boolean  loadNothing=true;
                         if(url!=null)
                         {
                             firstLoad=true;
                             new GetBookName(url).execute();
                         }
                         else{
                             Toast.makeText(MainActivity.this, "不能为空", Toast.LENGTH_SHORT).show();
                         }
    
    
    
                     }
    
                    return false;
                }
            });
    
            list_bookname.setOnScrollListener(new AbsListView.OnScrollListener() {
                //AbsListView view 这个view对象就是listview
                int lastItem;
                @Override
                public void onScrollStateChanged(AbsListView view, int scrollState) {
                    if (scrollState == AbsListView.OnScrollListener.SCROLL_STATE_IDLE) {
                        if (view.getLastVisiblePosition() == view.getCount() - 1) {
                            if(url!=null)
                            {
                                list_bookname.addFooterView(foot);
                                new GetBookName().execute();
                            }
    
                            else if(bookName.isEmpty()){
                                list_bookname.addFooterView(fail);
    
                            }
                            else {
                                list_bookname.addFooterView(complete);
                            }
    
    
                        }
                    }
                }
                @Override
                public void onScroll(AbsListView view, int firstVisibleItem,
                                     int visibleItemCount, int totalItemCount) {
                    lastItem = firstVisibleItem + visibleItemCount - 1 ;
                }
            });
    /*************************************ListView每个Item设置监听,转到这本书具体信息的Activity*************************************************/
            list_bookname.setOnItemClickListener(new AdapterView.OnItemClickListener() {
                @Override
                public void onItemClick(AdapterView<?> parent, View view, int position, long id) {
                    Intent intent=new Intent(MainActivity.this,BookInfoActivity.class);
                    intent.putExtra("href",bookLink.get(position));
                    startActivity(intent);
    
                }
            });
        }
    
    
        private void init() {
    
           // ibtn_clear= (ImageButton) findViewById(R.id.ibtn_clear);
            edit_search= (EditText) findViewById(R.id.edit_search);
            list_bookname= (ListView)findViewById(R.id.list_bookname);
            bookName=new ArrayList<String>();
            bookLink=new ArrayList<String>();
            adapter=new ArrayAdapter(MainActivity.this,android.R.layout.simple_list_item_1,bookName);
            foot= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_foot,null);
            fail= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_fail,null);
            complete= LayoutInflater.from(MainActivity.this).inflate(R.layout.layout_complete,null);
            progressBar= (ProgressBar) findViewById(R.id.progressBar);
    
            UmengUpdateAgent.update(this);
    
        }
    
    
        /***
         * 获取图书馆书名数据
         */
        class GetBookName extends AsyncTask {
    
            BookNameJsoup jsoup;
            URL href;
    
    
            public GetBookName(String keyword)  {
                super();
                url="http://222.188.3.137:8080/opac/openlink.php?strSearchType=title&match_flag=forward&historyCount=1&strText="+keyword+
                        "&doctype=ALL&with_ebook=on&displaypg=20&showmode=list&sort=CATA_DATE&orderby=desc&dept=ALL&page=1";
    
                try {
                  href=new URL(url);
                    url=href.toString();
    
                } catch (MalformedURLException e) {
                    e.printStackTrace();
                }
    
    
            }
            public GetBookName()
            {
            }
    
            @Override
            protected void onPreExecute() {
                super.onPreExecute();
                Log.d("TAG", "onPreExecute: "+url);
                if(firstLoad)
                {
                    progressBar.setVisibility(View.VISIBLE);
                    firstLoad=false;
                }
    
            }
    
            @Override
            protected Object doInBackground(Object[] params) {
    
                jsoup=new BookNameJsoup(url);
                jsoup.init();
                return null;
            }
    
    
    
            @Override
            protected void onPostExecute(Object o) {
                super.onPostExecute(o);
                progressBar.setVisibility(View.GONE);
    
                    String[] book = jsoup.getBookName();
                    String[] link = jsoup.getLink();
    
                    for (int i = 0; i < 20; i++) {
                        if (book[i] != null) {
                            bookName.add(book[i]);
                            Log.d("TAG", "onPostExecute: " + book[i]);
                        } else {
                            break;
                        }
                        if (link != null) {
                            bookLink.add(link[i]);
                        }
                    }
                    if(bookName.isEmpty())
                    {
                        list_bookname.addFooterView(fail);
                    }else {
                        list_bookname.removeFooterView(fail);
                    }
    
                    url = jsoup.getNextPage();
                    list_bookname.removeFooterView(foot);
                    adapter.notifyDataSetChanged();
            }
        }
    
       
    
    }

    四  书的详细信息

    这里展现的是书的信息。如图

    package com.wyf.newlibrary;
    
    import android.os.AsyncTask;
    import android.os.Bundle;
    import android.support.v7.app.AppCompatActivity;
    import android.util.Log;
    import android.view.MenuItem;
    import android.view.View;
    import android.widget.ImageView;
    import android.widget.ScrollView;
    import android.widget.TextView;
    
    import com.android.volley.RequestQueue;
    import com.android.volley.Response;
    import com.android.volley.VolleyError;
    import com.android.volley.toolbox.StringRequest;
    import com.android.volley.toolbox.Volley;
    import com.umeng.analytics.MobclickAgent;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    import java.io.UnsupportedEncodingException;
    
    public class BookInfoActivity extends AppCompatActivity {
    
        TextView text_bookName,text_douban,text_position;
        ImageView image_logo;
        String url,logoUrl;
        ScrollView scrollView;
        @Override
        protected void onCreate(Bundle savedInstanceState) {
            super.onCreate(savedInstanceState);
            setContentView(R.layout.activity_book_info);
            getSupportActionBar().setDisplayHomeAsUpEnabled(true);
            setTitle("图书信息");
            url=getIntent().getStringExtra("href");
    
            init();
    
            RequestQueue queue= Volley.newRequestQueue(BookInfoActivity.this);
            StringRequest stringRequest=new StringRequest(url, new Response.Listener<String>() {
                @Override
                public void onResponse(String s) {
                    try {
                        s=new String(s.getBytes("ISO-8859-1"),"utf-8");
                       // Toast.makeText(BookInfoActivity.this, s, Toast.LENGTH_SHORT).show();
                        new  GetDetail(s).execute();
                    } catch (UnsupportedEncodingException e) {
                        e.printStackTrace();
                    }
                }
            }, new Response.ErrorListener() {
                @Override
                public void onErrorResponse(VolleyError volleyError) {
                    Log.d("TAG", "onErrorResponse: "+"i fail");
                }
            });
            queue.add(stringRequest);
    
        }
    
    
        private void init() {
            text_bookName= (TextView) findViewById(R.id.text_bookname);
            text_douban= (TextView) findViewById(R.id.text_douban);
            text_position= (TextView) findViewById(R.id.text_position);
             scrollView= (ScrollView) findViewById(R.id.scroll_position);
    
        }
    
        @Override
        public boolean onOptionsItemSelected(MenuItem item) {
    
            if(item.getItemId()==android.R.id.home)
            {
                finish();
                return true;
            }
            return super.onOptionsItemSelected(item);
        }
    
    
    /*****************异步请求拿到数据**********************/
        class GetDetail extends AsyncTask{
            String response,name,douBan="110",position="图书位置:
    ";
    
            public GetDetail(String response) {
                super();
                this.response=response;
            }
    
            @Override
            protected Object doInBackground(Object[] params) {
                Document doc= Jsoup.parse(response);
                 name=doc.getElementsByAttributeValue("class","booklist").first().text();
                 name=name.substring(name.indexOf(":")+1);
              //  douBan=doc.getElementsByAttributeValue("id","douban_content").select("p").text();
    
                Elements a=doc.select("dl.booklist");
                String temp;
                for(Element i:a)
                {
    
                    System.out.println(i.lastElementSibling().text());
                    if(i.text().contains("提要文摘附注")&&i.text().length()>8)
                    {
                        temp=i.text();
    
                       douBan=temp;
    
                    }
                }
    
                //douBan=doc.select("intro").text();
                 logoUrl=doc.select("img#book_img").attr("src");
                 Elements posi=doc.getElementsByAttributeValue("align","left");
                posi=posi.select("tr.whitetext");
    
    
                 for(Element i:posi)
                 {
                     if(i.text()!=null)
                     {
                         //temp=i.text();
                        // temp=temp.substring(0,temp.indexOf(' '))+temp.substring(temp.indexOf("-"));
                         //Log.d("TAG", "doInBackground: "+temp);
                         position+=i.text()+"
    ";
    
                     }
    
                 }
                return null;
            }
    
            @Override
            protected void onPostExecute(Object o) {
                super.onPostExecute(o);
                text_bookName.setText(name);
                text_douban.setText(douBan);
                text_position.setText(position);
    
                if(douBan.equals("110"))
                {
                    text_douban.setVisibility(View.GONE);
                }
    
                scrollView.setVisibility(View.VISIBLE);
    
            }
        }
    
      
    
    }

    五  总结

    这个Demo其实并不难,很容易理解,但要对Jsoup和异步请求有所了解。你也可以做出自己第三方客户端。

    附上源码http://pan.baidu.com/s/1c1B1VIo

  • 相关阅读:
    html中的块级元素、行内元素
    ptyhon_opencv 图像的基本操作
    正则表达式总结 2017.1.6
    HashMap 中的 entrySet()使用方法 2016.12.28
    (转)Redis持久化的几种方式
    负数与二进制换转方法
    (转)2019JAVA面试题附答案(长期更新)
    Java后端技术面试汇总(第一套)
    (转)Dubbo服务暴露过程源码分析
    Dubbo消费方服务调用过程源码分析
  • 原文地址:https://www.cnblogs.com/dadafeige/p/5419896.html
Copyright © 2011-2022 走看看