zoukankan      html  css  js  c++  java
  • 网易邮件采集器(3)

    采集邮件

    (一)创建Constants类 ,存储文件路径,文件名,休眠时间以及收件箱,邮件的URL

    (二)登录

    public void login(String userName, String password) {
            Map<String, String> params = new HashMap<String, String>();
            params.put("ds", "mail163_letter");
            params.put("from", "web");
            params.put("funcid", "loginone");
            params.put("iframe", "1");
            params.put("language", "-1");
            params.put("passtype", "1");
            params.put("product", "mail163");
            params.put("net", "c");
            params.put("stype", "-1");
            params.put("race", "167_158_146_gz");
            params.put("uid", userName + "@163.com");
            params.put("savelogin", "0");
            params.put("url2", "http://mail.163.com/errorpage/error163.htm");
            params.put("username", userName);
            params.put("password", password);
    
            String s = httpUtil.post(Constants.loginURL, params).trim();
            // System.out.println(s);
            int i = s.indexOf("http://mail.163.com/");
            String mainUrl = s.substring(i, s.indexOf("";</script>"));
            httpUtil.get(mainUrl);
            sid = httpUtil.getCookie("Coremail.sid");
            // System.out.println("==============sid:" + sid);
            httpUtil.printCookies();
    
        }

    (三)抓取邮件

     (1)读取收件箱信息

    Map<String, String> params = new HashMap<String, String>();
            params.put("var", "<?xml version="1.0"?>" + "<object>" + "<int name="fid">1</int>"
                    + "<string name="order">date</string>" + "<boolean name="desc">true</boolean>"
                    + "<int name="limit">20</int>" + "<int name="start">0</int>"
                    + "<boolean name="skipLockedFolders">false</boolean>" + "<boolean name="returnTag">true</boolean>"
                    + "<boolean name="returnTotal">true</boolean>" + "</object>");
            result1 = httpUtil.post(Constants.mailBox + sid, params);
    
            // System.out.println(result1);

    (2)读取邮件信息

    Map<String, String> params2 = new HashMap<String, String>();
                result2 = httpUtil.post(Constants.mail + mg.getId(), params2);
                // System.out.println(result2);

    (3)用jsoup解析信息,并存入list集合中

            Document d = Jsoup.parseBodyFragment(result1);
            List<MailGet> ls = new ArrayList<MailGet>();
            // MailGrab m = new MailGrab();
            int i = 0;
    
            Elements links = d.select("[name="id"]");
    
            for (Element link : links) {
                MailGet mg = new MailGet(link.text());
    
                ls.add(mg);
                if (i == 0) {
                    mid = mg.getId();
                }
                Document cm = Jsoup.parseBodyFragment(result2);
                String str = cm.body().text();
                mg = ls.get(i);
                mg.setNeirong(str);
    
                i++;
            }
    links = d.select("[name="from"]");
            i = 0;
            for (Element link : links) {
    
                MailGet mg = ls.get(i);
    
                mg.setFrom(link.text());
    
                i++;
    
            }
            links = d.select("[name="to"]");
            i = 0;
            for (Element link : links) {
                MailGet mg = ls.get(i);
    
                mg.setTo(link.text());
    
                i++;
    
            }
            links = d.select("[name="sentDate"]");
            i = 0;
            for (Element link : links) {
                MailGet mg = ls.get(i);
    
                mg.setSentDate(link.text());
    
                i++;
    
            }
            links = d.select("[name="receivedDate"]");
            i = 0;
            for (Element link : links) {
                MailGet mg = ls.get(i);
    
                mg.setReceivedDate(link.text());
    
                i++;
            }
            links = d.select("[name="subject"]");
            i = 0;
            for (Element link : links) {
    
                MailGet mg = ls.get(i);
                mg.setSubject(link.text());
    
                i++;
    
            }

    (4)遍历集合,得到邮件的发件人,收件人,发送时间,接收时间,内容等信息,并存储邮件

    for (MailGet mg : ls) {
                //
                try {
                    File fa = new File(Constants.midPath, Constants.midFileName);
                    if (!fa.exists()) {
                        fa.createNewFile();
                    }
                    FileReader fr = new FileReader(fa);
                    char[] a = new char[1024];
                    String str = "";
                    int j = 0;
                    while ((j = fr.read(a)) > 0) {
                        str += new String(a, 0, j);
                    }
                    // System.out.println(str + mg.getId());
    
                    fr.close();
                    if (str.equals(mg.getId())) {
                        break;
                    } else {
    
                        System.out.println(mg);
                        
                        File fi = new File(Constants.youjianPath,
                                mg.getReceivedDate().replace(":", "").replace("-", "") + ".json");
    
                        if (!fi.exists()) {
                            fi.createNewFile();
                        }
    
                        FileWriter fw = new FileWriter(fi);
                        fw.write(JSON.toJSONString(mg));
    
                        fw.close();
                        i++;
    
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
  • 相关阅读:
    使用迭代器模式批量获得数据(C#实现)
    如何从技术上预防抢票软件刷屏
    如何用Tesseract做日文OCR(c#实现)
    我的.net开发百宝箱
    程序员必备基础:Git 命令全方位学习
    Java 异常处理的十个建议
    50道Java集合经典面试题(收藏版)
    记一次接口性能优化实践总结:优化接口性能的八个建议
    100道MySQL数据库经典面试题解析(收藏版)
    800+Java后端经典面试题,希望你找到自己理想的Offer呀~
  • 原文地址:https://www.cnblogs.com/wenwen123/p/5797101.html
Copyright © 2011-2022 走看看