zoukankan      html  css  js  c++  java
  • 【收藏】SearchCrawler By James Holmes

    转自 Crawling the Web with Java By James Holmes

    无需任何扩展包,可直接运行。

    import java.awt.*;
    import java.awt.event.*;
    import java.io.*;
    import java.net.*;
    import java.util.*;
    import java.util.regex.*;
    import javax.swing.*;
    import javax.swing.table.*;
    // The Search Web Crawler
    
    public class SearchCrawler extends JFrame
    {
        // Max URLs drop-down values.
        private static final String[] MAX_URLS =
                {"50", "100", "500", "1000"};
        // Cache of robot disallow lists.
        private HashMap disallowListCache = new HashMap();
        // Search GUI controls.
        private JTextField startTextField;
        private JComboBox maxComboBox;
        private JCheckBox limitCheckBox;
        private JTextField logTextField;
        private JTextField searchTextField;
        private JCheckBox caseCheckBox;
        private JButton searchButton;
        // Search stats GUI controls.
        private JLabel crawlingLabel2;
        private JLabel crawledLabel2;
        private JLabel toCrawlLabel2;
        private JProgressBar progressBar;
        private JLabel matchesLabel2;
        // Table listing search matches.
        private JTable table;// Flag for whether or not crawling is underway.
        private boolean crawling;
        // Matches log file print writer.
        private PrintWriter logFileWriter;
        // Constructor for Search Web Crawler.
        public SearchCrawler()
        {
            // Set application title.
            setTitle("Search Crawler");
            // Set window size.
            setSize(600, 600);
            // Handle window closing events.
            addWindowListener(new WindowAdapter() {
                public void windowClosing(WindowEvent e) {
                    actionExit();
                }
            });
            // Set up File menu.
            JMenuBar menuBar = new JMenuBar();
            JMenu fileMenu = new JMenu("File");
            fileMenu.setMnemonic(KeyEvent.VK_F);
            JMenuItem fileExitMenuItem = new JMenuItem("Exit",
                    KeyEvent.VK_X);
            fileExitMenuItem.addActionListener(new ActionListener() {
                public void actionPerformed(ActionEvent e) {
                    actionExit();
                }
            });
            fileMenu.add(fileExitMenuItem);
            menuBar.add(fileMenu);
            setJMenuBar(menuBar);
            // Set up search panel.
            JPanel searchPanel = new JPanel();
            GridBagConstraints constraints;
            GridBagLayout layout = new GridBagLayout();
            searchPanel.setLayout(layout);
            JLabel startLabel = new JLabel("Start URL:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(startLabel, constraints);
            searchPanel.add(startLabel);
            startTextField = new JTextField();
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 0, 5);
            layout.setConstraints(startTextField, constraints);
            searchPanel.add(startTextField);
            JLabel maxLabel = new JLabel("Max URLs to Crawl:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(maxLabel, constraints);
            searchPanel.add(maxLabel);
            maxComboBox = new JComboBox(MAX_URLS);
            maxComboBox.setEditable(true);
            constraints = new GridBagConstraints();
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(maxComboBox, constraints);
            searchPanel.add(maxComboBox);
            limitCheckBox =
                    new JCheckBox("Limit crawling to Start URL site");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.WEST;
            constraints.insets = new Insets(0, 10, 0, 0);
            layout.setConstraints(limitCheckBox, constraints);
            searchPanel.add(limitCheckBox);
            JLabel blankLabel = new JLabel();
            constraints = new GridBagConstraints();
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            layout.setConstraints(blankLabel, constraints);
            searchPanel.add(blankLabel);
            JLabel logLabel = new JLabel("Matches Log File:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(logLabel, constraints);
            searchPanel.add(logLabel);
            String file =
                    System.getProperty("user.dir") +
                            System.getProperty("file.separator") +
                            "crawler.log";
            logTextField = new JTextField(file);
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 0, 5);
            layout.setConstraints(logTextField, constraints);
            searchPanel.add(logTextField);
            JLabel searchLabel = new JLabel("Search String:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(searchLabel, constraints);
            searchPanel.add(searchLabel);
            searchTextField = new JTextField();
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.insets = new Insets(5, 5, 0, 0);
            constraints.gridwidth= 2;
            constraints.weightx = 1.0d;
            layout.setConstraints(searchTextField, constraints);
            searchPanel.add(searchTextField);
            caseCheckBox = new JCheckBox("Case Sensitive");
            constraints = new GridBagConstraints();
            constraints.insets = new Insets(5, 5, 0, 5);
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            layout.setConstraints(caseCheckBox, constraints);
            searchPanel.add(caseCheckBox);
            searchButton = new JButton("Search");
            searchButton.addActionListener(new ActionListener() {
                public void actionPerformed(ActionEvent e) {
                    actionSearch();
                }
            });
            constraints = new GridBagConstraints();
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 5, 5);
            layout.setConstraints(searchButton, constraints);
            searchPanel.add(searchButton);
            JSeparator separator = new JSeparator();
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 5, 5);
            layout.setConstraints(separator, constraints);
            searchPanel.add(separator);
            JLabel crawlingLabel1 = new JLabel("Crawling:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(crawlingLabel1, constraints);
            searchPanel.add(crawlingLabel1);
            crawlingLabel2 = new JLabel();
            crawlingLabel2.setFont(
                    crawlingLabel2.getFont().deriveFont(Font.PLAIN));
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 0, 5);
            layout.setConstraints(crawlingLabel2, constraints);
            searchPanel.add(crawlingLabel2);
            JLabel crawledLabel1 = new JLabel("Crawled URLs:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(crawledLabel1, constraints);
            searchPanel.add(crawledLabel1);
            crawledLabel2 = new JLabel();
            crawledLabel2.setFont(
                    crawledLabel2.getFont().deriveFont(Font.PLAIN));
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 0, 5);
            layout.setConstraints(crawledLabel2, constraints);
            searchPanel.add(crawledLabel2);
            JLabel toCrawlLabel1 = new JLabel("URLs to Crawl:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(toCrawlLabel1, constraints);
            searchPanel.add(toCrawlLabel1);
            toCrawlLabel2 = new JLabel();
            toCrawlLabel2.setFont(
                    toCrawlLabel2.getFont().deriveFont(Font.PLAIN));
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 0, 5);
            layout.setConstraints(toCrawlLabel2, constraints);
            searchPanel.add(toCrawlLabel2);
            JLabel progressLabel = new JLabel("Crawling Progress:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 0, 0);
            layout.setConstraints(progressLabel, constraints);
            searchPanel.add(progressLabel);
            progressBar = new JProgressBar();
            progressBar.setMinimum(0);
            progressBar.setStringPainted(true);
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 0, 5);
            layout.setConstraints(progressBar, constraints);
            searchPanel.add(progressBar);
            JLabel matchesLabel1 = new JLabel("Search Matches:");
            constraints = new GridBagConstraints();
            constraints.anchor = GridBagConstraints.EAST;
            constraints.insets = new Insets(5, 5, 10, 0);
            layout.setConstraints(matchesLabel1, constraints);
            searchPanel.add(matchesLabel1);matchesLabel2 = new JLabel();
            matchesLabel2.setFont(
                    matchesLabel2.getFont().deriveFont(Font.PLAIN));
            constraints = new GridBagConstraints();
            constraints.fill = GridBagConstraints.HORIZONTAL;
            constraints.gridwidth = GridBagConstraints.REMAINDER;
            constraints.insets = new Insets(5, 5, 10, 5);
            layout.setConstraints(matchesLabel2, constraints);
            searchPanel.add(matchesLabel2);
            // Set up matches table.
            table =
                    new JTable(new DefaultTableModel(new Object[][]{},
                            new String[]{"URL"}) {
                        public boolean isCellEditable(int row, int column)
                        {
                            return false;
                        }
                    });
            // Set up Matches panel.
            JPanel matchesPanel = new JPanel();
            matchesPanel.setBorder(
                    BorderFactory.createTitledBorder("Matches"));
            matchesPanel.setLayout(new BorderLayout());
            matchesPanel.add(new JScrollPane(table),
                    BorderLayout.CENTER);
            // Add panels to display.
            getContentPane().setLayout(new BorderLayout());
            getContentPane().add(searchPanel, BorderLayout.NORTH);
            getContentPane().add(matchesPanel, BorderLayout.CENTER);
        }
        // Exit this program.
        private void actionExit() {
            System.exit(0);
        }
        // Handle Search/Stop button being clicked.
        private void actionSearch() {
            // If stop button clicked, turn crawling flag off.
            if (crawling) {
                crawling = false;
                return;
            }
            ArrayList errorList = new ArrayList();
            // Validate that start URL has been entered.
            String startUrl = startTextField.getText().trim();
            if (startUrl.length() < 1) {
                errorList.add("Missing Start URL.");
            }
            // Verify start URL.
            else if (verifyUrl(startUrl) == null) {
                errorList.add("Invalid Start URL.");
            }
            // Validate that Max URLs is either empty or is a number.
            int maxUrls = 0;
            String max = ((String) maxComboBox.getSelectedItem()).trim();
            if (max.length() > 0) {
                try {
                    maxUrls = Integer.parseInt(max);
                } catch (NumberFormatException e) {
                }
                if (maxUrls < 1) {
                    errorList.add("Invalid Max URLs value.");
                }
            }
            // Validate that matches log file has been entered.
            String logFile = logTextField.getText().trim();
            if (logFile.length() < 1) {
                errorList.add("Missing Matches Log File.");
            }
            // Validate that search string has been entered.
            String searchString = searchTextField.getText().trim();
            if (searchString.length() < 1) {
                errorList.add("Missing Search String.");
            }
            // Show errors, if any, and return.
            if (errorList.size() > 0) {
                StringBuffer message = new StringBuffer();
                // Concatenate errors into single message.
                for (int i = 0; i < errorList.size(); i++) {
                message.append(errorList.get(i));
                    if (i + 1 < errorList.size()) {
                        message.append("
    ");
                    }
                }
                showError(message.toString());
                return;
            }
            // Remove "www" from start URL if present.
            startUrl = removeWwwFromUrl(startUrl);
            // Start the Search Crawler.
            search(logFile, startUrl, maxUrls, searchString);
        }
        private void search(final String logFile, final String startUrl,
                            final int maxUrls, final String searchString)
        {
            // Start the search in a new thread.
            Thread thread = new Thread(new Runnable() {
                public void run() {
                    // Show hour glass cursor while crawling is under way.
                    setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
                    // Disable search controls.
                    startTextField.setEnabled(false);
                    maxComboBox.setEnabled(false);
                    limitCheckBox.setEnabled(false);
                    logTextField.setEnabled(false);
                    searchTextField.setEnabled(false);
                    caseCheckBox.setEnabled(false);
                    // Switch Search button to "Stop."
                    searchButton.setText("Stop");
                    // Reset stats.
                    table.setModel(new DefaultTableModel(new Object[][]{},
                            new String[]{"URL"}) {
                        public boolean isCellEditable(int row, int column)
                        {
                            return false;
                        }
                    });
                 updateStats(startUrl, 0, 0, maxUrls);
                    // Open matches log file.
                    try {
                        logFileWriter = new PrintWriter(new FileWriter(logFile));
                    } catch (Exception e) {
                        showError("Unable to open matches log file.");
                        return;
                    }
                    // Turn crawling flag on.
                    crawling = true;
                    // Perform the actual crawling.
                    crawl(startUrl, maxUrls, limitCheckBox.isSelected(),
                            searchString, caseCheckBox.isSelected());
                    // Turn crawling flag off.
                    crawling = false;
                    // Close matches log file.
                    try {
                        logFileWriter.close();
                    } catch (Exception e) {
                        showError("Unable to close matches log file.");
                    }
                    // Mark search as done.
                    crawlingLabel2.setText("Done");
                    // Enable search controls.
                    startTextField.setEnabled(true);
                    maxComboBox.setEnabled(true);
                    limitCheckBox.setEnabled(true);
                    logTextField.setEnabled(true);
                    searchTextField.setEnabled(true);
                    caseCheckBox.setEnabled(true);
                    // Switch search button back to "Search."
                    searchButton.setText("Search");
                    // Return to default cursor.
                    setCursor(Cursor.getDefaultCursor());
                    // Show message if search string not found.
                    if (table.getRowCount() == 0) {
                        JOptionPane.showMessageDialog(SearchCrawler.this,
                                "Your Search String was not found. Please try another.",
                                "Search String Not Found",
                                JOptionPane.WARNING_MESSAGE);
                    }
                }
            });
            thread.start();
        }
        // Show dialog box with error message.
        private void showError(String message) {
            JOptionPane.showMessageDialog(this, message, "Error",
                    JOptionPane.ERROR_MESSAGE);
        }
        // Update crawling stats.
        private void updateStats(
                String crawling, int crawled, int toCrawl, int maxUrls)
        {
            crawlingLabel2.setText(crawling);
            crawledLabel2.setText("" + crawled);
            toCrawlLabel2.setText("" + toCrawl);
            // Update progress bar.
            if (maxUrls == -1) {
                progressBar.setMaximum(crawled + toCrawl);
            } else {
                progressBar.setMaximum(maxUrls);
            }
            progressBar.setValue(crawled);
            matchesLabel2.setText("" + table.getRowCount());
        }
        // Add match to matches table and log file.
        private void addMatch(String url) {
    // Add URL to matches table.
            DefaultTableModel model =
                    (DefaultTableModel) table.getModel();
            model.addRow(new Object[]{url});
    // Add URL to matches log file.
            try {
               logFileWriter.println(url);
            } catch (Exception e) {
                showError("Unable to log match.");
            }
        }
        // Verify URL format.
        private URL verifyUrl(String url) {
            // Only allow HTTP URLs.
            if (!url.toLowerCase().startsWith("http://"))
                return null;
            // Verify format of URL.
            URL verifiedUrl = null;
            try {
                verifiedUrl = new URL(url);
            } catch (Exception e) {
                return null;
            }
            return verifiedUrl;
        }
        // Check if robot is allowed to access the given URL.
        private boolean isRobotAllowed(URL urlToCheck) {
            String host = urlToCheck.getHost().toLowerCase();
            // Retrieve host's disallow list from cache.
            ArrayList disallowList =
                    (ArrayList) disallowListCache.get(host);
            // If list is not in the cache, download and cache it.
            if (disallowList == null) {
                disallowList = new ArrayList();
                try {
                    URL robotsFileUrl =
                            new URL("http://" + host + "/robots.txt");
                    // Open connection to robot file URL for reading.
                    BufferedReader reader =
                            new BufferedReader(new InputStreamReader(
                                    robotsFileUrl.openStream()));
                    // Read robot file, creating list of disallowed paths.
                    String line;
                    while ((line = reader.readLine()) != null) {
                        if (line.indexOf("Disallow:") == 0) {
                            String disallowPath =
                                    line.substring("Disallow:".length());
                            // Check disallow path for comments and remove if present.
                            int commentIndex = disallowPath.indexOf("#");
                            if (commentIndex != - 1) {
                                disallowPath =
                                        disallowPath.substring(0, commentIndex);
                            }
                            // Remove leading or trailing spaces from disallow path.
                            disallowPath = disallowPath.trim();
                            // Add disallow path to list.
                            disallowList.add(disallowPath);
                        }
                    }
                    // Add new disallow list to cache.
                    disallowListCache.put(host, disallowList);
                }
                catch (Exception e) {
                /* Assume robot is allowed since an exception
                is thrown if the robot file doesn't exist. */
                    return true;
                }
            }
                /* Loop through disallow list to see if
                crawling is allowed for the given URL. */
            String file = urlToCheck.getFile();
            for (int i = 0; i < disallowList.size(); i++) {
                String disallow = (String) disallowList.get(i);
                if (file.startsWith(disallow)) {
                    return false;
                }
            }
            return true;
        }
        // Download page at given URL.
        private String downloadPage(URL pageUrl) {
        try {
            // Open connection to URL for reading.
            BufferedReader reader =
                    new BufferedReader(new InputStreamReader(
                            pageUrl.openStream()));
            // Read page into buffer.
            String line;
            StringBuffer pageBuffer = new StringBuffer();
            while ((line = reader.readLine()) != null) {
                pageBuffer.append(line);
            }
            return pageBuffer.toString();
        } catch (Exception e) {
        }
        return null;
    }
        // Remove leading "www" from a URL's host if present.
        private String removeWwwFromUrl(String url) {
            int index = url.indexOf("://www.");
            if (index != -1) {
                return url.substring(0, index + 3) +
                        url.substring(index + 7);
            }
            return (url);
        }
        // Parse through page contents and retrieve links.
        private ArrayList retrieveLinks(
                URL pageUrl, String pageContents, HashSet crawledList,
                boolean limitHost)
        {
            // Compile link matching pattern.
            Pattern p =
                    Pattern.compile("<a\s+href\s*=\s*"?(.*?)["|>]",
                            Pattern.CASE_INSENSITIVE);
            Matcher m = p.matcher(pageContents);
            // Create list of link matches.
            ArrayList linkList = new ArrayList();
            while (m.find()) {
            String link = m.group(1).trim();
            // Skip empty links.
            if (link.length() < 1) {
                continue;
            }
            // Skip links that are just page anchors.
            if (link.charAt(0) == '#') {
                continue;
            }
            // Skip mailto links.
            if (link.indexOf("mailto:") != -1) {
                continue;
            }
                // Skip JavaScript links.
            if (link.toLowerCase().indexOf("javascript") != -1) {
                continue;
            }
            // Prefix absolute and relative URLs if necessary.
            if (link.indexOf("://") == -1) {
            // Handle absolute URLs.
                if (link.charAt(0) == '/') {
                    link = "http://" + pageUrl.getHost() + link;
                // Handle relative URLs.
                } else {
                    String file = pageUrl.getFile();
                    if (file.indexOf('/') == -1) {
                        link = "http://" + pageUrl.getHost() + "/" + link;
                    } else {
                        String path =
                                file.substring(0, file.lastIndexOf('/') + 1);
                        link = "http://" + pageUrl.getHost() + path + link;
                    }
                }
            }
            // Remove anchors from link.
            int index = link.indexOf('#');
            if (index != -1) {
                link = link.substring(0, index);
            }
            // Remove leading "www" from URL's host if present.
            link = removeWwwFromUrl(link);
            // Verify link and skip if invalid.
            URL verifiedLink = verifyUrl(link);
            if (verifiedLink == null) {
                continue;
            }
            /* If specified, limit links to those
            having the same host as the start URL. */
            if (limitHost &&
                    !pageUrl.getHost().toLowerCase().equals(
                            verifiedLink.getHost().toLowerCase()))
            {
                continue;
            }
                // Skip link if it has already been crawled.
            if (crawledList.contains(link)) {
                continue;
            }
            // Add link to list.
            linkList.add(link);
        }
            return (linkList);
        }
        /* Determine whether or not search string is
        matched in the given page contents. */
        private boolean searchStringMatches(
                String pageContents, String searchString,
                boolean caseSensitive)
        {
            String searchContents = pageContents;
    /* If case-sensitive search, lowercase
    page contents for comparison. */
            if (!caseSensitive) {
                searchContents = pageContents.toLowerCase();
            }// Split search string into individual terms.
            Pattern p = Pattern.compile("[\s]+");
            String[] terms = p.split(searchString);
    // Check to see if each term matches.
            for (int i = 0; i < terms.length; i++) {
                if (caseSensitive) {
                    if (searchContents.indexOf(terms[i]) == -1) {
                        return false;
                    }
                } else {
                    if (searchContents.indexOf(terms[i].toLowerCase()) == -1) {
                        return false;
                    }
                }
            }
            return true;
        }
        // Perform the actual crawling, searching for the search string.
        public void crawl(
                String startUrl, int maxUrls, boolean limitHost,
                String searchString, boolean caseSensitive)
        {
    // Set up crawl lists.
            HashSet crawledList = new HashSet();
            LinkedHashSet toCrawlList = new LinkedHashSet();
    // Add start URL to the to crawl list.
            toCrawlList.add(startUrl);
            /* Perform actual crawling by looping
            through the To Crawl list. */
            while (crawling && toCrawlList.size() > 0)
            {
                /* Check to see if the max URL count has
                been reached, if it was specified.*/
                if (maxUrls != -1) {
                    if (crawledList.size() == maxUrls) {
                        break;
                    }
                }
                // Get URL at bottom of the list.
                String url = (String) toCrawlList.iterator().next();
                // Remove URL from the To Crawl list.
                toCrawlList.remove(url);
                // Convert string url to URL object.
                URL verifiedUrl = verifyUrl(url);
                // Skip URL if robots are not allowed to access it.
                if (!isRobotAllowed(verifiedUrl)) {
                    continue;
                }
                // Update crawling stats.
                updateStats(url, crawledList.size(), toCrawlList.size(),
                        maxUrls);
                // Add page to the crawled list.
                crawledList.add(url);
                // Download the page at the given URL.
                String pageContents = downloadPage(verifiedUrl);
                /* If the page was downloaded successfully, retrieve all its
                links and then see if it contains the search string. */
                if (pageContents != null && pageContents.length() > 0)
                {
                    // Retrieve list of valid links from page.
                    ArrayList links =
                            retrieveLinks(verifiedUrl, pageContents, crawledList,
                                    limitHost);
                    // Add links to the To Crawl list.
                    toCrawlList.addAll(links);
                    /* Check if search string is present in
                    page, and if so, record a match. */
                    if (searchStringMatches(pageContents, searchString,
                            caseSensitive))
                    {
                        addMatch(url);
                    }
                }
    // Update crawling stats.
                updateStats(url, crawledList.size(), toCrawlList.size(),
                    maxUrls);
            }
        }
        // Run the Search Crawler.
        public static void main(String[] args) {
            SearchCrawler crawler = new SearchCrawler();
            crawler.show();
        }
    }
  • 相关阅读:
    krakend 扩展开发概述
    varnish/api-gateway-benchmarks api gateway 性能压测工具
    krakend 加速配置工具
    krakend 支持发布订阅后端
    krakend 消费amqp mq 消息
    krakend cache 后端请求
    krakend 请求流量镜像的处理
    krakend 请求&&相应的检查
    krakend 的静态proxy
    krakend 的串行proxy
  • 原文地址:https://www.cnblogs.com/xkxf/p/6881529.html
Copyright © 2011-2022 走看看