zoukankan      html  css  js  c++  java
  • JavaScript之从浏览器一键获取教务处个人课程信息【插件】

    由于博主的个人网站(:http://www.johnnyzen.cn/),每学期都需要更新呈现课程的静态信息,由于课程量多,而且手动爬取很冗杂,特别想自动化实现。这不,今天终于有点时间了,把之前写nodejs的爬虫的思路转移到前端js上,同时更新了抓取数据的算法,比起之前的来说,自然是更加灵活高效了。

    声明:如读者需引用,必须在文章显著处声明或者与博主取得联系,以示尊重劳动成果,非常感谢 0.0

    var Course = function(seletorForTds){
        var tds = [];
        var courseUnitCount = 0;
        
        //将dom对象数组转换为text文本数组
        //TextnumFilter [以设置判断是否是课程的td格子的字符串长度作为特征判断值,如果没有达到此长度则会被删除 | 20]
        var tdsToTextArray = function($tds,TextnumFilter){
            if($tds == undefined) 
                throw new Error("$tds is not defined!");
            
            if(TextnumFilter == undefined)
                TextnumFilter = 20;
    
            var array = [];
            for(var i = 0; i < $tds.length; i++){
                if($tds[i].innerText.replace(/s*/g,"").length > TextnumFilter){
                    array.push($tds[i].innerText.replace(/s*/g,"").replace(/(<fontcolor="red">(.[^font]*)</font>)*/g,""));
                }
            }
            return array;
        };
    
        // (UTF-8)汉字转换为英文数字    
        var chineseToEnglishNumber = function(chiNum){
            if(chiNum == undefined) throw new Error("$ don't load html!");
            switch(chiNum){
                case "零":return 0;break;
                case "一":return 1;break;
                case "二":return 2;break;
                case "三":return 3;break;
                case "四":return 4;break;
                case "五":return 5;break;
                case "六":return 6;break;
                case "七":return 7;break;
                case "八":return 8;break;
                case "九":return 9;break;
            }
        }
    
        //从td中生成课程单元数组(但仍未被解析,属于课程单元的原始信息数组)
        function generateCelltoCourseRawUnits(cell){
            var cellItems = cell.innerText.split("
    ");//以换行符为标志分割
            // console.log('[generateCelltoCourseRawUnits] cellItems:', cellItems);
            // console.log('[generateCelltoCourseRawUnits] cellItems[0]:', cellItems[0]);
            var courses = [];
            var previousStopFlag = false; //是否上一个数组元素也是停用标志元素:(调、(换、""等
            var count = 0;//记录原始课程单元的元素的长度(4 or 6 等)
            for(var i = 0, length = cellItems.length; i < length; i++){
                // console.log('cellItems[' + i + '].indexOf("(换"): ', cellItems[i].indexOf("(换"));
                // console.log('cellItems[' + i + '].indexOf("(调"): ', cellItems[i].indexOf("(调"));
                // console.log('cellItems[' + i + ']: ', cellItems[i]);
    
                //假如当前元素是最后一个元素时
                if(i == length-1){
                    if((cellItems[i].indexOf("(换") != -1) || (cellItems[i].indexOf("(调") != -1) || (cellItems[i] === "")){//如果当前元素为停用标志元素时
                        count++;
                        courses.push(cellItems.slice(i - count + 1, i));
                        // console.log('【1】cellItems.slice(' + i + ' - ' + count + ' + 1, ' + i + '):', cellItems.slice(i - count + 1, i));
                    }else if(previousStopFlag == false){//如果当前元素为非停用标志元素,且上一个元素非停用标志元素时
                        count++;
                        courses.push(cellItems.slice(i - count + 1, i + 1));
                        // console.log('【2】cellItems.slice(' + i + ' - ' + count + ' + 1, ' + i + '):', cellItems.slice(i - count + 1, i + 1));
                    }
                }
    
                //如果当前元素是停用标志元素时
                if( (cellItems[i].indexOf("(换") != -1) || (cellItems[i].indexOf("(调") != -1) || (cellItems[i] === "")){
                    if(previousStopFlag == false && (i != length-1)){//如果上一门课程未被填充且非最后一个元素时(即 上一个元素非停用标志元素且非最后一个元素时,push当前的course)
                        // console.log('push raw course:count:', count);
                        courses.push(cellItems.slice(i - count, i));
                        // console.log('【3】cellItems.slice(' + i + ' - ' + count + ', ' + i + '):', cellItems.slice(i - count, i));
                        
                    } else {//上一个元素为停用标志元素时
    
                    }
                    
                    count = 0;
                    // console.log("count " + count + " 【" + i + "】" + cellItems[i] + ' test 3');
                    previousStopFlag = true;//表示已经将上一门课程是停用标志元素
    
                } else {//如果当前元素不是停用标志元素
                    if( (previousStopFlag == true) || (i == 0)) { //如果上一个元素是停用标志元素或者当前元素属于第一个元素,则说明当前元素已经属于一门新的课程信息的单元对象的课程名了,需要创建一个新的课程单元数组
    
                    } else { //如果上一个元素不是停用标志元素且非首元素,则说明当前元素已经属于正在填充的课程单元
    
                    }
                    // if(i != length-1){
                        count++;
                        // console.log("count " + count + " 【" + i + "】" + cellItems[i] + ' test 1');
                        previousStopFlag = false;
                    // }
                }
            }
            // console.log('courses:', courses);
            return courses;
        }
    
        //根据原始的课程单元生成课程对象
        var generateCourses = function(CourseRawUnits,courses){
            for(var i = 0,CourseUnitSize = CourseRawUnits.length; i < CourseUnitSize; i++){
                switch(CourseRawUnits[i].length) {//根据课程单元的元素长度解析成对应课程对象
                    case 4:
                    case 6:{
                        var course = {};
    
                        course.name = CourseRawUnits[i][0];
                        course.week_index =chineseToEnglishNumber( CourseRawUnits[i][1].charAt( CourseRawUnits[i][1].search(/周[一二三四五六七]/gi) + 1 ));
                        
                        try {
                            var patCourse_index = new RegExp("第\d*[,\d*]*节","gi");
                        
                            // console.log('test CourseRawUnits[i][1]:', CourseRawUnits[i][1]);
                            course.course_index = patCourse_index.exec(CourseRawUnits[i][1])[0].replace("第","").replace("节","").split(",").map(function(ele,index,array){
                                return parseInt(ele);
                            })
                            // console.log('course.course_index:', course.course_index );
                        } catch(error){
                            console.log('[generateCourses] error.message:', error.message);
    
                        }
    
                        var patWeeks = new RegExp("第\d*[-]*[\d*]*周","gi");
                        // var course_indexArray = pat.exec(CourseRawUnits[i][1])[0].split("-");
                        var course_Weeks = patWeeks.exec(CourseRawUnits[i][1])[0].replace("第","").replace("周","").split("-").map(function(ele,index,array){
                                return parseInt(ele);
                            })
    
                        // console.log('test course_Weeks:', course_Weeks);
    
                        course.week_start = course_Weeks[0];
                        course.week_end = course_Weeks[1];
                        
                        course.teacher = CourseRawUnits[i][2];
                        course.location = CourseRawUnits[i][3];
    
                        courses.push(course);
                        break;
                    }
                    case 0:
                        break;
                }
            }
            return courses;
        } 
    
        //清除无关dom节点
        var ArraysClearEmptyItem = function(array,condition){
            if(tds == undefined) 
                throw new Error("tds is not defined!");
            // console.log('ArrayClearEmptyItem array', array);
    
            var newArray = [];
            for(var i = 0; i < array.length; i++){
                // console.log('[ArrayClearEmptyItem] array[i].innerText:', array[i].innerText);
                if(array[i].innerText.length > 20 && array[i].innerText != "&nbsp;"){
                    newArray.push(array[i]);
                    // console.log('push:', array[i]);
                }
            }
            return newArray;
        };
    
        // [ArrayClearEmptyItem 清除数组内为空字串""的元素]
        var ArrayClearEmptyItem = function(array,condition){
            if(tds == undefined) throw new Error("tds is not defined!");
    
            var newArray = [];
            for(var i = 0;i < array.length;i++){
                if(array[i].length > 1){
                    newArray.push(array[i]);
                    // console.log('push:',array[i]);
                }
            }
            return newArray;
        } 
    
        var tdsItemsToCourses = function($tds){    //$tds.length
            if($tds == undefined) 
                throw new Error("$tds is not defined!");
            
            $tds = ArraysClearEmptyItem($tds);//清除数组内空字串""的元素
    
            var courses = [];
            for(var j = 0; j < $tds.length; j++){
                courses = generateCourses(generateCelltoCourseRawUnits($tds[j]), courses);
            }
            return courses;
        };
    
        this.load = function(){
            tds = document.querySelectorAll(seletorForTds);
            
            // console.log('tds:', tds);
            tdsToTextArray(tds, 20);
            var courses = tdsItemsToCourses(tds);
            
            // console.log('courses: ', courses);
            // window.courses = courses;
            // console.log("课表课程解析:
    ",JSON.stringify(courses));
    
            return courses;
        }
    } 
    
    var Student = function(seletorOption){
        var that = this;
        that.load = function(){
            return {
                sno : document.querySelectorAll(seletorOption.sno)[0].innerText.trim().replace(/学号:/gi, ""),
                sname : document.querySelectorAll(seletorOption.sname)[0].innerText.trim().replace(/姓名:/gi, ""),
                college : document.querySelectorAll(seletorOption.college)[0].innerText.trim().replace(/学院:/gi, ""),
                profession : document.querySelectorAll(seletorOption.profession)[0].innerText.trim().replace(/专业:/gi, ""),
                clazz : document.querySelectorAll(seletorOption.clazz)[0].innerText.trim().replace(/行政班:/gi, ""),
                courses: (new Course(seletorOption.courseTable)).load()
            }
        };
        that.stringify = function(){
            return JSON.stringify(that.load());
        }
    }
    
    
    //demo 
    var seletorOption = {
        sno:"#Label5",
        sname:"#Label6",
        college:"#Label7",
        profession:"#Label8",
        clazz:"#Label9",
        courseTable:"#Table1 td"
    };
    var student = (new Student(seletorOption));

  • 相关阅读:
    电商数据分析模型
    电商系统中的商品模型的分析与设计
    java枚举使用详解
    运用四色建模法进行领域分析
    UML,Powerdesigner 使用方法
    powerdsigner Association Multiplicity
    UML的类图、时序图表示方法以及惯例
    面向对象分析与设计—四色原型模式(彩色建模、领域无关模型)(概念版)
    知识读书笔记
    知识型读书笔记
  • 原文地址:https://www.cnblogs.com/johnnyzen/p/7980969.html
Copyright © 2011-2022 走看看