zoukankan      html  css  js  c++  java
  • 初步分析美国科研项目申报项目

    将XML转换为新格式的XML、excel、和SQlserver数据

    package hander;
    
    /**
     * @author ZTX * @function:读取XML中字段到excel中
     * @Configuration:将文件放在 Xmldata下,或者data改为您的数据文件夹置于 Xml下
     * @time:2013年11月12日19:37:45--2013年11月12日21:48:22--2013年11月13日17:28:45--2013年11月13日21:00:45
     */
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStreamWriter;
    import java.sql.PreparedStatement;
    import java.sql.SQLException;
    import java.util.Iterator;
    import java.util.List;
    import java.util.Map;
    
    import org.dom4j.*;
    import org.dom4j.io.OutputFormat;
    import org.dom4j.io.SAXReader;
    import org.dom4j.io.XMLWriter;
    
    import org.apache.poi.*;
    import org.apache.poi.hssf.usermodel.HSSFCell;
    import org.apache.poi.hssf.usermodel.HSSFRow;
    import org.apache.poi.hssf.usermodel.HSSFSheet;
    import org.apache.poi.hssf.usermodel.HSSFWorkbook;
    
    public class Read { 
    	public static String inputXmlDir;//要处理的xml文件的文件夹
    	public static String data="2006";//要处理的xml文件的文件夹名称默认是data
    	public static boolean head=false;//表头是否写了
    	public static String pathPrefix="";//文件路径前缀
    	
    	public static long  recordNum=0;//记录批量
    	public static void main(String args[]){
    		PreparedStatement ppst = null ;
    		ConnectSQLserver mssq=new ConnectSQLserver(true);//连sqlserver
    		//建表
    		String reCreWord=
    				"create table Project"+
    				"     "+
    				"(                        "+
    				"AwardTitle varchar(200) null, "+
    				"Time varchar(1000) null, "+
    				"Organization varchar(200) null, "+
    				"ProgramOfficer varchar(100) null, "+
    				"AbstractNarration varchar(8000) null, "+
    				"AwardID varchar(10) null, "+
    				"PrincipalInvestigator varchar(60) null, "+
    				"EmailAddress varchar(40) null, "+
    				"Name varchar(80) null, "+
    				"CountryName varchar(60) null, "+
    				"StateName varchar(60) null, "+
    				"CityName varchar(60) null, "+
    				"ProgramReference varchar(500) null, "+
    				")                        ";
    		//插入
    		String inSQL="insert into Project(" +
    				"AwardTitle,Time,Organization," +
    				"ProgramOfficer,AbstractNarration," +
    				"AwardID,PrincipalInvestigator,EmailAddress," +
    				"Name,CountryName,StateName," +
    				"CityName,ProgramReference)values(?,?,?,?,?,?,?,?,?,?,?,?,?);";
    
    		try {
    			 mssq.sta.execute(reCreWord);
    			System.out.println("操作新表:"+inSQL);
    			ppst=mssq.conn.prepareStatement(inSQL);
    		} catch (SQLException e1) {
    			e1.printStackTrace();
    		}
    //		try {
    //			//mssq.conn.setAutoCommit(false);
    //		} catch (SQLException e1) {
    //			e1.printStackTrace();
    //		}
    		
    		long start,end=0;
    		//创建空表
    		Document creDocument = DocumentHelper.createDocument();    
    		//添加空字段
    		//项目标题
    		Element root= creDocument.addElement("root");
    		Element element[ ] = new Element[13];
    		element[0] =  root.addElement("AwardTitle"); 
    		element[1] = root.addElement("Time"); 
    		element[2]=  root.addElement("Organization"); 
    		element[3] =  root.addElement("ProgramOfficer"); 
    		element[4]=  root.addElement("AbstractNarration"); 
    		element[5]=  root.addElement("AwardID"); 
    		element[6]=  root.addElement("PrincipalInvestigator"); 
    		element[7]=  root.addElement("EmailAddress"); 
    		element[8] = root.addElement("Name"); 
    		element[9]=  root.addElement("CountryName"); 
    		element[10] = root.addElement("StateName"); 
    		element[11]=  root.addElement("CityName"); 
    		element[12]=  root.addElement("ProgramReference");
    		
            SAXReader saxReader = new SAXReader();   
    		//得到待处理的文件夹
    			//前缀
            File getPrefix = new File("");// 空文件夹 用来得到 文件路径前缀
            pathPrefix=getPrefix.getAbsolutePath();//得到 文件路径前缀
            	//组合
            inputXmlDir=File.separator + data;
            inputXmlDir=pathPrefix+inputXmlDir;//文件夹绝对路径
            System.out.println(inputXmlDir);
            File file = new File(data);
            //遍历文件夹下的每个文件
    	    if(file.isDirectory()){ 
    	        File[] files = file.listFiles();
    	        start=System.currentTimeMillis();
    	        Document document;
    	        for(int i = 0; i < files.length; i++){
    	        	 System.out.println("第"+(i+1)+"篇");
    	            try {
    	            	document = saxReader.read(inputXmlDir+File.separator + files[i].getName());
    					//拿出一个xml去处理
    	        			try {
    							String record[]=Handle( document,creDocument ,element);
    							//System.out.println("record[0]:"+record[0].toString());
     							for( int k=0;k<record.length;k++ ){
     								 ppst.setString(k+1, record[k]);
     								//System.out.println(k);
     								// System.out.println(record[k]);
     							}
    							ppst.execute();
    	        			} catch (SQLException e) {
    							e.printStackTrace();
    						}
    	            } catch (DocumentException e) {
    	            	e.printStackTrace();
    	            }
    	        }
    	        end=System.currentTimeMillis()-start;
    	        System.out.println("所用时间:"+end/1000+"s");
    	     }
    	    //输出xml结果文件
    	    try{
    	        XMLWriter output = new XMLWriter(
    	            new FileWriter( new File(inputXmlDir+File.separator+"result.xml") ));
    	            output.write( creDocument );
    	            output.close();
    	    }
    	    catch(IOException e){
    	    	System.out.println(e.getMessage());
    	    }
    	}
     
    	//处理一个xml 
    	public static String[] Handle(Document document, Document creDocument, Element[] element) throws SQLException{
    		String [] record=new String [13];
    		
    		//解析
    		Element root=document.getRootElement();    // 获取根节点
            Iterator Award = root.elementIterator("Award"); //根节点#Award
            // 遍历Award节点:
            while (Award.hasNext()) {
                Element AwardEle = (Element) Award.next();
                //项目标题
                String AwardTitle = AwardEle.elementTextTrim("AwardTitle"); //  Award#AwardTitle 
                  //添加到xml中
                Element Title=element[0].addElement("Title");//AwardTitle的子标签
                Title.setText(AwardTitle);
                record[0]=AwardTitle;
                //项目时间
                String Time="";
                String AwardEffectiveDate=AwardEle.elementTextTrim("AwardEffectiveDate"); //  Award#AwardEffectiveDate 
                String AwardExpirationDate=AwardEle.elementTextTrim("AwardExpirationDate"); //  Award#AwardExpirationDate 
                Time=AwardEffectiveDate +"-"+AwardExpirationDate;
                
                Element time=element[1].addElement("time"); 
                time.setText(Time);
                record[1]=Time;
                //组织
                String org="";
                Iterator Organization = AwardEle.elementIterator("Organization");//Award#Organization
                	// 遍历Organization节点:
                while (Organization.hasNext()) {
                    Element OrganizationEle = (Element) Organization.next();
                    //组织合成
                    org= getChild(OrganizationEle,"Division","LongName")+">"+getChild(OrganizationEle,"Directorate","LongName");
                }
                
                Element organization=element[2].addElement("organization"); 
                organization.setText(org);
                record[2]=org;
               //项目主任
               String  proOffice="";
               proOffice=getChild(AwardEle,"ProgramOfficer","SignBlockName");
               
               Element officers=element[3].addElement("officers"); 
               officers.setText(proOffice);
               record[3]=proOffice;
               //摘要叙述
               String  abs="";
               abs=getChild(AwardEle,"","AbstractNarration");
               
               Element abstracts=element[4].addElement("abstracts"); 
               abstracts.setText(abs);
               record[4]=abs;
               //项目编号
               String  Id="";
               Id=getChild(AwardEle,"","AwardID");
               
               Element id=element[5].addElement("id"); 
               id.setText(Id);
               record[5]=Id;
               //主要研究者
               String  Inv="";
               Inv=getChild(AwardEle,"Investigator","FirstName")+" "+getChild(AwardEle,"Investigator","LastName");
               if(getChild(AwardEle,"Investigator","RoleCode").equals("Principal Investigator")){
                   Element investigator=element[6].addElement("investigator"); 
                   investigator.setText(Inv);
                   record[6]=Inv;
               }
               //主要研究者邮箱
               String  Em="";
               Em=getChild(AwardEle,"Investigator","EmailAddress");
               if(getChild(AwardEle,"Investigator","RoleCode").equals("Principal Investigator")){
            	   Element email=element[7].addElement("email"); 
                   email.setText(Em);
                   record[7]=Em;
               }
               //机构名称
               String  Name="";
               Name=getChild(AwardEle,"Institution","Name");
               
               Element name=element[8].addElement("name"); 
               name.setText(Name);
               record[8]=Name;
               //机构国家
               String  Nation="";
               Nation=getChild(AwardEle,"Institution","CountryName");
              
               Element nation=element[9].addElement("nation"); 
               nation.setText(Nation);
               record[9]=Nation;
               //机构州
               String  State="";
               State=getChild(AwardEle,"Institution","StateName");
              
               Element state=element[10].addElement("state"); 
               state.setText(State);
               record[10]=State;
               //机构城市
               String  City="";
               City=getChild(AwardEle,"Institution","CityName");
              
               Element city=element[11].addElement("city"); 
               city.setText(City);
               record[11]=City;
               //研究主题
               String  Ref="";
              //(2)子节点的子元素
       		   Iterator iters = AwardEle.elementIterator("ProgramReference");
       		  //遍历mom父节点
       		   while (iters.hasNext()){
       		       Element itemEle = (Element) iters.next();
       		    Ref+=itemEle.elementTextTrim("Text")+" ;"; // 拿到head下的子节点script下的字节点username的值
       		   }
       		 Element reference=element[12].addElement("reference"); 
       		 reference.setText(Ref);
       		record[12]=Ref;
            } 
           // ppst.addBatch();
           // ppst.executeBatch();
           // ppst.execute();
    		recordNum++;
            if(!head){//写表头
            	String heads[]=new String [13];
            	heads[0]="项目标题";
            	heads[1]="项目时间";
            	heads[2]="项目主任";
            	heads[3]="组织";
            	heads[4]="摘要叙述";
            	heads[5]="项目编号";
            	heads[6]="主要研究者";
            	heads[7]="主要研究者邮箱";
            	heads[8]="机构名称";
            	heads[9]="机构国家";
            	heads[10]="机构州";
            	heads[11]="机构城市";
            	heads[12]="研究主题";
            	//writeResult2Excel(heads);
            	head=true;
            }
           
            //writeResult2Excel(record);//数据量大就没用
            return record;
    	}
    	//写入excel一行
    	public static void writeResult2Excel(String [] record) {
    		String DB=pathPrefix+File.separator+"data/results.xls";	
    		HSSFWorkbook wb = getDBwb(DB);//据已有的DB 建立新HSSFWorkbook对象  
    		HSSFSheet sheet = wb.getSheet("sheet1");//得到新的sheet对象	
    		int rowNum=sheet.getLastRowNum();//获得总行数
    		int coloumNum=13;//总列数
    		 HSSFRow rowTo =null;
    		 HSSFCell cellTo =null;
    			 rowTo = sheet.createRow(rowNum+1);
    			 for(int j=0;j<coloumNum;j++){//列循环
    				 cellTo=rowTo.createCell(j);
    				 cellTo.setCellValue(record[j]);
    			 } 
    		//要把结果写到文件中去
    		writeBack2Excel(DB,wb);
    	}//writeBack
    	public static HSSFWorkbook getDBwb(String getDB) {
    		HSSFWorkbook workbook=null;
    		try {
    			  FileInputStream fis=new FileInputStream(getDB);
    		       workbook=new HSSFWorkbook(fis); 
    		} catch (IOException e) {
    			e.printStackTrace();
    		}
    		return workbook;
    	}
    	public static void writeBack2Excel(String DBurl,HSSFWorkbook wb) {
    		 FileOutputStream fileOut;
    		try {
    			fileOut = new FileOutputStream(DBurl);		
    			wb.write(fileOut);//把Workbook对象输出到文件中  
    			fileOut.flush();              
    			fileOut.close(); 
    		} catch (FileNotFoundException e) {
    			e.printStackTrace();
    		} catch (IOException e) {
    			e.printStackTrace();
    		}  
    	}
    	private static String getChild(Element AwardEle,String mom,String kid) {
    		String target="";
    		//(1)直接输出的是根节点的子元素
    		if(mom.equals("")){
    		    target= AwardEle.elementTextTrim(kid); 
    		}
    		//(2)子节点的子元素
    		Iterator iters = AwardEle.elementIterator(mom);
    		//遍历mom父节点
    		   while (iters.hasNext()){
    		       Element itemEle = (Element) iters.next();
    		       target= itemEle.elementTextTrim(kid); // 拿到head下的子节点script下的字节点username的值
    		   }
    		return target;
    	}
    }


     

  • 相关阅读:
    添加活动记录的小坑
    用windows的批处理文件批量更改文件后缀
    js日期的初始化的格式
    对象的继承
    关于换行字符的问题
    js获取dom对象style样式的值
    判断邮箱是否合法
    Python控制函数运行时间
    如何用python编写一个计时器的程序
    TF-IDF算法介绍及实现
  • 原文地址:https://www.cnblogs.com/pangblog/p/3424258.html
Copyright © 2011-2022 走看看