Result文件数据说明:
Ip:106.39.41.166,(城市)
Date:10/Nov/2016:00:01:02 +0800,(日期)
Day:10,(天数)
Traffic: 54 ,(流量)
Type: video,(类型:视频video或文章article)
Id: 8701(视频或者文章的id)
测试要求:
1、 数据清洗:按照进行数据清洗,并将清洗后的数据导入MongDB数据库中。
两阶段数据清洗:
(1)第一阶段:把需要的信息从原始日志中提取出来
ip: 199.30.25.88
time: 10/Nov/2016:00:01:03 +0800
traffic: 62
文章: article/11325
视频: video/3235
(2)第二阶段:根据提取出来的信息做精细化操作
ip--->城市 city(IP)
date--> time:2016-11-10 00:01:03
day: 10
traffic:62
type:article/video
id:11325
(3)MongDB数据库表结构:
create table data( ip string, time string , day string, traffic bigint,type string, id string )
清洗数据代码:
package mongotest3;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
public class CleanData {
public static void main(String[] args) throws FileNotFoundException, UnsupportedEncodingException {
FileReader read = new FileReader("D:\\java\\eclipse-workplace\\mongotest3\\src\\mongotest3\\result.txt");
BufferedReader br = new BufferedReader(read);
Writer writer = null;
File outFile = new File("D:\\result2.txt");
writer = new OutputStreamWriter(new FileOutputStream(outFile),"utf-8");
BufferedWriter bw = new BufferedWriter(writer);
String row;
String[] data=new String[6];
int hang=1;
try {
while((row = br.readLine())!=null){
data=change(row);
data=chage(data);
for(int i=0;i<data.length;i++) {
System.out.print(data[i]+"\t");
}
System.out.println();
row=data[0]+","+data[1]+","+data[2]+","+data[3]+","+data[4]+","+data[5];
bw.write(row + "\r\n");
//i++;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private static String[] chage(String[] data) {
/*
* for(int i=0;i<data.length;i++) { data[] }
*/
data[0]=data[0];
char[] str=data[1].toCharArray();
String[] time=new String[7];
int j=0;
int k=0;
for(int i=0;i<str.length;i++) {
if(str[i]=='/'||str[i]==':'||str[i]==32) {
time[k]=data[1].substring(j,i);
j=i+1;
k++;
}
}
time[k]=data[1].substring(j, data[1].length());
switch(time[1]) { case "Jan":time[1]="01";break; case
"Feb":time[1]="02";break; case "Mar":time[1]="03";break; case
"Apr":time[1]="04";break; case "May":time[1]="05";break; case
"Jun":time[1]="06";break; case "Jul":time[1]="07";break; case
"Aug":time[1]="08";break; case "Sep":time[1]="09";break; case
"Oct":time[1]="10";break; case "Nov":time[1]="11";break; case
"Dec":time[1]="12";break; }
data[1]=time[2]+"-"+time[1]+"-"+time[0]+" "+time[3]+":"+time[4]+":"+time[5];
data[3]=data[3].substring(0, data[3].length()-1);
return data;
}
private static String [] change(String row) {
char [] str1=row.toCharArray();
String [] data =new String [6];
int j=0;
int k=0;
for(int i=0;i<str1.length;i++) {
if(str1[i]==',') {
data[k]=row.substring(j, i);
j=i+1;
k++;
}
}
data[k]=row.substring(j, str1.length);
return data;
}
}
运行结果: