Hadoop基础-Apache Avro串行化的与反串行化
作者:尹正杰
版权声明:原创作品,谢绝转载!否则将追究法律责任。
一.Apache Avro简介
1>.Apache Avro的来源
Apache Avro 是一个中立性语言,它是有Hadoop之父Doug Cutting开发而来。因为hadoop的Writerable的串行化只支持Java语言,即非跨语言。所以Doug Cutting开发了Avro ,它是一个语言独立的数据结构,也就是说它是跨语言的。
2>.Avro特点
Apache Avro™是一个数据序列化系统。它有以下特点:
第一:丰富的数据结构。
第二:紧凑,快速的二进制数据格式。
第三:一个容器文件,用于存储持久性数据。
第四:远程过程调用(RPC)。
第五:与动态语言的简单集成。读取或写入数据文件不需要代码生成,也不需要使用或实现RPC协议。代码生成是一种可选的优化,只有静态类型语言才值得实现。
想要了解更多Avro,可以参考Apache官网(http://avro.apache.org/docs/1.8.2/),我就懒得搬运了,直接上干货,本篇博客介绍了两种Avro序列化与反序列化的方式。
3>.安装Avro
下载Avro:http://mirror.bit.edu.cn/apache/avro/avro-1.8.2/
二.Avro环境准备
其实部署Avro的流程大致可以用下图来表示,配置起来相对来说还是蛮简单的,具体操作如下:
1>.创建emp.avsc文件,内容如下
1 { 2 "namespace": "tutorialspoint.com", 3 "type": "record", 4 "name": "Emp", 5 "fields": [ 6 {"name": "name", "type": "string"}, 7 {"name": "id", "type": "int"}, 8 {"name": "salary", "type": "int"}, 9 {"name": "age", "type": "int"}, 10 {"name": "address", "type": "string"} 11 ] 12 }
2>.将下载的avro-1.8.2.jar和avro-tools-1.8.2.jar文件放在emp.avsc同级目录
3>.编译schema文件
4>.查看文件内容
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{"type":"record","name":"Emp","namespace":"tutorialspoint.com","fields":[{"name":"name","type":"string"},{"name":"id","type":"int"},{"name":"salary","type":"int"},{"name":"age","type":"int"},{"name":"address","type":"string"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoder<Emp> ENCODER = 23 new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder<Emp> DECODER = 26 new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder<Emp> getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}. 37 * @param resolver a {@link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public java.lang.CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public java.lang.CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use <code>newBuilder()</code>. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(java.lang.CharSequence name, java.lang.Integer id, java.lang.Integer salary, java.lang.Integer age, java.lang.CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public java.lang.Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, java.lang.Object value$) { 99 switch (field$) { 100 case 0: name = (java.lang.CharSequence)value$; break; 101 case 1: id = (java.lang.Integer)value$; break; 102 case 2: salary = (java.lang.Integer)value$; break; 103 case 3: age = (java.lang.Integer)value$; break; 104 case 4: address = (java.lang.CharSequence)value$; break; 105 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 106 } 107 } 108 109 /** 110 * Gets the value of the 'name' field. 111 * @return The value of the 'name' field. 112 */ 113 public java.lang.CharSequence getName() { 114 return name; 115 } 116 117 /** 118 * Sets the value of the 'name' field. 119 * @param value the value to set. 120 */ 121 public void setName(java.lang.CharSequence value) { 122 this.name = value; 123 } 124 125 /** 126 * Gets the value of the 'id' field. 127 * @return The value of the 'id' field. 128 */ 129 public java.lang.Integer getId() { 130 return id; 131 } 132 133 /** 134 * Sets the value of the 'id' field. 135 * @param value the value to set. 136 */ 137 public void setId(java.lang.Integer value) { 138 this.id = value; 139 } 140 141 /** 142 * Gets the value of the 'salary' field. 143 * @return The value of the 'salary' field. 144 */ 145 public java.lang.Integer getSalary() { 146 return salary; 147 } 148 149 /** 150 * Sets the value of the 'salary' field. 151 * @param value the value to set. 152 */ 153 public void setSalary(java.lang.Integer value) { 154 this.salary = value; 155 } 156 157 /** 158 * Gets the value of the 'age' field. 159 * @return The value of the 'age' field. 160 */ 161 public java.lang.Integer getAge() { 162 return age; 163 } 164 165 /** 166 * Sets the value of the 'age' field. 167 * @param value the value to set. 168 */ 169 public void setAge(java.lang.Integer value) { 170 this.age = value; 171 } 172 173 /** 174 * Gets the value of the 'address' field. 175 * @return The value of the 'address' field. 176 */ 177 public java.lang.CharSequence getAddress() { 178 return address; 179 } 180 181 /** 182 * Sets the value of the 'address' field. 183 * @param value the value to set. 184 */ 185 public void setAddress(java.lang.CharSequence value) { 186 this.address = value; 187 } 188 189 /** 190 * Creates a new Emp RecordBuilder. 191 * @return A new Emp RecordBuilder 192 */ 193 public static tutorialspoint.com.Emp.Builder newBuilder() { 194 return new tutorialspoint.com.Emp.Builder(); 195 } 196 197 /** 198 * Creates a new Emp RecordBuilder by copying an existing Builder. 199 * @param other The existing builder to copy. 200 * @return A new Emp RecordBuilder 201 */ 202 public static tutorialspoint.com.Emp.Builder newBuilder(tutorialspoint.com.Emp.Builder other) { 203 return new tutorialspoint.com.Emp.Builder(other); 204 } 205 206 /** 207 * Creates a new Emp RecordBuilder by copying an existing Emp instance. 208 * @param other The existing instance to copy. 209 * @return A new Emp RecordBuilder 210 */ 211 public static tutorialspoint.com.Emp.Builder newBuilder(tutorialspoint.com.Emp other) { 212 return new tutorialspoint.com.Emp.Builder(other); 213 } 214 215 /** 216 * RecordBuilder for Emp instances. 217 */ 218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp> 219 implements org.apache.avro.data.RecordBuilder<Emp> { 220 221 private java.lang.CharSequence name; 222 private int id; 223 private int salary; 224 private int age; 225 private java.lang.CharSequence address; 226 227 /** Creates a new Builder */ 228 private Builder() { 229 super(SCHEMA$); 230 } 231 232 /** 233 * Creates a Builder by copying an existing Builder. 234 * @param other The existing Builder to copy. 235 */ 236 private Builder(tutorialspoint.com.Emp.Builder other) { 237 super(other); 238 if (isValidValue(fields()[0], other.name)) { 239 this.name = data().deepCopy(fields()[0].schema(), other.name); 240 fieldSetFlags()[0] = true; 241 } 242 if (isValidValue(fields()[1], other.id)) { 243 this.id = data().deepCopy(fields()[1].schema(), other.id); 244 fieldSetFlags()[1] = true; 245 } 246 if (isValidValue(fields()[2], other.salary)) { 247 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 248 fieldSetFlags()[2] = true; 249 } 250 if (isValidValue(fields()[3], other.age)) { 251 this.age = data().deepCopy(fields()[3].schema(), other.age); 252 fieldSetFlags()[3] = true; 253 } 254 if (isValidValue(fields()[4], other.address)) { 255 this.address = data().deepCopy(fields()[4].schema(), other.address); 256 fieldSetFlags()[4] = true; 257 } 258 } 259 260 /** 261 * Creates a Builder by copying an existing Emp instance 262 * @param other The existing instance to copy. 263 */ 264 private Builder(tutorialspoint.com.Emp other) { 265 super(SCHEMA$); 266 if (isValidValue(fields()[0], other.name)) { 267 this.name = data().deepCopy(fields()[0].schema(), other.name); 268 fieldSetFlags()[0] = true; 269 } 270 if (isValidValue(fields()[1], other.id)) { 271 this.id = data().deepCopy(fields()[1].schema(), other.id); 272 fieldSetFlags()[1] = true; 273 } 274 if (isValidValue(fields()[2], other.salary)) { 275 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 276 fieldSetFlags()[2] = true; 277 } 278 if (isValidValue(fields()[3], other.age)) { 279 this.age = data().deepCopy(fields()[3].schema(), other.age); 280 fieldSetFlags()[3] = true; 281 } 282 if (isValidValue(fields()[4], other.address)) { 283 this.address = data().deepCopy(fields()[4].schema(), other.address); 284 fieldSetFlags()[4] = true; 285 } 286 } 287 288 /** 289 * Gets the value of the 'name' field. 290 * @return The value. 291 */ 292 public java.lang.CharSequence getName() { 293 return name; 294 } 295 296 /** 297 * Sets the value of the 'name' field. 298 * @param value The value of 'name'. 299 * @return This builder. 300 */ 301 public tutorialspoint.com.Emp.Builder setName(java.lang.CharSequence value) { 302 validate(fields()[0], value); 303 this.name = value; 304 fieldSetFlags()[0] = true; 305 return this; 306 } 307 308 /** 309 * Checks whether the 'name' field has been set. 310 * @return True if the 'name' field has been set, false otherwise. 311 */ 312 public boolean hasName() { 313 return fieldSetFlags()[0]; 314 } 315 316 317 /** 318 * Clears the value of the 'name' field. 319 * @return This builder. 320 */ 321 public tutorialspoint.com.Emp.Builder clearName() { 322 name = null; 323 fieldSetFlags()[0] = false; 324 return this; 325 } 326 327 /** 328 * Gets the value of the 'id' field. 329 * @return The value. 330 */ 331 public java.lang.Integer getId() { 332 return id; 333 } 334 335 /** 336 * Sets the value of the 'id' field. 337 * @param value The value of 'id'. 338 * @return This builder. 339 */ 340 public tutorialspoint.com.Emp.Builder setId(int value) { 341 validate(fields()[1], value); 342 this.id = value; 343 fieldSetFlags()[1] = true; 344 return this; 345 } 346 347 /** 348 * Checks whether the 'id' field has been set. 349 * @return True if the 'id' field has been set, false otherwise. 350 */ 351 public boolean hasId() { 352 return fieldSetFlags()[1]; 353 } 354 355 356 /** 357 * Clears the value of the 'id' field. 358 * @return This builder. 359 */ 360 public tutorialspoint.com.Emp.Builder clearId() { 361 fieldSetFlags()[1] = false; 362 return this; 363 } 364 365 /** 366 * Gets the value of the 'salary' field. 367 * @return The value. 368 */ 369 public java.lang.Integer getSalary() { 370 return salary; 371 } 372 373 /** 374 * Sets the value of the 'salary' field. 375 * @param value The value of 'salary'. 376 * @return This builder. 377 */ 378 public tutorialspoint.com.Emp.Builder setSalary(int value) { 379 validate(fields()[2], value); 380 this.salary = value; 381 fieldSetFlags()[2] = true; 382 return this; 383 } 384 385 /** 386 * Checks whether the 'salary' field has been set. 387 * @return True if the 'salary' field has been set, false otherwise. 388 */ 389 public boolean hasSalary() { 390 return fieldSetFlags()[2]; 391 } 392 393 394 /** 395 * Clears the value of the 'salary' field. 396 * @return This builder. 397 */ 398 public tutorialspoint.com.Emp.Builder clearSalary() { 399 fieldSetFlags()[2] = false; 400 return this; 401 } 402 403 /** 404 * Gets the value of the 'age' field. 405 * @return The value. 406 */ 407 public java.lang.Integer getAge() { 408 return age; 409 } 410 411 /** 412 * Sets the value of the 'age' field. 413 * @param value The value of 'age'. 414 * @return This builder. 415 */ 416 public tutorialspoint.com.Emp.Builder setAge(int value) { 417 validate(fields()[3], value); 418 this.age = value; 419 fieldSetFlags()[3] = true; 420 return this; 421 } 422 423 /** 424 * Checks whether the 'age' field has been set. 425 * @return True if the 'age' field has been set, false otherwise. 426 */ 427 public boolean hasAge() { 428 return fieldSetFlags()[3]; 429 } 430 431 432 /** 433 * Clears the value of the 'age' field. 434 * @return This builder. 435 */ 436 public tutorialspoint.com.Emp.Builder clearAge() { 437 fieldSetFlags()[3] = false; 438 return this; 439 } 440 441 /** 442 * Gets the value of the 'address' field. 443 * @return The value. 444 */ 445 public java.lang.CharSequence getAddress() { 446 return address; 447 } 448 449 /** 450 * Sets the value of the 'address' field. 451 * @param value The value of 'address'. 452 * @return This builder. 453 */ 454 public tutorialspoint.com.Emp.Builder setAddress(java.lang.CharSequence value) { 455 validate(fields()[4], value); 456 this.address = value; 457 fieldSetFlags()[4] = true; 458 return this; 459 } 460 461 /** 462 * Checks whether the 'address' field has been set. 463 * @return True if the 'address' field has been set, false otherwise. 464 */ 465 public boolean hasAddress() { 466 return fieldSetFlags()[4]; 467 } 468 469 470 /** 471 * Clears the value of the 'address' field. 472 * @return This builder. 473 */ 474 public tutorialspoint.com.Emp.Builder clearAddress() { 475 address = null; 476 fieldSetFlags()[4] = false; 477 return this; 478 } 479 480 @Override 481 @SuppressWarnings("unchecked") 482 public Emp build() { 483 try { 484 Emp record = new Emp(); 485 record.name = fieldSetFlags()[0] ? this.name : (java.lang.CharSequence) defaultValue(fields()[0]); 486 record.id = fieldSetFlags()[1] ? this.id : (java.lang.Integer) defaultValue(fields()[1]); 487 record.salary = fieldSetFlags()[2] ? this.salary : (java.lang.Integer) defaultValue(fields()[2]); 488 record.age = fieldSetFlags()[3] ? this.age : (java.lang.Integer) defaultValue(fields()[3]); 489 record.address = fieldSetFlags()[4] ? this.address : (java.lang.CharSequence) defaultValue(fields()[4]); 490 return record; 491 } catch (java.lang.Exception e) { 492 throw new org.apache.avro.AvroRuntimeException(e); 493 } 494 } 495 } 496 497 @SuppressWarnings("unchecked") 498 private static final org.apache.avro.io.DatumWriter<Emp> 499 WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$); 500 501 @Override public void writeExternal(java.io.ObjectOutput out) 502 throws java.io.IOException { 503 WRITER$.write(this, SpecificData.getEncoder(out)); 504 } 505 506 @SuppressWarnings("unchecked") 507 private static final org.apache.avro.io.DatumReader<Emp> 508 READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$); 509 510 @Override public void readExternal(java.io.ObjectInput in) 511 throws java.io.IOException { 512 READER$.read(this, SpecificData.getDecoder(in)); 513 } 514 515 }
5>.将此文件加载到idea
第一步:编辑pom.xml配置文件
1 <?xml version="1.0" encoding="UTF-8"?> 2 <project xmlns="http://maven.apache.org/POM/4.0.0" 3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 5 <modelVersion>4.0.0</modelVersion> 6 7 <groupId>com.yinzhengjie</groupId> 8 <artifactId>myAvro-pb</artifactId> 9 <version>1.0-SNAPSHOT</version> 10 11 <dependencies> 12 <dependency> 13 <groupId>org.apache.avro</groupId> 14 <artifactId>avro</artifactId> 15 <version>1.8.2</version> 16 </dependency> 17 <dependency> 18 <groupId>org.apache.avro</groupId> 19 <artifactId>avro-tools</artifactId> 20 <version>1.8.2</version> 21 </dependency> 22 <dependency> 23 <groupId>junit</groupId> 24 <artifactId>junit</artifactId> 25 <version>4.12</version> 26 </dependency> 27 </dependencies> 28 </project>
第二步:新建包,名称tutorialspoint.com,并将Emp.java文件复制到包内,如有错误不建议直接修改。
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{"type":"record","name":"Emp","namespace":"tutorialspoint.com","fields":[{"name":"name","type":"string"},{"name":"id","type":"int"},{"name":"salary","type":"int"},{"name":"age","type":"int"},{"name":"address","type":"string"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoder<Emp> ENCODER = 23 new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder<Emp> DECODER = 26 new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder<Emp> getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}. 37 * @param resolver a {@link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use <code>newBuilder()</code>. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, Object value$) { 99 switch (field$) { 100 case 0: name = (CharSequence)value$; break; 101 case 1: id = (Integer)value$; break; 102 case 2: salary = (Integer)value$; break; 103 case 3: age = (Integer)value$; break; 104 case 4: address = (CharSequence)value$; break; 105 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 106 } 107 } 108 109 /** 110 * Gets the value of the 'name' field. 111 * @return The value of the 'name' field. 112 */ 113 public CharSequence getName() { 114 return name; 115 } 116 117 /** 118 * Sets the value of the 'name' field. 119 * @param value the value to set. 120 */ 121 public void setName(CharSequence value) { 122 this.name = value; 123 } 124 125 /** 126 * Gets the value of the 'id' field. 127 * @return The value of the 'id' field. 128 */ 129 public Integer getId() { 130 return id; 131 } 132 133 /** 134 * Sets the value of the 'id' field. 135 * @param value the value to set. 136 */ 137 public void setId(Integer value) { 138 this.id = value; 139 } 140 141 /** 142 * Gets the value of the 'salary' field. 143 * @return The value of the 'salary' field. 144 */ 145 public Integer getSalary() { 146 return salary; 147 } 148 149 /** 150 * Sets the value of the 'salary' field. 151 * @param value the value to set. 152 */ 153 public void setSalary(Integer value) { 154 this.salary = value; 155 } 156 157 /** 158 * Gets the value of the 'age' field. 159 * @return The value of the 'age' field. 160 */ 161 public Integer getAge() { 162 return age; 163 } 164 165 /** 166 * Sets the value of the 'age' field. 167 * @param value the value to set. 168 */ 169 public void setAge(Integer value) { 170 this.age = value; 171 } 172 173 /** 174 * Gets the value of the 'address' field. 175 * @return The value of the 'address' field. 176 */ 177 public CharSequence getAddress() { 178 return address; 179 } 180 181 /** 182 * Sets the value of the 'address' field. 183 * @param value the value to set. 184 */ 185 public void setAddress(CharSequence value) { 186 this.address = value; 187 } 188 189 /** 190 * Creates a new Emp RecordBuilder. 191 * @return A new Emp RecordBuilder 192 */ 193 public static Builder newBuilder() { 194 return new Builder(); 195 } 196 197 /** 198 * Creates a new Emp RecordBuilder by copying an existing Builder. 199 * @param other The existing builder to copy. 200 * @return A new Emp RecordBuilder 201 */ 202 public static Builder newBuilder(Builder other) { 203 return new Builder(other); 204 } 205 206 /** 207 * Creates a new Emp RecordBuilder by copying an existing Emp instance. 208 * @param other The existing instance to copy. 209 * @return A new Emp RecordBuilder 210 */ 211 public static Builder newBuilder(Emp other) { 212 return new Builder(other); 213 } 214 215 /** 216 * RecordBuilder for Emp instances. 217 */ 218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp> 219 implements org.apache.avro.data.RecordBuilder<Emp> { 220 221 private CharSequence name; 222 private int id; 223 private int salary; 224 private int age; 225 private CharSequence address; 226 227 /** Creates a new Builder */ 228 private Builder() { 229 super(SCHEMA$); 230 } 231 232 /** 233 * Creates a Builder by copying an existing Builder. 234 * @param other The existing Builder to copy. 235 */ 236 private Builder(Builder other) { 237 super(other); 238 if (isValidValue(fields()[0], other.name)) { 239 this.name = data().deepCopy(fields()[0].schema(), other.name); 240 fieldSetFlags()[0] = true; 241 } 242 if (isValidValue(fields()[1], other.id)) { 243 this.id = data().deepCopy(fields()[1].schema(), other.id); 244 fieldSetFlags()[1] = true; 245 } 246 if (isValidValue(fields()[2], other.salary)) { 247 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 248 fieldSetFlags()[2] = true; 249 } 250 if (isValidValue(fields()[3], other.age)) { 251 this.age = data().deepCopy(fields()[3].schema(), other.age); 252 fieldSetFlags()[3] = true; 253 } 254 if (isValidValue(fields()[4], other.address)) { 255 this.address = data().deepCopy(fields()[4].schema(), other.address); 256 fieldSetFlags()[4] = true; 257 } 258 } 259 260 /** 261 * Creates a Builder by copying an existing Emp instance 262 * @param other The existing instance to copy. 263 */ 264 private Builder(Emp other) { 265 super(SCHEMA$); 266 if (isValidValue(fields()[0], other.name)) { 267 this.name = data().deepCopy(fields()[0].schema(), other.name); 268 fieldSetFlags()[0] = true; 269 } 270 if (isValidValue(fields()[1], other.id)) { 271 this.id = data().deepCopy(fields()[1].schema(), other.id); 272 fieldSetFlags()[1] = true; 273 } 274 if (isValidValue(fields()[2], other.salary)) { 275 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 276 fieldSetFlags()[2] = true; 277 } 278 if (isValidValue(fields()[3], other.age)) { 279 this.age = data().deepCopy(fields()[3].schema(), other.age); 280 fieldSetFlags()[3] = true; 281 } 282 if (isValidValue(fields()[4], other.address)) { 283 this.address = data().deepCopy(fields()[4].schema(), other.address); 284 fieldSetFlags()[4] = true; 285 } 286 } 287 288 /** 289 * Gets the value of the 'name' field. 290 * @return The value. 291 */ 292 public CharSequence getName() { 293 return name; 294 } 295 296 /** 297 * Sets the value of the 'name' field. 298 * @param value The value of 'name'. 299 * @return This builder. 300 */ 301 public Builder setName(CharSequence value) { 302 validate(fields()[0], value); 303 this.name = value; 304 fieldSetFlags()[0] = true; 305 return this; 306 } 307 308 /** 309 * Checks whether the 'name' field has been set. 310 * @return True if the 'name' field has been set, false otherwise. 311 */ 312 public boolean hasName() { 313 return fieldSetFlags()[0]; 314 } 315 316 317 /** 318 * Clears the value of the 'name' field. 319 * @return This builder. 320 */ 321 public Builder clearName() { 322 name = null; 323 fieldSetFlags()[0] = false; 324 return this; 325 } 326 327 /** 328 * Gets the value of the 'id' field. 329 * @return The value. 330 */ 331 public Integer getId() { 332 return id; 333 } 334 335 /** 336 * Sets the value of the 'id' field. 337 * @param value The value of 'id'. 338 * @return This builder. 339 */ 340 public Builder setId(int value) { 341 validate(fields()[1], value); 342 this.id = value; 343 fieldSetFlags()[1] = true; 344 return this; 345 } 346 347 /** 348 * Checks whether the 'id' field has been set. 349 * @return True if the 'id' field has been set, false otherwise. 350 */ 351 public boolean hasId() { 352 return fieldSetFlags()[1]; 353 } 354 355 356 /** 357 * Clears the value of the 'id' field. 358 * @return This builder. 359 */ 360 public Builder clearId() { 361 fieldSetFlags()[1] = false; 362 return this; 363 } 364 365 /** 366 * Gets the value of the 'salary' field. 367 * @return The value. 368 */ 369 public Integer getSalary() { 370 return salary; 371 } 372 373 /** 374 * Sets the value of the 'salary' field. 375 * @param value The value of 'salary'. 376 * @return This builder. 377 */ 378 public Builder setSalary(int value) { 379 validate(fields()[2], value); 380 this.salary = value; 381 fieldSetFlags()[2] = true; 382 return this; 383 } 384 385 /** 386 * Checks whether the 'salary' field has been set. 387 * @return True if the 'salary' field has been set, false otherwise. 388 */ 389 public boolean hasSalary() { 390 return fieldSetFlags()[2]; 391 } 392 393 394 /** 395 * Clears the value of the 'salary' field. 396 * @return This builder. 397 */ 398 public Builder clearSalary() { 399 fieldSetFlags()[2] = false; 400 return this; 401 } 402 403 /** 404 * Gets the value of the 'age' field. 405 * @return The value. 406 */ 407 public Integer getAge() { 408 return age; 409 } 410 411 /** 412 * Sets the value of the 'age' field. 413 * @param value The value of 'age'. 414 * @return This builder. 415 */ 416 public Builder setAge(int value) { 417 validate(fields()[3], value); 418 this.age = value; 419 fieldSetFlags()[3] = true; 420 return this; 421 } 422 423 /** 424 * Checks whether the 'age' field has been set. 425 * @return True if the 'age' field has been set, false otherwise. 426 */ 427 public boolean hasAge() { 428 return fieldSetFlags()[3]; 429 } 430 431 432 /** 433 * Clears the value of the 'age' field. 434 * @return This builder. 435 */ 436 public Builder clearAge() { 437 fieldSetFlags()[3] = false; 438 return this; 439 } 440 441 /** 442 * Gets the value of the 'address' field. 443 * @return The value. 444 */ 445 public CharSequence getAddress() { 446 return address; 447 } 448 449 /** 450 * Sets the value of the 'address' field. 451 * @param value The value of 'address'. 452 * @return This builder. 453 */ 454 public Builder setAddress(CharSequence value) { 455 validate(fields()[4], value); 456 this.address = value; 457 fieldSetFlags()[4] = true; 458 return this; 459 } 460 461 /** 462 * Checks whether the 'address' field has been set. 463 * @return True if the 'address' field has been set, false otherwise. 464 */ 465 public boolean hasAddress() { 466 return fieldSetFlags()[4]; 467 } 468 469 470 /** 471 * Clears the value of the 'address' field. 472 * @return This builder. 473 */ 474 public Builder clearAddress() { 475 address = null; 476 fieldSetFlags()[4] = false; 477 return this; 478 } 479 480 481 @SuppressWarnings("unchecked") 482 public Emp build() { 483 try { 484 Emp record = new Emp(); 485 record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]); 486 record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]); 487 record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]); 488 record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]); 489 record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]); 490 return record; 491 } catch (Exception e) { 492 throw new org.apache.avro.AvroRuntimeException(e); 493 } 494 } 495 } 496 497 @SuppressWarnings("unchecked") 498 private static final org.apache.avro.io.DatumWriter<Emp> 499 WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$); 500 501 @Override public void writeExternal(java.io.ObjectOutput out) 502 throws java.io.IOException { 503 WRITER$.write(this, SpecificData.getEncoder(out)); 504 } 505 506 @SuppressWarnings("unchecked") 507 private static final org.apache.avro.io.DatumReader<Emp> 508 READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$); 509 510 @Override public void readExternal(java.io.ObjectInput in) 511 throws java.io.IOException { 512 READER$.read(this, SpecificData.getDecoder(in)); 513 } 514 515 }
三.进行Avro编程
1>.开始编写串行化代码
/* @author :yinzhengjie Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ EMAIL:y1053419035@qq.com */ package cn.org.yinzhengjie.avro; import org.apache.avro.file.DataFileWriter; import org.apache.avro.io.DatumWriter; import org.apache.avro.specific.SpecificDatumWriter; import org.junit.Test; import tutorialspoint.com.Emp; import java.io.File; import java.io.IOException; public class TestAvro { /** * 测试Avro序列化 */ @Test public void testAvroSerial() throws IOException { //定义对象,并给对象赋初值 Emp yinzhengjie = new Emp(); yinzhengjie.setId(1); yinzhengjie.setName("尹正杰"); yinzhengjie.setAge(18); yinzhengjie.setSalary(80000); yinzhengjie.setAddress("北京"); //初始化writer对象,我习惯称它为写入器 DatumWriter<Emp> dw = new SpecificDatumWriter<Emp>(Emp.class); //初始化文件写入器 DataFileWriter<Emp> dfw = new DataFileWriter<Emp>(dw); //开始序列化文件,这个 Emp.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。 dfw.create(Emp.SCHEMA$,new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.avro")); //在序列化文件中追加对象 dfw.append(yinzhengjie); //释放资源 dfw.close(); System.out.println("序列化成功!"); } }
2>.测试java、hadoop、avro对10000000个对象串行化速度比对,大小比对
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{"type":"record","name":"Emp","namespace":"tutorialspoint.com","fields":[{"name":"name","type":"string"},{"name":"id","type":"int"},{"name":"salary","type":"int"},{"name":"age","type":"int"},{"name":"address","type":"string"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoder<Emp> ENCODER = 23 new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder<Emp> DECODER = 26 new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder<Emp> getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}. 37 * @param resolver a {@link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use <code>newBuilder()</code>. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, Object value$) { 99 switch (field$) { 100 case 0: name = (CharSequence)value$; break; 101 case 1: id = (Integer)value$; break; 102 case 2: salary = (Integer)value$; break; 103 case 3: age = (Integer)value$; break; 104 case 4: address = (CharSequence)value$; break; 105 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 106 } 107 } 108 109 /** 110 * Gets the value of the 'name' field. 111 * @return The value of the 'name' field. 112 */ 113 public CharSequence getName() { 114 return name; 115 } 116 117 /** 118 * Sets the value of the 'name' field. 119 * @param value the value to set. 120 */ 121 public void setName(CharSequence value) { 122 this.name = value; 123 } 124 125 /** 126 * Gets the value of the 'id' field. 127 * @return The value of the 'id' field. 128 */ 129 public Integer getId() { 130 return id; 131 } 132 133 /** 134 * Sets the value of the 'id' field. 135 * @param value the value to set. 136 */ 137 public void setId(Integer value) { 138 this.id = value; 139 } 140 141 /** 142 * Gets the value of the 'salary' field. 143 * @return The value of the 'salary' field. 144 */ 145 public Integer getSalary() { 146 return salary; 147 } 148 149 /** 150 * Sets the value of the 'salary' field. 151 * @param value the value to set. 152 */ 153 public void setSalary(Integer value) { 154 this.salary = value; 155 } 156 157 /** 158 * Gets the value of the 'age' field. 159 * @return The value of the 'age' field. 160 */ 161 public Integer getAge() { 162 return age; 163 } 164 165 /** 166 * Sets the value of the 'age' field. 167 * @param value the value to set. 168 */ 169 public void setAge(Integer value) { 170 this.age = value; 171 } 172 173 /** 174 * Gets the value of the 'address' field. 175 * @return The value of the 'address' field. 176 */ 177 public CharSequence getAddress() { 178 return address; 179 } 180 181 /** 182 * Sets the value of the 'address' field. 183 * @param value the value to set. 184 */ 185 public void setAddress(CharSequence value) { 186 this.address = value; 187 } 188 189 /** 190 * Creates a new Emp RecordBuilder. 191 * @return A new Emp RecordBuilder 192 */ 193 public static Builder newBuilder() { 194 return new Builder(); 195 } 196 197 /** 198 * Creates a new Emp RecordBuilder by copying an existing Builder. 199 * @param other The existing builder to copy. 200 * @return A new Emp RecordBuilder 201 */ 202 public static Builder newBuilder(Builder other) { 203 return new Builder(other); 204 } 205 206 /** 207 * Creates a new Emp RecordBuilder by copying an existing Emp instance. 208 * @param other The existing instance to copy. 209 * @return A new Emp RecordBuilder 210 */ 211 public static Builder newBuilder(Emp other) { 212 return new Builder(other); 213 } 214 215 /** 216 * RecordBuilder for Emp instances. 217 */ 218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp> 219 implements org.apache.avro.data.RecordBuilder<Emp> { 220 221 private CharSequence name; 222 private int id; 223 private int salary; 224 private int age; 225 private CharSequence address; 226 227 /** Creates a new Builder */ 228 private Builder() { 229 super(SCHEMA$); 230 } 231 232 /** 233 * Creates a Builder by copying an existing Builder. 234 * @param other The existing Builder to copy. 235 */ 236 private Builder(Builder other) { 237 super(other); 238 if (isValidValue(fields()[0], other.name)) { 239 this.name = data().deepCopy(fields()[0].schema(), other.name); 240 fieldSetFlags()[0] = true; 241 } 242 if (isValidValue(fields()[1], other.id)) { 243 this.id = data().deepCopy(fields()[1].schema(), other.id); 244 fieldSetFlags()[1] = true; 245 } 246 if (isValidValue(fields()[2], other.salary)) { 247 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 248 fieldSetFlags()[2] = true; 249 } 250 if (isValidValue(fields()[3], other.age)) { 251 this.age = data().deepCopy(fields()[3].schema(), other.age); 252 fieldSetFlags()[3] = true; 253 } 254 if (isValidValue(fields()[4], other.address)) { 255 this.address = data().deepCopy(fields()[4].schema(), other.address); 256 fieldSetFlags()[4] = true; 257 } 258 } 259 260 /** 261 * Creates a Builder by copying an existing Emp instance 262 * @param other The existing instance to copy. 263 */ 264 private Builder(Emp other) { 265 super(SCHEMA$); 266 if (isValidValue(fields()[0], other.name)) { 267 this.name = data().deepCopy(fields()[0].schema(), other.name); 268 fieldSetFlags()[0] = true; 269 } 270 if (isValidValue(fields()[1], other.id)) { 271 this.id = data().deepCopy(fields()[1].schema(), other.id); 272 fieldSetFlags()[1] = true; 273 } 274 if (isValidValue(fields()[2], other.salary)) { 275 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 276 fieldSetFlags()[2] = true; 277 } 278 if (isValidValue(fields()[3], other.age)) { 279 this.age = data().deepCopy(fields()[3].schema(), other.age); 280 fieldSetFlags()[3] = true; 281 } 282 if (isValidValue(fields()[4], other.address)) { 283 this.address = data().deepCopy(fields()[4].schema(), other.address); 284 fieldSetFlags()[4] = true; 285 } 286 } 287 288 /** 289 * Gets the value of the 'name' field. 290 * @return The value. 291 */ 292 public CharSequence getName() { 293 return name; 294 } 295 296 /** 297 * Sets the value of the 'name' field. 298 * @param value The value of 'name'. 299 * @return This builder. 300 */ 301 public Builder setName(CharSequence value) { 302 validate(fields()[0], value); 303 this.name = value; 304 fieldSetFlags()[0] = true; 305 return this; 306 } 307 308 /** 309 * Checks whether the 'name' field has been set. 310 * @return True if the 'name' field has been set, false otherwise. 311 */ 312 public boolean hasName() { 313 return fieldSetFlags()[0]; 314 } 315 316 317 /** 318 * Clears the value of the 'name' field. 319 * @return This builder. 320 */ 321 public Builder clearName() { 322 name = null; 323 fieldSetFlags()[0] = false; 324 return this; 325 } 326 327 /** 328 * Gets the value of the 'id' field. 329 * @return The value. 330 */ 331 public Integer getId() { 332 return id; 333 } 334 335 /** 336 * Sets the value of the 'id' field. 337 * @param value The value of 'id'. 338 * @return This builder. 339 */ 340 public Builder setId(int value) { 341 validate(fields()[1], value); 342 this.id = value; 343 fieldSetFlags()[1] = true; 344 return this; 345 } 346 347 /** 348 * Checks whether the 'id' field has been set. 349 * @return True if the 'id' field has been set, false otherwise. 350 */ 351 public boolean hasId() { 352 return fieldSetFlags()[1]; 353 } 354 355 356 /** 357 * Clears the value of the 'id' field. 358 * @return This builder. 359 */ 360 public Builder clearId() { 361 fieldSetFlags()[1] = false; 362 return this; 363 } 364 365 /** 366 * Gets the value of the 'salary' field. 367 * @return The value. 368 */ 369 public Integer getSalary() { 370 return salary; 371 } 372 373 /** 374 * Sets the value of the 'salary' field. 375 * @param value The value of 'salary'. 376 * @return This builder. 377 */ 378 public Builder setSalary(int value) { 379 validate(fields()[2], value); 380 this.salary = value; 381 fieldSetFlags()[2] = true; 382 return this; 383 } 384 385 /** 386 * Checks whether the 'salary' field has been set. 387 * @return True if the 'salary' field has been set, false otherwise. 388 */ 389 public boolean hasSalary() { 390 return fieldSetFlags()[2]; 391 } 392 393 394 /** 395 * Clears the value of the 'salary' field. 396 * @return This builder. 397 */ 398 public Builder clearSalary() { 399 fieldSetFlags()[2] = false; 400 return this; 401 } 402 403 /** 404 * Gets the value of the 'age' field. 405 * @return The value. 406 */ 407 public Integer getAge() { 408 return age; 409 } 410 411 /** 412 * Sets the value of the 'age' field. 413 * @param value The value of 'age'. 414 * @return This builder. 415 */ 416 public Builder setAge(int value) { 417 validate(fields()[3], value); 418 this.age = value; 419 fieldSetFlags()[3] = true; 420 return this; 421 } 422 423 /** 424 * Checks whether the 'age' field has been set. 425 * @return True if the 'age' field has been set, false otherwise. 426 */ 427 public boolean hasAge() { 428 return fieldSetFlags()[3]; 429 } 430 431 432 /** 433 * Clears the value of the 'age' field. 434 * @return This builder. 435 */ 436 public Builder clearAge() { 437 fieldSetFlags()[3] = false; 438 return this; 439 } 440 441 /** 442 * Gets the value of the 'address' field. 443 * @return The value. 444 */ 445 public CharSequence getAddress() { 446 return address; 447 } 448 449 /** 450 * Sets the value of the 'address' field. 451 * @param value The value of 'address'. 452 * @return This builder. 453 */ 454 public Builder setAddress(CharSequence value) { 455 validate(fields()[4], value); 456 this.address = value; 457 fieldSetFlags()[4] = true; 458 return this; 459 } 460 461 /** 462 * Checks whether the 'address' field has been set. 463 * @return True if the 'address' field has been set, false otherwise. 464 */ 465 public boolean hasAddress() { 466 return fieldSetFlags()[4]; 467 } 468 469 470 /** 471 * Clears the value of the 'address' field. 472 * @return This builder. 473 */ 474 public Builder clearAddress() { 475 address = null; 476 fieldSetFlags()[4] = false; 477 return this; 478 } 479 480 481 @SuppressWarnings("unchecked") 482 public Emp build() { 483 try { 484 Emp record = new Emp(); 485 record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]); 486 record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]); 487 record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]); 488 record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]); 489 record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]); 490 return record; 491 } catch (Exception e) { 492 throw new org.apache.avro.AvroRuntimeException(e); 493 } 494 } 495 } 496 497 @SuppressWarnings("unchecked") 498 private static final org.apache.avro.io.DatumWriter<Emp> 499 WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$); 500 501 @Override public void writeExternal(java.io.ObjectOutput out) 502 throws java.io.IOException { 503 WRITER$.write(this, SpecificData.getEncoder(out)); 504 } 505 506 @SuppressWarnings("unchecked") 507 private static final org.apache.avro.io.DatumReader<Emp> 508 READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$); 509 510 @Override public void readExternal(java.io.ObjectInput in) 511 throws java.io.IOException { 512 READER$.read(this, SpecificData.getDecoder(in)); 513 } 514 515 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import java.io.Serializable; 9 10 public class Emp2 implements Serializable { 11 private int id; 12 private String name; 13 private int age; 14 private String address; 15 private int salary; 16 17 public int getId() { 18 return id; 19 } 20 public void setId(int id) { 21 this.id = id; 22 } 23 public String getName() { 24 return name; 25 } 26 public void setName(String name) { 27 this.name = name; 28 } 29 public int getAge() { 30 return age; 31 } 32 public void setAge(int age) { 33 this.age = age; 34 } 35 public String getAddress() { 36 return address; 37 } 38 public void setAddress(String address) { 39 this.address = address; 40 } 41 public int getSalary() { 42 return salary; 43 } 44 public void setSalary(int salary) { 45 this.salary = salary; 46 } 47 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.hadoop.io.Writable; 9 import tutorialspoint.com.Emp; 10 11 import java.io.DataInput; 12 import java.io.DataOutput; 13 import java.io.IOException; 14 15 public class EmpWritable implements Writable { 16 private Emp emp; 17 18 public Emp getEmp() { 19 return emp; 20 } 21 22 public void setEmp(Emp emp) { 23 this.emp = emp; 24 25 } 26 27 28 public EmpWritable() { 29 this.emp = emp; 30 } 31 32 //串行化 33 public void write(DataOutput dataOutput) throws IOException { 34 dataOutput.writeUTF((String)emp.getName()); 35 dataOutput.writeInt(emp.getId()); 36 dataOutput.writeInt(emp.getSalary()); 37 dataOutput.writeInt(emp.getAge()); 38 dataOutput.writeUTF((String)emp.getAddress()); 39 } 40 41 //发串行化,我们测试的是串行化,咱们不需要写反串行化的代码,空实现即可。 42 public void readFields(DataInput dataInput) { 43 } 44 }
/* @author :yinzhengjie Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ EMAIL:y1053419035@qq.com */ package cn.org.yinzhengjie.avro; import org.apache.avro.file.DataFileWriter; import org.apache.avro.io.DatumWriter; import org.apache.avro.specific.SpecificDatumWriter; import tutorialspoint.com.Emp; import java.io.*; public class TestAvro { private static final File avro = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.avroTest"); private static final File java = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.javaTest"); private static final File hadoop = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.hadoopTest"); public static void main(String[] args) throws IOException { testAvroSerial(); testJavaSerial(); testHadoopSerial(); } //测试hadoop的序列化方式 public static void testHadoopSerial() throws IOException { long start = System.currentTimeMillis(); //定义对象,并给对象赋初值 Emp p = new Emp(); p.setId(3); p.setName("jay"); p.setAge(30); p.setSalary(15000); p.setAddress("昌平"); EmpWritable ew = new EmpWritable(); ew.setEmp(p); FileOutputStream fos = new FileOutputStream(hadoop); DataOutputStream dos = new DataOutputStream(fos); for (int i = 0;i <= 10000000;i++){ ew.write(dos); } fos.close(); dos.close(); System.out.printf("这是Hadoop序列化方式: 生成文件大小:[%d],用时:[%d] ",hadoop.length(),System.currentTimeMillis() - start); } //测试Avro序列化 public static void testAvroSerial() throws IOException { long start = System.currentTimeMillis(); //定义对象,并给对象赋初值 Emp yinzhengjie = new Emp(); yinzhengjie.setId(1); yinzhengjie.setName("尹正杰"); yinzhengjie.setAge(18); yinzhengjie.setSalary(80000); yinzhengjie.setAddress("北京"); //初始化writer对象,我习惯称它为写入器 DatumWriter<Emp> dw = new SpecificDatumWriter<Emp>(Emp.class); //初始化文件写入器 DataFileWriter<Emp> dfw = new DataFileWriter<Emp>(dw); //开始序列化文件,这个 Emp2.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。 dfw.create(Emp.SCHEMA$,avro); //在序列化文件中追加对象 for (int i = 0;i <= 10000000;i++){ dfw.append(yinzhengjie); } //释放资源 dfw.close(); System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d] ",avro.length(),System.currentTimeMillis() - start); } //测试Java序列化 public static void testJavaSerial() throws IOException { long start = System.currentTimeMillis(); Emp2 p = new Emp2(); p.setId(2); p.setName("tom"); p.setAge(20); p.setSalary(13000); p.setAddress("亦庄"); FileOutputStream fos = new FileOutputStream(java); ObjectOutputStream oos = new ObjectOutputStream(fos); for (int i = 0;i <= 10000000;i++){ oos.writeObject(p); } fos.close(); oos.close(); System.out.printf("这是Java序列化方式: 生成文件大小:[%d],用时:[%d] ",java.length(),(System.currentTimeMillis()-start)); } } /* 以上代码执行结果如下: 这是Avro序列化方式: 生成文件大小:[220072462],用时:[2570] 这是Java序列化方式: 生成文件大小:[50000139],用时:[9876] 这是Hadoop序列化方式: 生成文件大小:[250000025],用时:[126290] */
经过测试对一个,序列化通一个对象时,用时最短的是Avro,用时最长的是Hadoop,生成文件最小的是Java,生成文件最大是Hadoop。
3>.编写反串行化代码
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.avro.file.DataFileReader; 9 import org.apache.avro.file.DataFileWriter; 10 import org.apache.avro.io.DatumReader; 11 import org.apache.avro.io.DatumWriter; 12 import org.apache.avro.specific.SpecificDatumReader; 13 import org.apache.avro.specific.SpecificDatumWriter; 14 import tutorialspoint.com.Emp; 15 16 import java.io.*; 17 18 public class TestAvro { 19 private static final File avro = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.avroTest"); 20 21 public static void main(String[] args) throws IOException { 22 AvroSerial(); 23 AvroDeserial(); 24 } 25 26 //测试Avro序列化 27 public static void AvroSerial() throws IOException { 28 long start = System.currentTimeMillis(); 29 //定义对象,并给对象赋初值 30 Emp yinzhengjie = new Emp(); 31 yinzhengjie.setId(1); 32 yinzhengjie.setName("尹正杰"); 33 yinzhengjie.setAge(18); 34 yinzhengjie.setSalary(80000); 35 yinzhengjie.setAddress("北京"); 36 //初始化writer对象,我习惯称它为写入器 37 DatumWriter<Emp> dw = new SpecificDatumWriter<Emp>(Emp.class); 38 //初始化文件写入器 39 DataFileWriter<Emp> dfw = new DataFileWriter<Emp>(dw); 40 //开始序列化文件,这个 Emp2.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。 41 dfw.create(Emp.SCHEMA$,avro); 42 //在序列化文件中追加对象 43 for (int i = 0;i <= 10000000;i++){ 44 dfw.append(yinzhengjie); 45 } 46 //释放资源 47 dfw.close(); 48 System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d] ",avro.length(),System.currentTimeMillis() - start); 49 } 50 51 52 //测试Avro反序列化 53 public static void AvroDeserial() throws IOException { 54 long start = System.currentTimeMillis(); 55 //初始化reader对象 56 DatumReader<Emp> dr = new SpecificDatumReader<Emp>(Emp.class); 57 //初始化文件阅读器 58 DataFileReader<Emp> dfr = new DataFileReader<Emp>(avro,dr); 59 //遍历阅读器里面的内容 60 while (dfr.hasNext()){ 61 Emp emp = dfr.next(); 62 Integer id = emp.getId(); 63 CharSequence name = emp.getName(); 64 Integer age = emp.getAge(); 65 Integer salary = emp.getSalary(); 66 CharSequence address = emp.getAddress(); 67 // System.out.println(id + "|" + name + "|" + age + "|" + salary + "|" + address); 不建议打印,因为1000万条数据估计得30~40秒左右才能输出完成。 68 } 69 System.out.printf("这是Avro反序列化方式: 生成文件大小:[%d],用时:[%d] ",avro.length(),System.currentTimeMillis() - start); 70 } 71 72 } 73 74 75 /* 76 以上代码执行结果如下: 77 这是Avro序列化方式: 生成文件大小:[220072462],用时:[2640] 78 这是Avro反序列化方式: 生成文件大小:[220072462],用时:[2866] 79 */
4>.测试java、hadoop、avro对10000000个对象反串行化速度比对
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{"type":"record","name":"Emp","namespace":"tutorialspoint.com","fields":[{"name":"name","type":"string"},{"name":"id","type":"int"},{"name":"salary","type":"int"},{"name":"age","type":"int"},{"name":"address","type":"string"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoder<Emp> ENCODER = 23 new BinaryMessageEncoder<Emp>(MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder<Emp> DECODER = 26 new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder<Emp> getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified {@link SchemaStore}. 37 * @param resolver a {@link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder<Emp> createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder<Emp>(MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use <code>newBuilder()</code>. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, Object value$) { 99 switch (field$) { 100 case 0: name = (CharSequence)value$; break; 101 case 1: id = (Integer)value$; break; 102 case 2: salary = (Integer)value$; break; 103 case 3: age = (Integer)value$; break; 104 case 4: address = (CharSequence)value$; break; 105 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 106 } 107 } 108 109 /** 110 * Gets the value of the 'name' field. 111 * @return The value of the 'name' field. 112 */ 113 public CharSequence getName() { 114 return name; 115 } 116 117 /** 118 * Sets the value of the 'name' field. 119 * @param value the value to set. 120 */ 121 public void setName(CharSequence value) { 122 this.name = value; 123 } 124 125 /** 126 * Gets the value of the 'id' field. 127 * @return The value of the 'id' field. 128 */ 129 public Integer getId() { 130 return id; 131 } 132 133 /** 134 * Sets the value of the 'id' field. 135 * @param value the value to set. 136 */ 137 public void setId(Integer value) { 138 this.id = value; 139 } 140 141 /** 142 * Gets the value of the 'salary' field. 143 * @return The value of the 'salary' field. 144 */ 145 public Integer getSalary() { 146 return salary; 147 } 148 149 /** 150 * Sets the value of the 'salary' field. 151 * @param value the value to set. 152 */ 153 public void setSalary(Integer value) { 154 this.salary = value; 155 } 156 157 /** 158 * Gets the value of the 'age' field. 159 * @return The value of the 'age' field. 160 */ 161 public Integer getAge() { 162 return age; 163 } 164 165 /** 166 * Sets the value of the 'age' field. 167 * @param value the value to set. 168 */ 169 public void setAge(Integer value) { 170 this.age = value; 171 } 172 173 /** 174 * Gets the value of the 'address' field. 175 * @return The value of the 'address' field. 176 */ 177 public CharSequence getAddress() { 178 return address; 179 } 180 181 /** 182 * Sets the value of the 'address' field. 183 * @param value the value to set. 184 */ 185 public void setAddress(CharSequence value) { 186 this.address = value; 187 } 188 189 /** 190 * Creates a new Emp RecordBuilder. 191 * @return A new Emp RecordBuilder 192 */ 193 public static Builder newBuilder() { 194 return new Builder(); 195 } 196 197 /** 198 * Creates a new Emp RecordBuilder by copying an existing Builder. 199 * @param other The existing builder to copy. 200 * @return A new Emp RecordBuilder 201 */ 202 public static Builder newBuilder(Builder other) { 203 return new Builder(other); 204 } 205 206 /** 207 * Creates a new Emp RecordBuilder by copying an existing Emp instance. 208 * @param other The existing instance to copy. 209 * @return A new Emp RecordBuilder 210 */ 211 public static Builder newBuilder(Emp other) { 212 return new Builder(other); 213 } 214 215 /** 216 * RecordBuilder for Emp instances. 217 */ 218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<Emp> 219 implements org.apache.avro.data.RecordBuilder<Emp> { 220 221 private CharSequence name; 222 private int id; 223 private int salary; 224 private int age; 225 private CharSequence address; 226 227 /** Creates a new Builder */ 228 private Builder() { 229 super(SCHEMA$); 230 } 231 232 /** 233 * Creates a Builder by copying an existing Builder. 234 * @param other The existing Builder to copy. 235 */ 236 private Builder(Builder other) { 237 super(other); 238 if (isValidValue(fields()[0], other.name)) { 239 this.name = data().deepCopy(fields()[0].schema(), other.name); 240 fieldSetFlags()[0] = true; 241 } 242 if (isValidValue(fields()[1], other.id)) { 243 this.id = data().deepCopy(fields()[1].schema(), other.id); 244 fieldSetFlags()[1] = true; 245 } 246 if (isValidValue(fields()[2], other.salary)) { 247 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 248 fieldSetFlags()[2] = true; 249 } 250 if (isValidValue(fields()[3], other.age)) { 251 this.age = data().deepCopy(fields()[3].schema(), other.age); 252 fieldSetFlags()[3] = true; 253 } 254 if (isValidValue(fields()[4], other.address)) { 255 this.address = data().deepCopy(fields()[4].schema(), other.address); 256 fieldSetFlags()[4] = true; 257 } 258 } 259 260 /** 261 * Creates a Builder by copying an existing Emp instance 262 * @param other The existing instance to copy. 263 */ 264 private Builder(Emp other) { 265 super(SCHEMA$); 266 if (isValidValue(fields()[0], other.name)) { 267 this.name = data().deepCopy(fields()[0].schema(), other.name); 268 fieldSetFlags()[0] = true; 269 } 270 if (isValidValue(fields()[1], other.id)) { 271 this.id = data().deepCopy(fields()[1].schema(), other.id); 272 fieldSetFlags()[1] = true; 273 } 274 if (isValidValue(fields()[2], other.salary)) { 275 this.salary = data().deepCopy(fields()[2].schema(), other.salary); 276 fieldSetFlags()[2] = true; 277 } 278 if (isValidValue(fields()[3], other.age)) { 279 this.age = data().deepCopy(fields()[3].schema(), other.age); 280 fieldSetFlags()[3] = true; 281 } 282 if (isValidValue(fields()[4], other.address)) { 283 this.address = data().deepCopy(fields()[4].schema(), other.address); 284 fieldSetFlags()[4] = true; 285 } 286 } 287 288 /** 289 * Gets the value of the 'name' field. 290 * @return The value. 291 */ 292 public CharSequence getName() { 293 return name; 294 } 295 296 /** 297 * Sets the value of the 'name' field. 298 * @param value The value of 'name'. 299 * @return This builder. 300 */ 301 public Builder setName(CharSequence value) { 302 validate(fields()[0], value); 303 this.name = value; 304 fieldSetFlags()[0] = true; 305 return this; 306 } 307 308 /** 309 * Checks whether the 'name' field has been set. 310 * @return True if the 'name' field has been set, false otherwise. 311 */ 312 public boolean hasName() { 313 return fieldSetFlags()[0]; 314 } 315 316 317 /** 318 * Clears the value of the 'name' field. 319 * @return This builder. 320 */ 321 public Builder clearName() { 322 name = null; 323 fieldSetFlags()[0] = false; 324 return this; 325 } 326 327 /** 328 * Gets the value of the 'id' field. 329 * @return The value. 330 */ 331 public Integer getId() { 332 return id; 333 } 334 335 /** 336 * Sets the value of the 'id' field. 337 * @param value The value of 'id'. 338 * @return This builder. 339 */ 340 public Builder setId(int value) { 341 validate(fields()[1], value); 342 this.id = value; 343 fieldSetFlags()[1] = true; 344 return this; 345 } 346 347 /** 348 * Checks whether the 'id' field has been set. 349 * @return True if the 'id' field has been set, false otherwise. 350 */ 351 public boolean hasId() { 352 return fieldSetFlags()[1]; 353 } 354 355 356 /** 357 * Clears the value of the 'id' field. 358 * @return This builder. 359 */ 360 public Builder clearId() { 361 fieldSetFlags()[1] = false; 362 return this; 363 } 364 365 /** 366 * Gets the value of the 'salary' field. 367 * @return The value. 368 */ 369 public Integer getSalary() { 370 return salary; 371 } 372 373 /** 374 * Sets the value of the 'salary' field. 375 * @param value The value of 'salary'. 376 * @return This builder. 377 */ 378 public Builder setSalary(int value) { 379 validate(fields()[2], value); 380 this.salary = value; 381 fieldSetFlags()[2] = true; 382 return this; 383 } 384 385 /** 386 * Checks whether the 'salary' field has been set. 387 * @return True if the 'salary' field has been set, false otherwise. 388 */ 389 public boolean hasSalary() { 390 return fieldSetFlags()[2]; 391 } 392 393 394 /** 395 * Clears the value of the 'salary' field. 396 * @return This builder. 397 */ 398 public Builder clearSalary() { 399 fieldSetFlags()[2] = false; 400 return this; 401 } 402 403 /** 404 * Gets the value of the 'age' field. 405 * @return The value. 406 */ 407 public Integer getAge() { 408 return age; 409 } 410 411 /** 412 * Sets the value of the 'age' field. 413 * @param value The value of 'age'. 414 * @return This builder. 415 */ 416 public Builder setAge(int value) { 417 validate(fields()[3], value); 418 this.age = value; 419 fieldSetFlags()[3] = true; 420 return this; 421 } 422 423 /** 424 * Checks whether the 'age' field has been set. 425 * @return True if the 'age' field has been set, false otherwise. 426 */ 427 public boolean hasAge() { 428 return fieldSetFlags()[3]; 429 } 430 431 432 /** 433 * Clears the value of the 'age' field. 434 * @return This builder. 435 */ 436 public Builder clearAge() { 437 fieldSetFlags()[3] = false; 438 return this; 439 } 440 441 /** 442 * Gets the value of the 'address' field. 443 * @return The value. 444 */ 445 public CharSequence getAddress() { 446 return address; 447 } 448 449 /** 450 * Sets the value of the 'address' field. 451 * @param value The value of 'address'. 452 * @return This builder. 453 */ 454 public Builder setAddress(CharSequence value) { 455 validate(fields()[4], value); 456 this.address = value; 457 fieldSetFlags()[4] = true; 458 return this; 459 } 460 461 /** 462 * Checks whether the 'address' field has been set. 463 * @return True if the 'address' field has been set, false otherwise. 464 */ 465 public boolean hasAddress() { 466 return fieldSetFlags()[4]; 467 } 468 469 470 /** 471 * Clears the value of the 'address' field. 472 * @return This builder. 473 */ 474 public Builder clearAddress() { 475 address = null; 476 fieldSetFlags()[4] = false; 477 return this; 478 } 479 480 481 @SuppressWarnings("unchecked") 482 public Emp build() { 483 try { 484 Emp record = new Emp(); 485 record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]); 486 record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]); 487 record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]); 488 record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]); 489 record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]); 490 return record; 491 } catch (Exception e) { 492 throw new org.apache.avro.AvroRuntimeException(e); 493 } 494 } 495 } 496 497 @SuppressWarnings("unchecked") 498 private static final org.apache.avro.io.DatumWriter<Emp> 499 WRITER$ = (org.apache.avro.io.DatumWriter<Emp>)MODEL$.createDatumWriter(SCHEMA$); 500 501 @Override public void writeExternal(java.io.ObjectOutput out) 502 throws java.io.IOException { 503 WRITER$.write(this, SpecificData.getEncoder(out)); 504 } 505 506 @SuppressWarnings("unchecked") 507 private static final org.apache.avro.io.DatumReader<Emp> 508 READER$ = (org.apache.avro.io.DatumReader<Emp>)MODEL$.createDatumReader(SCHEMA$); 509 510 @Override public void readExternal(java.io.ObjectInput in) 511 throws java.io.IOException { 512 READER$.read(this, SpecificData.getDecoder(in)); 513 } 514 515 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import java.io.Serializable; 9 10 public class Emp2 implements Serializable { 11 private int id; 12 private String name; 13 private int age; 14 private String address; 15 private int salary; 16 17 public int getId() { 18 return id; 19 } 20 public void setId(int id) { 21 this.id = id; 22 } 23 public String getName() { 24 return name; 25 } 26 public void setName(String name) { 27 this.name = name; 28 } 29 public int getAge() { 30 return age; 31 } 32 public void setAge(int age) { 33 this.age = age; 34 } 35 public String getAddress() { 36 return address; 37 } 38 public void setAddress(String address) { 39 this.address = address; 40 } 41 public int getSalary() { 42 return salary; 43 } 44 public void setSalary(int salary) { 45 this.salary = salary; 46 } 47 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.hadoop.io.Writable; 9 import tutorialspoint.com.Emp; 10 11 import java.io.DataInput; 12 import java.io.DataOutput; 13 import java.io.IOException; 14 15 public class EmpWritable implements Writable { 16 private Emp emp = new Emp(); 17 18 public Emp getEmp() { 19 return emp; 20 } 21 22 public void setEmp(Emp emp) { 23 this.emp = emp; 24 25 } 26 27 28 public EmpWritable() { 29 } 30 31 //串行化 32 public void write(DataOutput dataOutput) throws IOException { 33 dataOutput.writeUTF((String)emp.getName()); 34 dataOutput.writeInt(emp.getId()); 35 dataOutput.writeInt(emp.getSalary()); 36 dataOutput.writeInt(emp.getAge()); 37 dataOutput.writeUTF((String)emp.getAddress()); 38 } 39 40 //发串行化 41 public void readFields(DataInput dataInput) throws IOException { 42 emp.setName((CharSequence)dataInput.readUTF()); 43 emp.setId(dataInput.readInt()); 44 emp.setSalary(dataInput.readInt()); 45 emp.setAge(dataInput.readInt()); 46 emp.setAddress((CharSequence)dataInput.readUTF()); 47 } 48 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.avro.file.DataFileReader; 9 import org.apache.avro.io.DatumReader; 10 import org.apache.avro.specific.SpecificDatumReader; 11 import tutorialspoint.com.Emp; 12 13 import java.io.*; 14 15 public class test { 16 private static final File avro = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.avroTest"); 17 private static final File java = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.javaTest"); 18 private static final File hadoop = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.hadoopTest"); 19 20 public static void main(String[] args) throws Exception { 21 AvroDeserial(); 22 JavaDeserial(); 23 HadoopDeserial(); 24 } 25 26 //测试hadoop的反序列化方式 27 public static void HadoopDeserial() throws IOException { 28 long start = System.currentTimeMillis(); 29 EmpWritable ew = new EmpWritable(); 30 FileInputStream fis = new FileInputStream(hadoop); 31 DataInputStream dis = new DataInputStream(fis); 32 33 for (int i = 0;i <= 10000000;i++){ 34 ew.readFields(dis); 35 // Emp emp = ew.getEmp(); 36 // System.out.println(emp); 37 } 38 fis.close(); 39 dis.close(); 40 System.out.printf("这是Hadoop反序列化方式: 反序列化文件大小:[%d],用时:[%d] ",hadoop.length(),System.currentTimeMillis() - start); 41 } 42 43 44 //测试Avro反序列化 45 public static void AvroDeserial() throws IOException { 46 long start = System.currentTimeMillis(); 47 //初始化reader对象 48 DatumReader<Emp> dr = new SpecificDatumReader<Emp>(Emp.class); 49 //初始化文件阅读器 50 DataFileReader<Emp> dfr = new DataFileReader<Emp>(avro,dr); 51 //遍历阅读器里面的内容 52 while (dfr.hasNext()){ 53 Emp emp = dfr.next(); 54 Integer id = emp.getId(); 55 CharSequence name = emp.getName(); 56 Integer age = emp.getAge(); 57 Integer salary = emp.getSalary(); 58 CharSequence address = emp.getAddress(); 59 // System.out.println(id + "|" + name + "|" + age + "|" + salary + "|" + address); 不建议打印,因为1000万条数据估计得30~40秒左右才能输出完成。 60 } 61 System.out.printf("这是Avro反序列化方式: 反序列化文件大小:[%d],用时:[%d] ",avro.length(),System.currentTimeMillis() - start); 62 } 63 64 //测试Java反序列化 65 public static void JavaDeserial() throws Exception { 66 long start = System.currentTimeMillis(); 67 68 FileInputStream fis = new FileInputStream(java); 69 ObjectInputStream ois = new ObjectInputStream(fis); 70 for (int i = 0;i <= 10000000;i++){ 71 Emp2 emp = (Emp2) ois.readObject(); 72 } 73 fis.close(); 74 ois.close(); 75 System.out.printf("这是Java反序列化方式: 反序列化文件大小:[%d],用时:[%d] ",java.length(),(System.currentTimeMillis()-start)); 76 } 77 } 78 79 80 /* 81 以上代码执行结果如下: 82 这是Avro反序列化方式: 反序列化文件大小:[220072462],用时:[3479] 83 这是Java反序列化方式: 反序列化文件大小:[50000139],用时:[15677] 84 这是Hadoop反序列化方式: 反序列化文件大小:[250000025],用时:[134628] 85 */
经测试,将一个对象进行1000万次序列化到一个文件后,在反序列化回来,用时最短的依然是Avro,Java用时次之,Hadoop依然用时最长。
四.avro通过不生成代码,直接使用schema的方式对对象进行串行化
通过上面的编程发现如果想要通过Avro进行序列化很繁琐,在序列化之前需要准备环境,需要通过编译工具生成代码,然后在导入IDE中,进行编程,那有么有不生成代码就可以进行序列化操作的呢?答案是肯定的,大致流程如下图:
实现代码如下:
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.avro.Schema; 9 import org.apache.avro.file.DataFileReader; 10 import org.apache.avro.file.DataFileWriter; 11 import org.apache.avro.generic.GenericData; 12 import org.apache.avro.generic.GenericRecord; 13 import org.apache.avro.io.DatumReader; 14 import org.apache.avro.io.DatumWriter; 15 import org.apache.avro.specific.SpecificDatumReader; 16 import org.apache.avro.specific.SpecificDatumWriter; 17 18 import java.io.File; 19 import java.io.IOException; 20 21 public class avroSchema { 22 private static final File avscFile = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\emp.avsc"); 23 private static final File avro = new File("D:\10.Java\IDE\yhinzhengjieData\Avro\schema.avro"); 24 public static void main(String[] args) throws IOException { 25 AvroSerial(); 26 AvroDeserial(); 27 } 28 29 //通过Schema的方式序列化 30 private static void AvroSerial() throws IOException { 31 long start = System.currentTimeMillis(); 32 //通过文件解析schema,生成schema对象,因此需要传入emp.avsc的路径 33 Schema schema = new Schema.Parser().parse(avscFile); 34 //将schema变成了类似与map的对象 35 GenericRecord yinzhengjie = new GenericData.Record(schema); 36 yinzhengjie.put("id",1); 37 yinzhengjie.put("name","尹正杰"); 38 yinzhengjie.put("age",18); 39 yinzhengjie.put("salary",80000); 40 yinzhengjie.put("address","北京"); 41 //初始化writer对象,注意,泛型应写为:GenericRecord 42 DatumWriter<GenericRecord> dw = new SpecificDatumWriter<GenericRecord>(schema); 43 //初始化文件写入器,注意,泛型应写为:GenericRecord 44 DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(dw); 45 //定义写入的路径 46 dfw.create(schema,avro); 47 for (int i = 0;i <= 10000000;i++){ 48 dfw.append(yinzhengjie); 49 } 50 System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d] ",avro.length(),System.currentTimeMillis() - start); 51 } 52 53 //通过Schema的方式序列化 54 private static void AvroDeserial() throws IOException { 55 long start = System.currentTimeMillis(); 56 //通过文件解析schema,生成schema对象,因此需要传入emp.avsc的路径 57 Schema schema = new Schema.Parser().parse(avscFile); 58 //将schema变成了类似与map的对象 59 GenericRecord yinzhengjie = new GenericData.Record(schema); 60 61 //初始化Reader对象,注意,泛型应写为:GenericRecord 62 DatumReader<GenericRecord> dr = new SpecificDatumReader<GenericRecord>(schema); 63 //初始化文件阅读器,注意,泛型应写为:GenericRecord 64 DataFileReader<GenericRecord> dfr = new DataFileReader<GenericRecord>(avro,dr); 65 //遍历 66 while (dfr.hasNext()){ 67 GenericRecord record = dfr.next(); 68 // System.out.println(record); 69 } 70 System.out.printf("这是Avro反序列化方式: 反序列化文件大小:[%d],用时:[%d] ",avro.length(),System.currentTimeMillis() - start); 71 } 72 } 73 74 /* 75 以上代码执行结果如下: 76 这是Avro序列化方式: 生成文件大小:[220045139],用时:[2408] 77 这是Avro反序列化方式: 反序列化文件大小:[220045139],用时:[2614] 78 */