zoukankan      html  css  js  c++  java
  • yarn 集群部署,遇到的问题小结

    版本号信息: hadoop 2.3.0  hive 0.11.0

    1. Application Master 无法訪问

        点击application mater 链接,出现 http 500 错误,java.lang.Connect.exception:
        问题是因为设定web ui时,50030 port相应的ip地址为0.0.0.0,导致application master 链接无法定位。

    解决的方法:
         yarn-site.xml 文件
        <property>
            <description>The address of the RM web application.</description>
            <name>yarn.resourcemanager.webapp.address</name>
            <value>xxxxxxxxxx:50030</value>
        </property>
        这是2.3.0 的里面的一个bug 1811 ,2.4.0已经修复

    2. History UI 无法訪问 和 container 打不开
         点击 Tracking URL:History无法訪问
           问题是 history service 没有启动
          
      解决的方法:
         配置:选择(xxxxxxxxxx: 作为history sever
       
        <property>
            <name>yarn.log-aggregation-enable</name>
            <value>true</value>
        </property>
       <property>
            <name>mapreduce.jobhistory.address</name>
            <value>xxxxxxxxxx::10020</value>
        </property>

        <property>
        <name>mapreduce.jobhistory.webapp.address</name>
            <value>xxxxxxxxxx:19888</value>
        </property>

      sbin/mr-jobhistory-daemon.sh   
    start historyserver

    3 yarn 平台的优化
     
    设置 虚拟cpu的个数
        <property>
            <name>yarn.nodemanager.resource.cpu-vcores</name>
            <value>23</value> 
        </property>
        设置使用的内存
        <property>
            <name>yarn.nodemanager.resource.memory-mb</name>
            <value>61440</value>
            <description>the amount of memory on the NodeManager in GB</description>
        </property>
    设置每一个任务最大使用的内存
        <property>
            <name>yarn.scheduler.maximum-allocation-mb</name>
            <value>49152</value>
            <source>yarn-default.xml</source>
        </property>

    4 执行任务 提示: Found interface org.apache.hadoop.mapreduce.Counter, but class was expected
    改动pom,又一次install
        <dependency>
               <groupId>org.apache.hadoop</groupId>
               <artifactId>hadoop-common</artifactId>
               <version>2.3.0</version>
       </dependency>    
     <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-mapreduce-client-core</artifactId>
             <version>2.3.0</version>
       </dependency>
       <dependency>
                    <groupId>org.apache.mrunit</groupId>
                    <artifactId>mrunit</artifactId>
                    <version>1.0.0</version>
                    <classifier>hadoop2</classifier>
                    <scope>test</scope>
                </dependency>
    jdk 换成1.7



    5 执行任务提示shuffle内存溢出Java heap space
    2014-05-14 16:44:22,010 FATAL [IPC Server handler 4 on 44508] org.apache.hadoop.mapred.TaskAttemptListenerImpl: Task: attempt_1400048775904_0006_r_000004_0 - exited : org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in fetcher#3
        at org.apache.hadoop.mapreduce.task.reduce.Shuffle.run(Shuffle.java:134)
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
    Caused by: java.lang.OutOfMemoryError: Java heap space
        at org.apache.hadoop.io.BoundedByteArrayOutputStream.<init>(BoundedByteArrayOutputStream.java:56)
        at org.apache.hadoop.io.BoundedByteArrayOutputStream.<init>(BoundedByteArrayOutputStream.java:46)
        at org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput.<init>(InMemoryMapOutput.java:63)
        at org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.unconditionalReserve(MergeManagerImpl.java:297)
        at org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.reserve(MergeManagerImpl.java:287)
        at org.apache.hadoop.mapreduce.task.reduce.Fetcher.copyMapOutput(Fetcher.java:411)
        at org.apache.hadoop.mapreduce.task.reduce.Fetcher.copyFromHost(Fetcher.java:341)
        at org.apache.hadoop.mapreduce.task.reduce.Fetcher.run(Fetcher.java:165)

    来源: <http:/xxxxxxxxxx:19888/jobhistory/logs/ST-L09-05-back-tj-yarn15:8034/container_1400048775904_0006_01_000001/job_1400048775904_0006/hadoop/syslog/?start=0>
     

    解决方法:
    调低mapreduce.reduce.shuffle.memory.limit.percent的值 默觉得0.25 如今调成0.10 


    參考:
    http://www.sqlparty.com/yarn%E5%9C%A8shuffle%E9%98%B6%E6%AE%B5%E5%86%85%E5%AD%98%E4%B8%8D%E8%B6%B3%E9%97%AE%E9%A2%98error-in-shuffle-in-fetcher/

    6 reduce 任务的log 中间发现:

    2014-05-14 17:51:21,835 WARN [Readahead Thread #2] org.apache.hadoop.io.ReadaheadPool: Failed readahead on ifile
    EINVAL: Invalid argument
        at org.apache.hadoop.io.nativeio.NativeIO$POSIX.posix_fadvise(Native Method)
        at org.apache.hadoop.io.nativeio.NativeIO$POSIX.posixFadviseIfPossible(NativeIO.java:263)
        at org.apache.hadoop.io.nativeio.NativeIO$POSIX$CacheManipulator.posixFadviseIfPossible(NativeIO.java:142)
        at org.apache.hadoop.io.ReadaheadPool$ReadaheadRequestImpl.run(ReadaheadPool.java:206)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)

    来源: <http://xxxxxxxxxx:8042/node/containerlogs/container_1400060792764_0001_01_000726/hadoop/syslog/?start=-4096>
     
    ps: 错误没有再现,暂无解决方法


    7 hive 任务

    java.lang.InstantiationException: org.antlr.runtime.CommonToken
    Continuing ...
    java.lang.RuntimeException: failed to evaluate: <unbound>=Class.new();
    參考:https://issues.apache.org/jira/browse/HIVE-4222s

    8 hive 任务自己主动把join装换mapjoin时内存溢出,解决方法:关闭自己主动装换,11前的版本号默认值为false,后面的为true;
    在任务脚本里面加上:set hive.auto.convert.join=false;
    或者在hive-site.xml 配上为false;
    出错日志:
    SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
    2014-05-15 02:40:58     Starting to launch local task to process map join;      maximum memory = 1011351552
    2014-05-15 02:41:00     Processing rows:        200000  Hashtable size: 199999  Memory usage:   110092544       rate:   0.109
    2014-05-15 02:41:01     Processing rows:        300000  Hashtable size: 299999  Memory usage:   229345424       rate:   0.227
    2014-05-15 02:41:01     Processing rows:        400000  Hashtable size: 399999  Memory usage:   170296368       rate:   0.168
    2014-05-15 02:41:01     Processing rows:        500000  Hashtable size: 499999  Memory usage:   285961568       rate:   0.283
    2014-05-15 02:41:02     Processing rows:        600000  Hashtable size: 599999  Memory usage:   408727616       rate:   0.404
    2014-05-15 02:41:02     Processing rows:        700000  Hashtable size: 699999  Memory usage:   333867920       rate:   0.33
    2014-05-15 02:41:02     Processing rows:        800000  Hashtable size: 799999  Memory usage:   459541208       rate:   0.454
    2014-05-15 02:41:03     Processing rows:        900000  Hashtable size: 899999  Memory usage:   391524456       rate:   0.387
    2014-05-15 02:41:03     Processing rows:        1000000 Hashtable size: 999999  Memory usage:   514140152       rate:   0.508
    2014-05-15 02:41:03     Processing rows:        1029052 Hashtable size: 1029052 Memory usage:   546126888       rate:   0.54
    2014-05-15 02:41:03     Dump the hashtable into file: file:/tmp/hadoop/hive_2014-05-15_14-40-53_413_3806680380261480764/-local-10002/HashTable-Stage-4/MapJoin-mapfile01--.hashtable
    2014-05-15 02:41:06     Upload 1 File to: file:/tmp/hadoop/hive_2014-05-15_14-40-53_413_3806680380261480764/-local-10002/HashTable-Stage-4/MapJoin-mapfile01--.hashtable File size: 68300588
    2014-05-15 02:41:06     End of local task; Time Taken: 8.301 sec.
    Execution completed successfully
    Mapred Local Task Succeeded . Convert the Join into MapJoin
    Mapred Local Task Succeeded . Convert the Join into MapJoin
    Launching Job 2 out of 2

    log出错日志:
    2014-05-15 13:52:54,007 FATAL [main] org.apache.hadoop.mapred.YarnChild: Error running child : java.lang.OutOfMemoryError: Java heap space
        at java.io.ObjectInputStream$HandleTable.grow(ObjectInputStream.java:3465)
        at java.io.ObjectInputStream$HandleTable.assign(ObjectInputStream.java:3271)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1789)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
        at java.util.HashMap.readObject(HashMap.java:1183)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1017)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1893)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
        at org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper.initilizePersistentHash(HashMapWrapper.java:128)
        at org.apache.hadoop.hive.ql.exec.MapJoinOperator.loadHashTable(MapJoinOperator.java:194)
        at org.apache.hadoop.hive.ql.exec.MapJoinOperator.cleanUpInputFileChangedOp(MapJoinOperator.java:212)
        at org.apache.hadoop.hive.ql.exec.Operator.cleanUpInputFileChanged(Operator.java:1377)
        at org.apache.hadoop.hive.ql.exec.Operator.cleanUpInputFileChanged(Operator.java:1381)

    来源: <http://xxxxxxxxxx:19888/jobhistory/logs/ST-L09-10-back-tj-yarn21:8034/container_1400064445468_0013_01_000002/attempt_1400064445468_0013_m_000000_0/hadoop/syslog/?start=0>
     



    9 hive执行时 提示: failed to evaluate: <unbound>=Class.new(); ,升级到0.13.0
    SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]OKTime taken: 2.28 secondsjava.lang.InstantiationException: org.antlr.runtime.CommonTokenContinuing ...java.lang.RuntimeException: failed to evaluate: <unbound>=Class.new();Continuing ...java.lang.InstantiationException: org.antlr.runtime.CommonTokenContinuing ...java.lang.RuntimeException: failed to evaluate: <unbound>=Class.new();Continuing ...java.lang.InstantiationException: org.antlr.runtime.CommonTokenContinuing ...java.lang.RuntimeException: failed to evaluate: <unbound>=Class.new();Continuing ...java.lang.InstantiationException: org.antlr.runtime.CommonTokenContinuing ...java.lang.RuntimeException: failed to evaluate: <unbound>=Class.new();Continuing ...java.lang.InstantiationException: org.antlr.runtime.CommonTokenContinuing ...

    这个应该升级后能解决,只是不知道为什么我升级12.0 和13.0 ,一执行就报错fileNotfundHIVE_PLANxxxxxxxxx 。ps (參考11)应该是我配置有问题,暂无解决方法。



    10 hive 创建表或者数据库的时候 Couldnt obtain a new sequence (unique id) : You have an error in your SQL syntax
    解决方法:这个是由于hive元数据库的名字是yarn-hive, sql中中划线是关键词,所以sql错误。把数据库名去掉中划线,问题解决。
    错误日志:
    FAILED: Error in metadata: MetaException(message:javax.jdo.JDOException: Couldnt obtain a new sequence (unique id) : You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '-hive.`SEQUENCE_TABLE` WHERE `SEQUENCE_NAME`='org.apache.hadoop.hive.metastore.m' at line 1
            at org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:596)
            at org.datanucleus.api.jdo.JDOPersistenceManager.jdoMakePersistent(JDOPersistenceManager.java:732)
            at org.datanucleus.api.jdo.JDOPersistenceManager.makePersistent(JDOPersistenceManager.java:752)
            at org.apache.hadoop.hive.metastore.ObjectStore.createTable(ObjectStore.java:643)
            at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
            at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
            at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
            at java.lang.reflect.Method.invoke(Method.java:606)
            at org.apache.hadoop.hive.metastore.RetryingRawStore.invoke(RetryingRawStore.java:111)
            at com.sun.proxy.$Proxy14.createTable(Unknown Source)
            at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_table_core(HiveMetaStore.java:1070)
            at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_table_with_environment_context(HiveMetaStore.java:1103)
            at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
            at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
            at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
            at java.lang.reflect.Method.invoke(Method.java:606)
            at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:103)
            at com.sun.proxy.$Proxy15.create_table_with_environment_context(Unknown Source)
            at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:466)
            at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:455)
            at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
            at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
            at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
            at java.lang.reflect.Method.invoke(Method.java:606)
            at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:74)
            at com.sun.proxy.$Proxy16.createTable(Unknown Source)
            at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:597)
            at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:3777)
            at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:256)
            at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:144)
            at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57)
            at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1362)
            at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1146)
            at org.apache.hadoop.hive.ql.Driver.run(Driver.java:952)
            at shark.SharkCliDriver.processCmd(SharkCliDriver.scala:338)
            at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
            at shark.SharkCliDriver$.main(SharkCliDriver.scala:235)
            at shark.SharkCliDriver.main(SharkCliDriver.scala)
    NestedThrowablesStackTrace:
    com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '-hive.`SEQUENCE_TABLE` WHERE `SEQUENCE_NAME`='org.apache.hadoop.hive.metastore.m' at line 1
            at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
            at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
            at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
            at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
            at com.mysql.jdbc.Util.handleNewInstance(Util.java:406)
            at com.mysql.jdbc.Util.getInstance(Util.java:381)
            at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:1030)
            at com.mysql.jdbc.SQLError.createSQLException(SQLError.java:956)
            at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3558)
            at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3490)
            at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:1959)
            at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2109)
            at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2648)
            at com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:2077)
            at com.mysql.jdbc.PreparedStatement.executeQuery(PreparedStatement.java:2228)
            at org.apache.commons.dbcp.DelegatingPreparedStatement.executeQuery(DelegatingPreparedStatement.java:96)
            at org.apache.commons.dbcp.DelegatingPreparedStatement.executeQuery(DelegatingPreparedStatement.java:96)
            at org.datanucleus.store.rdbms.ParamLoggingPreparedStatement.executeQuery(ParamLoggingPreparedStatement.java:381)
            at org.datanucleus.store.rdbms.SQLController.executeStatementQuery(SQLController.java:504)
            at org.datanucleus.store.rdbms.valuegenerator.SequenceTable.getNextVal(SequenceTable.java:197)
            at org.datanucleus.store.rdbms.valuegenerator.TableGenerator.reserveBlock(TableGenerator.java:190)
            at org.datanucleus.store.valuegenerator.AbstractGenerator.reserveBlock(AbstractGenerator.java:305)
            at org.datanucleus.store.rdbms.valuegenerator.AbstractRDBMSGenerator.obtainGenerationBlock(AbstractRDBMSGenerator.java:170)
            at org.datanucleus.store.valuegenerator.AbstractGenerator.obtainGenerationBlock(AbstractGenerator.java:197)
            at org.datanucleus.store.valuegenerator.AbstractGenerator.next(AbstractGenerator.java:105)
            at org.datanucleus.store.rdbms.RDBMSStoreManager.getStrategyValueForGenerator(RDBMSStoreManager.java:2019)
            at org.datanucleus.store.AbstractStoreManager.getStrategyValue(AbstractStoreManager.java:1385)
            at org.datanucleus.ExecutionContextImpl.newObjectId(ExecutionContextImpl.java:3727)
            at org.datanucleus.state.JDOStateManager.setIdentity(JDOStateManager.java:2574)
            at org.datanucleus.state.JDOStateManager.initialiseForPersistentNew(JDOStateManager.java:526)
            at org.datanucleus.state.ObjectProviderFactoryImpl.newForPersistentNew(ObjectProviderFactoryImpl.java:202)
            at org.datanucleus.ExecutionContextImpl.newObjectProviderForPersistentNew(ExecutionContextImpl.java:1326)
            at org.datanucleus.ExecutionContextImpl.persistObjectInternal(ExecutionContextImpl.java:2123)
            at org.datanucleus.ExecutionContextImpl.persistObjectWork(ExecutionContextImpl.java:1972)
            at org.datanucleus.ExecutionContextImpl.persistObject(ExecutionContextImpl.java:1820)
            at org.datanucleus.ExecutionContextThreadedImpl.persistObject(ExecutionContextThreadedImpl.java:217)
            at org.datanucleus.api.jdo.JDOPersistenceManager.jdoMakePersistent(JDOPersistenceManager.java:727)
            at org.datanucleus.api.jdo.JDOPersistenceManager.makePersistent(JDOPersistenceManager.java:752)
            at org.apache.hadoop.hive.metastore.ObjectStore.createTable(ObjectStore.java:643)


    11 安装hive 12 和13 后,执行任务报错提示:FileNotFoundException: HIVE_PLAN
    解决方法:可能是hive一个bug,也可能那里配置错了 ,待解决

    错误日志

    2014-05-16 10:27:07,896 INFO [main] org.apache.hadoop.mapred.MapTask: Processing split: Paths:/user/hive/warehouse/game_predata.db/game_login_log/dt=0000-00-00/000000_0:201326592+60792998,/user/hive/warehouse/game_predata.db/game_login_log/dt=0000-00-00/000001_0_copy_1:201326592+58503492,/user/hive/warehouse/game_predata.db/game_login_log/dt=0000-00-00/000001_0_copy_2:67108864+67108864,/user/hive/warehouse/game_predata.db/game_login_log/dt=0000-00-00/000001_0_copy_2:134217728+67108864,/user/hive/warehouse/game_predata.db/game_login_log/dt=0000-00-00/000002_0_copy_1:67108864+67108864InputFormatClass: org.apache.hadoop.mapred.TextInputFormat
     
    2014-05-16 10:27:07,954 WARN [main] org.apache.hadoop.mapred.YarnChild: Exception running child : java.lang.RuntimeException: java.io.FileNotFoundException: HIVE_PLAN14c8af69-0156-4633-9273-6a812eb91a4c (没有那个文件或文件夹)
        at org.apache.hadoop.hive.ql.exec.Utilities.getMapRedWork(Utilities.java:230)
        at org.apache.hadoop.hive.ql.io.HiveInputFormat.init(HiveInputFormat.java:255)
        at org.apache.hadoop.hive.ql.io.HiveInputFormat.pushProjectionsAndFilters(HiveInputFormat.java:381)
        at org.apache.hadoop.hive.ql.io.HiveInputFormat.pushProjectionsAndFilters(HiveInputFormat.java:374)
        at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:540)
        at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:168)
        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:409)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:342)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:415)
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
    Caused by: java.io.FileNotFoundException: HIVE_PLAN14c8af69-0156-4633-9273-6a812eb91a4c (没有那个文件或文件夹)
        at java.io.FileInputStream.open(Native Method)
        at java.io.FileInputStream.<init>(FileInputStream.java:146)
        at java.io.FileInputStream.<init>(FileInputStream.java:101)
        at org.apache.hadoop.hive.ql.exec.Utilities.getMapRedWork(Utilities.java:221)
        ... 12 more
     
    2014-05-16 10:27:07,957 INFO [main] org.apache.hadoop.mapred.Task: Runnning cleanup for the task

    来源: <http://sxxxxxxxxxx:19888/jobhistory/logs/ST-L10-10-back-tj-yarn10:8034/container_1400136017046_0026_01_000030/attempt_1400136017046_0026_m_000000_0/hadoop>
     

    12java.lang.OutOfMemoryError: GC overhead limit exceeded 
    分析:这个是JDK6新添的错误类型。是发生在GC占用大量时间为释放非常小空间的时候发生的,是一种保护机制。解决方式是,关闭该功能,能够加入JVM的启动參数来限制使用内存: -XX:-UseGCOverheadLimit 
    加入位置是:mapred-site.xml 里新增项:mapred.child.java.opts 内容:-XX:-UseGCOverheadLimit
    參考14 


    13hive   hive 0.10.0为了运行效率考虑,简单的查询,就是仅仅是select,不带count,sum,group by这种,都不走map/reduce,直接读取hdfs文件进行filter过滤。这样做的优点就是不新开mr任务,运行效率要提高不少,可是不好的地方就是用户界面不友好,有时候数据量大还是要等非常长时间,可是又没有不论什么返回。

    改这个非常easy,在hive-site.xml里面有个配置參数叫

    hive.fetch.task.conversion

    将这个參数设置为more,简单查询就不走map/reduce了,设置为minimal,就不论什么简单select都会走map/reduce。


     參考14 

    14 执行mr 任务的时候提示:

    错误日志
    Container [pid=30486,containerID=container_1400229396615_0011_01_000012] is running beyond physical memory limits. Current usage: 1.0 GB of 1 GB physical memory used; 1.7 GB of 2.1 GB virtual memory used. Killing container. Dump of the process-tree for container_1400229396615_0011_01_000012 : |- PID PPID PGRPID SESSID CMD_NAME USER_MODE_TIME(MILLIS) SYSTEM_TIME(MILLIS) VMEM_USAGE(BYTES) RSSMEM_USAGE(PAGES) FULL_CMD_LINE |- 30501 30486 30486 30486 (java) 3924 322 1720471552 262096 /opt/jdk1.7.0_55/bin/java -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN -Xmx1024m -XX:-UseGCOverheadLimit -Djava.io.tmpdir=/home/nodemanager/local/usercache/hadoop/appcache/application_1400229396615_0011/container_1400229396615_0011_01_000012/tmp -Dlog4j.configuration=container-log4j.properties -Dyarn.app.container.log.dir=/home/hadoop/logs/nodemanager/logs/application_1400229396615_0011/container_1400229396615_0011_01_000012 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA org.apache.hadoop.mapred.YarnChild 30.30.30.39 47925 attempt_1400229396615_0011_m_000000_0 12 |- 30486 12812 30486 30486 (bash) 0 0 108642304 302 /bin/bash -c /opt/jdk1.7.0_55/bin/java -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN -Xmx1024m -XX:-UseGCOverheadLimit -Djava.io.tmpdir=/home/nodemanager/local/usercache/hadoop/appcache/application_1400229396615_0011/container_1400229396615_0011_01_000012/tmp -Dlog4j.configuration=container-log4j.properties -Dyarn.app.container.log.dir=/home/hadoop/logs/nodemanager/logs/application_1400229396615_0011/container_1400229396615_0011_01_000012 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA org.apache.hadoop.mapred.YarnChild 30.30.30.39 47925 attempt_1400229396615_0011_m_000000_0 12 1>/home/hadoop/logs/nodemanager/logs/application_1400229396615_0011/container_1400229396615_0011_01_000012/stdout 2>/home/hadoop/logs/nodemanager/logs/application_1400229396615_0011/container_1400229396615_0011_01_000012/stderr Container killed on request. Exit code is 143 Container exited with a non-zero exit code 143
    


     
    解决方法:
     
    以下的參数是关于mapreduce任务执行时的内存设置,假设有的任务须要可单独配置,就统一配置了。假设有container被kill 能够适当调高
    mapreduce.map.memory.mb    map任务的最大内存
    mapreduce.map.java.opts -Xmx1024M map任务jvm的參数
    mapreduce.reduce.memory.mb  reduce任务的最大内存
    mapreduce.reduce.java.opts -Xmx2560M reduce任务jvm的參数
    mapreduce.task.io.sort.mb 512 Higher memory-limit while sorting data for efficiency.

     
    关闭内存检測进程:
    是在搞不清楚 问什么有的任务就物理内存200多MB ,虚拟内存就飙到2.7G了,预计内存检測进程有问题,并且我有的任务是须要大内存的,为了进度,索性关了,一下子解决全部内存问题。
    yarn.nodemanager.pmem-check-enabled false
    yarn.nodemanager.vmem-check-enabled false


    15 yarn 的webUI 有关的调整:


    1 cluser 页面 application的starttime 和finishtime 都是 UTC格式,改成 +8区时间也就是北京时间。

    ./share/hadoop/yarn/hadoop-yarn-common-2.3.0.jar 里面的webapps.static.yarn.dt.plugins.js
     
    或者源代码包里面:/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/yarn.dt.plugins.js

    加入代码:
    Date.prototype.Format = function (fmt) { //author: meizz 
        var o = {
            "M+": this.getMonth() + 1, //月份 
            "d+": this.getDate(), //日 
            "h+": this.getHours(), //小时 
            "m+": this.getMinutes(), //分 
            "s+": this.getSeconds(), //秒 
            "q+": Math.floor((this.getMonth() + 3) / 3), //季度 
            "S": this.getMilliseconds() //毫秒 
        };
        if (/(y+)/.test(fmt)) fmt = fmt.replace(RegExp.$1, (this.getFullYear() + "").substr(4 - RegExp.$1.length));
        for (var k in o)
        if (new RegExp("(" + k + ")").test(fmt)) fmt = fmt.replace(RegExp.$1, (RegExp.$1.length == 1) ? (o[k]) : (("00" + o[k]).substr(("" + o[k]).length)));
        return fmt;
    };



    同一时候按以下改动下的代码
    function renderHadoopDate(data, type, full) 
    { if (type === 'display' || type === 'filter') { if(data === '0') { return "N/A"; } 
    return new Date(parseInt(data)).Format("yyyy-MM-dd hh:mm:ss"); }




    16  MR1的任务用到DistributedCache 的任务迁移到MR2上出错。原来我里面使用文件名称区分不同的缓存文件,MR2里面分发文件以后仅仅保留的文件名称如:
    application_xxxxxxx/container_14xxxx/part-m-00000
    application_xxxxxxx/container_14xxxx/part-m-00001
    application_xxxxxxx/container_14xxxx/00000_0



    解决方法:每一个缓存文件加入符号链接,链接为 父级名字+文件名称
    DistributedCache.addCacheFile(new URI(path.toString() + "#"+ path.getParent().getName() + "_" + path.getName()),
    configuration);


    这样就会生成带有文件名称的缓存文件





    未完待续

    各个无心爱
  • 相关阅读:
    Account group in ERP and its mapping relationship with CRM partner group
    错误消息Number not in interval XXX when downloading
    错误消息Form of address 0001 not designated for organization
    Algorithm类介绍(core)
    梯度下降与随机梯度下降
    反思
    绘图: matplotlib核心剖析
    ORB
    SIFT
    Harris角点
  • 原文地址:https://www.cnblogs.com/mfrbuaa/p/3780791.html
Copyright © 2011-2022 走看看