zoukankan      html  css  js  c++  java
  • Hue 工具使用


    Hue 是一个 Web 接口的 Hadoop 分析数据工具,由 Cloudra 公司开源

    官方网址

    Github 地址 -> 安装方法

    文档地址

    一.Build

    1.ubuntu安装所需环境(以Github为准)

    # JDK
    # maven
    # 其他环境
    $ sudo apt-get install git ant gcc g++ libffi-dev libkrb5-dev libmysqlclient-dev libsasl2-dev libsasl2-modules-gssapi-mit libsqlite3-dev libssl-dev libxml2-dev libxslt-dev make maven libldap2-dev python-dev python-setuptools libgmp3-dev
    

    2.build

    $ make apps
    

    二.配置

    1.基础配置(位于官方文档3.1节)

    secret_key=jFE93j;2[290-eiw.KEiwN2s3['d;/.q[eIW^y#e=+Iei*@Mn<qW5o
    
    http_host=cen-ubuntu
    http_port=8888
    
    time_zone=Asia/Shanghai
    

    2.WebHDFS 配置

    # hdfs-site.xml(默认为true)
    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
    </property>
    
    # core-site.xml 配置代理
    <property>
        <name>hadoop.proxyuser.hue.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hue.groups</name>
        <value>*</value>
    </property>
    
    # hue.ini 配置 3 处,若配置 HA 需要配置 logical_name 
    [hadoop]
    
      # Configuration for HDFS NameNode
      # ------------------------------------------------------------------------
      [[hdfs_clusters]]
        # HA support by using HttpFs
    
        [[[default]]]
          # Enter the filesystem uri
          fs_defaultfs=hdfs://cen-ubuntu:8020
    
          # NameNode logical name.
          ## logical_name=
    
          # Use WebHdfs/HttpFs as the communication mechanism.
          # Domain should be the NameNode or HttpFs host.
          # Default port is 14000 for HttpFs.
          webhdfs_url=http://cen-ubuntu:50070/webhdfs/v1
    
          # Change this if your HDFS cluster is Kerberos-secured
          ## security_enabled=false
    
          # In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
          # have to be verified against certificate authority
          ## ssl_cert_ca_verify=True
    
          # Directory of the Hadoop configuration
          hadoop_conf_dir=/opt/cdh5.3.6/hadoop-2.6.0-cdh5.12.0/etc/hadoop
    

    3.YARN 配置

    # hue.ini
    [[yarn_clusters]]
    
      [[[default]]]
        # Enter the host on which you are running the ResourceManager
        resourcemanager_host=cen-ubuntu
    
        # The port where the ResourceManager IPC listens on
        resourcemanager_port=8032
    
        # Whether to submit jobs to this cluster
        submit_to=True
    
        # Resource Manager logical name (required for HA)
        ## logical_name=
    
        # Change this if your YARN cluster is Kerberos-secured
        ## security_enabled=false
    
        # URL of the ResourceManager API
        resourcemanager_api_url=http://cen-ubuntu:8088
    
        # URL of the ProxyServer API
        proxy_api_url=http://cen-ubuntu:8088
    
        # URL of the HistoryServer API
        history_server_api_url=http://cen-ubuntu:19888
    
        # URL of the Spark History Server
        ## spark_history_server_url=http://localhost:18088
    
        # In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
        # have to be verified against certificate authority
        ## ssl_cert_ca_verify=True
    

    4.临时文件目录

    [filebrowser]
      # Location on local filesystem where the uploaded archives are temporary stored.
      archive_upload_tempdir=/tmp
    

    5.Hive 配置(需要启动Hive server2 服务 启动 Hive 服务)

    # hive-site.xml
    <!-- 配置server2 的地址和端口 -->
    <property>
      <name>hive.server2.thrift.port</name>
      <value>10000</value>
      <description>Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'binary'.</description>
    </property>
    
    <property>
      <name>hive.server2.thrift.bind.host</name>
      <value>cen-ubuntu</value>
      <description>Bind host on which to run the HiveServer2 Thrift service.</description>
    </property>
    
    # 启动hiveserver2
    $ bin/hiveserver2 
    
    # hive-site.xml
    <!-- 配置远程 remote metastore 的uri 见hive官方文档-->
    <property>
      <name>hive.metastore.uris</name>
      <value>thrift://cen-ubuntu:9083</value>
    </property>
      
    # 启动 metastore server
    hive --service metastore
    
    # hue.ini
    [beeswax]
    
      # Host where HiveServer2 is running.
      # If Kerberos security is enabled, use fully-qualified domain name (FQDN).
      hive_server_host=cen-ubuntu
    
      # Port where HiveServer2 Thrift server runs on.
      hive_server_port=10000
    
      # Hive configuration directory, where hive-site.xml is located
      hive_conf_dir=/opt/cdh5.3.6/hive-1.1.0-cdh5.12.0/conf
    
      # Timeout in seconds for thrift calls to Hive service
      server_conn_timeout=120
    

    6.database 链接管理关系型数据库(SQLite3 是 que 自带的数据库)(注意:需要删除[[[xxx]]]前注释)

    ###########################################################################
    # Settings for the RDBMS application
    ###########################################################################
    
    [librdbms]
      # The RDBMS app can have any number of databases configured in the databases
      # section. A database is known by its section name
      # (IE sqlite, mysql, psql, and oracle in the list below).
    
      [[databases]]
        # sqlite configuration.
        ## [[[sqlite]]]
          # Name to show in the UI.
          nice_name=SQLite
    
          # For SQLite, name defines the path to the database.
          name=/opt/cdh5.3.6/hue-3.9.0-cdh5.12.0/desktop/desktop.db
    
          # Database backend to use.
          engine=sqlite
    
          # Database options to send to the server when connecting.
          # https://docs.djangoproject.com/en/1.4/ref/databases/
          ## options={}
    
        # mysql, oracle, or postgresql configuration.
        [[[mysql]]]
          # Name to show in the UI.
          nice_name="My SQL DB"
    
          # For MySQL and PostgreSQL, name is the name of the database.
          # For Oracle, Name is instance of the Oracle server. For express edition
          # this is 'xe' by default.
          name=mysqldb
    
          # Database backend to use. This can be:
          # 1. mysql
          # 2. postgresql
          # 3. oracle
          engine=mysql
    
          # IP or hostname of the database to connect to.
          host=cen-ubuntu
    
          # Port the database server is listening to. Defaults are:
          # 1. MySQL: 3306
          # 2. PostgreSQL: 5432
          # 3. Oracle Express Edition: 1521
          port=3306
    
          # Username to authenticate with when connecting to the database.
          user=root
    
          # Password matching the username to authenticate with when
          # connecting to the database.
          password=ubuntu
    
          # Database options to send to the server when connecting.
          # https://docs.djangoproject.com/en/1.4/ref/databases/
          ## options={}
    

    7.Oozie 配置

    [liboozie]
      # The URL where the Oozie service runs on. This is required in order for
      # users to submit jobs. Empty value disables the config check.
      oozie_url=http://cen-ubuntu:11000/oozie
    
      # Requires FQDN in oozie_url if enabled
      ## security_enabled=false
    
      # Location on HDFS where the workflows/coordinator are deployed when submitted.
      remote_deployement_dir=/user/cen/examples/apps
    
      [oozie]
        # Location on local FS where the examples are stored.
        local_data_dir=/opt/cdh5.3.6/oozie-4.1.0-cdh5.12.0/examples
    
        # Location on local FS where the data for the examples is stored.
        sample_data_dir=/opt/cdh5.3.6/oozie-4.1.0-cdh5.12.0/examples/input-data
    
        # Location on HDFS where the oozie examples and workflows are stored.
        # Parameters are $TIME and $USER, e.g. /user/$USER/hue/workspaces/workflow-$TIME
        remote_data_dir=/user/cen/examples/apps/
    

    三.运行

    # 0.0.0.0意味着所有ip都能访问,本来是在hue.ini中配置的,但是配置不生效,因此手动设置
    $ build/env/bin/hue runserver 0.0.0.0:8000
  • 相关阅读:
    给出两个 非空 的链表用来表示两个非负的整数。其中,它们各自的位数是按照 逆序 的方式存储的,并且它们的每个节点只能存储 一位 数字。
    11
    实战 迁移学习 VGG19、ResNet50、InceptionV3 实践 猫狗大战 问题
    tx2系统备份与恢复
    如何在Ubuntu 18.04上安装和卸载TeamViewer
    bzoj 3732 Network (kruskal重构树)
    bzoj2152 聪聪可可 (树形dp)
    牛客 216D 消消乐 (二分图最小点覆盖)
    牛客 197E 01串
    Wannafly挑战赛23
  • 原文地址:https://www.cnblogs.com/cenzhongman/p/7261170.html
Copyright © 2011-2022 走看看