go get github.com/golang/dep/cmd/dep
go get github.com/onsi/ginkgo/ginkgo
cd /opt
tar zxvf pxf.5.11.2.tar.gz
cd pxf-5.11.2
# Compile & Test PXF
make
# Simply Run unittest
make test
安装到 /usr/local/greenplum-db-6.7.0/pxf:
cd /opt/pxf-5.11.2
PXF_HOME=/usr/local/greenplum-db-6.7.0/pxf make install
export PXF_CONF=$GPHOME/pxf/conf
$GPHOME/pxf/bin/pxf cluster init
$GPHOME/pxf/bin/pxf cluster start
cp mysql-connector-java.jar $PXF_CONF/lib
----------------------------------------------------------------
# 4、同步配置
$GPHOME/pxf/bin/pxf cluster sync
# 5、启动服务
$GPHOME/pxf/bin/pxf cluster restart
cp $GPHOME/pxf/conf/templates/jdbc-site.xml $GPHOME/pxf/conf/servers/xxgl2/
创建扩展
CREATE EXTENSION pxf;
# 6、创建外部表,读取postgres数据
drop external table my_user_temp;
CREATE EXTERNAL TABLE my_user_temp(id int, name char(20))
LOCATION ('pxf://user?PROFILE=Jdbc&SERVER=my_xxgl2')
FORMAT 'CUSTOM' (FORMATTER='pxfwritable_import');
drop external table pg_user_temp;
CREATE EXTERNAL TABLE pg_user_temp(id varchar(7), name varchar(20))
LOCATION ('pxf://public.glfmkhjl?PROFILE=Jdbc&SERVER=pg_xxgl3')
FORMAT 'CUSTOM' (FORMATTER='pxfwritable_import');
--------------------------------
# 4、同步配置
$GPHOME/pxf/bin/pxf cluster sync
# 5、启动服务
$GPHOME/pxf/bin/pxf cluster start
# 6、创建外部表,读取postgres数据
CREATE EXTERNAL TABLE dw_vehicle_gps1_days30_temp(vehicle_udid character varying(64), gps_timestamp integer, accuracy character varying(64), angle character varying(64), lat_lng character varying(64), lat_lng_gps character varying(64), riding_status smallint, speed character varying(64), startnum smallint, vehicle_type smallint, collect_fre smallint, fly_spot character varying(64), create_time integer, signal_type smallint, vehicle_id integer, pos_type smallint)
LOCATION ('pxf://public.dw_vehicle_gps1_days30?PROFILE=Jdbc&SERVER=pgsrvcfg')
FORMAT 'CUSTOM' (FORMATTER='pxfwritable_import');
------------------------------------------------------------
# 连接hadoop
# 安装环境 gpssh -e -v -f seg_hosts yum -y install hadoop-client hive hbase
# 1、https://gpdb.docs.pivotal.io/6-3/pxf/intro_pxf.html
# 2、https://gpdb.docs.pivotal.io/6-3/pxf/access_hdfs.html
# hadoop外部表
CREATE EXTERNAL TABLE pxf_hdfs_text(location text, month text, num_orders int, total_sales float8)
LOCATION ('pxf://data/pxf_examples/pxf_hdfs_simple.txt?PROFILE=hdfs:text')
FORMAT 'TEXT' (delimiter=E',');
# hbase外部表
CREATE EXTERNAL TABLE orderinfo_hbase ("product:name" varchar, "shipping_info:zipcode" int)
LOCATION ('pxf://order_info?PROFILE=HBase')
FORMAT 'CUSTOM' (FORMATTER='pxfwritable_import');
------------------------------------------------------
# hive外部表
# 进入hive 创建表和导入数据
CREATE TABLE sales_info (location string, month string,
number_of_orders int, total_sales double)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS textfile;
LOAD DATA LOCAL INPATH '/tmp/pxf_hive_datafile.txt'
INTO TABLE sales_info;
# 查询数据
SELECT * FROM sales_info;
# 创建外部表
CREATE EXTERNAL TABLE salesinfo_hiveprofile(location text, month text, num_orders int, total_sales float8)
LOCATION ('pxf://default.sales_info?PROFILE=Hive')
FORMAT 'custom' (FORMATTER='pxfwritable_import');