Citus 是一个很强大且成功的pg 扩展,以下是pgspider 集成Citus 的使用
docker镜像
FROM dalongrong/pgspider:base as build
WORKDIR /app
RUN apt-get update && apt-get install -y cmake automake autoconf libcurl4-openssl-dev libtool pkg-config libssl-dev
RUN git clone https://github.com/citusdata/citus.git /app/postgresql-11.6/contrib/citus
RUN cd /app/postgresql-11.6/contrib/citus && ./configure && make && make install
FROM debian:stretch-slim
ENV GOSU_VERSION 1.11
RUN apt-get update && apt-get install -y wget openssl libcurl4-openssl-dev libreadline-dev
# explicitly set user/group IDs
RUN set -eux;
groupadd -r postgres --gid=999;
# https://salsa.debian.org/postgresql/postgresql-common/blob/997d842ee744687d99a2b2d95c1083a2615c79e8/debian/postgresql-common.postinst#L32-35
useradd -r -g postgres --uid=999 --home-dir=/var/lib/postgresql --shell=/bin/bash postgres;
# also create the postgres user's home directory with appropriate permissions
# see https://github.com/docker-library/postgres/issues/274
mkdir -p /var/lib/postgresql;
chown -R postgres:postgres /var/lib/postgresql
RUN wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)"
&& chmod +x /usr/local/bin/gosu
&& gosu nobody true
# make the "en_US.UTF-8" locale so postgres will be utf-8 enabled by default
RUN set -eux;
if [ -f /etc/dpkg/dpkg.cfg.d/docker ]; then
# if this file exists, we're likely in "debian:xxx-slim", and locales are thus being excluded so we need to remove that exclusion (since we need locales)
grep -q '/usr/share/locale' /etc/dpkg/dpkg.cfg.d/docker;
sed -ri '//usr/share/locale/d' /etc/dpkg/dpkg.cfg.d/docker;
! grep -q '/usr/share/locale' /etc/dpkg/dpkg.cfg.d/docker;
fi;
apt-get update; apt-get install -y locales; rm -rf /var/lib/apt/lists/*;
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
ENV LANG en_US.utf8
# install "nss_wrapper" in case we need to fake "/etc/passwd" and "/etc/group" (especially for OpenShift)
# https://github.com/docker-library/postgres/issues/359
# https://cwrap.org/nss_wrapper.html
RUN set -eux;
apt-get update;
apt-get install -y --no-install-recommends libnss-wrapper;
rm -rf /var/lib/apt/lists/*
COPY --from=build /usr/local/pgspider /usr/local/pgspider
RUN sed -ri "s!^#?(listen_addresses)s*=s*S+.*!1 = '*'!" /usr/local/pgspider/share/postgresql/postgresql.conf.sample;
grep -F "listen_addresses = '*'" /usr/local/pgspider/share/postgresql/postgresql.conf.sample
RUN echo "shared_preload_libraries='citus'" >> /usr/local/pgspider/share/postgresql/postgresql.conf.sample
RUN mkdir -p /var/run/postgresql && chown -R postgres:postgres /var/run/postgresql && chmod 2777 /var/run/postgresql
ENV PATH $PATH:/usr/local/pgspider/bin
ENV PGDATA /var/lib/postgresql/data
RUN mkdir -p "$PGDATA" && chown -R postgres:postgres "$PGDATA" && chmod 777 "$PGDATA"
VOLUME /var/lib/postgresql/data
COPY docker-entrypoint.sh /usr/local/bin/
RUN ln -s usr/local/bin/docker-entrypoint.sh / # backwards compat
ENTRYPOINT ["docker-entrypoint.sh"]
EXPOSE 5432
CMD ["postgres"]
环境准备
- docker-compose 文件
没有使用官方的基于manger 自动添加节点的方式(官方是通过监听socket 对于label 自动处理worker 节点的上线以及下线)
version: "3"
services:
pg-citus-master:
container_name: pg-citus-master
image: dalongrong/pgspider:citus
volumes:
- "./csvfiles:/opt/csv"
ports:
- "5432:5432"
environment:
- "POSTGRES_PASSWORD=dalong"
pg-citus-worker:
container_name: pg-citus-worker
image: dalongrong/pgspider:citus
volumes:
- "./csvfiles:/opt/csv"
ports:
- "5433:5432"
- 启动
docker-compose up -d
使用Citus
注意因为使用的是社区版本,缺少用户的权限管理,简单的方式是对于mster节点配置密码,worker 节点不需要,但是会有安全风险(最好集群的安全控制)
- 创建扩展
master 以及worker 都需要
create extension citus;
- 添加worker
SELECT master_add_node('pg-citus-worker', '5432');
- 创建表
CREATE TABLE companies (
id bigint NOT NULL,
name text NOT NULL,
image_url text,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
CREATE TABLE campaigns (
id bigint NOT NULL,
company_id bigint NOT NULL,
name text NOT NULL,
cost_model text NOT NULL,
state text NOT NULL,
monthly_budget bigint,
blacklisted_site_urls text[],
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
CREATE TABLE ads (
id bigint NOT NULL,
company_id bigint NOT NULL,
campaign_id bigint NOT NULL,
name text NOT NULL,
image_url text,
target_url text,
impressions_count bigint DEFAULT 0,
clicks_count bigint DEFAULT 0,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL
);
ALTER TABLE companies ADD PRIMARY KEY (id);
ALTER TABLE campaigns ADD PRIMARY KEY (id, company_id);
ALTER TABLE ads ADD PRIMARY KEY (id, company_id);
- 下载数据
下载到csvfiles 目录
curl https://examples.citusdata.com/tutorial/companies.csv > csvfiles/scompanies.csv
curl https://examples.citusdata.com/tutorial/campaigns.csv > csvfiles/campaigns.csv
curl https://examples.citusdata.com/tutorial/ads.csv > csvfiles/ads.csv
- 创建分布式表
SELECT create_distributed_table('companies', 'id');
SELECT create_distributed_table('campaigns', 'company_id');
SELECT create_distributed_table('ads', 'company_id');
- 导入数据
容器内部操作,master 节点
copy companies from 'companies.csv' with csv
copy campaigns from 'campaigns.csv' with csv
copy ads from 'ads.csv' with csv
- 数据查询
select * from ads;
效果
说明
以上是pgspider 与Citus 简单的集成,完备性测试还没有,不确定是否有bug
参考资料
https://hub.docker.com/r/citusdata/citus
https://github.com/citusdata/citus
https://hub.docker.com/repository/docker/dalongrong/pgspider
https://github.com/rongfengliang/pgspider-docker
https://github.com/rongfengliang/pgspider-citus-learning