zoukankan      html  css  js  c++  java
  • airbyte 基于singer 扩展的EL 平台

    airbyte 是基于singer 协议扩展的EL 平台,支持了不错的可视化操作

    支持的特性

    • 方便的数据操作(配置+api)
    • 方便的connector 构建操作
    • 开源可以私有化部署

    参考docker-compose 运行

    • .env
    VERSION=0.14.1-alpha
    DATABASE_USER=docker
    DATABASE_PASSWORD=docker
    DATABASE_DB=airbyte
    CONFIG_ROOT=/data
    WORKSPACE_ROOT=/tmp/workspace
    DATA_DOCKER_MOUNT=airbyte_data
    DB_DOCKER_MOUNT=airbyte_db
    WORKSPACE_DOCKER_MOUNT=airbyte_workspace
    # todo (cgardens) - when we are mount raw directories instead of named volumes, *_DOCKER_MOUNT must
    # be the same as *_ROOT.
    # Issue: https://github.com/airbytehq/airbyte/issues/578
    LOCAL_ROOT=/tmp/airbyte_local
    LOCAL_DOCKER_MOUNT=/tmp/airbyte_local
    TRACKING_STRATEGY=segment
    # todo (cgardens) - hack to handle behavior change in docker compose. *_PARENT directories MUST
    # already exist on the host filesystem and MUST be parents of *_ROOT.
    # Issue: https://github.com/airbytehq/airbyte/issues/577
    HACK_LOCAL_ROOT_PARENT=/tmp
    • docker-compose 文件
    version: "3.7"
    services:
      # hook in case we need to add init behavior
      # every root service (no depends_on) should depend on init
      init:
        image: airbyte/init:${VERSION}
        container_name: init
        command: /bin/sh -c "./scripts/create_mount_directories.sh /local_parent ${HACK_LOCAL_ROOT_PARENT} ${LOCAL_ROOT}"
        environment:
          - LOCAL_ROOT=${LOCAL_ROOT}
          - HACK_LOCAL_ROOT_PARENT=${HACK_LOCAL_ROOT_PARENT}
        volumes:
          - ${HACK_LOCAL_ROOT_PARENT}:/local_parent
      db:
        image: airbyte/db:${VERSION}
        container_name: airbyte-db
        environment:
          - POSTGRES_USER=${DATABASE_USER}
          - POSTGRES_PASSWORD=${DATABASE_PASSWORD}
          - POSTGRES_DB=db-${DATABASE_DB}
        volumes:
          - db:/var/lib/postgresql/data
      seed:
        image: airbyte/seed:${VERSION}
        container_name: airbyte-data-seed
        # Pre-populate the volume if it is empty.
        # See: https://docs.docker.com/storage/volumes/#populate-a-volume-using-a-container
        volumes:
          - data:/app/seed
      scheduler:
        image: airbyte/scheduler:${VERSION}
        container_name: airbyte-scheduler
        environment:
          - DATABASE_USER=${DATABASE_USER}
          - DATABASE_PASSWORD=${DATABASE_PASSWORD}
          - DATABASE_URL=jdbc:postgresql://db:5432/${DATABASE_DB}
          - WAIT_BEFORE_HOSTS=5
          - WAIT_HOSTS_TIMEOUT=45
          - WAIT_HOSTS=db:5432
          - WORKSPACE_ROOT=${WORKSPACE_ROOT}
          - WORKSPACE_DOCKER_MOUNT=${WORKSPACE_DOCKER_MOUNT}
          - LOCAL_DOCKER_MOUNT=${LOCAL_DOCKER_MOUNT}
          - CONFIG_ROOT=${CONFIG_ROOT}
          - TRACKING_STRATEGY=${TRACKING_STRATEGY}
          - AIRBYTE_VERSION=${VERSION}
          - AIRBYTE_ROLE=${AIRBYTE_ROLE:-}
        volumes:
          - /var/run/docker.sock:/var/run/docker.sock
          - workspace:${WORKSPACE_ROOT}
          - ${LOCAL_ROOT}:${LOCAL_ROOT}
          - data:${CONFIG_ROOT}
      server:
        image: airbyte/server:${VERSION}
        container_name: airbyte-server
        environment:
          - DATABASE_USER=${DATABASE_USER}
          - DATABASE_PASSWORD=${DATABASE_PASSWORD}
          - DATABASE_URL=jdbc:postgresql://db:5432/${DATABASE_DB}
          - WAIT_BEFORE_HOSTS=5
          - WAIT_HOSTS_TIMEOUT=45
          - WAIT_HOSTS=db:5432
          - CONFIG_ROOT=${CONFIG_ROOT}
          - TRACKING_STRATEGY=${TRACKING_STRATEGY}
          - AIRBYTE_VERSION=${VERSION}
          - AIRBYTE_ROLE=${AIRBYTE_ROLE:-}
          - WORKSPACE_ROOT=${WORKSPACE_ROOT}
        ports:
          - 8001:8001
        volumes:
          - workspace:${WORKSPACE_ROOT}
          - data:${CONFIG_ROOT}
      webapp:
        image: airbyte/webapp:${VERSION}
        container_name: airbyte-webapp
        ports:
          - 8000:80
        environment:
          - AIRBYTE_ROLE=${AIRBYTE_ROLE:-}
          - AIRBYTE_VERSION=${VERSION}
          - API_URL=${API_URL}
          - IS_DEMO=${IS_DEMO:-}
          - PAPERCUPS_STORYTIME=${PAPERCUPS_STORYTIME:-}
          - TRACKING_STRATEGY=${TRACKING_STRATEGY}
    volumes:
      workspace:
        name: ${WORKSPACE_DOCKER_MOUNT}
      data:
        name: ${DATA_DOCKER_MOUNT}
      db:
        name: ${DB_DOCKER_MOUNT}

    运行参考效果

    说明

    以下的一些参考链接是很值得学习的,同时airbyte也在singer 的基础上自己搞了一套协议基于singer 周边的一个el 工具也是很多的,
    gitlab 基于singer 的扩展meltano 是一个很不错的设计(singer+dbt)

    参考资料

    https://docs.airbyte.io/integrations
    https://docs.airbyte.io/integrations
    https://gitlab.com/meltano/meltano
    https://meltano.com/docs/production.html
    https://airbyte.io/articles/data-engineering-thoughts/why-you-should-not-build-your-data-pipeline-on-top-of-singer/
    https://airbyte.io/articles/data-engineering-thoughts/how-we-leveraged-singer-for-our-mvp/

  • 相关阅读:
    SQL GROUPING 运算符
    SQL 中各种各样的函数
    SQL 窗口函数简介
    [OpenWrt] 简单的策略路由
    简略讲解OpenWrt的路由配置(单播路由/静态路由、策略路由、IGMP组播路由)
    WPF中XAML中使用String.Format格式化字符串示例
    链接服务器使用OPENQUERY性能提升
    VSCode中不能使用cnpm的解决方案
    SQL执行时间计算常用的两种方法
    C# 实现简体中文和繁体中文的转换
  • 原文地址:https://www.cnblogs.com/rongfengliang/p/14350074.html
Copyright © 2011-2022 走看看