zoukankan      html  css  js  c++  java
  • 使用bloom 加速sqler + gitbase 的代码统计分析情况

    我们基于gitbase 暴露的mysql 服务,可以方便的查询数据,但是如果需要长时间计算的就不太好了
    还是我们可以通过bloom通过配置的方式就可以解决,以下是一个实践以及一些问题的解决访问

    环境准备

    • docker-compose 文件
     
    version: "3"
    services: 
      lb: 
        image: openresty/openresty:alpine
        volumes:
        - "./nginx-lb.conf:/usr/local/openresty/nginx/conf/nginx.conf"
        ports:
        - "9000:80"
      bloom: 
        image: dalongrong/bloom:v1.28.0
        volumes: 
        - "./bloom.cfg:/etc/bloom.cfg"
        ports: 
        - "8811:8811"
        - "9001:8080"
      redis:
        image: redis
        ports:
        - "6379:6379"
      gitbase:
        container_name: gitbase
        hostname: gitbase
        image: srcd/gitbase:v0.24.0-rc2
        volumes: 
        - "./git-demos:/opt/repos"
        ports:
        - "3306:3306"
      sqler:
        image: dalongrong/sqler:2.2-pprof
        volumes:
        - "./sqler.hcl:/app/config.example.hcl"
        environment:
        - "DSN=root@tcp(gitbase:3306)/gitbase?multiStatements=true"
        ports:
        - "3678:3678"
        - "8025:8025"
    • 运行原理
      如下图:openresty进行api 的代理(自动添加header头以及proxy,bloom需要),bloom cache 请求api,sqler 暴露rest api
      gitbase 提供git sql 引擎服务

    配置说明

    • openresty
     
    worker_processes 1;
    user root;  
    events {
        worker_connections 1024;
    }
    http {
        include mime.types;
        default_type application/octet-stream;
        lua_need_request_body on;
        gzip on;
        resolver 127.0.0.11 ipv6=off;          
        real_ip_header X-Forwarded-For;
        real_ip_recursive on;
        server {
            listen 80;
            charset utf-8;
            default_type text/html;
            location / {
                 default_type text/plain; 
                 index index.html;
            }
            location /codecounts {
                proxy_pass http://bloom:8080;
                proxy_set_header Bloom-Request-Shard 0;
                proxy_set_header Host $host;
                proxy_read_timeout 10000;
                proxy_send_timeout 10000;
                proxy_buffer_size 1M; 
                proxy_buffers 8 1M; 
                proxy_busy_buffers_size 1M; 
                proxy_temp_file_write_size 1M;
                proxy_set_header X-Real-IP $remote_addr;
                proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
                proxy_set_header X-Forwarded-Host $server_name;
            }
            location /repos {
                proxy_pass <a href="http://bloom:8080;">http://bloom:8080;</a>
               // 核心,动态添加header
                proxy_set_header Bloom-Request-Shard 0;
                proxy_set_header Host $host;
                proxy_read_timeout 10000;
                proxy_send_timeout 10000;
                proxy_buffer_size 1M; 
                proxy_buffers 8 1M; 
                proxy_busy_buffers_size 1M; 
                proxy_temp_file_write_size 1M;
                proxy_set_header X-Real-IP $remote_addr;
                proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
                proxy_set_header X-Forwarded-Host $server_name;
            }
            location /apps {
                proxy_pass <a href="http://bloom:8080;">http://bloom:8080;</a>
               // 核心,动态添加header
                proxy_set_header Bloom-Request-Shard 0;
                proxy_set_header Host $host;
                proxy_read_timeout 10000;
                proxy_send_timeout 10000;
                proxy_buffer_size 1M; 
                proxy_buffers 8 1M; 
                proxy_busy_buffers_size 1M; 
                proxy_temp_file_write_size 1M;
                proxy_set_header X-Real-IP $remote_addr;
                proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
                proxy_set_header X-Forwarded-Host $server_name;
            }
            error_page 500 502 503 504 /50x.html;
            location = /50x.html {
                root html;
            }
        }
    }
    • bloom
      基于toml 的配置
     
    [server]
    log_level = "debug"
    inet = "0.0.0.0:8080"
    [control]
    inet = "0.0.0.0:8811"
    tcp_timeout = 900
    [proxy]
    [[proxy.shard]]
    shard = 0
    host = "sqler"
    port = 8025
    [cache]
    ttl_default = 600
    executor_pool = 64
    disable_read = false
    disable_write = false
    compress_body = true
    [redis]
    host = "redis"
    port = 6379
    database = 0
    pool_size = 80
    max_lifetime_seconds = 60
    idle_timeout_seconds = 600
    connection_timeout_seconds = 1
    max_key_size = 256000
    max_key_expiration = 2592000
     
     
    • sqler 代码统计查询
    codecounts {
        exec = <<SQL
     SET inmemory_joins = 1;
     SET SQL_SELECT_LIMIT=200;
    SELECT
     repo,
     MONTH,
     YEAR,
     DAY,
     sum( JSON_EXTRACT( info, '$.Code.Additions' ) AS code_lines_added ) AS code_lines_addeds,
     sum( JSON_EXTRACT( info, '$.Code.Deletions' ) AS code_lines_removed ) AS code_lines_removeds 
    FROM
     (
     SELECT
      repository_id AS repo,
      commit_stats ( repository_id, commit_hash ) AS info,
      commits.commit_author_when AS commit_when,
      YEAR ( committer_when ) AS YEAR,
      MONTH ( committer_when ) AS MONTH,
      DAY ( committer_when ) AS DAY 
     FROM
      ref_commits
      NATURAL JOIN commits 
     ) a 
    GROUP BY
     repo,
     YEAR,
     MONTH,
        DAY
    ORDER BY
     MONTH,
     YEAR,
     DAY
     limit 500;
    SQL
    }
    cache {
        cron = "* 1 * * *"
        trigger {
            webhook = "http://lb/codecounts"
        }
    }
    apps {
       exec = <<SQL
       SET SQL_SELECT_LIMIT=200; 
          SET inmemory_joins = 1;
         select 1;
       SQL
    }
    repos {
    exec = <<SQL
    SET SQL_SELECT_LIMIT=500; 
    SELECT
        repository_id,
        LANGUAGE(file_path, blob_content) as lang,
        SUM(JSON_EXTRACT(LOC(file_path, blob_content), '$.Code')) as code,
        SUM(JSON_EXTRACT(LOC(file_path, blob_content), '$.Comment')) as comments,
        SUM(JSON_EXTRACT(LOC(file_path, blob_content), '$.Blank')) as blanks,
        COUNT(1) as files
    FROM refs
    NATURAL JOIN commit_files
    NATURAL JOIN blobs
    WHERE ref_name='HEAD'
    GROUP BY lang,repository_id;
    SQL
    }

    几个问题

    • gzip 问题
      因为bloom是不处理数据解压缩的,但是sqler默认gzip了,所以使用中会有问题,解决方法,源码编译,禁用gzip部分
      参考
      server_rest.go 注意我同时添加了pprof,方便分析性能问题
     
    // Copyright 2018 The SQLer Authors. All rights reserved.
    // Use of this source code is governed by a Apache 2.0
    // license that can be found in the LICENSE file.
    package main
    import (
     "net/http"
     _ "net/http/pprof"
     "strings"
     "github.com/labstack/echo"
     "github.com/labstack/echo/middleware"
    )
    // initialize RESTful server
    func initRESTServer() error {
     e := echo.New()
     e.HideBanner = true
     e.HidePort = true
     e.Pre(middleware.RemoveTrailingSlash())
     e.Use(middleware.CORS())
     // e.Use(middleware.GzipWithConfig(middleware.GzipConfig{Level: 9}))
     e.Use(middleware.Recover())
     e.GET("/", routeIndex)
     e.Any("/:macro", routeExecMacro, middlewareAuthorize)
     e.GET("/debug/pprof/*", echo.WrapHandler(http.DefaultServeMux))
     return e.Start(*flagRESTListenAddr)
    }
    • 大量代码走sqler查询很慢
      这个问题应该是gitbase 实现问题,默认用ide工具,测试很快,但是sqler就是很慢,发现如果我们执行了
      查询的数据(分页)就很快了,实际推荐还是走分页方式处理,如果数据是可估算的,可以通过
      SET SQL_SELECT_LIMIT=200; 解决

    参考资料

    https://github.com/rongfengliang/bloom-sqler-gitbase
    https://github.com/alash3al/sqler
    https://github.com/valeriansaliou/bloom

  • 相关阅读:
    php 下载文件
    thinkphp3.1 发送email
    微擎 plugin 时间插件 图片上传插件不显示 报错 影响下面执行
    Java中基本数据类型的对比记忆
    堆内存设置以及垃圾回收方式
    try--catch--finally中return返回值执行的顺序(区别)
    Java中的值传递和引用传递
    全面总结sizeof的用法(定义、语法、指针变量、数组、结构体、类、联合体、位域位段)
    10进制转换成16进制最简单的方法
    quartz 框架定时任务,使用spring @Scheduled注解执行定时任务
  • 原文地址:https://www.cnblogs.com/rongfengliang/p/12727306.html
Copyright © 2011-2022 走看看