zoukankan      html  css  js  c++  java
  • 解析mysql慢日志

    mysql慢日志太多,需要分析下具体有哪些慢日志

    mysql可以直接记录所有慢日志,现在的问题是将日志文件sql进行去重

    想了老半天该怎样将sql的查询字段去掉进行排序,没有get到重点。后来发现mysql自带提供了mysqldumpslow工具用于解析慢日志
    下面是选项:

    Option Name Description
    -a Do not abstract all numbers to N and strings to 'S'
    -n Abstract numbers with at least the specified digits
    --de bug Write debugging information
    -g Only consider statements that match the pattern
    --he lp Display help message and exit
    -h Host name of the server in the log file name
    -i Name of the server instance
    -l Do not subtract lock time from total time
    -r Reverse the sort order
    -s How to sort output
    -t Display only first num queries
    --verbose Verbose mode

    默认添加-a选项将不替换sql的查询参数,导致相同类型的sql只是查询串不一样也作为两条语句了

    所以-a选项可以做参考,依然会记录很多重复sql

    下面是修改后的文件,当不使用-a选项时添加一个耗时最大的sql作为例子

    #!/usr/bin/perl
    
    # Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
    #
    # This program is free software; you can redistribute it and/or
    # modify it under the terms of the GNU Library General Public
    # License as published by the Free Software Foundation; version 2
    # of the License.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    # Library General Public License for more details.
    #
    # You should have received a copy of the GNU Library General Public
    # License along with this library; if not, write to the Free
    # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
    # MA 02110-1301, USA
    
    # mysqldumpslow - parse and summarize the MySQL slow query log
    
    # Original version by Tim Bunce, sometime in 2000.
    # Further changes by Tim Bunce, 8th March 2001.
    # Handling of strings with  and double '' by Monty 11 Aug 2001.
    
    use strict;
    use Getopt::Long;
    
    # t=time, l=lock time, r=rows
    # at, al, and ar are the corresponding averages
    
    my %opt = (
        s => 'at',
        h => '*',
    );
    
    GetOptions(\%opt,
        'v|verbose+',# verbose
        'help+',	# write usage info
        'd|debug+',	# debug
        's=s',	# what to sort by (al, at, ar, c, t, l, r)
        'r!',	# reverse the sort order (largest last instead of first)
        't=i',	# just show the top n queries
        'a!',	# don't abstract all numbers to N and strings to 'S'
        'n=i',	# abstract numbers with at least n digits within names
        'g=s',	# grep: only consider stmts that include this string
        'h=s',	# hostname of db server for *-slow.log filename (can be wildcard)
        'i=s',	# name of server instance (if using mysql.server startup script)
        'l!',	# don't subtract lock time from total time
    ) or usage("bad option");
    
    $opt{'help'} and usage();
    
    unless (@ARGV) {
        my $defaults   = `my_print_defaults mysqld`;
        my $basedir = ($defaults =~ m/--basedir=(.*)/)[0]
    	or die "Can't determine basedir from 'my_print_defaults mysqld' output: $defaults";
        warn "basedir=$basedir
    " if $opt{v};
    
        my $datadir = ($defaults =~ m/--datadir=(.*)/)[0];
        my $slowlog = ($defaults =~ m/--slow-query-log-file=(.*)/)[0];
        if (!$datadir or $opt{i}) {
    	# determine the datadir from the instances section of /etc/my.cnf, if any
    	my $instances  = `my_print_defaults instances`;
    	die "Can't determine datadir from 'my_print_defaults mysqld' output: $defaults"
    	    unless $instances;
    	my @instances = ($instances =~ m/^--(w+)-/mg);
    	die "No -i 'instance_name' specified to select among known instances: @instances.
    "
    	    unless $opt{i};
    	die "Instance '$opt{i}' is unknown (known instances: @instances)
    "
    	    unless grep { $_ eq $opt{i} } @instances;
    	$datadir = ($instances =~ m/--$opt{i}-datadir=(.*)/)[0]
    	    or die "Can't determine --$opt{i}-datadir from 'my_print_defaults instances' output: $instances";
    	warn "datadir=$datadir
    " if $opt{v};
        }
    
        if ( -f $slowlog ) {
            @ARGV = ($slowlog);
            die "Can't find '$slowlog'
    " unless @ARGV;
        } else {
            @ARGV = <$datadir/$opt{h}-slow.log>;
            die "Can't find '$datadir/$opt{h}-slow.log'
    " unless @ARGV;
        }
    }
    
    warn "
    Reading mysql slow query log from @ARGV
    ";
    
    my @pending;
    my %stmt;
    $/ = ";
    #";		# read entire statements using paragraph mode
    while ( defined($_ = shift @pending) or defined($_ = <>) ) {
        warn "[[$_]]
    " if $opt{d};	# show raw paragraph being read
    
        my @chunks = split /^/.*Version.*started with[00-377]*?Time.*Id.*Command.*Argument.*
    /m;
        if (@chunks > 1) {
    	unshift @pending, map { length($_) ? $_ : () } @chunks;
    	warn "<<".join(">>
    <<",@chunks).">>" if $opt{d};
    	next;
        }
    
        s/^#? Time: d{6}s+d+:d+:d+.*
    //;
        my ($user,$host,$dummy,$thread_id) = s/^#? User@Host:s+(S+)s+@s+(S+)s+S+(s+Id:s+(d+))?.*
    // ? ($1,$2,$3,$4) : ('','','','','');
    
        s/^# Query_time: ([0-9.]+)s+Lock_time: ([0-9.]+)s+Rows_sent: ([0-9.]+).*
    //;
        my ($t, $l, $r) = ($1, $2, $3);
        $t -= $l unless $opt{l};
    
        # remove fluff that mysqld writes to log when it (re)starts:
        s!^/.*Version.*started with:.*
    !!mg;
        s!^Tcp port: d+  Unix socket: S+
    !!mg;
        s!^Time.*Id.*Command.*Argument.*
    !!mg;
    
        s/^use w+;
    //;	# not consistently added
        s/^SET timestamp=d+;
    //;
    
        s/^[ 	]*
    //mg;	# delete blank lines
        s/^[ 	]*/  /mg;	# normalize leading whitespace
        s/s*;s*(#s*)?$//;	# remove trailing semicolon(+newline-hash)
    
        next if $opt{g} and !m/$opt{g}/io;
    
        # 定义eg变量用于保存原始sql,避免被下面语句替换
        my $eg = $_;
    
        unless ($opt{a}) {
    	s/d+/N/g;
    	s/0x[0-9A-Fa-f]+/N/g;
            s/''/'S'/g;
            s/""/"S"/g;
            s/(\')//g;
            s/(\")//g;
            s/'[^']+'/'S'/g;
            s/"[^"]+"/"S"/g;
    	# -n=8: turn log_20001231 into log_NNNNNNNN
    	s/([a-z_]+)(d{$opt{n},})/$1.('N' x length($2))/ieg if $opt{n};
    	# abbreviate massive "in (...)" statements and similar
    	s!(([NS],){100,})!sprintf("$2,{repeated %d times}",length($1)/2)!eg;
        }
    
        my $s = $stmt{$_} ||= { users=>{}, hosts=>{} };
        $s->{c} += 1;
        $s->{t} += $t;
        $s->{l} += $l;
        $s->{r} += $r;
    
        # 选取耗时最大的sql保存在eg变量里面
        $s->{max} = $s->{c}>1?$t>$s->{max}?$t:$s->{max}:$t;
        $s->{eg} = $s->{max}>$t?$s->{eg}:$eg;
    
        $s->{users}->{$user}++ if $user;
        $s->{hosts}->{$host}++ if $host;
    
        warn "{{$_}}
    
    " if $opt{d};	# show processed statement string
    }
    
    foreach (keys %stmt) {
        my $v = $stmt{$_} || die;
        my ($c, $t, $l, $r) = @{ $v }{qw(c t l r)};
        $v->{at} = $t / $c;
        $v->{al} = $l / $c;
        $v->{ar} = $r / $c;
    }
    
    my @sorted = sort { $stmt{$b}->{$opt{s}} <=> $stmt{$a}->{$opt{s}} } keys %stmt;
    @sorted = @sorted[0 .. $opt{t}-1] if $opt{t};
    @sorted = reverse @sorted         if $opt{r};
    
    foreach (@sorted) {
        my $v = $stmt{$_} || die;
        my ($c, $t,$at, $l,$al, $r,$ar,$eg) = @{ $v }{qw(c t at l al r ar eg)};
        my @users = keys %{$v->{users}};
        my $user  = (@users==1) ? $users[0] : sprintf "%dusers",scalar @users;
        my @hosts = keys %{$v->{hosts}};
        my $host  = (@hosts==1) ? $hosts[0] : sprintf "%dhosts",scalar @hosts;
        printf "Count: %d  Time=%.2fs (%ds)  Lock=%.2fs (%ds)  Rows=%.1f (%d), $user@$host
    %s
    ",
    	    $c, $at,$t, $al,$l, $ar,$r, $_;
        # 如果没有使用-a选项打印example作为例子
        printf "Example:
    %s
    ", $eg if not $opt{a};
        printf "
    ";
    }
    
    sub usage {
        my $str= shift;
        my $text= <<HERE;
    Usage: mysqldumpslow [ OPTS... ] [ LOGS... ]
    
    Parse and summarize the MySQL slow query log. Options are
    
      --verbose    verbose
      --debug      debug
      --help       write this text to standard output
    
      -v           verbose
      -d           debug
      -s ORDER     what to sort by (al, at, ar, c, l, r, t), 'at' is default
                    al: average lock time
                    ar: average rows sent
                    at: average query time
                     c: count
                     l: lock time
                     r: rows sent
                     t: query time  
      -r           reverse the sort order (largest last instead of first)
      -t NUM       just show the top n queries
      -a           don't abstract all numbers to N and strings to 'S'
      -n NUM       abstract numbers with at least n digits within names
      -g PATTERN   grep: only consider stmts that include this string
      -h HOSTNAME  hostname of db server for *-slow.log filename (can be wildcard),
                   default is '*', i.e. match all
      -i NAME      name of server instance (if using mysql.server startup script)
      -l           don't subtract lock time from total time
    
    HERE
        if ($str) {
          print STDERR "ERROR: $str
    
    ";
          print STDERR $text;
          exit 1;
        } else {
          print $text;
          exit 0;
        }
    }
    

    可以看到上面的perl脚本很简单,添加example也很简单。之前打算用python来做,是我想复杂了。直接将数字替换为N,引号里面的字符替换成S就可以了。
    这个还有一个问题是where后面的条件顺序也会影响,不过这个影响不大

    如下面的情况(只是作为示例),不使用-a时正常只显示第一行,现在将显示第一行和执行第2,3,4行sql时耗时最大的一条sql作为示例以便用户分析

    select * from mysql.user where N=N;
    select * from mysql.user where 1=1;
    select * from mysql.user where 2=2;
    select * from mysql.user where 3=3;
    
  • 相关阅读:
    GO语言系列- 结构体和接口
    GO语言系列- 高级数据类型之数组、切片、map
    tomcat8开启APR模式
    python中的subprocess.Popen()使用详解---以及注意的问题(死锁)
    linux dig 命令使用
    究竟什么时候该使用MQ?
    Python 列表(List) 的三种遍历(序号和值)方法
    python函数里引用全局变量
    python(xlsxwriter模块使用)
    Linux下生成patch和打patch
  • 原文地址:https://www.cnblogs.com/mikeguan/p/11496736.html
Copyright © 2011-2022 走看看