#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

# This is mysql-deadlock-logger, a program that extracts and saves a summary of
# the last deadlock recorded in MySQL.
#
# This program is copyright (c) 2007 Baron Schwartz.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.

use strict;
use warnings FATAL => 'all';

use DBI;
use English qw(-no_match_vars);
use Getopt::Long;
use List::Util qw(max);
use Socket qw(inet_aton);
use Term::ReadKey;

our $VERSION = '1.0.3';
our $DISTRIB = '848';
our $SVN_REV = sprintf("%d", q$Revision: 782 $ =~ m/(\d+)/g);

# ############################################################################
# Get configuration information.
# ############################################################################

# Define cmdline args; each is GetOpt::Long spec, whether required,
# human-readable description.  Add more hash entries as needed.
my @opt_spec = (
   { s => 'askpass',      d => 'Prompt for password for connections' },
   { s => 'collapse|c!',  d => 'Collapse whitespace in queries' },
   { s => 'columns|C=s',  d => 'Comma-separated list of columns to output' },
   { s => 'dest|d=s',     d => 'Server in which to store info' },
   { s => 'help',         d => 'Show this help message' },
   { s => 'numip|n',      d => 'Express IP addresses as integers' },
   { s => 'print|p',      d => 'Print to STDOUT' },
   { s => 'source|s=s',   d => 'Server to check for deadlock' },
   { s => 'tab|t',        d => 'Output tab-separated' },
   { s => 'version',      d => 'Output version information and exit' },
);

# This is the container for the command-line options' values to be stored in
# after processing.  Initial values are defaults.
my %opts = (
   s => '',
   d => '',
   C => '',
);
# Post-process...
my %opt_seen;
foreach my $spec ( @opt_spec ) {
   my ( $long, $short ) = $spec->{s} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
   $spec->{k} = $short || $long;
   $spec->{l} = $long;
   $spec->{t} = $short;
   $spec->{n} = $spec->{s} =~ m/!/;
   $opts{$spec->{k}} = undef unless defined $opts{$spec->{k}};
   die "Duplicate option $spec->{k}" if $opt_seen{$spec->{k}}++;
}

Getopt::Long::Configure('no_ignore_case', 'bundling');
GetOptions( map { $_->{s} => \$opts{$_->{k}} } @opt_spec) or $opts{help} = 1;

if ( $opts{version} ) {
   print "mysql-deadlock-logger  Ver $VERSION Distrib $DISTRIB Changeset $SVN_REV\n";
   exit(0);
}

# Process options...
$opts{c} = $opts{p} unless defined $opts{c};
$opts{C} = $opts{C} ? { map { $_ => 1 } $opts{C} =~ m/(\w+)/g } : 0;

# ############################################################################
# Parse arguments saying where to connect.  If the script doesn't have
# everything it needs, show help text.
# ############################################################################
my %conn   = (
   h => 'host',
   P => 'port',
   S => 'mysql_socket',
   u => 'user',
   p => 'pass',
   F => 'mysql_read_default_file',
   D => 'database',
   t => 'table',
);

my $source = parse_dsn($opts{s}, {}      );
my $dest   = parse_dsn($opts{d}, $source );

if (!$opts{p} && !$dest->{t}) {
   warn "You need to specify what to do (print or save).\n";
   $opts{help} = 1;
}

if ( $opts{help} ) {
   print "Usage: mysql-deadlock-logger <options>\n\n";
   my $maxw = max(map { length($_->{l}) + ($_->{n} ? 4 : 0)} @opt_spec);
   foreach my $spec ( sort { $a->{l} cmp $b->{l} } @opt_spec ) {
      my $long  = $spec->{n} ? "[no]$spec->{l}" : $spec->{l};
      my $short = $spec->{t} ? "-$spec->{t}" : '';
      printf("  --%-${maxw}s %-4s %s\n", $long, $short, $spec->{d});
   }
   print <<USAGE;

mysql-deadlock-logger extracts and saves information about the most recent
deadlock in a MySQL server.  You need to specify whether to print the output or
save it in a database.  The --source and --dest arguments use a special format.
For more details, please read the documentation:

   perldoc mysql-deadlock-logger

USAGE
   exit(0);
}

# ############################################################################
# Configuration info.
# ############################################################################

# Some common patterns and variables
my $MAX_ULONG = 4294967296;                   # 2^32
my $d         = qr/(\d+)/;                    # Digit
my $t         = qr/(\d+ \d+)/;                # Transaction ID
my $i         = qr/((?:\d{1,3}\.){3}\d+)/;    # IP address
my $n         = qr/([^`\s]+)/;                # MySQL object name
my $w         = qr/(\w+)/;                    # Words
my $s         = qr/(\d{6} .\d:\d\d:\d\d)/;    # InnoDB timestamp

# A thread's proc_info can be at least 98 different things I've found in the
# source.  Fortunately, most of them begin with a gerunded verb.  These are
# the ones that don't.
my %is_proc_info = (
   'After create'                 => 1,
   'Execution of init_command'    => 1,
   'FULLTEXT initialization'      => 1,
   'Reopen tables'                => 1,
   'Repair done'                  => 1,
   'Repair with keycache'         => 1,
   'System lock'                  => 1,
   'Table lock'                   => 1,
   'Thread initialized'           => 1,
   'User lock'                    => 1,
   'copy to tmp table'            => 1,
   'discard_or_import_tablespace' => 1,
   'end'                          => 1,
   'got handler lock'             => 1,
   'got old table'                => 1,
   'init'                         => 1,
   'key cache'                    => 1,
   'locks'                        => 1,
   'malloc'                       => 1,
   'query end'                    => 1,
   'rename result table'          => 1,
   'rename'                       => 1,
   'setup'                        => 1,
   'statistics'                   => 1,
   'status'                       => 1,
   'table cache'                  => 1,
   'update'                       => 1,
);

# ############################################################################
# Start working.
# ############################################################################
my $dbh = get_dbh($source);

# Since the user might not have specified hostname for connection, try to
# extract it from $dbh
if ( !$source->{h} ) {
   ($source->{h}) = $dbh->{mysql_hostinfo} =~ m/(\w+) via/;
}

my $text = $dbh->selectrow_hashref("SHOW INNODB STATUS")->{Status};

# Pull out the deadlock section
my $dl_text;
my @matches = $text =~ m#\n(---+)\n([A-Z /]+)\n\1\n(.*?)(?=\n(---+)\n[A-Z /]+\n\4\n|$)#gs;
while ( my ( $start, $name, $text, $end ) = splice(@matches, 0, 4) ) {
   next unless $name eq 'LATEST DETECTED DEADLOCK';
   $dl_text = $text;
}

exit(0) unless $dl_text;

my @sections
   = $dl_text
   =~ m{
      ^\*{3}\s([^\n]*)  # *** (1) WAITING FOR THIS...
      (.*?)             # Followed by anything, non-greedy
      (?=(?:^\*{3})|\z) # Followed by another three-stars or EOF
   }gmsx;

# Loop through each section.  There are no assumptions about how many
# there are, who holds and wants what locks, and who gets rolled back.
my %txns;
while ( my ($header, $body) = splice(@sections, 0, 2) ) {
   my ( $txn_id, $what ) = $header =~ m/^\($d\) (.*):$/m;
   next unless $txn_id;
   $txns{$txn_id} ||= { id => $txn_id };
   my $hash = $txns{$txn_id};

   if ( $what eq 'TRANSACTION' ) {
      @{$hash}{qw(txn_time)} = $body =~ m/ACTIVE $d sec/;

      # Parsing the line that begins 'MySQL thread id' is complicated.  The only
      # thing always in the line is the thread and query id.  See function
      # innobase_mysql_print_thd in InnoDB source file sql/ha_innodb.cc.
      my ( $thread_line ) = $body =~ m/^(MySQL thread id .*)$/m;
      my ( $mysql_thread_id, $query_id, $hostname, $ip, $user, $query_status );

      if ( $thread_line ) {
         # These parts can always be gotten.
         ( $mysql_thread_id, $query_id ) = $thread_line =~ m/^MySQL thread id $d, query id $d/m;

         # If it's a master/slave thread, "Has (read|sent) all" may be the thread's
         # proc_info.  In these cases, there won't be any host/ip/user info
         ( $query_status ) = $thread_line =~ m/(Has (?:read|sent) all .*$)/m;
         if ( defined($query_status) ) {
            $user = 'system user';
         }

         # It may be the case that the query id is the last thing in the line.
         elsif ( $thread_line =~ m/query id \d+ / ) {
            # The IP address is the only non-word thing left, so it's the most
            # useful marker for where I have to start guessing.
            ( $hostname, $ip ) = $thread_line =~ m/query id \d+(?: ([A-Za-z]\S+))? $i/m;
            if ( defined $ip ) {
               ( $user, $query_status ) = $thread_line =~ m/$ip $w(?: (.*))?$/;
            }
            else { # OK, there wasn't an IP address.
               # There might not be ANYTHING except the query status.
               ( $query_status ) = $thread_line =~ m/query id \d+ (.*)$/;
               if ( $query_status !~ m/^\w+ing/ && !exists($is_proc_info{$query_status}) ) {
                  # The remaining tokens are, in order: hostname, user, query_status.
                  # It's basically impossible to know which is which.
                  ( $hostname, $user, $query_status ) = $thread_line
                     =~ m/query id \d+(?: ([A-Za-z]\S+))?(?: $w(?: (.*))?)?$/m;
               }
               else {
                  $user = 'system user';
               }
            }
         }
      }

      my ( $query_text ) = $body =~ m/\nMySQL thread id .*\n((?s).*)/;
      $query_text =~ s/\s+$//;
      $query_text =~ s/\s+/ /g if $opts{c};

      @{$hash}{qw(thread hostname ip user query)}
         = ($mysql_thread_id, $hostname, $ip, $user, $query_text);
      foreach my $key ( keys %$hash ) {
         if ( !defined $hash->{$key} ) {
            $hash->{$key} = '';
         }
      }

   }
   else {
      # Prefer information about locks waited-for over locks-held.
      if ( $what eq 'WAITING FOR THIS LOCK TO BE GRANTED' || !$hash->{lock_type} ) {
         $hash->{wait_hold} = $what eq 'WAITING FOR THIS LOCK TO BE GRANTED' ? 'w' : 'h';
         @{$hash}{ qw(lock_type idx db tbl txn_id lock_mode) }
            = $body
            =~ m{^(RECORD|TABLE) LOCKS? (?:space id \d+ page no \d+ n bits \d+ index `?$n`? of )?table `$n(?:/|`\.`)$n` trx id $t lock.mode (\S+)}m;
         if ( $hash->{txn_id} ) {
            my ( $high, $low ) = $hash->{txn_id} =~ m/^(\d+) (\d+)$/;
            $hash->{txn_id} = $high ? ( $low + $high * $MAX_ULONG ) : $low;
         }
      }
   }

   # Ensure all values are defined
   map { $hash->{$_} = 0 unless defined $hash->{$_} }
      qw(thread txn_id txn_time);
   map { $hash->{$_} = '' unless defined $hash->{$_} }
      qw(user hostname db tbl idx lock_type lock_mode query);
}

my @cols = qw( server ts thread txn_id txn_time user hostname ip db tbl idx
               lock_type lock_mode wait_hold victim query );
if ( $opts{C} ) {
   @cols = grep { $opts{C}->{$_} } @cols;
}

# Extract some miscellaneous data from the deadlock.
my ( $ts ) = $dl_text =~ m/^$s$/m;
my ( $year, $mon, $day, $hour, $min, $sec ) = $ts =~ m/^(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)$/;
$ts = sprintf('%02d-%02d-%02dT%02d:%02d:%02d', $year + 2000, $mon, $day, $hour, $min, $sec);
my ( $victim ) = $dl_text =~ m/^\*\*\* WE ROLL BACK TRANSACTION \((\d+)\)$/m;
$victim ||= 0;

# Stick the misc data into the transactions.
foreach my $txn ( values %txns ) {
   $txn->{victim} = $txn->{id} == $victim ? 1 : 0;
   $txn->{ts}     = $ts;
   $txn->{server} = $source->{h} || '';
   $txn->{ip}     = inet_aton($txn->{ip}) if $opts{n};
}

if ( $dest && $dest->{t} ) {
   my $db_tbl = 
      join('.',
      map  {  $dbh->quote_identifier($_) }
      grep { $_ }
      ( $dest->{D}, $dest->{t} ));

   my $dbh2  = get_dbh($dest);
   my $cols  = join(',', map { $dbh->quote_identifier($_) } @cols);
   my $parms = join(',', map { '?' } @cols);
   my $query = $dbh2->prepare("INSERT IGNORE INTO $db_tbl($cols) VALUES($parms)");
   foreach my $txn ( sort { $a->{thread} <=> $b->{thread} } values %txns ) {
      $query->execute(@{$txn}{@cols});
   }
}

if ( $opts{p} ) {
   my $sep = $opts{t} ? "\t" : ' ';
   print join($sep, @cols), "\n";
   foreach my $txn ( sort { $a->{thread} <=> $b->{thread} } values %txns ) {
      # If $opts{c} is on, it's already been taken care of, but if it's unset,
      # by default strip whitespace.
      if ( !defined $opts{c} ) {
         $txn->{query} =~ s/\s+/ /g;
      }
      print join($sep, map { $txn->{$_} } @cols), "\n";
   }
}

# ############################################################################
# Subroutines
# ############################################################################
sub parse_dsn {
   my ($dsn, $prev) = @_;
   return unless $dsn;
   $prev ||= {};

   my %vals;
   if ( $dsn =~ m/=/ ) {
      my %hash = map { m/^(.)=(.*)$/g } split(/,/, $dsn);
      %vals = map  { $_ => $hash{$_} } keys %conn;
   }
   else {
      $vals{h} = $dsn;
   }
   map { $vals{$_} ||= $prev->{$_} } keys %conn;
   die "Missing host (h) part in $dsn\n" unless $vals{h};
   return \%vals;
}

sub get_dbh {
   my ( $info ) = @_;
   my $db_options = {
      AutoCommit => !$opts{k} && !$opts{F} && !$opts{1},
      RaiseError => 1,
      PrintError => 0,
   };

   if ( !$info->{p} && $opts{askpass} ) {
      print "Enter password: ";
      ReadMode('noecho');
      chomp($info->{p} = <STDIN>);
      ReadMode('normal');
      print "\n";
   }

   my $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
      . join(';', map  { "$conn{$_}=$info->{$_}" } grep { defined $info->{$_} } qw(F h P S))
      . ';mysql_read_default_group=mysql';
   return DBI->connect($dsn, @{$info}{qw(u p)}, $db_options);
}

# ############################################################################
# Documentation
# ############################################################################
=pod

=head1 NAME

mysql-deadlock-logger - Extract and log MySQL deadlock information.

=head1 SYNOPSIS

   mysql-deadlock-logger --source u=user,p=pass,h=server --dest D=test,t=deadlocks

=head1 DESCRIPTION

mysql-deadlock-logger extracts deadlock data from a MySQL server (currently only
InnoDB deadlock information is available).  You can print it to standard output
or save it in a database table.  By default it does neither.

=head1 OPTIONS

Some options are negatable with --no.

=over

=item --askpass

Prompt for password for connections.

=item --collapse

Makes mysql-deadlock-logger collapse all whitespace in queries to a single
space.  This might make it easier to inspect on the command line or in a query.
By default, whitespace is collapsed when printing with L<"--print">, but not
modified when storing to L<"--dest">.  (That is, the default is different for
each action).

=item --columns

Makes mysql-deadlock-logger only output and/or save certain columns.  See
L<"OUTPUT"> for more details.

=item --dest

Specifies a server, database and table in which to store deadlock information,
in the same format as L<"--source">.  Missing values are filled in with the same
values as L<"--source">, so you can usually omit most parts of this argument if
you're storing deadlocks on the same server on which they happen.

By default, whitespace in the query column is left intact; use L<"--collapse">
if you want whitespace collapsed.

The following table is suggested if you want to store all the information
mysql-deadlock-logger can extract about deadlocks:

 CREATE TABLE deadlocks (
   server char(20) NOT NULL,
   ts datetime NOT NULL,
   thread int unsigned NOT NULL,
   txn_id bigint unsigned NOT NULL,
   txn_time smallint unsigned NOT NULL,
   user char(16) NOT NULL,
   hostname char(20) NOT NULL,
   ip char(15) NOT NULL, -- alternatively, ip int unsigned NOT NULL
   db char(64) NOT NULL,
   tbl char(64) NOT NULL,
   idx char(64) NOT NULL,
   lock_type char(16) NOT NULL,
   lock_mode char(1) NOT NULL,
   wait_hold char(1) NOT NULL,
   victim tinyint unsigned NOT NULL,
   query text NOT NULL,
   PRIMARY KEY  (server,ts,thread)
 ) ENGINE=InnoDB

If you use L<"--columns">, you can omit whichever columns you don't want to
store.

=item --help

Displays a help message.

=item --numip

Makes mysql-deadlock-logger express IP addresses as integers.

=item --print

Makes mysql-deadlock-logger print results on standard output.  See L<"OUTPUT">
for more.  By default, enables L<"--collapse"> unless you explicitly disable it.

=item --source

Specifies a connection to a server to check for deadlocks.  This argument is
specially formatted as a key=value,key=value string.  Keys are a single letter:

   KEY MEANING
   === =======
   h   Connect to host
   P   Port number to use for connection
   S   Socket file to use for connection
   u   User for login if not current user
   p   Password to use when connecting
   F   Only read default options from the given file
   D   Database to connect to
   t   Table in which to store deadlock information

The 't' part only applies to L<"--dest">.  All parts are optional;
mysql-deadlock-logger will try to read MySQL's option files to determine how
to connect.  If you specify a value without an '=' character,
mysql-deadlock-logger will interpret this as a hostname.

=item --tab

Makes mysql-deadlock-logger output tab-separated columns instead of
whitespace-aligning them.  See L<"OUTPUT"> for more.

=item --version

Output version information and exit.

=back

=head1 INNODB CAVEATS AND DETAILS

InnoDB's output is hard to parse and sometimes there's no way to do it right.

Sometimes not all information (for example, username or IP address) is included
in the deadlock information.  In this case there's nothing for the script to put
in those columns.  It may also be the case that the deadlock output is so long
(because there were a lot of locks) that the whole thing is truncated.

Though there are usually two transactions involved in a deadlock, there are more
locks than that; at a minimum, one more lock than transactions is necessary to
create a cycle in the waits-for graph.  mysql-deadlock-logger prints the
transactions (always two in the InnoDB output, even when there are more
transactions in the waits-for graph than that) and fills in locks.  It prefers
waited-for over held when choosing lock information to output, but you can
figure out the rest with a moment's thought.  If you see one wait-for and one
held lock, you're looking at the same lock, so of course you'd prefer to see
both wait-for locks and get more information.  If the two waited-for locks are
not on the same table, more than two transactions were involved in the deadlock.

=head1 OUTPUT

You can choose which columns are output and/or saved to L<"--dest"> with the
L<"--columns"> argument.  The default columns are as follows:

=over

=item server

The (source) server on which the deadlock occurred.  This might be useful if
you're tracking deadlocks on many servers.

=item ts

The date and time of the last detected deadlock.

=item thread

The MySQL thread number, which is the same as the connection ID in SHOW FULL
PROCESSLIST.

=item txn_id

The InnoDB transaction ID, which InnoDB expresses as two unsigned integers.  I
have multiplied them out to be one number.

=item txn_time

How long the transaction was active when the deadlock happened.

=item user

The connection's database username.

=item hostname

The connection's host.

=item ip

The connection's IP address.  If you specify L<"--numip">, this is converted to
an unsigned integer.

=item db

The database in which the deadlock occurred.

=item tbl

The table on which the deadlock occurred.

=item idx

The index on which the deadlock occurred.

=item lock_type

The lock type the transaction held on the lock that caused the deadlock.

=item lock_mode

The lock mode of the lock that caused the deadlock.

=item wait_hold

Whether the transaction was waiting for the lock or holding the lock.  Usually
you will see the two waited-for locks.

=item victim

Whether the transaction was selected as the deadlock victim and rolled back.

=item query

The query that caused the deadlock.

=back

=head1 SYSTEM REQUIREMENTS

You need Perl, DBI, DBD::mysql, and some core packages that ought to be
installed in any reasonably new version of Perl.

=head1 BUGS

Please use the Sourceforge bug tracker, forums, and mailing lists to request
support or report bugs: L<http://sourceforge.net/projects/mysqltoolkit/>.

=head1 COPYRIGHT, LICENSE AND WARRANTY

This program is copyright (c) 2007 Baron Schwartz.
Feedback and improvements are welcome.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 AUTHOR

Baron Schwartz.

=head1 VERSION

This manual page documents Ver 1.0.3 Distrib 848 $Revision: 782 $.

=cut
