#!/usr/bin/perl -w
# file KLEIDER/catalog/src/dbdump/dbdump.pl
# MySQL-Datenbank im XML-Format ausgeben
# 2020-11-29, Herbert Schiemann <h.schiemann@herbaer.de>
# GPL Version 2 oder neuer

use utf8;
use Cwd qw(realpath);
use Herbaer::MysqlAccess ;
use Herbaer::Readargs ;
use Herbaer::XMLDataWriter;
use MIME::Base64;

binmode (STDIN,  ":encoding(utf-8)");
binmode (STDOUT, ":encoding(utf-8)");
binmode (STDERR, ":encoding(utf-8)");

=for comment

{database}
-> dbname
-> create      -- die Anweisung zum Erstellen der Datenbank
-> tabspec  [{tabspec}]
-> contents

{tabspec}
-> tabname
-> tabtype
-> create      -- Anweisung zum Erstellen der Tabelle
-> fields  [{fldspec}]
-> index   [{ixspec}]

{fldspec}
-> name
-> type
-> collate
-> null
-> key
-> default
-> extra
-> comment

{ixspec}
-> tabname
-> key
-> seq
-> colname
-> ixtype
-> nonuni
-> collate
-> card
-> subpart
-> packed
-> null
-> comment
-> ixcomm

=cut

# Kommandozeilen-Argumente
my $args = {
   "[cnt]verbose"  => 1,
   "[cnt]spec"     => 1,
   "[cnt]contents" => 1,   
   "dbkey"         => "",    # Schlüssel zu den Datenbank-Zugangsdaten
   "secrets"       => undef, # Eingabedatei (Text)
   "dumpdir"       => undef,
};

# gibt die Version nach STDOUT aus
sub version {
   print <<'VERSION' ;
dbdump.pl
MySQL-Datenbank im XML-Format
2020-11-29 Herbert Schiemann <h.schiemann@herbaer.de>
VERSION
}
$args -> {"[sr]version"} = sub { version (); exit 0; };

$args -> {"[sr]help"} = sub {
   set_defaults ($args);
   version ();
   print_message_with_values (<<"HELP", $args);
$0 OPTION ...
--[no_]verbose     Umfang der Ausgabe \${[cnt]verbose}
--[no_]spec        Datenbank-Struktur ausgeben \${[cnt]spec}
--[no_]contents    Datenbank-Inhalt ausgeben \${[cnt]contents}
--dbkey   DBKEY    Schlüssel zum Datenbankzugang \${dbkey}
--dumpdir DUMPDIR  optional: Ausgabeverzeichnis
                   \${dumpdir}
--secrets SECRETS  optional: Pfad der Geheimnis-Datei
                   \${secrets}
HELP
   exit 0;
};

sub set_defaults {
   my $args = shift;
   my $b = realpath ($0);
   $b =~ s/\/catalog\/.*//;
   $args -> {"dumpdir"} ||= "$b/catalog/dbdump";
}; # set_defaults

read_args ($args);
set_defaults ($args);

my $data = {
   "dbh"         => undef,    # Datenbank-Handle
   "xmlwriter"   => undef,
   "database"    => {},       # Datenbanken s.o.
};

sub get_dbh {
   my ($args, $data) = @_;
   my $ar = get_database (
      $args -> {"dbkey"},
      $args -> {"secrets"},
      $args -> {"[cnt]verbose"}
   );
   if ($ar) {
      $data -> {"dbh"} = $ar -> [0];
      $data -> {"database"} -> {"dbname"} = $ar -> [1];
      return 1;
   }
   else {
      return 0;
   }
} # get_dbh

sub get_create_database {
   my ($args, $data) = @_;
   my $dbh    = $data  -> {"dbh"};
   my $dbase  = $data  -> {"database"};
   my $dbname = $dbase -> {"dbname"};
   my $sh  = $dbh -> prepare ("SHOW CREATE DATABASE $dbname");
   $sh -> execute ();
   my $ar = $sh -> fetchrow_arrayref();
   if ($ar && @$ar > 1) {
      $dbase -> {"create"} = $ar -> [1];
   }
   1;
} # get_create_database

sub get_tables {
   my ($args, $data) = @_;
   my $verbose = $args -> {"[cnt]verbose"};
   my $dbh   = $data  -> {"dbh"};
   my $dbase = $data  -> {"database"};
   my $ts    = $dbase -> {"tabspec"} ||= [];
   my $sh  = $dbh -> prepare ("SHOW FULL TABLES");
   my $ar;
   $sh -> execute ();
   my $tabname;
   my $rv = 0;
   while ($ar = $sh -> fetchrow_arrayref()) {
      $tabname = $ar -> [0];
      $tabtype = $ar -> [1];
      push (@$ts, {
         "tabname" => $tabname,
         "tabtype" => $tabtype,
      });
      ++$rv;
      print "Table $tabname: $tabtype\n" if $verbose;
   }
   $rv;
} # get_tables

sub get_table_status {
   my ($args, $data) = @_;
   my $dbh   = $data  -> {"dbh"};
   my $tstat = {};  # Tabellenname als Schlüssel
   my $tsd   = {};  # Daten zu einer Tabelle aus SHOW TABLE STATUS

   # Felder aus SHOW TABLE STATUS
   # https://mariadb.com/kb/en/show-table-status/
   my $ts_name;    # Name: Table name
   my $ts_engine;  # Engine: Table storage engine
   my $ts_version; # Version: Version number from the table's .frm file.
   my $ts_rf;      # Row_format: Row format (see InnoDB, Aria and MyISAM row formats)
                   # hier nicht benutzt
   my $ts_rows;    # Rows: Number of rows in the table.
                   # Some engines, such as XtraDB and InnoDB may store an estimate.
   my $ts_arl;     # Avg_row_length: Average row length in the table
                   # hier nicht benutzt
   my $ts_datalen; # Data_length
=for comment
For InnoDB/XtraDB, the index size, in pages, multiplied by the page size.
For Aria and MyISAM, length of the data file, in bytes.
For MEMORY, the approximate allocated memory.
=cut
   my $ts_maxdl;   # Max_data_length: hier nicht benutzt
=for comment
Maximum length of the data file,
ie the total number of bytes that could be stored in the table.
Not used in XtraDB and InnoDB.
=cut
   my $ts_ixlen;   # Index_length: Length of the index file.
   my $ts_df;      # Data_free
=for comment
Bytes allocated but unused.
For InnoDB tables in a shared tablespace,
the free space of the shared tablespace with small safety margin.
An estimate in the case of partitioned tables - see the PARTITIONS table.
=cut
   my $ts_autoinc; # Auto_increment: Next AUTO_INCREMENT value.
   my $ts_cretime; # Create_time: Time the table was created.
   my $ts_updtime; # Update_time:
=for comment
Time the table was last updated.
On Windows, the timestamp is not updated on update,
so MyISAM values will be inaccurate.
In InnoDB, if shared tablespaces are used, will be NULL,
while buffering can also delay the update,
so the value will differ from the actual time of the last UPDATE, INSERT or DELETE.
=cut
   my $ts_chktime; # Check_time:
=for comment
Time the table was last checked.
Not kept by all storage engines, in which case will be NULL.
=cut
   my $ts_collate; # Collation: Character set and collation.
   my $ts_chksum;  # Checksum: Live checksum value, if any.
                   # hier nicht benutzt
   my $ts_creopt;  # Create_options: Extra CREATE TABLE options.
   my $ts_comment; # Comment: Table comment provided when MariaDB created the table.
=for comment
Weitere Felder werden hier nicht benutzt:
Max_index_length:
Maximum index length (supported by MyISAM and Aria tables). Added in MariaDB 10.3.5.
Temporary:
Placeholder to signal that a table is a temporary table.
Currently always "N",
except "Y" for generated information_schema tables and NULL for views.
Added in MariaDB 10.3.5.   
=cut
   my $sh  = $dbh -> prepare ("SHOW TABLE STATUS");
   my $ar;
   $sh -> execute ();

   while ($ar = $sh -> fetchrow_arrayref()) {
      (
       $ts_name,    # Name: Table name
       $ts_engine,  # Engine: Table storage engine
       $ts_version, # Version: Version number from the table's .frm file.
       $ts_rf,      # Row_format: Row format (see InnoDB, Aria and MyISAM row formats)
       $ts_rows,    # Rows: Number of rows in the table.
       $ts_arl,     # Avg_row_length: Average row length in the table
       $ts_datalen, # Data_length
       $ts_maxdl,   # Max_data_length: hier nicht benutzt
       $ts_ixlen,   # Index_length: Length of the index file.
       $ts_df,      # Data_free
       $ts_autoinc, # Auto_increment: Next AUTO_INCREMENT value.
       $ts_cretime, # Create_time: Time the table was created.
       $ts_updtime, # Update_time:
       $ts_chktime, # Check_time:
       $ts_collate, # Collation: Character set and collation.
       $ts_chksum,  # Checksum: Live checksum value, if any.
       $ts_creopt,  # Create_options: Extra CREATE TABLE options.
       $ts_comment, # Comment: Table comment provided when MariaDB created the table.
      ) = (@$ar);
      $tsd -> {$ts_name} = {
         "engine"  => $ts_engine,
         "version" => $ts_version,
         "rows"    => $ts_rows,
         "datalen" => $ts_datalen,
         "ixlen"   => $ts_ixlen,   
         "df"      => $ts_df,
         "autoinc" => $ts_autoinc,
         "cretime" => $ts_cretime,
         "updtime" => $ts_updtime,
         "chktime" => $ts_chktime,
         "collate" => $ts_collate,
         "creopt"  => $ts_creopt,
         "comment" => $ts_comment,
      };
   }
   my $dbase = $data  -> {"database"};
   my $ts    = $dbase -> {"tabspec"};
   my $td;
   my ($k, $v);
   for $tab (@$ts) {
      $td = $tsd -> {$tab -> {"tabname"}};
      while ( ($k, $v) = each %$td) {
         $tab -> {$k} = $v if $v;
      }
   }
} # get_table_status

# Spalten der Datenbanktabellen
sub get_columns {
   my ($args, $data) = @_;
   my $verbose = $args -> {"[cnt]verbose"};
   my $dbh   = $data  -> {"dbh"};
   my $dbase = $data  -> {"database"};
   my $ts    = $dbase -> {"tabspec"};
   my $tab;
   my $tabname;
   my $flds;
   my $fld;
   my $sh;
   my $ar;
   my $rv = 1;   # 1 ok, 0 Problem / eine Tabelle ohne Felder
   my $fcnt;     # Zahl der Felder einer Tabelle
   
   # https://mariadb.com/kb/en/show-columns/
   my $fnam;    # Field / name
                # indicates the column name.
   my $ftyp;    # Type / type
                # indicates the column data type.
   my $fcoll;   # Collation / collate
                # indicates the collation for non-binary string columns,
                # or NULL for other columns.
                # NULL wird nicht gespeichert
   my $fnull;   # Null
                # contains YES if NULL values can be stored in the column, NO if not.
                # Nur im Falle YES wird 1 gespeichert.
   my $key;     # Key
                # indicates whether the column is indexed:
                # gespeichert wird p für PRI, u für UNI, m für MUL

=for comment
If Key is empty, the column either is not indexed
or is indexed only as a secondary column in a multiple-column, non-unique index.
If Key is PRI, the column is a PRIMARY KEY
or is one of the columns in a multiple-column PRIMARY KEY.
If Key is UNI, the column is the first column of a unique-valued index
that cannot contain NULL values.
If Key is MUL, multiple occurrences of a given value are allowed within the column.
The column is the first column of a non-unique index
or a unique-valued index that can contain NULL values. 

If more than one of the Key values applies to a given column of a table,
Key displays the one with the highest priority, in the order PRI, UNI, MUL.

A UNIQUE index may be displayed as PRI
if it cannot contain NULL values and there is no PRIMARY KEY in the table.
A UNIQUE index may display as MUL if several columns form a composite UNIQUE index;
although the combination of the columns is unique,
each column can still hold multiple occurrences of a given value.
=cut

   my $fdef;    # Default
                # indicates the default value that is assigned to the column.
   my $fx;      # Extra

=for comment   
Any additional information that is available about a given column.

AUTO_INCREMENT    The column was created with the AUTO_INCREMENT keyword.
PERSISTENT        The column was created with the PERSISTENT keyword. (New in 5.3)
VIRTUAL           The column was created with the VIRTUAL keyword. (New in 5.3)
CURRENT_TIMESTAMP The column is a TIMESTAMP column that is automatically updated on INSERT and UPDATE.
=cut

   my $fpriv;   # Privileges
                # the privileges you have for the column.
   my $fcomm;   # Comment
                # any comment the column has.
   
   for $tab (@$ts) {
      $tabname = $tab -> {"tabname"};
      $flds = [];
      $tab -> {"fields"} = $flds;
      $sh  = $dbh -> prepare ("SHOW FULL COLUMNS FROM $tabname");
      $sh -> execute ();
      $fcnt = 0;
      while ($ar = $sh -> fetchrow_arrayref()) {
         ++$fcnt;
         ($fnam, $ftyp, $fcoll, $fnull, $fkey, $fdef, $fx, $fpriv, $fcomm) = (@$ar);
         $fld = {
            "name" => $fnam,
            "type" => lc ($ftyp),
         };
         push (@$flds, $fld);
         $fld -> {"collate"} = $fcoll  if $fcoll && $fcoll ne "NULL";
         $fld -> {"null"}    = 1       if $fnull eq "YES";
         if ($fkey) {
            $fld -> {"key"}  = "p"     if $fkey  eq "PRI";
            $fld -> {"key"}  = "u"     if $fkey  eq "UNI";
            $fld -> {"key"}  = "m"     if $fkey  eq "MUL";
         }
         $fld -> {"default"} = $fdef   if $fdef  && $fdef  ne "NULL";
         $fld -> {"extra"}   = lc($fx) if $fx;
         $fld -> {"comment"} = $fcomm  if $fcomm;
         # Datentypen s. https://mariadb.com/kb/en/data-types/
         # "encode" wird nur intern benutzt (evtl. auch ausgeben?)
         if ( $ftyp =~ /blob|binary/i ) {
            $fld -> {"encode"} = "b64";  # Base64-Kodierung für Binär-Daten
         }
         elsif ( $ftyp =~ /text|char/i ) {
            $fld -> {"encode"} = "esc";  # reservierte XML-Zeichen schützen
         }
      }
      $rv = 0 if !$fcnt;
   }
   $rv;
} # get_columns

# SQL-Anweisung zum Erstellen der Tabellen
sub get_create_table {
   my ($args, $data) = @_;
   my $dbh   = $data  -> {"dbh"};
   my $dbase = $data  -> {"database"};
   my $ts    = $dbase -> {"tabspec"};
   my $tab;
   my $sh;
   my $ar;
   for $tab (@$ts) {
      next if $tab -> {"tabtype"} ne "BASE TABLE";
      $tabname = $tab -> {"tabname"};
      $sh  = $dbh -> prepare ("SHOW CREATE TABLE $tabname");
      $sh -> execute ();
      $ar = $sh -> fetchrow_arrayref();
      if ($ar && @$ar > 1) {
         $tab -> {"create"} = $ar -> [1];
      }
   }
   1;
} # get_create_table

sub get_index {
   my ($args, $data) = @_;
   my $dbh   = $data  -> {"dbh"};
   my $dbase = $data  -> {"database"};
   my $ts    = $dbase -> {"tabspec"};
   my $tab;
   my $sh;
   my $ar;
   my $ixs;     # Liste der Index-Einträge
   my $ix;      # ein Index-Eintrag
   
                # https://mariadb.com/kb/en/show-index/
   my $tabname; # Table
                # Table name
   my $nonuni;  # Non_unique
                # 1 if the index permits duplicate values, 0 if values must be unique.
                # Nur 1 wird gespeichert
   my $key;     # Key_name
                # Index name. The primary key is always named PRIMARY.
   my $seq;     # Seq_in_index
                # The column's sequence in the index, beginning with 1.
   my $colname; # Column_name
                # Column name.
   my $collate; # Collation
                # Either A, if the column is sorted in ascending order in the index,
                # or NULL if it's not sorted.
   my $card;    # Cardinality
                # Estimated number of unique values in the index.
                # The cardinality statistics are calculated at various times,
                # and can help the optimizer make improved decisions.
   my $subpart; # Sub_part
                # NULL if the entire column is included in the index,
                # or the number of included characters if not.
                # NULL wird nicht gespeichert
   my $packed;  # Packed
                # NULL if the index is not packed, otherwise how the index is packed.
                # NULL wird nicht gespeichert
   my $null;    # Null
                # NULL if NULL values are permitted in the column,
                # an empty string if NULL's are not permitted.
                # Im Falle NULL wird 1 gespeichert, sonst nichts.
   my $ixtype;  # Index_type
                # The index type, which can be BTREE, FULLTEXT, HASH or RTREE.
                # See Storage Engine Index Types.
   my $comment; # Comment
                # Other information, such as whether the index is disabled.
                # Wird nur gespeichert, wenn nicht leer.
   my $ixcomm;  # Index_comment
                # Contents of the COMMENT attribute when the index was created.
                # Wird nur gespeichert, wenn nicht leer.

   for $tab (@$ts) {
      $ixs = $tab -> {"index"} ||= [];
      # next if $tab -> {"tabtype"} ne "BASE TABLE";
      $tabname = $tab -> {"tabname"};
      $sh  = $dbh -> prepare ("SHOW INDEX FROM $tabname");
      $sh -> execute ();
      while ($ar = $sh -> fetchrow_arrayref()) {
         ($tabname,
          $nonuni,
          $key,
          $seq,
          $colname,
          $collate,
          $card,
          $subpart,
          $packed,
          $null,
          $ixtype,
          $comment,
          $ixcomm) = (@$ar);
         $ix = {
            "tabname" => $tabname,
            "key"     => $key,
            "seq"     => $seq,
            "colname" => $colname,
            "ixtype"  => $ixtype,   
         };
         $ix -> {"nonuni"}  = $nonuni  if $nonuni;
         $ix -> {"collate"} = $collate if $collate;
         $ix -> {"card"}    = $card    if $card;
         $ix -> {"subpart"} = $subpart if $subpart && $subpart ne "NULL";
         $ix -> {"packed"}  = $packed  if $packed  && $packed  ne "NULL";
         $ix -> {"null"}    = 1        if $null    && $null    eq "NULL";
         $ix -> {"comment"} = $comment if $comment;
         $ix -> {"ixcomm"}  = $ixcomm  if $ixcomm;
         push (@$ixs, $ix);
      }
   }
   1;
} # get_index

my $xmlopt = {
   '@fields' => ["", "field"],
   '$encode' => "IGNORE",
};

sub write_spec {
   my ($args, $data) = @_;
   my $verbose = $args -> {"[cnt]verbose"};
   my $dbase   = $data -> {"database"};
   my $dn      = $dbase -> {"dbname"};
   my $file    = join ("", $args -> {"dumpdir"}, "/", "$dn.xml");
   my $xdw     = $data -> {"xmlwriter"} ||= Herbaer::XMLDataWriter -> new ($xmlopt);
   $xdw -> open (
      $file,
      "utf-8", "http://herbaer.de/xmlns/20201201/dbdump",
      "${dn}_ht.xslt"
   ) || do {
      print STDERR "Kann Datei $file nicht erstellen\n" if $verbose;
      return;
   };
   if ($args -> {"[cnt]spec"}) {
      $xdw -> open_element ("dbdump");
      $xdw -> write ("dbname", {}, $dbase -> {"dbname"});
      $xdw -> write ("create", {}, $dbase -> {"create"});
      $xdw -> write ("table",  {}, $dbase -> {"tabspec"});
   }
} # write_spec

sub write_end {
   my ($args, $data) = @_;
   my $xdw     = $data -> {"xmlwriter"};
   $xdw -> close () if $xdw;
} # write_end

# Datenbank-Inhalt lesen und schreiben
sub get_write_contents {
   my ($args, $data) = @_;
   my $dbh    = $data -> {"dbh"};
   my $xdw    = $data -> {"xmlwriter"};
   my $dbase  = $data -> {"database"};
   my $dbname = $dbase -> {"dbname"};
   $xdw -> open_element ("contents");
   $xdw -> open_element (
      $dbname,
      {"xmlns" => "http://herbaer.de/xmlns/20201201/dbcontents/$dbname"}
      );
   my $ts    = $dbase -> {"tabspec"};
   my $tab;
   my $tabname;
   my $fields;
   my $sh;
   my $ar;
   my $i;
   my $d;     # ein Datum
   my $enc;
   for $tab (@$ts) {
      $tabname = $tab -> {"tabname"};
      $fields  = $tab -> {"fields"};
      $sh  = $dbh -> prepare ("SELECT * FROM $tabname");
      $sh -> execute();
      while ($ar = $sh -> fetchrow_arrayref()) {
         $xdw -> open_element ($tabname);
         for ($i = 0; $i < @$ar; ++$i) {
            next unless defined $ar -> [$i];
            $d = $ar -> [$i];
            if ( $enc = $fields -> [$i] -> {"encode"} ) {
               if ($enc eq "b64") {
                  $d = encode_base64 ($d);
               }
               elsif ($enc eq "esc") {
                  $d =~ s/&/&amp;/g;
                  $d =~ s/</&lt;</g;
                  $d =~ s/>/&gt;</g;
               }
            }
            $xdw -> write ($fields -> [$i] -> {"name"}, {}, $d);
         }
         $xdw -> close_element ($tabname);
      }
   }
} # get_write_contents

get_dbh             ($args, $data) || exit 1;
get_create_database ($args, $data);
get_tables          ($args, $data) || exit 2;
get_create_table    ($args, $data);
get_table_status    ($args, $data);
get_columns         ($args, $data) || exit 3;
get_index           ($args, $data);
write_spec          ($args, $data);  # Datenbankstruktur wird ausgegeben
# Datenbank-Inhalte werden gelesen und ausgegeben
get_write_contents  ($args, $data) if $args -> {"[cnt]contents"}; 
write_end           ($args, $data);  # Dump-Datei (XMLWriter) wird geschlossen

# end of file KLEIDER/catalog/src/dbdump/dbdump.pl