Browse Source

Send DB logs into their own file per worker, allow preload-app

George Baugh 7 months ago
parent
commit
8a85cb3a4b
8 changed files with 194 additions and 85 deletions
  1. 1 0
      .gitignore
  2. 1 1
      Makefile
  3. 48 0
      bin/consolidate_logs.pl
  4. 43 29
      lib/TCMS.pm
  5. 53 42
      lib/Trog/Log.pm
  6. 19 8
      lib/Trog/Log/DBI.pm
  7. 21 2
      lib/Trog/Routes/HTML.pm
  8. 8 3
      lib/Trog/SQLite.pm

+ 1 - 0
.gitignore

@@ -25,3 +25,4 @@ node_modules/
 www/statics/
 totp/
 nginx/tcms.conf
+logs/

+ 1 - 1
Makefile

@@ -13,7 +13,7 @@ install:
 	test -d www/statics || mkdir -p www/statics
 	test -d totp/ || mkdir -p totp
 	test -d ~/.tcms || mkdir ~/.tcms
-	test -d /var/log && mkdir /var/log/www; /bin/true
+	test -d logs/db/ && mkdir -p logs/db/; /bin/true
 	$(RM) pod2htmd.tmp;
 
 .PHONY: install-service

+ 48 - 0
bin/consolidate_logs.pl

@@ -0,0 +1,48 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use FindBin::libs;
+use Trog::SQLite;
+use POSIX ":sys_wait_h";
+use Time::HiRes qw{usleep};
+
+# Every recorded request is fully finished, so we can treat them as such.
+my $cons_dbh = Trog::SQLite::dbh( 'schema/log.schema', "logs/consolidated.db" );
+
+opendir(my $dh, "logs/db");
+my @pids;
+foreach my $db (readdir($dh)) {
+    next unless $db =~ m/\.db$/;
+    die "AAAGH" unless -f "logs/db/$db";
+    my $dbh = Trog::SQLite::dbh( 'schema/log.schema', "logs/db/$db" );
+    my $pid = fork();
+    if (!$pid) {
+        do_row_migration($dbh);
+        exit 0;
+    }
+    push(@pids, $pid);
+}
+while (@pids) {
+    my $pid = shift(@pids);
+    my $status = waitpid($pid, WNOHANG);
+    push(@pids, $pid) if $status == 0;
+    usleep(100);
+}
+
+sub do_row_migration {
+    my ($dbh) = @_;
+    my $query = "select * from all_requests";
+    my $sth = $dbh->prepare($query);
+    $sth->execute();
+    while (my @rows = @{ $sth->fetchall_arrayref({}, 100000) || [] }) {
+        my @bind = sort keys(%{$rows[0]});
+        my @rows_bulk = map { my $subj = $_; map { $subj->{$_} } @bind } @rows;
+        Trog::SQLite::bulk_insert($cons_dbh, 'all_requests', \@bind, 'IGNORE', @rows_bulk);
+
+        # Now that we've migrated the rows from the per-fork DBs, murder these rows
+        my $binder = join(',', (map { '?' } @rows));
+        $dbh->do("DELETE FROM requests WHERE uuid IN ($binder)", undef, map { $_->{uuid} } @rows);
+    }
+}

+ 43 - 29
lib/TCMS.pm

@@ -39,29 +39,6 @@ use Trog::FileHandler;
 
 # Troglodyne philosophy - simple as possible
 
-# Import the routes
-my $conf  = Trog::Config::get();
-my $data  = Trog::Data->new($conf);
-my %roots = $data->routes();
-
-my %routes = %Trog::Routes::HTML::routes;
-@routes{ keys(%Trog::Routes::JSON::routes) } = values(%Trog::Routes::JSON::routes);
-@routes{ keys(%roots) }                      = values(%roots);
-
-# Add in global routes, here because they *must* know about all other routes
-# Also, nobody should ever override these.
-$routes{'/robots.txt'} = {
-    method   => 'GET',
-    callback => \&robots,
-};
-my $routes_immutable = clone( \%routes );
-
-my %aliases = $data->aliases();
-
-# XXX this is built progressively across the forks, leading to inconsistent behavior.
-# This should eventually be pre-filled from DB.
-my %etags;
-
 # Wrap app to return *our* error handler instead of Plack::Util::run_app's
 my $cur_query = {};
 
@@ -93,17 +70,29 @@ sub _app {
     # Start the server timing clock
     my $start = [gettimeofday];
 
-    # Don't allow any captured routes to persist past a request in the routing table
-    %routes = %$routes_immutable;
+    # Build the routing table
+    state ($conf, $data, %aliases);
+    
+    $conf  //= Trog::Config::get();
+    $data  //= Trog::Data->new($conf);
+    my %routes = %{_routes($data)};
+    %aliases = $data->aliases() unless %aliases;
+
+    # XXX this is built progressively across the forks, leading to inconsistent behavior.
+    # This should eventually be pre-filled from DB.
+    my %etags;
+
+    # Setup logging
+    log_init();
+    my $requestid = Trog::Utils::uuid();
+    Trog::Log::uuid($requestid);
 
+    # Actually start processing the request
     my $env = shift;
 
     # Discard the path used in the log, it's too long and enough 4xx error code = ban
     return _toolong( { method => $env->{REQUEST_METHOD}, fullpath => '...' } ) if length( $env->{REQUEST_URI} ) > 2048;
 
-    my $requestid = Trog::Utils::uuid();
-    Trog::Log::uuid($requestid);
-
     # Various stuff important for logging requests
     state $domain = $conf->param('general.hostname') || $env->{HTTP_X_FORWARDED_HOST} || $env->{HTTP_HOST} || eval { Sys::Hostname::hostname() };
     my $path   = $env->{PATH_INFO};
@@ -312,6 +301,30 @@ sub _app {
     }
 }
 
+#XXX Return a clone of the routing table ref, because code modifies it later
+sub _routes ($data) {
+    state %routes;
+    return clone(\%routes) if %routes;
+
+    if (!$data) {
+        my $conf = Trog::Config::get();
+        $data    = Trog::Data->new($conf);
+    }
+    my %roots = $data->routes();
+    %routes = %Trog::Routes::HTML::routes;
+    @routes{ keys(%Trog::Routes::JSON::routes) } = values(%Trog::Routes::JSON::routes);
+    @routes{ keys(%roots) }                      = values(%roots);
+
+    # Add in global routes, here because they *must* know about all other routes
+    # Also, nobody should ever override these.
+    $routes{'/robots.txt'} = {
+        method   => 'GET',
+        callback => \&robots,
+    };
+
+    return clone(\%routes);
+}
+
 =head2 robots
 
 Return an appropriate robots.txt
@@ -322,9 +335,10 @@ This is a "special" route as it needs to know about all the routes in order to d
 
 sub robots ($query) {
     state $etag = "robots-" . time();
+    my $routes = _routes();
 
     # If there's a 'capture' route, we need to format it correctly.
-    state @banned = map { exists $routes{$_}{robot_name} ? $routes{$_}{robot_name} : $_ } grep { $routes{$_}{noindex} } sort keys(%routes);
+    state @banned = map { exists $routes->{$_}{robot_name} ? $routes->{$_}{robot_name} : $_ } grep { $routes->{$_}{noindex} } sort keys(%$routes);
 
     return Trog::Renderer->render(
         contenttype => 'text/plain',

+ 53 - 42
lib/Trog/Log.pm

@@ -13,52 +13,62 @@ use Trog::SQLite;
 use Trog::Log::DBI;
 
 use Exporter 'import';
-our @EXPORT_OK   = qw{is_debug INFO DEBUG WARN FATAL};
+our @EXPORT_OK   = qw{log_init is_debug INFO DEBUG WARN FATAL};
 our %EXPORT_TAGS = ( 'all' => \@EXPORT_OK );
 
-my $LOGNAME = -d '/var/log' ? '/var/log/www/tcms.log' : '~/.tcms/tcms.log';
+my $LOGNAME = 'logs/tcms.log';
 $LOGNAME = $ENV{CUSTOM_LOG} if $ENV{CUSTOM_LOG};
 
 my $LEVEL = $ENV{WWW_VERBOSE} ? 'debug' : 'info';
 
-# By default only log requests & warnings.
-# Otherwise emit debug messages.
-my $rotate = Log::Dispatch::FileRotate->new(
-    name      => 'tcms',
-    filename  => $LOGNAME,
-    min_level => $LEVEL,
-    'mode'    => 'append',
-    size      => 10 * 1024 * 1024,
-    max       => 6,
-);
-
-# Only send fatal events/errors to prod-web.log
-my $screen = Log::Dispatch::Screen->new(
-    name      => 'screen',
-    min_level => 'error',
-);
-
-# Send things like requests in to the stats log
-my $dblog = Trog::Log::DBI->new(
-    name => 'dbi',
-    min_level => $LEVEL,
-    dbh  => _dbh(),
-);
-
-our $log = Log::Dispatch->new();
-$log->add($rotate);
-$log->add($screen);
-$log->add($dblog);
-
-uuid("INIT");
-DEBUG("If you see this message, you are running in DEBUG mode.  Turn off WWW_VERBOSE env var if you are running in production.");
-uuid("BEGIN");
+our $log;
+our $user;
+$Trog::Log::user = 'nobody';
+$Trog::Log::ip   = '0.0.0.0';
+
+sub log_init {
+    # By default only log requests & warnings.
+    # Otherwise emit debug messages.
+    my $rotate = Log::Dispatch::FileRotate->new(
+        name      => 'tcms',
+        filename  => $LOGNAME,
+        min_level => $LEVEL,
+        'mode'    => 'append',
+        size      => 10 * 1024 * 1024,
+        max       => 6,
+    );
+
+    # Only send fatal events/errors to prod-web.log
+    my $screen = Log::Dispatch::Screen->new(
+        name      => 'screen',
+        min_level => 'error',
+    );
+
+    # Send things like requests in to the stats log
+    my $dblog = Trog::Log::DBI->new(
+        name => 'dbi',
+        min_level => $LEVEL,
+        dbh  => _dbh(),
+    );
+
+    $log = Log::Dispatch->new();
+    $log->add($rotate);
+    $log->add($screen);
+    $log->add($dblog);
+
+    uuid("INIT");
+    DEBUG("If you see this message, you are running in DEBUG mode.  Turn off WWW_VERBOSE env var if you are running in production.");
+    uuid("BEGIN");
+
+    return 1;
+}
 
 #memoize
 my $rq;
 
 sub _dbh {
-	return Trog::SQLite::dbh( 'schema/log.schema', "data/log.db" );
+    # Too many writers = lock sadness, so just give each fork it's own DBH.
+	return Trog::SQLite::dbh( 'schema/log.schema', "logs/db/$$.db" );
 }
 
 sub is_debug {
@@ -71,13 +81,6 @@ sub uuid {
     $requestid //= return $rq;
 }
 
-#XXX make perl -c quit whining
-BEGIN {
-    our $user;
-    $Trog::Log::user = 'nobody';
-    $Trog::Log::ip   = '0.0.0.0';
-}
-
 sub _log {
     my ( $msg, $level ) = @_;
 
@@ -91,19 +94,27 @@ sub _log {
 }
 
 sub DEBUG {
+    _check_init();
     $log->debug( _log( shift, 'DEBUG' ) );
 }
 
 sub INFO {
+    _check_init();
     $log->info( _log( shift, 'INFO' ) );
 }
 
 sub WARN {
+    _check_init();
     $log->warning( _log( shift, 'WARN' ) );
 }
 
 sub FATAL {
+    _check_init();
     $log->log_and_die( level => 'error', message => _log( shift, 'FATAL' ) );
 }
 
+sub _check_init {
+    die "You must run log_init() before using other Trog::Log methods" unless $log;
+}
+
 1;

+ 19 - 8
lib/Trog/Log/DBI.pm

@@ -5,6 +5,9 @@ use warnings;
 
 use parent qw{Log::Dispatch::DBI};
 
+use Ref::Util qw{is_arrayref};
+use Capture::Tiny qw{capture_merged};
+
 sub create_statement {
     my $self = shift;
 
@@ -17,6 +20,8 @@ sub create_statement {
     return $self->{dbh}->prepare($sql);
 }
 
+my %buffer;
+
 sub log_message {
     my ($self, %params) = @_;
 
@@ -25,20 +30,26 @@ sub log_message {
     my $message;
     my ($date, $uuid, $ip, $user, $method, $code, $route) = $msg =~ m!^([\w|\-|:]+) \[INFO\]: RequestId ([\w|\-]+) From ([\w|\.|:]+) \|(\w+)\| (\w+) (\d+) (.+)!;
 
-    # Otherwise, let's mark it down in the "messages" table.
+    # Otherwise, let's mark it down in the "messages" table.  This will be deferred until the final write.
     if (!$date) {
         ($date, $uuid, $ip, $user, $message) = $msg =~ m!^([\w|\-|:]+) \[\w+\]: RequestId ([\w|\-]+) From ([\w|\.|:]+) \|(\w+)\| (.+)!;
-        # Dummy up the method, code and route, as otherwise we summon complexity demon due to lack of FULL OUTER JOIN.
-        $method = "UNKNOWN";
-        $code   = 100;
-        $route  = "bogus";
+
+        $buffer{$uuid} //= [];
+        push(@{$buffer{$uuid}}, $message);
+        return 1;
     }
 
     # If this is a mangled log, forget it.
-    return unless $date;
+    return unless $date && $uuid;
 
-    my $res = $self->{sth}->execute($uuid, $date, $ip, $user, $method, $route, $code);
-    $self->{sth2}->execute($uuid, $message) if $message;
+    my $res = $self->{sth}->execute($uuid, $date, $ip, $user, $method, $route, $code );
+
+    if (is_arrayref($buffer{$uuid}) && @{$buffer{$uuid}}) {
+        $self->{sth2}->bind_param_array(1, $uuid);
+        $self->{sth2}->bind_param_array(2, $buffer{$uuid});
+        $self->{sth2}->execute_array({});
+        delete $buffer{$uuid};
+    }
 
     return $res;
 }

+ 21 - 2
lib/Trog/Routes/HTML.pm

@@ -230,8 +230,6 @@ if ($Trog::Themes::theme_dir) {
     }
 }
 
-my $data = Trog::Data->new($conf);
-
 =head1 PRIMARY ROUTE
 
 =head2 index
@@ -288,6 +286,9 @@ sub index ( $query, $content = '', $i_styles = [] ) {
         $query->{user_class} =~ tr/ /_/;
     }
 
+    state $data;
+    $data //= Trog::Data->new($conf);
+
     return finish_render(
         $tmpl,
         {
@@ -772,6 +773,9 @@ sub do_totp_clear ($query) {
 }
 
 sub _get_series ( $edit = 0 ) {
+    state $data;
+    $data //= Trog::Data->new($conf);
+
     my @series = $data->get(
         acls  => [qw{public}],
         tags  => [qw{topbar}],
@@ -879,6 +883,9 @@ sub post_save ($query) {
     # Posts will always be GET
     $query->{method} = 'GET';
 
+    state $data;
+    $data //= Trog::Data->new($conf);
+
     $data->add($query) and die "Could not add post";
     return see_also($to);
 }
@@ -917,6 +924,9 @@ sub post_delete ($query) {
     return see_also('/login') unless $query->{user};
     return Trog::Routes::HTML::forbidden($query) unless grep { $_ eq 'admin' } @{ $query->{user_acls} };
 
+    state $data;
+    $data //= Trog::Data->new($conf);
+
     $data->delete($query) and die "Could not delete post";
     return see_also( $query->{to} );
 }
@@ -1159,6 +1169,9 @@ sub posts ( $query, $direct = 0 ) {
     my $edittype = $query->{primary_post} ? $query->{primary_post}->{child_form}          : $query->{form};
     my $tiled    = $query->{primary_post} ? !$is_admin && $query->{primary_post}->{tiled} : 0;
 
+    state $data;
+    $data //= Trog::Data->new($conf);
+
     # Grab the rest of the tags to dump into the edit form
     my @tags_all = $data->tags();
 
@@ -1241,6 +1254,9 @@ sub _themed_title ($path) {
 }
 
 sub _post_helper ( $query, $tags, $acls ) {
+    state $data;
+    $data //= Trog::Data->new($conf);
+
     return $data->get(
         older        => $query->{older},
         newer        => $query->{newer},
@@ -1273,6 +1289,9 @@ Passing compressed=1 will gzip the output.
 
 sub sitemap ($query) {
 
+    state $data;
+    $data //= Trog::Data->new($conf);
+
     state $etag = "sitemap-" . time();
     my ( @to_map, $is_index, $route_type );
     my $warning = '';

+ 8 - 3
lib/Trog/SQLite.pm

@@ -4,7 +4,7 @@ use strict;
 use warnings;
 
 no warnings 'experimental';
-use feature qw{signatures};
+use feature qw{signatures state};
 
 use POSIX qw{floor};
 
@@ -36,12 +36,16 @@ Be careful when first calling, the standard fork-safety concerns with sqlite app
 
 =cut
 
+# We need to make sure this is different across forks, AND consistent within them.
 my $dbh = {};
 
 # Ensure the db schema is OK, and give us a handle
+# WARNING: do not ever call during BEGIN or outside a sub.
+# Otherwise, we can't preload_app.
 sub dbh {
     my ( $schema, $dbname ) = @_;
-    return $dbh->{$schema} if $dbh->{$schema};
+    $dbh //= {};
+    return $dbh->{$dbname} if $dbh->{$dbname};
     File::Touch::touch($dbname) unless -f $dbname;
     die "No such schema file '$schema' !" unless -f $schema;
     my $qq = File::Slurper::read_text($schema);
@@ -49,12 +53,13 @@ sub dbh {
     $db->{sqlite_allow_multiple_statements} = 1;
     $db->do($qq) or die "Could not ensure database consistency: " . $db->errstr;
     $db->{sqlite_allow_multiple_statements} = 0;
-    $dbh->{$schema} = $db;
+    $dbh->{$dbname} = $db;
 
     # Turn on fkeys
     $db->do("PRAGMA foreign_keys = ON")  or die "Could not enable foreign keys";
     # Turn on WALmode
     $db->do("PRAGMA journal_mode = WAL") or die "Could not enable WAL mode";
+
     return $db;
 }