TCMS.pm 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. package TCMS;
  2. use strict;
  3. use warnings;
  4. no warnings 'experimental';
  5. use feature qw{signatures state};
  6. use Date::Format qw{strftime};
  7. use HTTP::Body ();
  8. use URL::Encode ();
  9. use Text::Xslate ();
  10. use Plack::MIME ();
  11. use Mojo::File ();
  12. use DateTime::Format::HTTP();
  13. use CGI::Cookie ();
  14. use File::Basename();
  15. use IO::Compress::Gzip();
  16. use Time::HiRes qw{gettimeofday tv_interval};
  17. use HTTP::Parser::XS qw{HEADERS_AS_HASHREF};
  18. use List::Util;
  19. use lib 'lib';
  20. use Trog::Routes::HTML;
  21. use Trog::Routes::JSON;
  22. use Trog::Auth;
  23. use Trog::Utils;
  24. use Trog::Config;
  25. use Trog::Data;
  26. use Trog::Vars;
  27. # Troglodyne philosophy - simple as possible
  28. # Import the routes. Made extensible by the 'extra_modules' param in config.
  29. # Just add another module in lib/Trog/Routes/ and specify it in config.
  30. my $conf = Trog::Config::get();
  31. my $data = Trog::Data->new($conf);
  32. my %routes;
  33. my @required_modules = qw{HTML JSON Formatted};
  34. foreach my $route_module (@required_modules, split( /,/, $conf->{'routes'}{'extra_modules'} || '' )) {
  35. my $ns = "Trog::Routes::$route_module";
  36. eval "require $ns";
  37. if($@) {
  38. warn "Error when loading $ns: $@";
  39. next;
  40. }
  41. @routes{keys(%{$ns->routes()} = values(%{$ns->routes()});
  42. }
  43. {
  44. my %roots = $data->routes();
  45. @routes{keys(%roots)} = values(%roots);
  46. }
  47. my %aliases = $data->aliases();
  48. # XXX this is built progressively across the forks, leading to inconsistent behavior.
  49. # This should eventually be pre-filled from DB.
  50. my %etags;
  51. #1MB chunks
  52. my $CHUNK_SIZE = 1024000;
  53. my $CHUNK_SEP = 'tCMSep666YOLO42069';
  54. #Stuff that isn't in upstream finders
  55. my %extra_types = (
  56. '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
  57. );
  58. =head2 app()
  59. Dispatches requests based on %routes built above.
  60. The dispatcher here does *not* do anything with the authn/authz data. It sets those in the 'user' and 'acls' parameters of the query object passed to routes.
  61. If a path passed is not a defined route (or regex route), but exists as a file under www/, it will be served up immediately.
  62. =cut
  63. sub app {
  64. # Start the server timing clock
  65. my $start = [gettimeofday];
  66. my $env = shift;
  67. return _toolong() if length($env->{REQUEST_URI}) > 2048;
  68. # Check eTags. If we don't know about it, just assume it's good and lazily fill the cache
  69. # XXX yes, this allows cache poisoning...but only for logged in users!
  70. if ($env->{HTTP_IF_NONE_MATCH}) {
  71. return [304, [], ['']] if $env->{HTTP_IF_NONE_MATCH} eq ($etags{$env->{REQUEST_URI}} || '');
  72. $etags{$env->{REQUEST_URI}} = $env->{HTTP_IF_NONE_MATCH} unless exists $etags{$env->{REQUEST_URI}};
  73. }
  74. my $last_fetch = 0;
  75. if ($env->{HTTP_IF_MODIFIED_SINCE}) {
  76. $last_fetch = DateTime::Format::HTTP->parse_datetime($env->{HTTP_IF_MODIFIED_SINCE})->epoch();
  77. }
  78. #XXX Don't use statics anything that has a search query
  79. # On one hand, I don't want to DOS the disk, but I'd also like some like ?rss...
  80. # Should probably turn those into aliases.
  81. my $has_query = !!$env->{QUERY_STRING};
  82. my $query = {};
  83. $query = URL::Encode::url_params_mixed($env->{QUERY_STRING}) if $env->{QUERY_STRING};
  84. #Actually parse the POSTDATA and dump it into the QUERY object if this is a POST
  85. if ($env->{REQUEST_METHOD} eq 'POST') {
  86. my $body = HTTP::Body->new( $env->{CONTENT_TYPE}, $env->{CONTENT_LENGTH} );
  87. while ( $env->{'psgi.input'}->read(my $buf, $CHUNK_SIZE) ) {
  88. $body->add($buf);
  89. }
  90. @$query{keys(%{$body->param})} = values(%{$body->param});
  91. @$query{keys(%{$body->upload})} = values(%{$body->upload});
  92. }
  93. # Grab the list of ACLs we want to add to a post, if any.
  94. $query->{acls} = [$query->{acls}] if ($query->{acls} && ref $query->{acls} ne 'ARRAY');
  95. my $path = $env->{PATH_INFO};
  96. $path = '/index' if $path eq '/';
  97. # Translate alias paths into their actual path
  98. $path = $aliases{$path} if exists $aliases{$path};
  99. # Figure out if we want compression or not
  100. my $alist = $env->{HTTP_ACCEPT_ENCODING} || '';
  101. $alist =~ s/\s//g;
  102. my @accept_encodings;
  103. @accept_encodings = split(/,/, $alist);
  104. my $deflate = grep { 'gzip' eq $_ } @accept_encodings;
  105. # Collapse multiple slashes in the path
  106. $path =~ s/[\/]+/\//g;
  107. # Let's open up our default route before we bother to see if users even exist
  108. return $routes{default}{callback}->($query) unless -f "config/setup";
  109. my $cookies = {};
  110. if ($env->{HTTP_COOKIE}) {
  111. $cookies = CGI::Cookie->parse($env->{HTTP_COOKIE});
  112. }
  113. my $active_user = '';
  114. if (exists $cookies->{tcmslogin}) {
  115. $active_user = Trog::Auth::session2user($cookies->{tcmslogin}->value);
  116. }
  117. $query->{user_acls} = [];
  118. $query->{user_acls} = Trog::Auth::acls4user($active_user) // [] if $active_user;
  119. # Filter out passed ACLs which are naughty
  120. my $is_admin = grep { $_ eq 'admin' } @{$query->{user_acls}};
  121. @{$query->{acls}} = grep { $_ ne 'admin' } @{$query->{acls}} unless $is_admin;
  122. # Disallow any paths that are naughty ( starman auto-removes .. up-traversal)
  123. if (index($path,'/templates') == 0 || index($path, '/statics') == 0 || $path =~ m/.*(\.psgi|\.pm)$/i ) {
  124. return _forbidden($query);
  125. }
  126. my $streaming = $env->{'psgi.streaming'};
  127. $query->{streaming} = $streaming;
  128. # If we have a static render, just use it instead (These will ALWAYS be correct, data saves invalidate this)
  129. # TODO: make this key on admin INSTEAD of active user when we add non-admin users.
  130. $query->{start} = $start;
  131. if (!$active_user && !$has_query) {
  132. return _static("$path.z",$start, $streaming) if -f "www/statics/$path.z" && $deflate;
  133. return _static($path,$start, $streaming) if -f "www/statics/$path";
  134. }
  135. # Handle HTTP range/streaming requests
  136. my $range = $env->{HTTP_RANGE} || "bytes=0-" if $env->{HTTP_RANGE} || $env->{HTTP_IF_RANGE};
  137. my @ranges;
  138. if ($range) {
  139. $range =~ s/bytes=//g;
  140. push(@ranges, map {
  141. [split(/-/, $_)];
  142. #$tuples[1] //= $tuples[0] + $CHUNK_SIZE;
  143. #\@tuples
  144. } split(/,/, $range) );
  145. }
  146. return _serve("www/$path", $start, $streaming, \@ranges, $last_fetch, $deflate) if -f "www/$path";
  147. #Handle regex/capture routes
  148. if (!exists $routes{$path}) {
  149. my @captures;
  150. foreach my $pattern (keys(%routes)) {
  151. @captures = $path =~ m/^$pattern$/;
  152. if (@captures) {
  153. $path = $pattern;
  154. foreach my $field (@{$routes{$path}{captures}}) {
  155. $routes{$path}{data} //= {};
  156. $routes{$path}{data}{$field} = shift @captures;
  157. }
  158. last;
  159. }
  160. }
  161. }
  162. $query->{deflate} = $deflate;
  163. $query->{user} = $active_user;
  164. return _notfound($query) unless exists $routes{$path};
  165. return _badrequest($query) unless grep { $env->{REQUEST_METHOD} eq $_ } ($routes{$path}{method} || '','HEAD');
  166. @{$query}{keys(%{$routes{$path}{'data'}})} = values(%{$routes{$path}{'data'}}) if ref $routes{$path}{'data'} eq 'HASH' && %{$routes{$path}{'data'}};
  167. #Set various things we don't want overridden
  168. $query->{body} = '';
  169. $query->{dnt} = $env->{HTTP_DNT};
  170. $query->{user} = $active_user;
  171. $query->{domain} = $env->{HTTP_X_FORWARDED_HOST} || $env->{HTTP_HOST};
  172. $query->{route} = $path;
  173. $query->{scheme} = $env->{'psgi.url_scheme'} // 'http';
  174. $query->{social_meta} = 1;
  175. $query->{primary_post} = {};
  176. $query->{has_query} = $has_query;
  177. #XXX there is a trick to now use strict refs, but I don't remember it right at the moment
  178. {
  179. no strict 'refs';
  180. my $output = $routes{$path}{callback}->($query);
  181. # Append server-timing headers
  182. my $tot = tv_interval($start) * 1000;
  183. push(@{$output->[1]}, 'Server-Timing' => "app;dur=$tot");
  184. return $output;
  185. }
  186. };
  187. sub _generic($type, $query) {
  188. return _static("$type.z",$query->{start}, $query->{streaming}) if -f "www/statics/$type.z";
  189. return _static($type, $query->{start}, $query->{streaming}) if -f "www/statics/$type";
  190. my %lookup = (
  191. notfound => \&Trog::Routes::HTML::notfound,
  192. forbidden => \&Trog::Routes::HTML::forbidden,
  193. badrequest => \&Trog::Routes::HTML::badrequest,
  194. toolong => \&Trog::Routes::HTML::toolong,
  195. );
  196. return $lookup{$type}->($query);
  197. }
  198. sub _notfound ($query) {
  199. return _generic('notfound', $query);
  200. }
  201. sub _forbidden($query) {
  202. return _generic('forbidden', $query);
  203. }
  204. sub _badrequest($query) {
  205. return _generic('badrequest', $query);
  206. }
  207. sub _toolong() {
  208. return _generic('toolong', {});
  209. }
  210. sub _static($path,$start,$streaming,$last_fetch=0) {
  211. # XXX because of psgi I can't just vomit the file directly
  212. if (open(my $fh, '<', "www/statics/$path")) {
  213. my $headers = '';
  214. # NOTE: this is relying on while advancing the file pointer
  215. while (<$fh>) {
  216. last if $_ eq "\n";
  217. $headers .= $_;
  218. }
  219. my(undef, undef, $status, undef, $headers_parsed) = HTTP::Parser::XS::parse_http_response("$headers\n", HEADERS_AS_HASHREF);
  220. #XXX need to put this into the file itself
  221. my $mt = (stat($fh))[9];
  222. my @gm = gmtime($mt);
  223. my $now_string = strftime( "%a, %d %b %Y %H:%M:%S GMT", @gm );
  224. my $code = $mt > $last_fetch ? $status : 304;
  225. $headers_parsed->{"Last-Modified"} = $now_string;
  226. # Append server-timing headers
  227. my $tot = tv_interval($start) * 1000;
  228. $headers_parsed->{'Server-Timing'} = "static;dur=$tot";
  229. #XXX uwsgi just opens the file *again* when we already have a filehandle if it has a path.
  230. # starman by comparison doesn't violate the principle of least astonishment here.
  231. # This is probably a performance optimization, but makes the kind of micromanagement I need to do inconvenient.
  232. # As such, we will just return a stream.
  233. return sub {
  234. my $responder = shift;
  235. #push(@headers, 'Content-Length' => $sz);
  236. my $writer = $responder->([ $code, [%$headers_parsed]]);
  237. while ( $fh->read( my $buf, $CHUNK_SIZE) ) {
  238. $writer->write($buf);
  239. }
  240. close $fh;
  241. $writer->close;
  242. } if $streaming;
  243. return [$code, [%$headers_parsed], $fh];
  244. }
  245. return [ 403, ['Content-Type' => $Trog::Vars::content_types{plain}], ["STAY OUT YOU RED MENACE"]];
  246. }
  247. sub _range ($fh, $ranges, $sz, %headers) {
  248. # Set mode
  249. my $primary_ct = "Content-Type: $headers{'Content-type'}";
  250. my $is_multipart = scalar(@$ranges) > 1;
  251. if ( $is_multipart ) {
  252. $headers{'Content-type'} = "multipart/byteranges; boundary=$CHUNK_SEP";
  253. }
  254. my $code = 206;
  255. my $fc = '';
  256. # Calculate the content-length up-front. We have to fix unspecified lengths first, and reject bad requests.
  257. foreach my $range (@$ranges) {
  258. $range->[1] //= $sz-1;
  259. return [416, [%headers], ["Requested range not satisfiable"]] if $range->[0] > $sz || $range->[0] < 0 || $range->[1] < 0 || $range->[0] > $range->[1];
  260. }
  261. $headers{'Content-Length'} = List::Util::sum(map { my $arr=$_; $arr->[1]+1, -$arr->[0] } @$ranges);
  262. #XXX Add the entity header lengths to the value - should hash-ify this to DRY
  263. if ($is_multipart) {
  264. foreach my $range (@$ranges) {
  265. $headers{'Content-Length'} += length("$fc--$CHUNK_SEP\n$primary_ct\nContent-Range: bytes $range->[0]-$range->[1]/$sz\n\n" );
  266. $fc = "\n";
  267. }
  268. $headers{'Content-Length'} += length( "\n--$CHUNK_SEP\--\n" );
  269. $fc = '';
  270. }
  271. return sub {
  272. my $responder = shift;
  273. my $writer;
  274. foreach my $range (@$ranges) {
  275. $headers{'Content-Range'} = "bytes $range->[0]-$range->[1]/$sz" unless $is_multipart;
  276. $writer //= $responder->([ $code, [%headers]]);
  277. $writer->write( "$fc--$CHUNK_SEP\n$primary_ct\nContent-Range: bytes $range->[0]-$range->[1]/$sz\n\n" ) if $is_multipart;
  278. $fc = "\n";
  279. my $len = List::Util::min($sz,$range->[1]+1) - $range->[0];
  280. $fh->seek( $range->[0], 0);
  281. while ($len) {
  282. $fh->read(my $buf, List::Util::min($len,$CHUNK_SIZE) );
  283. $writer->write($buf);
  284. # Adjust for amount written
  285. $len = List::Util::max($len - $CHUNK_SIZE, 0);
  286. }
  287. }
  288. $fh->close();
  289. $writer->write( "\n--$CHUNK_SEP\--\n" ) if $is_multipart;
  290. $writer->close;
  291. };
  292. }
  293. sub _serve ($path, $start, $streaming, $ranges, $last_fetch=0, $deflate=0) {
  294. my $mf = Mojo::File->new($path);
  295. my $ext = '.'.$mf->extname();
  296. my $ft;
  297. if ($ext) {
  298. $ft = Plack::MIME->mime_type($ext) if $ext;
  299. $ft ||= $extra_types{$ext} if exists $extra_types{$ext};
  300. }
  301. $ft ||= $Trog::Vars::content_types{plain};
  302. my $ct = 'Content-type';
  303. my @headers = ($ct => $ft);
  304. #TODO use static Cache-Control for everything but JS/CSS?
  305. push(@headers,'Cache-control' => $Trog::Vars::cache_control{revalidate});
  306. push(@headers,'Accept-Ranges' => 'bytes');
  307. my $mt = (stat($path))[9];
  308. my $sz = (stat(_))[7];
  309. my @gm = gmtime($mt);
  310. my $now_string = strftime( "%a, %d %b %Y %H:%M:%S GMT", @gm );
  311. my $code = $mt > $last_fetch ? 200 : 304;
  312. push(@headers, "Last-Modified" => $now_string);
  313. push(@headers, 'Vary' => 'Accept-Encoding');
  314. if (open(my $fh, '<', $path)) {
  315. return _range($fh, $ranges, $sz, @headers) if @$ranges && $streaming;
  316. # Transfer-encoding: chunked
  317. return sub {
  318. my $responder = shift;
  319. push(@headers, 'Content-Length' => $sz);
  320. my $writer = $responder->([ $code, \@headers]);
  321. while ( $fh->read( my $buf, $CHUNK_SIZE) ) {
  322. $writer->write($buf);
  323. }
  324. close $fh;
  325. $writer->close;
  326. } if $streaming && $sz > $CHUNK_SIZE;
  327. #Return data in the event the caller does not support deflate
  328. if (!$deflate) {
  329. push( @headers, "Content-Length" => $sz );
  330. # Append server-timing headers
  331. my $tot = tv_interval($start) * 1000;
  332. push(@headers, 'Server-Timing' => "file;dur=$tot");
  333. return [ $code, \@headers, $fh];
  334. }
  335. #Compress everything less than 1MB
  336. push( @headers, "Content-Encoding" => "gzip" );
  337. my $dfh;
  338. IO::Compress::Gzip::gzip( $fh => \$dfh );
  339. print $IO::Compress::Gzip::GzipError if $IO::Compress::Gzip::GzipError;
  340. push( @headers, "Content-Length" => length($dfh) );
  341. # Append server-timing headers
  342. my $tot = tv_interval($start) * 1000;
  343. push(@headers, 'Server-Timing' => "file;dur=$tot");
  344. return [ $code, \@headers, [$dfh]];
  345. }
  346. return [ 403, [$ct => $Trog::Vars::content_types{plain}], ["STAY OUT YOU RED MENACE"]];
  347. }
  348. 1;