Pārlūkot izejas kodu

Fix #58: support connecting to remote CDP sessions

George S. Baugh 5 mēneši atpakaļ
vecāks
revīzija
bbb980a8ce
4 mainītis faili ar 106 papildinājumiem un 9 dzēšanām
  1. 4 0
      conf/Changes
  2. 57 2
      example.pl
  3. 30 5
      lib/Playwright.pm
  4. 15 2
      playwright_server

+ 4 - 0
conf/Changes

@@ -1,5 +1,9 @@
 Revision history for Playwright
 
+1.401 2024-02-07 TEODESIAN
+    - Fix ConsoleMessages being broken due to returning out-of-spec data
+    - Add support for connecting to browserless.io playwright hosts
+
 1.324 2023-04-18 TEODESIAN
     - Ensure all PW methods returning Locator objects are correctly blessed
 

+ 57 - 2
example.pl

@@ -4,8 +4,10 @@ use warnings;
 use Data::Dumper;
 use Playwright;
 use Try::Tiny;
+use Net::EmptyPort;
+use Carp::Always;
 
-{
+NORMAL: {
     my $handle = Playwright->new( debug => 1 );
 
     # Open a new chrome instance
@@ -165,7 +167,7 @@ use Try::Tiny;
 }
 
 # Example of using persistent mode / remote hosts
-{
+OPEN: {
     my $handle  = Playwright->new( debug => 1 );
     my $handle2 = Playwright->new( debug => 1, host => 'localhost', port => $handle->{port} );
 
@@ -175,6 +177,59 @@ use Try::Tiny;
 
 }
 
+# Example of connecting to remote CDP sessions
+CDP: {
+    local $SIG{HUP} = 'IGNORE';
+
+    sub kill_krom_and_die {
+        my ($in, $msg) = @_;
+        kill_krom($in);
+        die $msg;
+    }
+
+    sub kill_krom {
+        my ($in) = @_;
+        kill HUP => -getpgrp();
+        close $in;
+    }
+
+    my $port = Net::EmptyPort::empty_port();
+
+    my $pid = fork // die("Could not fork");
+    if (!$pid) {
+        open(my $stdin, '|-', qq{chromium-browser --remote-debugging-port=$port --headless}) or die "Could not open chromium-browser to test!";
+        print "Waiting for cdp server on port $port to come up...\n";
+        Net::EmptyPort::wait_port( $port, 10 )
+          or kill_krom_and_die($stdin, "Server never came up after 10s!");
+        print "done\n";
+
+        my $handle = Playwright->new( debug => 1, cdp_uri => "http://127.0.0.1:$port" );
+
+        # Open a new chrome instance
+        my $browser = $handle->launch( headless => 1, type => 'chrome' );
+
+        # Open a tab therein
+        my $page = $browser->newPage({ videosPath => 'video', acceptDownloads => 1 });
+
+        # Load a URL in the tab
+        my $res = $page->goto('http://troglodyne.net', { waitUntil => 'networkidle' });
+        print Dumper($res->status(), $browser->version());
+
+        $handle->quit();
+
+        #XXX OF COURSE chrome responds correctly to ESPIPE and SIGCHLD, why wouldn't it
+        kill_krom($stdin);
+        exit 0;
+    } else {
+        # If it can't get done in 20s, it ain't getting done
+        foreach (0..20) {
+            last unless waitpid( $pid, 1) == 0;
+            sleep 1;
+        }
+    }
+    print "All Done!\n\n";
+}
+
 # Clean up, since we left survivors
 require './bin/reap_playwright_servers';
 Playwright::ServerReaper::main();

+ 30 - 5
lib/Playwright.pm

@@ -198,6 +198,25 @@ Don't worry though, you can access the parent attribute on most Playwright::* ob
     my $page = $element->{parent};
 
 
+=head2 Chrome Specific features
+
+You can pass the parameter 'cdp_uri' to the constructor to connect to a running browser with a ChromeDevTools server running.
+Example:
+
+    ws://wegotussomebrowsers.test:666?user=fred&token=Y4BBAD4B3AD00
+
+This appears to be what the large scale playwright-as-a-service shops are using to expose browsers to their customers.
+
+For the curious as to how this actually works:
+
+Similar to the playwright_server binary this module ships, they use some kind of web service to wrap browser.newBrowserCDPSession().
+Alternatively, they wrap running `chromium-browser --remote-debugging-port=7779311` instead of touching pw, because complexity demon BAD.
+See the CDP: block in example.pl with this distribution (read: in the TLD of its repo) for how to do precisely that.
+
+It's nothing all that complicated, other than the hulking swarm of services which integrate that into a userland that can charge your credit card!
+Oh, and monitoring/balancing/scaling it all so that it doesn't fall over because you crammed 10 billion clients onto one box.
+Gotta stay right on the edge of utilization madness, that's the sweet spot as far as margin is concerned.
+
 =head2 Firefox Specific concerns
 
 By default, firefox will open PDFs in a pdf.js window.
@@ -452,15 +471,17 @@ sub new ( $class, %options ) {
 
     #XXX yes, this is a race, so we need retries in _start_server
     my $port = $options{port} // Net::EmptyPort::empty_port();
+    my $cdp_uri = $options{cdp_uri} // '';
     my $timeout = $options{timeout} // 30;
     my $self = bless(
         {
             ua      => $options{ua} // LWP::UserAgent->new(),
             host    => $options{host} // 'localhost',
             port    => $port,
+            cdp_uri => $cdp_uri,
             debug   => $options{debug},
             cleanup => ( $options{cleanup} || !$options{port} || !$options{host} ) // 1,
-            pid     => $options{host} ? "REUSE" : _start_server( $port, $timeout, $options{debug}, $options{cleanup} // 1 ),
+            pid     => $options{host} ? "REUSE" : _start_server( $port, $cdp_uri, $timeout, $options{debug}, $options{cleanup} // 1 ),
             parent  => $$ // 'bogus', # Oh lawds, this can be undef sometimes
             timeout => $timeout,
         },
@@ -669,7 +690,7 @@ sub DESTROY ($self) {
     $self->quit();
 }
 
-sub _start_server ( $port, $timeout, $debug, $cleanup ) {
+sub _start_server ( $port, $cdp_uri, $timeout, $debug, $cleanup ) {
     $debug = $debug ? '--debug' : '';
 
     # Check if the port is already live, and short-circuit if this is the case.
@@ -684,12 +705,16 @@ sub _start_server ( $port, $timeout, $debug, $cleanup ) {
     if ($pid) {
         print "Waiting for playwright server on port $port to come up...\n" if $debug;
         Net::EmptyPort::wait_port( $port, $timeout )
-          or confess("Server never came up after 30s!");
+          or confess("Server never came up after ".$timeout."s!");
         print "done\n" if $debug;
 
         return $pid;
     }
 
+    my @args = ( $node_bin, $server_bin, "--port", $port );
+    push(@args, "--cdp", $cdp_uri) if $cdp_uri;
+    push(@args, $debug) if $debug;
+
     # Orphan the process in the event that cleanup => 0
     if (!$cleanup) {
         print "Detaching child process...\n";
@@ -697,10 +722,10 @@ sub _start_server ( $port, $timeout, $debug, $cleanup ) {
         require POSIX;
         die "Cannot detach playwright_server process for persistence" if POSIX::setsid() < 0;
         require Capture::Tiny;
-        capture_merged { exec( $node_bin, $server_bin, "--port", $port, $debug ) };
+        capture_merged { exec( @args ) };
         die("Could not exec!");
     }
-    exec( $node_bin, $server_bin, "--port", $port, $debug );
+    exec( @args );
 }
 
 1;

+ 15 - 2
playwright_server

@@ -69,7 +69,7 @@ if (fix_it) {
 var args = process.argv.slice(2);
 
 if ( args.filter(arg => arg == '--help' || arg == '-h' || arg == '-?' ).length > 0 ) {
-    console.log("Usage:\nplaywright_server [--debug | --check | --port PORT | --help]");
+    console.log("Usage:\nplaywright_server [--debug | --check | --port PORT | --cdp URI --help]");
     exit(0);
 }
 
@@ -91,6 +91,14 @@ if ( args.filter(arg => arg == '--port').length > 0 ) {
     }
 }
 
+var cdp_uri = '';
+if ( args.filter(arg => arg == '--cdp').length > 0 ) {
+    var pos = args.indexOf('--cdp') + 1;
+    if (pos !=0) {
+        cdp_uri = args[pos];
+    }
+}
+
 const app = express();
 const port = got_port;
 
@@ -121,7 +129,12 @@ app.post('/session', async (req, res) => {
         try {
             var browserServer = await browsers[type].launchServer(...args);
             var wsEndpoint = browserServer.wsEndpoint();
-            var browser = await browsers[type].connect({ wsEndpoint });
+            var browser;
+            if (cdp_uri == '') {
+                browser = await browsers[type].connect({ wsEndpoint });
+            } else {
+                browser = await browsers[type].connectOverCDP( cdp_uri );
+            }
             browser.server = browserServer;
             objects[browser._guid] = browser;
             result = { error : false, message : browser };