123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- package Selenium::Specification;
- # ABSTRACT: Module for building a machine readable specification for Selenium
- use strict;
- use warnings;
- use v5.28;
- no warnings 'experimental';
- use feature qw/signatures unicode_strings/;
- use List::Util qw{uniq};
- use HTML::Parser();
- use JSON::MaybeXS();
- use File::HomeDir();
- use File::Slurper();
- use DateTime::Format::HTTP();
- use HTTP::Tiny();
- use File::Path qw{make_path};
- use File::Spec();
- use Encode qw{decode};
- use Unicode::Normalize qw{NFC};
- #TODO make a JSONWire JSON spec since it's not changing
- # URLs and the container ID
- our %spec_urls = (
- unstable => {
- url => 'https://w3c.github.io/webdriver/',
- section_id => 'endpoints',
- },
- draft => {
- url => "https://www.w3.org/TR/webdriver2/",
- section_id => 'endpoints',
- },
- stable => {
- url => "https://www.w3.org/TR/webdriver1/",
- section_id => 'list-of-endpoints',
- },
- );
- our $browser = HTTP::Tiny->new();
- my %state;
- my $parse = [];
- our $method = {};
- =head1 SUBROUTINES
- =head2 read($type STRING, $nofetch BOOL)
- Reads the copy of the provided spec type, and fetches it if a cached version is not available.
- =cut
- sub read($client_dir, $type='stable', $nofetch=1) {
- my $dir = File::Spec->catdir( $client_dir,"specs" );
- my $file = File::Spec->catfile( "$dir","$type.json");
- fetch( once => $nofetch, dir => $dir );
- die "could not write $file: $@" unless -f $file;
- my $buf = File::Slurper::read_binary($file);
- my $array = JSON::MaybeXS->new()->utf8()->decode($buf);
- my %hash;
- @hash{map { $_->{name} } @$array} = @$array;
- return \%hash;
- }
- =head2 fetch(%OPTIONS HASH)
- Builds a spec hash based upon the WC3 specification documents, and writes it to disk.
- =cut
- #TODO needs to grab args and argtypes still
- sub fetch (%options) {
- my $dir = $options{dir};
- my $rc = 0;
- foreach my $spec ( sort keys(%spec_urls) ) {
- make_path( $dir ) unless -d $dir;
- my $file = File::Spec->catfile( "$dir","$spec.json");
- my $last_modified = -f $file ? (stat($file))[9] : undef;
- if ($options{once} && $last_modified) {
- print STDERR "Skipping fetch, using cached result" if $options{verbose};
- next;
- }
- $last_modified = 0 if $options{force};
- my $spc = _build_spec($last_modified, %{$spec_urls{$spec}});
- if (!$spc) {
- print STDERR "Could not retrieve $spec_urls{$spec}{url}, skipping" if $options{verbose};
- $rc = 1;
- next;
- }
- # Second clause is for an edge case -- if the header is not set for some bizarre reason we should obey force still
- if (ref $spc ne 'ARRAY' && $last_modified) {
- print STDERR "Keeping cached result '$file', as page has not changed since last fetch.\n" if $options{verbose};
- next;
- }
- _write_spec($spc, $file);
- print "Wrote $file\n" if $options{verbose};
- }
- return $rc;
- }
- sub _write_spec ($spec, $file) {
- my $spec_json = JSON::MaybeXS->new()->utf8()->encode($spec);
- return File::Slurper::write_binary($file, $spec_json);
- }
- sub _build_spec($last_modified, %spec) {
- my $page = $browser->get($spec{url});
- return unless $page->{success};
- if ($page->{headers}{'last-modified'} && $last_modified ) {
- my $modified = DateTime::Format::HTTP->parse_datetime($page->{headers}{'last-modified'})->epoch();
- return 'cache' if $modified < $last_modified;
- }
- my $html = NFC( decode('UTF-8', $page->{content}) );
- $parse = [];
- %state = ( id => $spec{section_id} );
- my $parser = HTML::Parser->new(
- handlers => {
- start => [\&_handle_open, "tagname,attr"],
- end => [\&_handle_close, "tagname"],
- text => [\&_handle_text, "text"],
- }
- );
- $parser->parse($html);
- # Now that we have parsed the methods, let us go ahead and build the argspec based on the anchors for each endpoint.
- foreach my $m (@$parse) {
- $method = $m;
- %state = ();
- my $mparser = HTML::Parser->new(
- handlers => {
- start => [\&_endpoint_open, "tagname,attr"],
- end => [\&_endpoint_close, "tagname"],
- text => [\&_endpoint_text, "text"],
- },
- );
- $mparser->parse($html);
- }
- return _fixup(\%spec,$parse);
- }
- sub _fixup($spec,$parse) {
- @$parse = map {
- $_->{href} = "$spec->{url}$_->{href}";
- #XXX correct TYPO in the spec
- $_->{uri} =~ s/{sessionid\)/{sessionid}/g;
- @{$_->{output_params}} = grep { $_ ne 'null' } uniq @{$_->{output_params}};
- $_
- } @$parse;
- return $parse;
- }
- sub _handle_open($tag,$attr) {
- if ( $tag eq 'section' && ($attr->{id} || '') eq $state{id} ) {
- $state{active} = 1;
- return;
- }
- if ($tag eq 'tr') {
- $state{method} = 1;
- $state{headers} = [qw{method uri name}];
- $state{data} = {};
- return;
- }
- if ($tag eq 'td') {
- $state{heading} = shift @{$state{headers}};
- return;
- }
- if ($tag eq 'a' && $state{heading} && $attr->{href}) {
- $state{data}{href} = $attr->{href};
- }
- }
- sub _handle_close($tag) {
- if ($tag eq 'section') {
- $state{active} = 0;
- return;
- }
- if ($tag eq 'tr' && $state{active}) {
- if ($state{past_first}) {
- push(@$parse, $state{data});
- }
- $state{past_first} = 1;
- $state{method} = 0;
- return;
- }
- }
- sub _handle_text($text) {
- return unless $state{active} && $state{method} && $state{past_first} && $state{heading};
- $text =~ s/\s//gm;
- return unless $text;
- $state{data}{$state{heading}} .= $text;
- }
- # Endpoint parsers
- sub _endpoint_open($tag,$attr) {
- my $id = $method->{href};
- $id =~ s/^#//;
- if ($attr->{id} && $attr->{id} eq $id) {
- $state{active} = 1;
- }
- if ($tag eq 'ol') {
- $state{in_tag} = 1;
- }
- if ($tag eq 'dt' && $state{in_tag} && $state{last_tag} eq 'dl') {
- $state{in_dt} = 1;
- }
- if ($tag eq 'code' && $state{in_dt} && $state{in_tag} && $state{last_tag} eq 'dt') {
- $state{in_code} = 1;
- }
- $state{last_tag} = $tag;
- }
- sub _endpoint_close($tag) {
- return unless $state{active};
- if ($tag eq 'section') {
- $state{active} = 0;
- $state{in_tag} = 0;
- }
- if ($tag eq 'ol') {
- $state{in_tag} = 0;
- }
- if ($tag eq 'dt') {
- $state{in_dt} = 0;
- }
- if ($tag eq 'code') {
- $state{in_code} = 0;
- }
- }
- sub _endpoint_text($text) {
- if ($state{active} && $state{in_tag} && $state{in_code} && $state{in_dt} && $state{last_tag} eq 'code') {
- $method->{output_params} //= [];
- $text =~ s/\s//gm;
- push(@{$method->{output_params}},$text) if $text;
- }
- }
- 1;
|