Specification.pm 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. package Selenium::Specification;
  2. # ABSTRACT: Module for building a machine readable specification for Selenium
  3. use strict;
  4. use warnings;
  5. use v5.28;
  6. no warnings 'experimental';
  7. use feature qw/signatures unicode_strings/;
  8. use List::Util qw{uniq};
  9. use HTML::Parser();
  10. use JSON::MaybeXS();
  11. use File::HomeDir();
  12. use File::Slurper();
  13. use DateTime::Format::HTTP();
  14. use HTTP::Tiny();
  15. use File::Path qw{make_path};
  16. use File::Spec();
  17. use Encode qw{decode};
  18. use Unicode::Normalize qw{NFC};
  19. #TODO make a JSONWire JSON spec since it's not changing
  20. # URLs and the container ID
  21. our %spec_urls = (
  22. unstable => {
  23. url => 'https://w3c.github.io/webdriver/',
  24. section_id => 'endpoints',
  25. },
  26. draft => {
  27. url => "https://www.w3.org/TR/webdriver2/",
  28. section_id => 'endpoints',
  29. },
  30. stable => {
  31. url => "https://www.w3.org/TR/webdriver1/",
  32. section_id => 'list-of-endpoints',
  33. },
  34. );
  35. our $browser = HTTP::Tiny->new();
  36. my %state;
  37. my $parse = [];
  38. our $method = {};
  39. =head1 SUBROUTINES
  40. =head2 read($type STRING, $nofetch BOOL)
  41. Reads the copy of the provided spec type, and fetches it if a cached version is not available.
  42. =cut
  43. sub read($client_dir, $type='stable', $nofetch=1) {
  44. my $dir = File::Spec->catdir( $client_dir,"specs" );
  45. my $file = File::Spec->catfile( "$dir","$type.json");
  46. fetch( once => $nofetch, dir => $dir );
  47. die "could not write $file: $@" unless -f $file;
  48. my $buf = File::Slurper::read_binary($file);
  49. my $array = JSON::MaybeXS->new()->utf8()->decode($buf);
  50. my %hash;
  51. @hash{map { $_->{name} } @$array} = @$array;
  52. return \%hash;
  53. }
  54. =head2 fetch(%OPTIONS HASH)
  55. Builds a spec hash based upon the WC3 specification documents, and writes it to disk.
  56. =cut
  57. #TODO needs to grab args and argtypes still
  58. sub fetch (%options) {
  59. my $dir = $options{dir};
  60. my $rc = 0;
  61. foreach my $spec ( sort keys(%spec_urls) ) {
  62. make_path( $dir ) unless -d $dir;
  63. my $file = File::Spec->catfile( "$dir","$spec.json");
  64. my $last_modified = -f $file ? (stat($file))[9] : undef;
  65. if ($options{once} && $last_modified) {
  66. print STDERR "Skipping fetch, using cached result" if $options{verbose};
  67. next;
  68. }
  69. $last_modified = 0 if $options{force};
  70. my $spc = _build_spec($last_modified, %{$spec_urls{$spec}});
  71. if (!$spc) {
  72. print STDERR "Could not retrieve $spec_urls{$spec}{url}, skipping" if $options{verbose};
  73. $rc = 1;
  74. next;
  75. }
  76. # Second clause is for an edge case -- if the header is not set for some bizarre reason we should obey force still
  77. if (ref $spc ne 'ARRAY' && $last_modified) {
  78. print STDERR "Keeping cached result '$file', as page has not changed since last fetch.\n" if $options{verbose};
  79. next;
  80. }
  81. _write_spec($spc, $file);
  82. print "Wrote $file\n" if $options{verbose};
  83. }
  84. return $rc;
  85. }
  86. sub _write_spec ($spec, $file) {
  87. my $spec_json = JSON::MaybeXS->new()->utf8()->encode($spec);
  88. return File::Slurper::write_binary($file, $spec_json);
  89. }
  90. sub _build_spec($last_modified, %spec) {
  91. my $page = $browser->get($spec{url});
  92. return unless $page->{success};
  93. if ($page->{headers}{'last-modified'} && $last_modified ) {
  94. my $modified = DateTime::Format::HTTP->parse_datetime($page->{headers}{'last-modified'})->epoch();
  95. return 'cache' if $modified < $last_modified;
  96. }
  97. my $html = NFC( decode('UTF-8', $page->{content}) );
  98. $parse = [];
  99. %state = ( id => $spec{section_id} );
  100. my $parser = HTML::Parser->new(
  101. handlers => {
  102. start => [\&_handle_open, "tagname,attr"],
  103. end => [\&_handle_close, "tagname"],
  104. text => [\&_handle_text, "text"],
  105. }
  106. );
  107. $parser->parse($html);
  108. # Now that we have parsed the methods, let us go ahead and build the argspec based on the anchors for each endpoint.
  109. foreach my $m (@$parse) {
  110. $method = $m;
  111. %state = ();
  112. my $mparser = HTML::Parser->new(
  113. handlers => {
  114. start => [\&_endpoint_open, "tagname,attr"],
  115. end => [\&_endpoint_close, "tagname"],
  116. text => [\&_endpoint_text, "text"],
  117. },
  118. );
  119. $mparser->parse($html);
  120. }
  121. return _fixup(\%spec,$parse);
  122. }
  123. sub _fixup($spec,$parse) {
  124. @$parse = map {
  125. $_->{href} = "$spec->{url}$_->{href}";
  126. #XXX correct TYPO in the spec
  127. $_->{uri} =~ s/{sessionid\)/{sessionid}/g;
  128. @{$_->{output_params}} = grep { $_ ne 'null' } uniq @{$_->{output_params}};
  129. $_
  130. } @$parse;
  131. return $parse;
  132. }
  133. sub _handle_open($tag,$attr) {
  134. if ( $tag eq 'section' && ($attr->{id} || '') eq $state{id} ) {
  135. $state{active} = 1;
  136. return;
  137. }
  138. if ($tag eq 'tr') {
  139. $state{method} = 1;
  140. $state{headers} = [qw{method uri name}];
  141. $state{data} = {};
  142. return;
  143. }
  144. if ($tag eq 'td') {
  145. $state{heading} = shift @{$state{headers}};
  146. return;
  147. }
  148. if ($tag eq 'a' && $state{heading} && $attr->{href}) {
  149. $state{data}{href} = $attr->{href};
  150. }
  151. }
  152. sub _handle_close($tag) {
  153. if ($tag eq 'section') {
  154. $state{active} = 0;
  155. return;
  156. }
  157. if ($tag eq 'tr' && $state{active}) {
  158. if ($state{past_first}) {
  159. push(@$parse, $state{data});
  160. }
  161. $state{past_first} = 1;
  162. $state{method} = 0;
  163. return;
  164. }
  165. }
  166. sub _handle_text($text) {
  167. return unless $state{active} && $state{method} && $state{past_first} && $state{heading};
  168. $text =~ s/\s//gm;
  169. return unless $text;
  170. $state{data}{$state{heading}} .= $text;
  171. }
  172. # Endpoint parsers
  173. sub _endpoint_open($tag,$attr) {
  174. my $id = $method->{href};
  175. $id =~ s/^#//;
  176. if ($attr->{id} && $attr->{id} eq $id) {
  177. $state{active} = 1;
  178. }
  179. if ($tag eq 'ol') {
  180. $state{in_tag} = 1;
  181. }
  182. if ($tag eq 'dt' && $state{in_tag} && $state{last_tag} eq 'dl') {
  183. $state{in_dt} = 1;
  184. }
  185. if ($tag eq 'code' && $state{in_dt} && $state{in_tag} && $state{last_tag} eq 'dt') {
  186. $state{in_code} = 1;
  187. }
  188. $state{last_tag} = $tag;
  189. }
  190. sub _endpoint_close($tag) {
  191. return unless $state{active};
  192. if ($tag eq 'section') {
  193. $state{active} = 0;
  194. $state{in_tag} = 0;
  195. }
  196. if ($tag eq 'ol') {
  197. $state{in_tag} = 0;
  198. }
  199. if ($tag eq 'dt') {
  200. $state{in_dt} = 0;
  201. }
  202. if ($tag eq 'code') {
  203. $state{in_code} = 0;
  204. }
  205. }
  206. sub _endpoint_text($text) {
  207. if ($state{active} && $state{in_tag} && $state{in_code} && $state{in_dt} && $state{last_tag} eq 'code') {
  208. $method->{output_params} //= [];
  209. $text =~ s/\s//gm;
  210. push(@{$method->{output_params}},$text) if $text;
  211. }
  212. }
  213. 1;