summarylogtreecommitdiffstats
path: root/download-resources.pl
blob: 482d262da9b50e1d3862a9e24e79e0c90c09ef08 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
use warnings;
use strict;
use LWP::UserAgent;
use File::Path qw/make_path/;
use Data::Dumper;

=pod

WHATWG.org Local Living HTML Standard

Simon Wilper - 2017-04-09

When cloning the html-build repository from http://github.com/whatwg and
run the `build.sh` you get a clone from the HTML source file that then
gets processed by [WATTSI](https://github.com/whatwg/wattsi).

To my dismay I encountered the style and javascript URLs in the
resulting HTML files to reference remote URLs. Not cool. You have the
possibility to clone the source and have it *locally* but the CSS not?

This script `download-resources.pl` does exactly as it says: Examine the
source file, download the resource files and replace the links with
relative URLs to the `resource` subdirectory.

=cut

my $ua = LWP::UserAgent->new;
$ua->timeout(3);
$ua->env_proxy;
my $resources = {};

open( my $fh, '<', "$ARGV[0]" ) || die($!);
open( my $fhOut, '>', "$ARGV[1]" ) || die($!);
mkdir( "resources" );
while( <$fh> ) {
  if ( m@(https://resources.whatwg.org/(.+?))[)"]@ ) {

    my $res_uri = $1;
    my $res_localfile = $2;
    $resources->{$res_uri}++;

    my( $relpath ) = ( $res_localfile =~ m@^(.+)/.+$@ );
    if ( defined($relpath) ) {
      make_path( "resources/$relpath" );
    }

    print( "$res_uri -> resources/$res_localfile ... " );
    my $response = $ua->get( $res_uri, ':content_file'=>"resources/$res_localfile" );
    if ($response->is_success) {
      print( "OK\n" );
    } else {
      print("FAIL\n");
      die( "Download failed: " . $response->status_line . "\n\n" );
    }

    s@https://resources.whatwg.org@resources@;
  }
  print $fhOut $_;
}
close($fh);
close($fhOut);