docs/gen-html-index: Extract titles from HTML documents

author Ian Jackson <ian.jackson@eu.citrix.com>

Fri, 6 Apr 2018 18:09:02 +0000 (19:09 +0100)

committer Ian Jackson <Ian.Jackson@eu.citrix.com>

Thu, 12 Apr 2018 15:13:39 +0000 (16:13 +0100)
author Ian Jackson <ian.jackson@eu.citrix.com>
Fri, 6 Apr 2018 18:09:02 +0000 (19:09 +0100)
committer Ian Jackson <Ian.Jackson@eu.citrix.com>
Thu, 12 Apr 2018 15:13:39 +0000 (16:13 +0100)
diff --git a/docs/gen-html-index b/docs/gen-html-index

index e9792bf9372bbddbd2affabe68e55441a03ce8dd..5b43b42a8cb3368dc372ee0c2cde55db84a0afd8 100644 (file)
--- a/docs/gen-html-index
+++ b/docs/gen-html-index
@@ -10,6 +10,7 @@ use warnings;
  use Getopt::Long;
  use IO::File;
  use File::Basename;
+use HTML::TreeBuilder::XPath;
  
  Getopt::Long::Configure('bundling');
  
@@ -64,6 +65,18 @@ sub make_linktext ($) {
      return "$1($2)" if $l =~ m,^man/(.*)\.([0-9].*)\.html,;
      $l =~ s/.(?:html|txt)$//g;
      return $index{$l} if exists $index{$l};
+
+    my $from_html;
+    eval {
+        my $tree = new HTML::TreeBuilder::XPath;
+        my $f = "$outdir/$l.html";
+        open F, '<', $f or die "$l $f $!";
+        $tree->parse_file(\*F) or die;
+        close F;
+        $from_html = $tree->findvalue("/html/head/title");
+    };
+    return $from_html if $from_html;
+
      return basename($l);
  }
author	Ian Jackson <ian.jackson@eu.citrix.com>
	Fri, 6 Apr 2018 18:09:02 +0000 (19:09 +0100)
committer	Ian Jackson <Ian.Jackson@eu.citrix.com>
	Thu, 12 Apr 2018 15:13:39 +0000 (16:13 +0100)