Xen project Mailing List

[Xen-devel] [PATCH 08/11] docs: Provide parse-support-md

From: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>

Date: Thu, 12 Apr 2018 13:28:33 +0100

Cc: Juergen Gross <jgross@xxxxxxxx>, Lars Kurth <lars.kurth@xxxxxxxxxx>, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>, Wei Liu <wei.liu2@xxxxxxxxxx>, George Dunlap <george.dunlap@xxxxxxxxxx>

Delivery-date: Thu, 12 Apr 2018 12:29:00 +0000

List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

This utility reads json format pandoc output, from parsing one or more SUPPORT.md files, and generates an HTML table element containing the principal version and feature information. This is rather hairier than I anticipated when I started out; hence the 400-odd-line Perl script. Machinery to assemble the appropriate inputs for parse-support-md will be in the next commit. Signed-off-by: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx> Release-acked-by: Juergen Gross <jgross@xxxxxxxx> Acked-by: Lars Kurth <lars.kurth@xxxxxxxxxx> --- v2: New in this version of the series. v3: Refactor to introduce RealSect v3: Add [*] footnote to all applicable entries, not just the last --- docs/parse-support-md | 414 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 414 insertions(+) create mode 100755 docs/parse-support-md diff --git a/docs/parse-support-md b/docs/parse-support-md new file mode 100755 index 0000000..83768cf --- /dev/null +++ b/docs/parse-support-md @@ -0,0 +1,414 @@ +#!/usr/bin/perl -w +# +# Written with reference to pandoc_markdown from Debian jessie +# We require atx-style headers +# +# usage: +# pandoc -t json SUPPORT.md >j-unstable +# git-cat-file ... | pandoc -t json >j-4.10 +# docs/parse-support-md \ +# j-unstable https://xenbits/unstable/SUPPORT.html +# j-4.10 https://xenbits/4.10/SUPPORT.html +# or equivalent + +use strict; +use JSON; +use Tie::IxHash; +use IO::File; +use CGI qw(escapeHTML); +use Data::Dumper; + +#---------- accumulating input/output ---------- + +# This combines information from all of the input files. + +sub new_sectlist () { { } }; +our $toplevel_sectlist = new_sectlist(); +# an $sectlist is +# { } nothing seen yet +# a tied hashref something seen +# (tied $sectlist) is an object of type Tie::IxHash +# $sectlist->{KEY} a $sectnode: +# $sectlist->{KEY}{Status}[VI] = absent or markdown content +# $sectlist->{KEY}{Children} = a further $sectlist +# $sectlist->{KEY}{Key} = KEY +# $sectlist->{KEY}{RealSect} = containing real section in @insections, so +# $sectlist->{KEY}{RealSect}{HasText}[VI] = trueish iff there was a Para +# $sectlist->{KEY}{RealSect}{Anchor} = value for < id="" > in the pandoc html +# A $sectnode represents a single section from the original markdown +# document. Its subsections are in Children. +# +# Also, the input syntax: +# Status, something or other: Supported +# is treated as a $sectnode, is as if it were a subsection - +# one called `something or other'. +# +# KEY is the Anchor, or derived from the `something or other'. +# It is used to match up identical features in different versions. + +#---------- state for this input file ---------- + +our $version_index; +our @version_urls; + +our @insections; +# $insections[]{Key} = string +# $insections[]{Headline} = markdown content +# these next are only defined for real sections, not Status elements +# $insections[]{Anchor} = string +# $insections[]{HasText} = array, $sectlist->{HasText} will refer to this + +our $had_unknown; +# adding new variable ? it must be reset in r_toplevel + +#---------- parsing ---------- + +sub ri_Header { + my ($c) = @_; + my ($level, $infos, $hl) = @$c; +#print STDERR 'RI_HEADER ', Dumper($c, \@c); + my ($id) = @$infos; + die unless $level >= 1; + die unless $level-2 <= $#insections; + $#insections = $level-2; + push @insections, + { + Key => $id, + Anchor => $id, + Headline => $hl, + HasText => [], + }; +#print STDERR Dumper(\@insections); +} + +sub ri_Para { + if (@insections) { + $insections[$#insections]{HasText}[$version_index] = 1; + } +}; + +sub parse_feature_entry ($) { + my ($value) = @_; + die unless @insections; + + my $sectnode; + my $realsect; + foreach my $s (@insections) { + my $sectlist = $sectnode + ? $sectnode->{Children} : $toplevel_sectlist; + my $key = $s->{Key}; + $realsect = $s if $s->{Anchor}; + tie %$sectlist, 'Tie::IxHash' unless tied %$sectlist; +#print STDERR "PARSE_FEATURE_ENTRY ", Dumper($s); + $sectlist->{$key} //= + { + Children => new_sectlist(), + Headline => $s->{Headline}, + Key => $key, + RealSect => $realsect, + }; + $sectnode = $sectlist->{$key}; + } + die unless $sectnode; + $sectnode->{Status}[$version_index] = $value; +} + +sub ri_CodeBlock { + my ($c) = @_; + my ($infos, $text) = @$c; + + if ($text =~ m{^(?: Functional\ completeness + | Functional\ stability + | Interface\ stability + | Security\ supported ) \:}x) { + # ignore this + return; + } + die "$had_unknown / $text ?" if $had_unknown; + + my $toplevel = $text =~ m{^Xen-Version:}; + + foreach my $l (split /\n/, $text) { + $l =~ s/\s*$//; + next unless $l =~ m/\S/; + + my ($descr, $value) = + $toplevel + ? $l =~ m{^([A-Z][-A-Z0-9a-z]+)\:\s+(\S.*)$} + : $l =~ m{^(?:Status|Supported)(?:\,\s*([^:]+))?\:\s+(\S.*)$} + or die ("$text\n^ cannot parse status codeblock line:". + ($toplevel and 'top'). + "\n$l\n ?"); + + die unless @insections; + my $insection = $insections[$#insections]; + + if (length $descr) { + my $key = lc $descr; + $key =~ y/ /-/; + $key =~ y/-0-9A-Za-z//cd; + $key = $insection->{Anchor}.'--'.$key; + push @insections, + { + Key => $key, + Headline => [{ t => 'Str', c => $descr }], + }; + } + parse_feature_entry $value; + if (length $descr) { + pop @insections; + } + } +} + +sub process_unknown { + my ($c, $e) = @_; + $had_unknown = Dumper($e); +} + +sub r_content ($) { + my ($i) = @_; + foreach my $e (@$i) { + my $f = ${*::}{"ri_$e->{t}"}; + $f //= \&process_unknown; + $f->($e->{c}, $e); + } +} + +sub r_toplevel ($) { + my ($i) = @_; + + die unless defined $version_index; + + @insections = (); + $had_unknown = undef; + + foreach my $e (@$i) { + next unless ref $e eq 'ARRAY'; + r_content $e; + } +} + +sub read_inputs () { + $version_index = 0; + + local $/; + undef $/; + + while (my $f = shift @ARGV) { + push @version_urls, shift @ARGV; + eval { + open F, '<', $f or die $!; + my $input_toplevel = decode_json <F>; + r_toplevel $input_toplevel; + }; + die "$@\nwhile processing input file $f\n" if $@; + $version_index++; + } +} + +#---------- reprocessing ---------- + +# variables generated by analyse_reprocess: +our $maxdepth; + +sub pandoc2html_inline ($) { + my ($content) = @_; + + my $json_fh = IO::File::new_tmpfile or die $!; + print $json_fh to_json([ + { unMeta => { } }, + [{ t => 'Para', c => $content }], + ]) or die $!; + flush $json_fh or die $!; + seek $json_fh,0,0 or die $!; + + my $c = open PD, "-|" // die $!; + if (!$c) { + open STDIN, "<&", $json_fh; + exec qw(pandoc -f json) or die $!; + } + + local $/; + undef $/; + my $html = <PD>; + $?=$!=0; + if (!close PD) { + eval { + seek $json_fh,0,0 or die $!; + open STDIN, '<&', $json_fh or die $!; + system 'json_pp'; + }; + die "\n $? $!"; + } + + $html =~ s{^\<p\>}{} or die "$html ?"; + $html =~ s{\</p\>$}{} or die "$html ?"; + $html =~ s{\n$}{}; + return $html; +} + +sub reprocess_sectlist ($$); + +sub reprocess_sectnode ($$) { + my ($sectnode, $d) = @_; + + $sectnode->{Depth} = $d; + + if ($sectnode->{Status}) { + $maxdepth = $d if $d > $maxdepth; + } + + if ($sectnode->{Headline}) { +# print STDERR Dumper($sectnode); + $sectnode->{Headline} = + pandoc2html_inline $sectnode->{Headline}; + } + + reprocess_sectlist $sectnode->{Children}, $d; +} + +sub reprocess_sectlist ($$) { + my ($sectlist, $d) = @_; + $d++; + + foreach my $sectnode (values %$sectlist) { + reprocess_sectnode $sectnode, $d; + } +} + +sub count_rows_sectlist ($); + +sub count_rows_sectnode ($) { + my ($sectnode) = @_; + my $rows = 0; + $rows++ if $sectnode->{Status}; + $rows += count_rows_sectlist $sectnode->{Children}; + $sectnode->{Rows} = $rows; + return $rows; +} + +sub count_rows_sectlist ($) { + my ($sectlist) = @_; + my $rows = 0; + foreach my $sectnode (values %$sectlist) { + $rows += count_rows_sectnode $sectnode; + } + return $rows; +} + +# After reprocess_sectlist, +# ->{Headline} is in html +# ->{Status} is in plain text + +sub analyse_reprocess () { + $maxdepth = 0; + reprocess_sectlist $toplevel_sectlist, 0; +} + +#---------- output ---------- + +sub o { print @_ or die $!; } + +our @pending_headings; + +sub write_output_row ($) { + my ($sectnode) = @_; +# print STDERR 'WOR ', Dumper($d, $sectnode); + o('<tr>'); + my $span = sub { + my ($rowcol, $n) = @_; + o(sprintf ' %sspan="%d"', $rowcol, $n) if $n != 1; + }; + # This is all a bit tricky because (i) the input is hierarchical + # with variable depth, whereas the output has to have a fixed + # number of heading columns on the LHS; (ii) the HTML + # colspan/rowspan system means that when we are writing out, we + # have to not write table elements for table entries which have + # already been written with a span instruction that covers what we + # would write now. + while (my $heading = shift @pending_headings) { + o('<th valign="top"'); + o(sprintf ' id="%s"', $heading->{Key}); + $span->('row', $heading->{Rows}); + $span->('col', $maxdepth - $heading->{Depth} + 1) + if !%{ $heading->{Children} }; + o(' align="left">'); + o($heading->{Headline}); + o('</th>'); + } + if (%{ $sectnode->{Children} }) { + # we suppressed the colspan above, but we do need to make the gap + my $n = $maxdepth - $sectnode->{Depth}; + die 'XX '. Dumper($n, $sectnode) if $n<0; + if ($n) { + o('<td'); + $span->('col', $n); + o('></td>'); + } + } + for (my $i=0; $i<@version_urls; $i++) { + my $st = $sectnode->{Status}[$i]; + $st //= '-'; + o('<td>'); + my $end_a = ''; + if ($sectnode->{Key} eq 'release-support--xen-version') { + o(sprintf '<a href="%s">', $version_urls[$i]); + $end_a = '</a>'; + } + o(escapeHTML($st)); + if ($sectnode->{RealSect}{HasText}[$i] + && $sectnode->{Status}[$i] + && $sectnode->{RealSect}{Anchor}) { + o(sprintf '<a href="%s#%s">[*]</a>', + $version_urls[$i], $sectnode->{RealSect}{Anchor}); + } + o($end_a); + o('</td>'); + } + o("</tr>\n"); +} + +sub write_output_sectlist ($); +sub write_output_sectlist ($) { + my ($sectlist) = @_; + foreach my $key (keys %$sectlist) { + my $sectnode = $sectlist->{$key}; + push @pending_headings, $sectnode; + write_output_row $sectnode if $sectnode->{Status}; + write_output_sectlist $sectnode->{Children}; + } +} + +sub write_output () { + o('<table rules="all">'); + write_output_sectlist $toplevel_sectlist; + o('</table>'); +} + +#---------- main program ---------- + +open DEBUG, '>', '/dev/null' or die $!; +if (@ARGV && $ARGV[0] eq '-D') { + shift @ARGV; + open DEBUG, '>&2' or die $!; +} + +die unless @ARGV; +die if $ARGV[0] =~ m/^-/; +die if @ARGV % 2; + +read_inputs(); + +#use Data::Dumper; +#print DEBUG Dumper($toplevel_sectlist); + +analyse_reprocess(); +# Now Headline is in HTML + +count_rows_sectlist($toplevel_sectlist); + +#use Data::Dumper; +print DEBUG Dumper($toplevel_sectlist); + +write_output(); -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.