From fed68baa7cddcbb1f1ffcddeca578683d6e2e030 Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Mon, 15 Sep 2014 21:46:22 +0200 Subject: Add pandoc.pm --- .ikiwiki/IkiWiki/Plugin/pandoc.pm | 329 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 329 insertions(+) create mode 100755 .ikiwiki/IkiWiki/Plugin/pandoc.pm diff --git a/.ikiwiki/IkiWiki/Plugin/pandoc.pm b/.ikiwiki/IkiWiki/Plugin/pandoc.pm new file mode 100755 index 0000000..ddc5299 --- /dev/null +++ b/.ikiwiki/IkiWiki/Plugin/pandoc.pm @@ -0,0 +1,329 @@ +#!/usr/bin/env perl + +package IkiWiki::Plugin::pandoc; + +use warnings; +use strict; +use IkiWiki; +use FileHandle; +use IPC::Open2; +use JSON; + +sub import { + my $markdown_ext = $config{pandoc_markdown_ext} || "mdwn"; + + # May be both a string with a single value, a string containing commas or an arrayref + if ($markdown_ext =~ /,/) { + $markdown_ext = [split /\s*,\s*/, $markdown_ext]; + } + + hook(type => "getsetup", id => "pandoc", call => \&getsetup); + if (ref $markdown_ext eq 'ARRAY') { + foreach my $mde (@$markdown_ext) { + hook(type => 'htmlize', id => $mde, + call => sub{ htmlize("markdown", @_) }); + } + } else { + hook(type => "htmlize", id => $markdown_ext, + call => sub { htmlize("markdown", @_) }); + } + if ($config{pandoc_latex}) { + hook(type => "htmlize", id => "tex", + call => sub { htmlize("latex", @_) }); + } + if ($config{pandoc_rst}) { + hook(type => "htmlize", id => "rst", + call => sub { htmlize("rst", @_) }); + } + if ($config{pandoc_textile}) { + hook(type => "htmlize", id => "textile", + call => sub { htmlize("textile", @_) }); + } + if ($config{pandoc_mediawiki}) { + hook(type => "htmlize", id => "mediawiki", + call => sub { htmlize("mediawiki", @_) }); + } +} + + +sub getsetup () { + return + plugin => { + safe => 1, + rebuild => 1, + }, + pandoc_command => { + type => "string", + example => "/usr/local/bin/pandoc", + description => "Path to pandoc executable", + safe => 0, + rebuild => 0, + }, + pandoc_markdown_ext => { + type => "string", + example => "mdwn", + description => "File extension for Markdown files", + safe => 1, + rebuild => 1, + }, + pandoc_latex => { + type => "boolean", + example => 0, + description => "Enable Pandoc processing of LaTeX documents", + safe => 0, + rebuild => 1, + }, + pandoc_rst => { + type => "boolean", + example => 0, + description => "Enable Pandoc processing of reStructuredText documents", + safe => 0, + rebuild => 1, + }, + pandoc_textile => { + type => "boolean", + example => 0, + description => "Enable Pandoc processing of Textile documents", + safe => 0, + rebuild => 1, + }, + pandoc_mediawiki => { + type => "boolean", + example => 0, + description => "Enable Pandoc processing of MediaWiki documents", + safe => 0, + rebuild => 1, + }, + pandoc_smart => { + type => "boolean", + example => 1, + description => "Use smart quotes, dashes, and ellipses", + safe => 1, + rebuild => 1, + }, + pandoc_obfuscate => { + type => "boolean", + example => 1, + description => "Obfuscate emails", + safe => 1, + rebuild => 1, + }, + pandoc_html5 => { + type => "boolean", + example => 0, + description => "Generate HTML5", + safe => 1, + rebuild => 1, + }, + pandoc_ascii => { + type => "boolean", + example => 0, + description => "Generate ASCII instead of UTF8", + safe => 1, + rebuild => 1, + }, + pandoc_numsect => { + type => "boolean", + example => 0, + description => "Number sections", + safe => 1, + rebuild => 1, + }, + pandoc_sectdiv => { + type => "boolean", + example => 0, + description => "Attach IDs to section DIVs instead of Headers", + safe => 1, + rebuild => 1, + }, + pandoc_codeclasses => { + type => "string", + example => "", + description => "Classes to use for indented code blocks", + safe => 1, + rebuild => 1, + }, + pandoc_math => { + type => "string", + example => "mathjax", + description => "Process TeX math using", + safe => 0, + rebuild => 1, + }, + pandoc_bibliography => { + type => "string", + example => "", + description => "Path to bibliography file", + safe => 0, + rebuild => 1, + }, + pandoc_csl => { + type => "string", + example => "", + description => "Path to CSL file (for references and bibliography)", + safe => 0, + rebuild => 1, + }, +} + + +sub htmlize ($@) { + my $format = shift; + my %params = @_; + my $page = $params{page}; + + local(*PANDOC_IN, *JSON_IN, *JSON_OUT, *PANDOC_OUT); + my @args; + + my $command = $config{pandoc_command} || "/usr/local/bin/pandoc"; + + if ($config{pandoc_smart}) { + push @args, '--smart'; + }; + + if ($config{pandoc_obfuscate}) { + push @args, '--email-obfuscation=references'; + } else { + push @args, '--email-obfuscation=none'; + }; + + if ($config{pandoc_html5}) { + push @args, '--html5'; + }; + + if ($config{pandoc_ascii}) { + push @args, '--ascii'; + }; + + if ($config{pandoc_numsect}) { + push @args, '--number-sections'; + }; + + if ($config{pandoc_sectdiv}) { + push @args, '--section-divs'; + }; + + if ($config{pandoc_codeclasses} && ($config{pandoc_codeclasses} ne "")) { + push @args, '--indented-code-classes=' . $config{pandoc_codeclasses}; + }; + + if ($config{pandoc_bibliography}) { + push @args, '--bibliography='.$config{pandoc_bibliography}; + } + + if ($config{pandoc_csl}) { + push @args, '--csl='.$config{pandoc_csl}; + } + + for ($config{pandoc_math}) { + if (/^mathjax$/) { + push @args, '--mathjax=/dev/null'; + } + elsif (/^jsmath$/) { + push @args, '--jsmath'; + } + elsif (/^latexmathml$/) { + push @args, '--latexmathml'; + } + elsif (/^mimetex$/) { + push @args, '--mimetex'; + } + elsif (/^mathtex$/) { + push @args, '--mimetex=/cgi-bin/mathtex.cgi'; + } + elsif (/^google$/) { + push @args, '--webtex'; + } + elsif (/^mathml$/) { + push @args, '--mathml'; + } + else { } + } + + # Convert to intermediate JSON format so that the title block + # can be parsed out + my $to_json_pid = open2(*JSON_OUT, *PANDOC_OUT, $command, + '-f', $format, + '-t', 'json', + @args); + + error("Unable to open $command") unless $to_json_pid; + + # $ENV{"LC_ALL"} = "en_US.UTF-8"; + my $to_html_pid = open2(*PANDOC_IN, *JSON_IN, $command, + '-f', 'json', + '-t', 'html', + @args); + + error("Unable to open $command") unless $to_html_pid; + + # Workaround for perl bug (#376329) + require Encode; + my $content = Encode::encode_utf8($params{content}); + + print PANDOC_OUT $content; + close PANDOC_OUT; + + my $json_content = ; + close JSON_OUT; + + waitpid $to_json_pid, 0; + + print JSON_IN $json_content; + close JSON_IN; + + my @html = ; + close PANDOC_IN; + + waitpid $to_html_pid, 0; + + $content = Encode::decode_utf8(join('', @html)); + + # Parse the title block out of the JSON and set the meta values + my @perl_content = @{decode_json($json_content)}; + my %header_section = %{$perl_content[0]}; + my @doc_title = @{$header_section{'docTitle'}}; + my @doc_authors = @{$header_section{'docAuthors'}}; + my $num_authors = @doc_authors; + my @primary_author = (); + if ($num_authors gt 0) { + @primary_author = @{$doc_authors[0]}; + } + my @doc_date = @{$header_section{'docDate'}}; + + sub compile_string { + # The uncompiled string is an array of hashes containing words and + # string with the word "Space". + my (@uncompiled_string) = @_; + my $compiled_string = ''; + foreach my $word_or_space(@uncompiled_string) { + if (ref($word_or_space) eq "HASH") { + if ($word_or_space->{"Str"}) { + $compiled_string .= $word_or_space->{"Str"}; + } + } + else { + $compiled_string .= ' '; + } + } + return $compiled_string; + } + + my $title = compile_string @doc_title; + my $author = compile_string @primary_author; + my $date = compile_string @doc_date; + + if ($title) { + $pagestate{$page}{meta}{title} = $title; + } + if ($author) { + $pagestate{$page}{meta}{author} = $author; + } + if ($date) { + $pagestate{$page}{meta}{date} = $date; + } + + return $content; +} + +1 -- cgit v1.2.3