Add pandoc.pm

author: Guilhem Moulin <guilhem.moulin@fripost.org> 2014-09-15 21:46:22 +0200
committer: Guilhem Moulin <guilhem.moulin@fripost.org> 2014-09-15 21:46:22 +0200
commit: fed68baa7cddcbb1f1ffcddeca578683d6e2e030 (patch)
tree: 597e91fe1e095c794d02333841d77ef6e2cdf524 /.ikiwiki
parent: e0441e4abd3869f3a3aa57322dbd7785a9f36bdb (diff)
1 files changed, 329 insertions, 0 deletions
diff --git a/.ikiwiki/IkiWiki/Plugin/pandoc.pm b/.ikiwiki/IkiWiki/Plugin/pandoc.pm
new file mode 100755
index 0000000..ddc5299
--- /dev/null
+++ b/.ikiwiki/IkiWiki/Plugin/pandoc.pm
@@ -0,0 +1,329 @@
+#!/usr/bin/env perl
+
+package IkiWiki::Plugin::pandoc;
+
+use warnings;
+use strict;
+use IkiWiki;
+use FileHandle;
+use IPC::Open2;
+use JSON;
+
+sub import {
+    my $markdown_ext = $config{pandoc_markdown_ext} || "mdwn";
+
+    # May be both a string with a single value, a string containing commas or an arrayref
+    if ($markdown_ext =~ /,/) {
+        $markdown_ext = [split /\s*,\s*/, $markdown_ext];
+    }
+
+    hook(type => "getsetup", id => "pandoc", call => \&getsetup);
+    if (ref $markdown_ext eq 'ARRAY') {
+        foreach my $mde (@$markdown_ext) {
+            hook(type => 'htmlize', id => $mde,
+                 call => sub{ htmlize("markdown", @_) });
+        }
+    } else {
+        hook(type => "htmlize", id => $markdown_ext,
+             call => sub { htmlize("markdown", @_) });
+    }
+    if ($config{pandoc_latex}) {
+        hook(type => "htmlize", id => "tex",
+             call => sub { htmlize("latex", @_) });
+    }
+    if ($config{pandoc_rst}) {
+        hook(type => "htmlize", id => "rst",
+             call => sub { htmlize("rst", @_) });
+    }
+    if ($config{pandoc_textile}) {
+        hook(type => "htmlize", id => "textile",
+             call => sub { htmlize("textile", @_) });
+    }
+    if ($config{pandoc_mediawiki}) {
+        hook(type => "htmlize", id => "mediawiki",
+             call => sub { htmlize("mediawiki", @_) });
+    }
+}
+
+
+sub getsetup () {
+    return
+    plugin => {
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_command => {
+        type => "string",
+        example => "/usr/local/bin/pandoc",
+        description => "Path to pandoc executable",
+        safe => 0,
+        rebuild => 0,
+    },
+    pandoc_markdown_ext => {
+        type => "string",
+        example => "mdwn",
+        description => "File extension for Markdown files",
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_latex => {
+        type => "boolean",
+        example => 0,
+        description => "Enable Pandoc processing of LaTeX documents",
+        safe => 0,
+        rebuild => 1,
+    },
+    pandoc_rst => {
+        type => "boolean",
+        example => 0,
+        description => "Enable Pandoc processing of reStructuredText documents",
+        safe => 0,
+        rebuild => 1,
+    },
+    pandoc_textile => {
+        type => "boolean",
+        example => 0,
+        description => "Enable Pandoc processing of Textile documents",
+        safe => 0,
+        rebuild => 1,
+    },
+    pandoc_mediawiki => {
+        type => "boolean",
+        example => 0,
+        description => "Enable Pandoc processing of MediaWiki documents",
+        safe => 0,
+        rebuild => 1,
+    },
+    pandoc_smart => {
+        type => "boolean",
+        example => 1,
+        description => "Use smart quotes, dashes, and ellipses",
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_obfuscate => {
+        type => "boolean",
+        example => 1,
+        description => "Obfuscate emails",
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_html5 => {
+        type => "boolean",
+        example => 0,
+        description => "Generate HTML5",
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_ascii => {
+        type => "boolean",
+        example => 0,
+        description => "Generate ASCII instead of UTF8",
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_numsect => {
+        type => "boolean",
+        example => 0,
+        description => "Number sections",
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_sectdiv => {
+        type => "boolean",
+        example => 0,
+        description => "Attach IDs to section DIVs instead of Headers",
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_codeclasses => {
+        type => "string",
+        example => "",
+        description => "Classes to use for indented code blocks",
+        safe => 1,
+        rebuild => 1,
+    },
+    pandoc_math => {
+        type => "string",
+        example => "mathjax",
+        description => "Process TeX math using",
+        safe => 0,
+        rebuild => 1,
+    },
+    pandoc_bibliography => {
+        type => "string",
+        example => "",
+        description => "Path to bibliography file",
+        safe => 0,
+        rebuild => 1,
+    },
+    pandoc_csl => {
+        type => "string",
+        example => "",
+        description => "Path to CSL file (for references and bibliography)",
+        safe => 0,
+        rebuild => 1,
+    },
+}
+
+
+sub htmlize ($@) {
+    my $format = shift;
+    my %params = @_;
+    my $page = $params{page};
+
+    local(*PANDOC_IN, *JSON_IN, *JSON_OUT, *PANDOC_OUT);
+    my @args;
+
+    my $command = $config{pandoc_command} || "/usr/local/bin/pandoc";
+
+    if ($config{pandoc_smart}) {
+        push @args, '--smart';
+    };
+
+    if ($config{pandoc_obfuscate}) {
+        push @args, '--email-obfuscation=references';
+    } else {
+        push @args, '--email-obfuscation=none';
+    };
+
+    if ($config{pandoc_html5}) {
+        push @args, '--html5';
+    };
+
+    if ($config{pandoc_ascii}) {
+        push @args, '--ascii';
+    };
+
+    if ($config{pandoc_numsect}) {
+        push @args, '--number-sections';
+    };
+
+    if ($config{pandoc_sectdiv}) {
+        push @args, '--section-divs';
+    };
+
+    if ($config{pandoc_codeclasses} && ($config{pandoc_codeclasses} ne "")) {
+        push @args, '--indented-code-classes=' . $config{pandoc_codeclasses};
+    };
+
+    if ($config{pandoc_bibliography}) {
+        push @args, '--bibliography='.$config{pandoc_bibliography};
+    }
+
+    if ($config{pandoc_csl}) {
+        push @args, '--csl='.$config{pandoc_csl};
+    }
+
+    for ($config{pandoc_math}) {
+        if (/^mathjax$/) {
+            push @args, '--mathjax=/dev/null';
+        }
+        elsif (/^jsmath$/) {
+            push @args, '--jsmath';
+        }
+        elsif (/^latexmathml$/) {
+            push @args, '--latexmathml';
+        }
+        elsif (/^mimetex$/) {
+            push @args, '--mimetex';
+        }
+        elsif (/^mathtex$/) {
+            push @args, '--mimetex=/cgi-bin/mathtex.cgi';
+        }
+        elsif (/^google$/) {
+            push @args, '--webtex';
+        }
+        elsif (/^mathml$/) {
+            push @args, '--mathml';
+        }
+        else { }
+    }
+
+    # Convert to intermediate JSON format so that the title block
+    # can be parsed out
+    my $to_json_pid = open2(*JSON_OUT, *PANDOC_OUT, $command,
+                    '-f', $format,
+                    '-t', 'json',
+                    @args);
+
+    error("Unable to open $command") unless $to_json_pid;
+
+    # $ENV{"LC_ALL"} = "en_US.UTF-8";
+    my $to_html_pid = open2(*PANDOC_IN, *JSON_IN, $command,
+                    '-f', 'json',
+                    '-t', 'html',
+                    @args);
+
+    error("Unable to open $command") unless $to_html_pid;
+
+    # Workaround for perl bug (#376329)
+    require Encode;
+    my $content = Encode::encode_utf8($params{content});
+
+    print PANDOC_OUT $content;
+    close PANDOC_OUT;
+
+    my $json_content = <JSON_OUT>;
+    close JSON_OUT;
+
+    waitpid $to_json_pid, 0;
+
+    print JSON_IN $json_content;
+    close JSON_IN;
+
+    my @html = <PANDOC_IN>;
+    close PANDOC_IN;
+
+    waitpid $to_html_pid, 0;
+
+    $content = Encode::decode_utf8(join('', @html));
+
+    # Parse the title block out of the JSON and set the meta values
+    my @perl_content = @{decode_json($json_content)};
+    my %header_section = %{$perl_content[0]};
+    my @doc_title = @{$header_section{'docTitle'}};
+    my @doc_authors = @{$header_section{'docAuthors'}};
+    my $num_authors = @doc_authors;
+    my @primary_author = ();
+    if ($num_authors gt 0) {
+        @primary_author = @{$doc_authors[0]};
+    }
+    my @doc_date = @{$header_section{'docDate'}};
+
+    sub compile_string {
+        # The uncompiled string is an array of hashes containing words and 
+        # string with the word "Space".
+        my (@uncompiled_string) = @_;
+        my $compiled_string = '';
+        foreach my $word_or_space(@uncompiled_string) {
+            if (ref($word_or_space) eq "HASH") {
+                if ($word_or_space->{"Str"}) {
+                    $compiled_string .= $word_or_space->{"Str"};
+                }
+            }
+            else {
+                $compiled_string .= ' ';
+            }
+        }
+        return $compiled_string;
+    }
+
+    my $title = compile_string @doc_title;
+    my $author = compile_string @primary_author;
+    my $date = compile_string @doc_date;
+
+    if ($title) {
+        $pagestate{$page}{meta}{title} = $title;
+    }
+    if ($author) {
+        $pagestate{$page}{meta}{author} = $author;
+    }
+    if ($date) {
+        $pagestate{$page}{meta}{date} = $date;
+    }
+
+    return $content;
+}
+
+1
author	Guilhem Moulin <guilhem.moulin@fripost.org>	2014-09-15 21:46:22 +0200
committer	Guilhem Moulin <guilhem.moulin@fripost.org>	2014-09-15 21:46:22 +0200
commit	fed68baa7cddcbb1f1ffcddeca578683d6e2e030 (patch)
tree	597e91fe1e095c794d02333841d77ef6e2cdf524 /.ikiwiki
parent	e0441e4abd3869f3a3aa57322dbd7785a9f36bdb (diff)