#!/usr/bin/perl # # by Thomas A. Alspaugh # # Licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 2.5 License # http://creativecommons.org/licenses/by-nc-sa/2.5/ # use strict; use warnings; # ¤ circle with tabs # ° # · # • sub help { my $cmdname = $0; $cmdname =~ s:.*/::; print "$cmdname - Produce a formatted HTML presentation of bibtex entries.\n"; print "The entries are read from .bib files named on the command line, or from\n"; print "bib files whose keys are named in
elements in a file\n"; print "given in the -filter option. -- represents the standard input.\n"; print ""; print "Each entry is assumed to have each field on a single line.\n"; print ""; print "Annotations may be read from .tex files (-annotation), or linked to in\n"; print "KEY.notes.html files (-notes).\n"; print ""; print "Output for each entry is formatted as a
element.\n"; print "Unless -wrap is given, the output contains only these elements; you\n"; print "provide the rest of the HTML.\n"; print "Output is the formatted bib entry (default); or an index to key files\n"; print "(-index); or an author cross-reference to key files (-authorIndex);\n"; print "or individual key files (-toKeyFile).\n"; print " -dt Make a
KEY for each entry.\n"; print " -wrap Wrap the output with a minimal HTML head and tail.\n"; print " -abstract Include abstracts in the HTML output.\n"; print " -annotation Include annotations (KEY.tex files) in the HTML output.\n"; print " -keywords Include keywords in the HTML output.\n"; print " -notes=NDIR Include links to KEY.notes.html files in NDIR [default '.'].\n"; print " -annotationMark[=MARK] Mark entries that have an annotation.\n"; print " -pdfMark[=MARK] Mark entries that have a pdf file.\n"; print " -pdfDir=PDIR Look for pdf files in PDIR [default '.'].\n"; print " -initials Assume entries are alphabetized and insert 'initials'.\n"; print " -filter=FILE Read from FILE and look for keys to expand.\n"; print " Any of the following are expanded with bib information for KEY:\n"; print "
KEY\n";
print " Each elements must be an entire line and exactly as above.\n";
print " An id=\"KEY\" attribute is added to each element.\n";
print " In addition, if the -linkTo option was given, strings of the form\n";
print " KEY are made into a (plain black) link to KEY\n";
print " in the bib file (BFILE#KEY).\n";
print " -linkTo=BFILE Use BFILE as the bib file name if needed.\n";
print " -toKeyFile=KFDIR Write output from each bib file D/KEY.bib to KFDIR/KEY.html.\n";
print " -keyFileDir=KFDIR Use KFDIR as the key file directory.\n";
print " -index=IFILE Write an index file IFILE with a link to each key file in KFDIR.\n";
print " If IFILE is not given, write to standard output.\n";
print " -css=CNAME or +CNAME Use CNAME as the style file.\n";
print " Create if + is given. If CNAME is absent, default is DIR.css.\n";
print " -authorIndex=AFILE Write an author index to AFILE.\n";
print " If AFILE is not given, write to standard output.\n";
}
if ($#ARGV < 0) {
&help;
exit 0;
}
my $abstractsIn="";
my $annotationMark="";
my $annotationsIn="";
my $authorIndexFname = "";
my $cssFname = "";
my $cssWrite = "";
my $htmlList = "dl";
my $htmlMark="";
my $htmlTag = "dt";
my $htmlTag1 = "dd";
my $id = "";
my $indexFname = "";
my $keyFileDir = "";
my $keywordsIn="";
my $linkTo="";
my $makeAuthorIndex = "";
my $makeIndex = "";
my $marks = "";
my $pdfDir = ".";
my $pdfMark="";
my $printEntries = "yes";
my $titleLimit = 80;
my $toKeyFile = "";
my $wrap = "";
my $arg;
my $bibFileName = "";
my $count = 0;
my $fromStdin = "";
my $initialPv=" ";
my $initials = "";
my $aLineNo="";
my $bLineNo="";
my $fLineNo="";
my %args = ();
my %authorIndex = ();
my %authorIndexCounts = ();
my %authorName = ();
my %keys = ();
my $summarize = "";
my %yearCounts = ();
my $entryAlreadyPrinted = "";
my $entryType="";
my $key = "";
my $URL="";
my $abstract="";
my $address="";
my $annotation="";
my $author=""; my $author0="";
my $booktitle="";
my $doi="";
my $editor="";
my $howpublished="";
my $institution="";
my $journal="";
my $keywords="";
my $month="";
my $note="";
my $number="";
my $organization="";
my $pages="";
my $publisher="";
my $school="";
my $title="";
my $type="";
my $volume="";
my $year="";
my $notesDir="";
my $notesFname="";
my $notesIn="";
foreach $arg (@ARGV) {
# printf STDERR "<$arg>\n";
if ($arg =~ /^-/) {
if ($arg eq "--") {
$bibFileName = $arg;
&handleBibFile(\*STDOUT, $cssFname);
}
elsif ($arg =~ /^-+help$/) {
&help;
exit 0;
}
elsif ($arg =~ /^-+abstract$/) {
$abstractsIn = "yes";
}
elsif ($arg =~ /^-+annotation$/) {
$annotationsIn = "yes";
}
elsif ($arg =~ /^-+annotationMark/) {
if ($arg =~ /^-+annotationMark=(.+)/) {
$arg =~ s/^-+annotationMark=(.+)/$1/;
$annotationMark = $arg;
}
else {
$annotationMark = "¤";
}
}
elsif ($arg =~ /^-+authorIndex(=.+)?$/) {
$makeAuthorIndex = "yes";
$printEntries = "";
if ($arg =~ /^-+authorIndex=(.*)$/) {
$arg =~ s/^-+authorIndex=(.*)$/$1/;
$authorIndexFname = $arg;
}
}
elsif ($arg =~ /^-+css=/) { $cssFname = $arg;
$cssFname =~ s/^-+css=\+?//;
if (!$cssFname) {
if (!$keyFileDir) {
die "-css= without CNAME must be preceded by -toKeyFile=DIR.\n";
}
$cssFname = "$keyFileDir.css";
}
# print STDERR "CSS file name \"$cssFname\".\n";
if ($arg =~ /^-+css=\+/) {
$cssWrite = "yes";
}
}
elsif ($arg =~ /^-+dt$/) {
$htmlList = "dl";
$htmlTag = "dt";
$htmlTag1 = "dd";
}
elsif ($arg =~ /^-+filter=/) {
$arg =~ s/^-+filter=//;
if ($wrap) {
print STDERR "Can't give both -wrap and -filter.\n";
}
$fLineNo = 0;
if ($arg eq "--") {
$fromStdin = "yes";
while ( $count entries.\n";
if (scalar %yearCounts) {
print STDOUT " $1/g; # for multi-paragraph abstracts
}
if ($annotationsIn) {
my $fileKey = $bibFileName;
$fileKey =~ s/\.bib$//;
my $annFile = "$fileKey.tex";
if (-f $annFile) {
print STDERR " Including $annFile\n";
if (open AFILE, "<$annFile") {
$aLineNo = 0;
while ( ";
}
s/%.*//;
$annotation = "$annotation$_\n";
}
close AFILE;
}
else {
die "Can't open \"$annFile\".\n";
}
}
$annotation =~ s/([.:;?!]) ([A-Z])/$1 $2/g;
$annotation =~ s/\\par([ \n])/ $1/g; # for multi-paragraph annotations
}
if ($keywords) {
$keywords =~ s/([.:;?!]) ([A-Z])/$1 $2/g;
}
if ($note) {
$note =~ s/([.:;?!]) ([A-Z])/$1 $2/g;
}
if ($notesIn) {
$notesFname = "$notesDir/$key.notes.html";
if (-f "$notesFname") {
print STDERR " Linking to $notesFname\n";
}
else {
$notesFname = "";
}
}
#
if ($initials) {
my $initial = uc substr($key, 0, 1);
if ($initial ne $initialPv) {
if ($initialPv ne " " && $htmlList) {
print $HOUT "$htmlList>\n\n";
}
print $HOUT " [top]
A B C D
E F G H
I J K L
M N O P
Q R S T
U V W X
Y Z
";
}
if ($htmlList) { print $HOUT "<$htmlList>\n"; }
}
sub wrapTail { # ($HOUT)
my $HOUT = $_[0];
if ($htmlList) { print $HOUT "$htmlList>\n"; }
print $HOUT "\n";
foreach $year (reverse sort keys %yearCounts) {
my $count = $yearCounts{$year};
print STDOUT "
\n";
}
}
if ($printEntries && $wrap) { &wrapTail(\*STDOUT ); }
if ($makeAuthorIndex) { &wrapTail(\*$HAINDEX); }
if ($makeIndex) { &wrapTail(\*$HINDEX ); }
if ($indexFname) { close $HINDEX; }
if ($authorIndexFname) { close $HAINDEX; }
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#
# Handle the -filter option for $_ and print the result.
#
sub filterLine {
if (/<(dt|li|p) class=('bib'|"bib")> *([a-zA-Z]+((\+[a-zA-Z]+){0,3}|\+[a-zA-Z]+\+)[0-9]{4}-[a-z0-9]{1,4}(-[a-z]+)?)/) {
#print;
s/.*<(dt|li|p) class=('bib'|"bib")> *([a-zA-Z]+((\+[a-zA-Z]+){0,3}|\+[a-zA-Z]+\+)[0-9]{4}-[a-z0-9]{1,4}(-[a-z]+)?).*/$3/;
if ($keyFileDir) { $bibFileName = "$keyFileDir/$_.bib"; }
else { $bibFileName = "$_.bib"; }
print "\n";
&handleBibFile(\*STDOUT, "");
print "\n";
}
elsif (/<(a) class=('bib'|"bib")>([a-zA-Z]+((\+[a-zA-Z]+){0,3}|\+[a-zA-Z]+\+)[0-9]{4}-[a-z0-9]{1,4})<\/a>/) {
s/<(a) class=('bib'|"bib")>([a-zA-Z]+((\+[a-zA-Z]+){0,3}|\+[a-zA-Z]+\+)[0-9]{4}-[a-z0-9]{1,4})<\/a>/$3<\/a>/g;
s/(/>/g;
s/\\_/_/g;
s/\\(exist)s/&$1;/g;
s/\\(forall)/&$1;/g;
s/\\Rightarrow/⇒/g;
s/\\diamond/◊/g;
s/\\box/ BOX /g;
s/\\texttrademark/™/g;
s/{\\'(.)}/&$1acute;/g;
s/{\\`(.)}/&$1grave;/g;
s/{\\"(.)}/&$1uml;/g;
s/{\\~(.)}/&$1tilde;/g;
s/{\\c(.)}/&$1cedil;/g;
s/{\\AA}/Å/g;
s/{\\ae}/æ/g;
s/{\\o}/ø/g;
s/{\\O}/Ø/g;
s/\$\\([Aa]lpha)\$/&$1;/g;
s/\$\\([Bb]eta)\$/&$1;/g;
s/\$\\([Gg]amma)\$/&$1;/g;
s/\$\\([Dd]elta)\$/&$1;/g;
s/\$\\([Ee]psilon)\$/&$1;/g;
s/\$\\([Zz]eta)\$/&$1;/g;
s/\$\\([Ee]ta)\$/&$1;/g;
s/\$\\([Tt]heta)\$/&$1;/g;
s/\$\\([Ii]ota)\$/&$1;/g;
s/\$\\([Kk]appa)\$/&$1;/g;
s/\$\\([Ll]ambda)\$/&$1;/g;
s/\$\\([Mm]u)\$/&$1;/g;
s/\$\\([Nn]u)\$/&$1;/g;
s/\$\\([Xx]i)\$/&$1;/g;
s/\$\\([Oo]micron)\$/&$1;/g;
s/\$\\([Pp]i)\$/&$1;/g;
s/\$\\([Rr]ho)\$/&$1;/g;
s/\$\\([Ss]igma)\$/&$1;/g;
s/\$\\([Tt]au)\$/&$1;/g;
s/\$\\([Uu]psilon)\$/&$1;/g;
s/\$\\([Pp]hi)\$/&$1;/g;
s/\$\\([Cc]hi)\$/&$1;/g;
s/\$\\([Pp]si)\$/&$1;/g;
s/\$\\([Oo]mega)\$/&$1;/g;
s/\\emph{([^}]*)}/$1<\/i>/g;
s/\\textit{([^}]*)}/$1<\/i>/g;
s/\\textsf{([^}]*)}/$1/g; # Unable to put class="sf" in the to string.
s/\\begin\{description\}/$year";
print STDOUT " $count";
print STDOUT " ";
my $incr = 2;
$count += $incr - 1;
while (0 < $count) {
print STDOUT "=";
$count -= $incr;
}
print STDOUT "\n";
}
print STDOUT " /g;
s/\\begin\{enumerate\}/
/g;
s/\\begin\{itemize\}/
/g;
s/\\begin\{quote\}/
/g;
s/\\begin\{quotation\}/
/g;
s/\\item *\[([^]]+)\]/
\n";
#
$authorIndex{$a} = $ai;
$count = $authorIndexCounts{$a};
++$count;
$authorIndexCounts{$a} = $count;
#print STDERR "### + indexEntryAuthors '$a' -> '$ai'\n";
}
if ($tmp) {
die "Unexpected author residue '$tmp' for '$key'.\n";
}
}
}
#
# Classify and optionally store data from a line of a bib file.
#
sub parseBibFileLine { # ($HOUT)
my $HOUT = $_[0];
&fixline;
if (/^@/) {
# if ($entryType && !$entryAlreadyPrinted) {
# &printBibFileEntry($HOUT);
# }
$entryAlreadyPrinted = "";
$entryType = $_; $entryType =~ s/^@(.*)\(.*/$1/;
$key = $_; $key =~ s/.*\((.*),/$1/;
#
$URL="";
$abstract="";
$address="";
$annotation="";
$author=""; $author0 = "";
$booktitle="";
$doi="";
$editor="";
$howpublished="";
$institution="";
$journal="";
$keywords="";
$month="";
$note="";
$number="";
$organization="";
$pages="";
$publisher="";
$school="";
$title="";
$type="";
$volume="";
$year="";
#
$notesFname = "";
}
elsif (/^ISBN=/) {}
elsif (/^LCCN=/) {}
elsif (/^URL=/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $URL = $_; }
elsif (/^abstract/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $abstract = $_; }
elsif (/^address=/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $address = $_; }
elsif (/^annotated=/) {}
elsif (/^annotation=/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $annotation = $_; }
elsif (/^annote=/) {}
elsif (/^author/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $author = $_;
$author0 = $author;
#print STDERR "### - '$author'\n";
$author =~ s/ and /#/g;
#print STDERR "### - '$author'\n";
$author =~ s/^(.+)/#$1#/;
#print STDERR "### - '$author'\n";
while ($author =~ /#([^#]+)#/) {
if ($author =~ /#([^#]+), ([^#,]+)#/) {
$author =~ s/#([^#]+), ([^#,]+)#/%$2 $1#/;
}
else {
$author =~ s/#([^#]+)#/%$1#/;
}
#print STDERR "### - '$author'\n";
}
$author =~ s/^%//;
if ($author =~ /(%.+)%([^%]+)#$/) { #print STDERR "###1 '$author'\n";
$author =~ s/(%.+)%([^%]+)#$/$1, and $2/;
$author =~ s/%/, /g;
}
elsif ($author =~ /%([^%]+)#$/) { #print STDERR "###2 '$author'\n";
$author =~ s/%([^%]+)#/ and $1/;
}
else { #print STDERR "###3 '$author'\n";
$author =~ s/^#//;
$author =~ s/#$//;
}
}
elsif (/^bibchecked=/) {}
elsif (/^bibcreated=/) {}
elsif (/^bibcreated=/) {}
elsif (/^booktitle/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $booktitle = $_; }
elsif (/^chapter=/) {}
elsif (/^day=/) {}
elsif (/^doi=/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $doi = $_; }
elsif (/^edition=/) {}
elsif (/^editor/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $editor = $_; }
elsif (/^editor=/) {}
elsif (/^howpublished/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $howpublished = $_; }
elsif (/^institution/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $institution = $_; }
elsif (/^journal/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $journal = $_; }
elsif (/^key=/) {}
elsif (/^keywords=/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $keywords = $_; }
elsif (/^location=/) {}
elsif (/^misc=/) {}
elsif (/^month/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $month = $_; }
elsif (/^note=/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $note = $_; }
elsif (/^number/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $number = $_; }
elsif (/^organization/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $organization = $_; }
elsif (/^pages/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $pages = $_; }
elsif (/^publisher/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $publisher = $_; }
elsif (/^references=/) {}
elsif (/^school/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $school = $_; }
elsif (/^series=/) {}
elsif (/^title/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $title = $_; }
elsif (/^type/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $type = $_; }
elsif (/^volume/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $volume = $_; }
elsif (/^year/) { s/ *[a-zA-Z]+ *=(.*),/$1/; $year = $_; }
#
elsif (/^\)/) {}
elsif (/^$/) {}
#
else {
if ($bLineNo) {
print STDERR "(line $bLineNo) ";
}
print STDERR "Unexpected field type \"$_\" for $bibFileName.\n";
}
}
#
# Print the author index
#
sub printAuthorIndex { # ($HOUT)
#print STDERR "### printAuthorIndex ". (scalar keys %authorIndex) . "\n";
my $HOUT = $_[0];
my $a;
foreach $a (sort keys %authorIndex) {
my $line = $authorIndex{$a};
my $count = $authorIndexCounts{$a};
my $a0 = $authorName{$a};
print $HOUT " <$htmlTag>$a0 ($count)\n";
print $HOUT " <$htmlTag1>\n";
print $HOUT $line;
}
}
#
# Handle an entry whose parts are in the various variables.
#
sub printBibFileEntry { # ($HOUT)
my $HOUT = $_[0];
if (exists $keys{$key}) {
print STDERR "Duplicated key $key in $keys{$key}, $bibFileName.\n";
}
$keys{$key} = $bibFileName;
#
if ($year) {
my $y = $year;
$y =~ s/-.*//;
$y =~ s/^([0-9]+) B.C./-$1/;
my $yearCount;
if (exists $yearCounts{$y}) {
$yearCount = 1 + $yearCounts{$y};
}
else {
$yearCount = 1;
}
$yearCounts{$y} = $yearCount;
}
#
if ($month =~ /#/) {
$month =~ s/ # //g;
}
if ($month) {
$month =~ s/\bjan\b/Jan./;
$month =~ s/\bfeb\b/Feb./;
$month =~ s/\bmar\b/Mar./;
$month =~ s/\bapr\b/Apr./;
$month =~ s/\bmay\b/May/;
$month =~ s/\bjun\b/June/;
$month =~ s/\bjul\b/July/;
$month =~ s/\baug\b/Aug./;
$month =~ s/\bsep\b/Sep./;
$month =~ s/\boct\b/Oct./;
$month =~ s/\bnov\b/Nov./;
$month =~ s/\bdec\b/Dec./;
}
&setMarks;
if ($abstract) {
$abstract =~ s/([.:;?!]) ([A-Z])/$1 $2/g;
$abstract =~ s/\\par([ \n])/