// m2html - convert mail/news/babyl files to HTML format // David Eppstein, UC Irvine, 12 Apr 1996 #include "lineb.h" #include #include // Page break is automatic paragraph break // include ^_ as EOP as well for recognizing Babyl files inline int pagebreak(char v) { return (v == '\014' || v == '\037'); } // Read char into variable, returning nz if end of line or end of page. inline int eolgetc(char & v, istream & i) { i.get(v); return (!i.eof() && (v == '\n' || pagebreak(v))); } // Add a char to the save buffer. inline void reset(line_buffer & lb) { lb.set_buffer(0); } void add(line_buffer & lb, char c) { lb.replace(lb.textlen(), &c, 1, 0); } // Complain about inappropriate args extern "C" void exit(int); const char * progname = "m2html"; void usage() { cerr << "usage: " << progname << " < mailfile > htmlfile\n"; exit (1); } // shared variables between process() and main() static enum { filestart, msgheader, msgtext } filestate = filestart; static line_buffer para; // process a line of text // input = pointer to start of line // output = pointer to start of next line // // does untabification, < > conversion, @ and URL HREFification static void outword(const char * w, const char * s) { while (w != s) { if (*w == '<') cout << "<"; else if (*w == '>') cout << ">"; else if (*w == '&') cout << "&"; else cout << *w; w++; } } static const char * processline(const char * s) { int inword = 1; // for HREF const char * w = s; // word start for HREF while (*s != '\0' && *s != '\n') { switch(*s) { case '<': case '>': case ',': case ' ': case '(': case ')': case '[': case ']': case '\t': case '"': case '\'': case ';': // no colon tho because of http: etc outword(w,s+1); // punctuation, output w = s+1; // including punc char itself inword = 1; break; case '@': if (inword && w != s && ((w - para.text()) < 12 || strncmp(w-12,"In article <",12) != 0)) { // email address? while ((*++s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || (*s >= '0' && *s <= '9') || (*s == '.' || *s == '-')) ; // find end of hostname while (*(s-1) == '.') s--; // back over period cout << ""; outword(w,s); cout << ""; w = s; s--; // make up for extra s++ below break; } // atsign after nonword, treat as default nonword char inword = 0; break; case '$': // this character shows up in many msg-ids and few real email addrs inword = 0; break; case ':': // maybe URL? if (inword && (strncmp(w,"mailto:",7) == 0 || strncmp(s,"://",3) == 0)) { while (*++s != '<' && *s != '>' && *s != ' ' && *s != '(' && *s != ')' && *s != '\t' && *s != ',' && *s != '\n' && *s != '\0') ; // find end of URL by looking for break char while (*(s-1) == '.') s--; // back over period cout << ""; outword(w,s); cout << ""; w = s; s--; // make up for extra s++ below break; } // colon after nonword, treat as default nonword char inword = 0; break; default: // any other character break; } s++; } outword(w,s); // flush final word cout << "\n"; if (*s == '\n') s++; return s; } // check if a header line should be output // if yes, output it and return nonzero static int testhead(const char * s, const char * t) { int l = strlen(t); if (strncmp(s,t,l) != 0) return 0; // not that header, give up cout << "" << t << ""; cout << " "+l; // pad to 16th col so tabs line up s += l; // skip past header while (*s == ' ' || *s == '\t') s++; // skip past whitespace processline(s); // output header contents return 1; } // process a paragraph known to be message header static void processheader() { const char * s = para.text(); int cont = 0; while (*s != '\0') { switch(*s) { case ' ': case '\t': if (cont) testhead(s,""); break; case 'D': cont = testhead(s,"Date:"); break; case 'F': cont = testhead(s,"From:"); break; case 'K': cont = testhead(s,"Keywords:"); break; case 'N': cont = testhead(s,"Newsgroups:"); break; case 'O': cont = testhead(s,"Organization:"); case 'R': cont = ( testhead(s,"Reply-to:") || testhead(s,"Reply-To:") ); case 'S': cont = (testhead(s,"Subject:") || testhead(s,"Summary:")); break; case 'T': cont = testhead(s,"To:"); break; default: cont = 0; } while (*++s != '\n' && *s != '\0') ; // find eol if (*s == '\n') s++; // and skip to next line } } // process a paragraph known to be message text static void processtext() { const char * s = para.text(); while (*s != '\0') s = processline(s); } // process a paragraph of text static void process() { if (para.textlen() == 0) return; // nothing to process // check trailing newline if (para.text()[para.textlen() - 1] != '\n') add(para,'\n'); // handle Babyl format junk // ignore file header, saved full headers, start of abbrev headers if (strncmp(para.text(), "1,,\n", 4) == 0) { // DE 9 Jun 1999: VM doesn't leave blank line before *** EOOH *** // so look for it in here rather than immediately returning int i; const char * s = para.text(); for (i = 0; i < para.textlen() - 14; i++) if (strncmp(para.text()+i,"\n*** EOOH ***\n", 14) == 0) break; if (i < para.textlen()) para.replace(0, 0, 0, i+14); else return; } if (strncmp(para.text(), "BABYL OPTIONS:", 14) == 0) return; // DE 1 Aug 1996: remove newline from string // since emacs 19 adds " -*- rmail -*-" on same line if (strncmp(para.text(), "*** EOOH ***\n", 13) == 0) para.replace(0, 0, 0, 13); // Babyl real hdrs // test if header by searching for "From: " line int is_header = 0; char * s = (char *) para.text(); while (*s != '\0') { if (strncmp(s, "From:", 5) == 0) { is_header = 1; break; } while (*s != '\n' && *s != '\0') s++; if (*s == '\n') s++; } // output glue from old state switch (filestate) { case filestart: if (!is_header) cerr << "Warning: file does not begin w/headers\n"; cout << "

\n";
	break;

     case msgheader:
	if (is_header) cerr << "Warning: two headers in a row\n";
	cout << "

\n";
	break;

     case msgtext:
	if (is_header) cout << "

\n";
	else cout << "\n";
	break;

     default:
	cerr << "Error: bad state in process glue\n";
    }

    // set new state and handle para
    if (is_header) {
	filestate = msgheader;
	processheader();
    } else {
	filestate = msgtext;
	processtext();
    }
}

// The main program.
main (int argc, char ** argv)
{
    if (argc > 1) usage();
    int was_eol = 1;
    int have_nonspace = 0;
    for (;;) {		// looping over chars in file
	char inchar;
	if (eolgetc(inchar, cin)) { // end of line?
	    if (was_eol || pagebreak(inchar) || !have_nonspace) {
		// found paragraph break
		if (have_nonspace) process(); // output old paragraph
		reset(para);    // and start new one
		have_nonspace = 0;
	    }
	    else {
		was_eol = 1;	// one newline, remember the next
		add(para,'\n'); // but assume internal to para for now
	    }
	    continue;		// back to top of loop
	}
	if (cin.eof()) {
	    // maybe eolgetc() above hit eof?
	    if (have_nonspace) process(); // process final paragraph
	    cout << "

\n"; // finish output break; // break out of loop } was_eol = 0; // if we got here, have non-line-break char if (inchar != ' ' && inchar != '\t') have_nonspace = 1; add(para, inchar); // add it to buffer } // end loop over chars in line }