/*
* htmlise.c:
* Turn text into HTML.
*
* Copyright (c) 2003 Chris Lightfoot. All rights reserved.
* Email: chris@ex-parrot.com; WWW: http://www.ex-parrot.com/~chris/
*
*/
static const char rcsid[] = "$Id: htmlise.c,v 1.5 2003/03/07 18:53:54 chris Exp $";
#include
#include
#include
#include
#include "htmlise.h"
/* get_line STREAM
* Returns the next line read from STREAM, or NULL on end-of-file or error.
* The returned string includes the trailing \n, unless the line is the last
* part of a file which doesn't end \n. */
char *get_line(FILE *fp) {
static char *buf;
static size_t buflen;
int c;
size_t i = 0;
if (!buf)
buf = malloc(buflen = 1024);
while ((c = getc(fp)) != EOF) {
buf[i++] = (char)c;
if (i == buflen)
buf = realloc(buf, buflen *= 2);
if (c == '\n')
break;
}
/* NULL-terminate the string. */
if (i > 0)
buf[i] = 0;
if (c == -1) {
if (feof(fp) && i > 0)
return buf;
else
return NULL;
} else
return buf;
}
/* is_blank STRING
* Does STRING consist only of blanks? */
int is_blank(const char *s) {
return s[strspn(s, " \t\n")] == 0;
}
/* expand_tabs STRING
* Return STRING with tabs expanded. */
char *expand_tabs(const char *string) {
static char *buf;
static size_t buflen;
size_t len, i, j, ntabs;
len = strlen(string);
for (i = 0, ntabs = 0; i < len; ++i)
if (string[i] == '\t') ++ntabs;
if (!buf || buflen < len + 1 + ntabs * 8)
buf = realloc(buf, (buflen = len + 1 + ntabs * 8));
for (i = 0, j = 0; i < len; ++i) {
if (string[i] != '\t')
buf[j++] = string[i];
else {
size_t nexttabstop;
nexttabstop = (j / TABSIZE + 1) * TABSIZE;
while (j < nexttabstop)
buf[j++] = ' ';
}
}
buf[j] = 0;
return buf;
}
/* classify_paragraph PARAGRAPH
* Look at the first line of PARAGRAPH, and classify it as
* normal/numbered/bulleted. Also remove any leader, if present. */
void classify_paragraph(struct paragraph *P) {
char *p;
size_t n;
if (P->nlines < 1) return;
/* Now it's necessary to classify the paragraph by looking for a leader in
* the first paragraph. */
p = P->lines[0] + (P->ldrindent = strspn(P->lines[0], " "));
/* A bulleted list is indicated by a leading -, *, + or . followed by
* whitespace. */
if (strchr(BULLET_CHARS, *p) && *(p + 1) == ' ') {
P->type = bullet;
P->leader = *p;
P->indent = P->ldrindent + 1 + strspn(p + 1, " ");
*p = ' ';
}
/* A numbered list is indicated by a decimal number followed by an
* optional . and then whitespace. */
if (strchr("0123456789", *p)) {
char *q;
q = p + strspn(p, "0123456789");
if (*q == ' ' || (*q == '.' && *(q + 1) == ' ')) {
P->type = number;
P->leader = atoi(p);
P->indent = P->ldrindent + (q - p) + (*q == '.' ? 1 + strspn(q + 1, " ") : strspn(q, " "));
while (*p != ' ')
*p++ = ' ';
}
}
/*
* Also figure out the indent. We want to deal with different styles, like
* this:
*
* Indent leader Block leader
* --------------- ---------------
* - foo bar - foo bar
* baz quux baz quux
*
* more stuff more stuff
*
* So, if the paragraph has more than one line, we find the minimum indent
* of any line. Otherwise use the indent of the text following the leader
* on the first line, and try to fix it up later if we're wrong.
*/
for (n = 1; n < P->nlines; ++n) {
size_t m;
if ((m = strspn(P->lines[n], " ")) < P->indent)
P->indent = m;
}
}
/* read_paragraph STREAM
* Read a paragraph from STREAM, returning it as a pointer to struct
* paragraph allocated on the heap. On error or if there was no paragraph
* before end-of-file, returns NULL. */
struct paragraph *read_paragraph(FILE *fp) {
struct paragraph *P;
char *line, **lines;
size_t linenum = 0, nlinesalloc;
int readsomething = 0;
if (feof(fp))
return NULL;
/* Skip any leading blank lines. */
while ((line = get_line(fp)) && is_blank(line));
if (!line) return NULL;
alloc_struct(paragraph, P);
P->indent = 1000000;
lines = malloc((nlinesalloc = 16) * sizeof *lines);
do {
int i;
size_t len;
/* Want this in true white space with tabs removed. */
line = expand_tabs(line);
len = strlen(line);
/* A paragraph is ended by a blank line. */
if (is_blank(line))
break;
readsomething = 1;
/* Measure the indent of the paragraph. For a one-line paragraph, we
* use the indent of the first line; for a two-line paragraph, the
* indent of the second line, to cope with indented paragraphs and
* bulleted or numbered lists. Otherwise we use the smallest indent
* we discover. */
i = strspn(line, " \t");
if (linenum <= 1 || i < P->indent)
P->indent = i;
/* Remove any trailing \n and save the line. */
if (line[len - 1] == '\n')
line[--len] = 0;
lines[linenum++] = strdup(line);
if (linenum == nlinesalloc)
lines = realloc(lines, (nlinesalloc *= 2) * sizeof *lines);
} while ((line = get_line(fp)));
/* If an error occurred or we hit EOF before reading anything, abort. */
if (!line && (ferror(fp) || (feof(fp) && linenum == 0))) {
free(lines);
free(P);
return NULL;
}
P->lines = lines;
P->nlines = linenum;
classify_paragraph(P);
return P;
}
/* write_html STREAM PARAGRAPHS NO_P
* Write PARAGRAPHS out to STREAM, enclosing the text of each paragraph in
* unless NO_P is true. */
void write_html(FILE *fp, const struct paragraph *paras, const int nopp) {
const struct paragraph *P;
for (P = paras; P; P = P->next) {
if (P->container) {
fprintf(fp, "<%s>\n", P->container);
if (P->contents)
/* Container may be empty. */
write_html(fp, P->contents, P->contents->next == NULL);
fprintf(fp, "%s>\n", P->container);
} else if (P->nlines > 0) {
char *enclosing = NULL;
if (P->nlines == 2) {
size_t len;
len = strlen(P->lines[0]);
if (len == strspn(P->lines[1], "=")) {
P->lines[1][0] = 0;
enclosing = "h1";
} else if (len == strspn(P->lines[1], "-")) {
P->lines[1][0] = 0;
enclosing = "h2";
} else if (len == strspn(P->lines[1], "~")) {
P->lines[1][0] = 0;
enclosing = "h3";
}
}
if (!enclosing && !nopp)
enclosing = "p";
if (enclosing) fprintf(fp, "<%s>", enclosing);
emit_as_html(fp, P);
if (enclosing) fprintf(fp, "%s>\n", enclosing);
}
}
}
/* main ARGC ARGV
* Entry point. */
int main(int argc, char *argv[]) {
struct paragraph pp, *ppstart, *ppend;
ppend = &pp;
while ((ppend->next = read_paragraph(stdin))) {
ppend->next->prev = ppend;
ppend = ppend->next;
}
ppstart = pp.next;
/* Maybe no input. */
if (!ppstart)
return 0;
ppstart->prev = NULL;
if (ferror(stdin)) {
fprintf(stderr, "htmlise: standard input: %s\n", strerror(errno));
return 1;
}
process_markup(ppstart);
write_html(stdout, ppstart, 0);
return 0;
}