/* * htmlise.c: * Turn text into HTML. * * Copyright (c) 2003 Chris Lightfoot. All rights reserved. * Email: chris@ex-parrot.com; WWW: http://www.ex-parrot.com/~chris/ * */ static const char rcsid[] = "$Id: htmlise.c,v 1.5 2003/03/07 18:53:54 chris Exp $"; #include #include #include #include #include "htmlise.h" /* get_line STREAM * Returns the next line read from STREAM, or NULL on end-of-file or error. * The returned string includes the trailing \n, unless the line is the last * part of a file which doesn't end \n. */ char *get_line(FILE *fp) { static char *buf; static size_t buflen; int c; size_t i = 0; if (!buf) buf = malloc(buflen = 1024); while ((c = getc(fp)) != EOF) { buf[i++] = (char)c; if (i == buflen) buf = realloc(buf, buflen *= 2); if (c == '\n') break; } /* NULL-terminate the string. */ if (i > 0) buf[i] = 0; if (c == -1) { if (feof(fp) && i > 0) return buf; else return NULL; } else return buf; } /* is_blank STRING * Does STRING consist only of blanks? */ int is_blank(const char *s) { return s[strspn(s, " \t\n")] == 0; } /* expand_tabs STRING * Return STRING with tabs expanded. */ char *expand_tabs(const char *string) { static char *buf; static size_t buflen; size_t len, i, j, ntabs; len = strlen(string); for (i = 0, ntabs = 0; i < len; ++i) if (string[i] == '\t') ++ntabs; if (!buf || buflen < len + 1 + ntabs * 8) buf = realloc(buf, (buflen = len + 1 + ntabs * 8)); for (i = 0, j = 0; i < len; ++i) { if (string[i] != '\t') buf[j++] = string[i]; else { size_t nexttabstop; nexttabstop = (j / TABSIZE + 1) * TABSIZE; while (j < nexttabstop) buf[j++] = ' '; } } buf[j] = 0; return buf; } /* classify_paragraph PARAGRAPH * Look at the first line of PARAGRAPH, and classify it as * normal/numbered/bulleted. Also remove any leader, if present. */ void classify_paragraph(struct paragraph *P) { char *p; size_t n; if (P->nlines < 1) return; /* Now it's necessary to classify the paragraph by looking for a leader in * the first paragraph. */ p = P->lines[0] + (P->ldrindent = strspn(P->lines[0], " ")); /* A bulleted list is indicated by a leading -, *, + or . followed by * whitespace. */ if (strchr(BULLET_CHARS, *p) && *(p + 1) == ' ') { P->type = bullet; P->leader = *p; P->indent = P->ldrindent + 1 + strspn(p + 1, " "); *p = ' '; } /* A numbered list is indicated by a decimal number followed by an * optional . and then whitespace. */ if (strchr("0123456789", *p)) { char *q; q = p + strspn(p, "0123456789"); if (*q == ' ' || (*q == '.' && *(q + 1) == ' ')) { P->type = number; P->leader = atoi(p); P->indent = P->ldrindent + (q - p) + (*q == '.' ? 1 + strspn(q + 1, " ") : strspn(q, " ")); while (*p != ' ') *p++ = ' '; } } /* * Also figure out the indent. We want to deal with different styles, like * this: * * Indent leader Block leader * --------------- --------------- * - foo bar - foo bar * baz quux baz quux * * more stuff more stuff * * So, if the paragraph has more than one line, we find the minimum indent * of any line. Otherwise use the indent of the text following the leader * on the first line, and try to fix it up later if we're wrong. */ for (n = 1; n < P->nlines; ++n) { size_t m; if ((m = strspn(P->lines[n], " ")) < P->indent) P->indent = m; } } /* read_paragraph STREAM * Read a paragraph from STREAM, returning it as a pointer to struct * paragraph allocated on the heap. On error or if there was no paragraph * before end-of-file, returns NULL. */ struct paragraph *read_paragraph(FILE *fp) { struct paragraph *P; char *line, **lines; size_t linenum = 0, nlinesalloc; int readsomething = 0; if (feof(fp)) return NULL; /* Skip any leading blank lines. */ while ((line = get_line(fp)) && is_blank(line)); if (!line) return NULL; alloc_struct(paragraph, P); P->indent = 1000000; lines = malloc((nlinesalloc = 16) * sizeof *lines); do { int i; size_t len; /* Want this in true white space with tabs removed. */ line = expand_tabs(line); len = strlen(line); /* A paragraph is ended by a blank line. */ if (is_blank(line)) break; readsomething = 1; /* Measure the indent of the paragraph. For a one-line paragraph, we * use the indent of the first line; for a two-line paragraph, the * indent of the second line, to cope with indented paragraphs and * bulleted or numbered lists. Otherwise we use the smallest indent * we discover. */ i = strspn(line, " \t"); if (linenum <= 1 || i < P->indent) P->indent = i; /* Remove any trailing \n and save the line. */ if (line[len - 1] == '\n') line[--len] = 0; lines[linenum++] = strdup(line); if (linenum == nlinesalloc) lines = realloc(lines, (nlinesalloc *= 2) * sizeof *lines); } while ((line = get_line(fp))); /* If an error occurred or we hit EOF before reading anything, abort. */ if (!line && (ferror(fp) || (feof(fp) && linenum == 0))) { free(lines); free(P); return NULL; } P->lines = lines; P->nlines = linenum; classify_paragraph(P); return P; } /* write_html STREAM PARAGRAPHS NO_P * Write PARAGRAPHS out to STREAM, enclosing the text of each paragraph in *

unless NO_P is true. */ void write_html(FILE *fp, const struct paragraph *paras, const int nopp) { const struct paragraph *P; for (P = paras; P; P = P->next) { if (P->container) { fprintf(fp, "<%s>\n", P->container); if (P->contents) /* Container may be empty. */ write_html(fp, P->contents, P->contents->next == NULL); fprintf(fp, "\n", P->container); } else if (P->nlines > 0) { char *enclosing = NULL; if (P->nlines == 2) { size_t len; len = strlen(P->lines[0]); if (len == strspn(P->lines[1], "=")) { P->lines[1][0] = 0; enclosing = "h1"; } else if (len == strspn(P->lines[1], "-")) { P->lines[1][0] = 0; enclosing = "h2"; } else if (len == strspn(P->lines[1], "~")) { P->lines[1][0] = 0; enclosing = "h3"; } } if (!enclosing && !nopp) enclosing = "p"; if (enclosing) fprintf(fp, "<%s>", enclosing); emit_as_html(fp, P); if (enclosing) fprintf(fp, "\n", enclosing); } } } /* main ARGC ARGV * Entry point. */ int main(int argc, char *argv[]) { struct paragraph pp, *ppstart, *ppend; ppend = &pp; while ((ppend->next = read_paragraph(stdin))) { ppend->next->prev = ppend; ppend = ppend->next; } ppstart = pp.next; /* Maybe no input. */ if (!ppstart) return 0; ppstart->prev = NULL; if (ferror(stdin)) { fprintf(stderr, "htmlise: standard input: %s\n", strerror(errno)); return 1; } process_markup(ppstart); write_html(stdout, ppstart, 0); return 0; }