scribble

0.1.0-SNAPSHOT


A Racket's Scribble sub-language implementation in Clojure

dependencies

org.clojure/clojure
1.5.1
org.clojure/math.combinatorics
0.0.4
chiara
0.2.0-SNAPSHOT



(this space intentionally left almost blank)
 

Containts the main part of the API.

See the GitHub repo for the usage information.

(ns scribble.core
  (:use [chiara.reader.hacking :only [with-reader-macro]]
        [chiara.reader.macros :only [use-reader-macros]])
  (:require [scribble.reader :refer :all]
            [scribble.settings :refer :all]))

Enables the Scribble reader macro in the current namespace.

(defn use-scribble
  ([]
    (use-scribble default-settings))
  ([settings]
    (use-reader-macros {:char (entry-char settings)
                        :reader (partial read-entry settings)})))

Temporarily enables the Scribble reader macro with custom settings.

(defmacro with-scribble-settings
  [settings & exprs]
 `(with-reader-macro
    (entry-char ~settings)
    (partial read-entry ~settings)
    (do ~@exprs)))

Temporarily enables the Scribble reader macro with default settings.

(defmacro with-scribble
  [& exprs]
 `(with-scribble-settings default-settings ~@exprs))
 

Contains postprocessing functions for the body part, which remove redundant whitespace.

(ns scribble.postprocess
  (:require [scribble.types :refer :all])
  (:import [scribble.types BodyToken]))

Converts the body part to the final container that is going to be emitted by the reader.

(defn- body-part-finalize
  [body-part]
  (mapv
    (fn [^BodyToken token] (.contents token))
    body-part))
(defn- whitespace-or-newline?
  [^BodyToken token]
  (or
    (.newline? token)
    (.leading-ws? token)
    (.trailing-ws? token)))

Returns true if body-part contains only \newlines and strings of whitespace characters.

(defn- whitespace-only?
  "Returns `true` if `body-part` contains only `\\newline`s
  and strings of whitespace characters."
  [body-part]
  (every? whitespace-or-newline? body-part))

Trim (a possible whitespace string and) a newline string from body-part, if they are present.

(defn- trim-leading-newline
  [body-part]
  (if (empty? body-part)
    body-part
    (let [^BodyToken t-first (nth body-part 0)]
      (if (= (count body-part) 1)
        (if (.newline? t-first)
          []
          body-part)
        (let [^BodyToken t-second (nth body-part 1)]
          (cond
            (.newline? t-first)
              (subvec body-part 1)
            (and (.leading-ws? t-first)
                 (.newline? t-second))
              (subvec body-part 2)
            :else
              body-part))))))

Trim a newline string (and a possible whitespace string) from body-part, if they are present.

(defn- trim-trailing-newline
  [body-part]
  (if (empty? body-part)
    body-part
    (let [n-last (dec (count body-part))
          ^BodyToken t-last (nth body-part n-last)]
      (if (= (count body-part) 1)
        (if (.newline? t-last)
          []
          body-part)
        (let [n-prev (dec n-last)
              ^BodyToken t-prev (nth body-part n-prev)]
          (cond
            (.newline? t-last)
              (subvec body-part 0 n-last)
            (and (.leading-ws? t-last)
                 (.newline? t-prev))
              (subvec body-part 0 n-prev)
            :else
              body-part))))))

The predicate for trim-whitespace. Checks if the contents of the token are leading whitespace and trims them by the value of indent. Returns nil, if the leading whitespace is too small, these will be cleaned up by trim-whitespace.

(defn- trim-leading-whitespace-pred
  [indent ^BodyToken token]
  (when-not (.trailing-ws? token)
    (if (.leading-ws? token)
      (when (> (count (.contents token)) indent)
        (make-body-token (subs (.contents token) indent) :leading-ws :true))
      token)))

Trims leading indent characters from leading whitespace in body-part

(defn- trim-leading-whitespace
  [body-part indent]
  (filterv #(not (nil? %))
    (map (partial trim-leading-whitespace-pred indent) body-part)))
(defn- zip
  [seq1 seq2]
  (map (fn [a b] [a b]) seq1 seq2))

Of all leading whitespace tokens, which are:

  • not last,
  • have something non-whitespace after them,

    we select the smallest one.

(defn- find-starting-indent
  [body-part]
  (let [ws-candidates (subvec body-part 0 (dec (count body-part)))
        next-elems (subvec body-part 1)
        ; Filter all leading whitespaces followed by something significant
        filter-pred
          (fn [[^BodyToken elem ^BodyToken next-elem]]
            (and (.leading-ws? elem)
                 (not (.newline? next-elem))))
        ws-pairs (filter filter-pred (zip ws-candidates next-elems))
        ; Extract their lengths
        map-pred
          (fn [[^BodyToken elem _]]
            (count (.contents elem)))
        ws-lengths (map map-pred ws-pairs)]
    (if (empty? ws-lengths)
      0
      (reduce min ws-lengths))))

Returns the common indentation for body-part that can be trimmed.

(defn- get-common-indent
  [body-part starting-indent]
  (if (< (count body-part) 2)
    starting-indent
    (let [t0 ^BodyToken (nth body-part 0)
          t1 ^BodyToken (nth body-part 1)]
      ; If the body part starts from something significant and not
      ; just whitespace and newline, we take indent of that.
      ; Otherwise we have to search for it in the whole body-part.
      (if (or (.newline? t0)
              (and (.leading-ws? t0)
                   (.newline? t1)))
        (find-starting-indent body-part)
        starting-indent))))

If body-part starts with a leading whitespace and a non-whitespace string, merges them together.

(defn- merge-starting-whitespace
  [body-part]
  (if (< (count body-part) 2)
    body-part
    (let [t0 ^BodyToken (nth body-part 0)
          t1 ^BodyToken (nth body-part 1)]
      (if (and (.leading-ws? t0)
               (string? (.contents t1))
               (not (.newline? t1)))
        (assoc (subvec body-part 1)
               0
               (make-body-token (str (.contents t0) (.contents t1))))
        body-part))))

If body-part ends with a non-whitespace string and a trailing whitespace, merges them together.

(defn- merge-ending-whitespace
  [body-part]
  (if (< (count body-part) 2)
    body-part
    (let [n-last (dec (count body-part))
          n-prev (dec n-last)
          t-last ^BodyToken (nth body-part n-last)
          t-prev ^BodyToken (nth body-part n-prev)]
      (if (and (.trailing-ws? t-last)
               (string? (.contents t-prev))
               (not (.newline? t-prev)))
        (conj
          (subvec body-part 0 n-prev)
          (make-body-token (str (.contents t-prev) (.contents t-last))))
        body-part))))

Removes excessive whitespace according to the following rules:

  • If the body part starts with a \newline, the size of the maximum common whitespace is used instead of starting-indent.
  • If the body part only contains \newlines and whitespace, everything except \newlines is discarded.
  • Otherwise if it starts with (maybe some whitespace and) \newline, or ends with \newline (and maybe some whitespace), these are discarded.
  • Any whitespace right before a \newline is discarded.
  • Any whitespace after a \newline with more than starting-indent characters is truncated by this amount, otherwise it is discarded completely.
(defn- trim-whitespace
  "Removes excessive whitespace according to the following rules:
  - If the body part starts with a `\\newline`, the size of the
    maximum common whitespace is used instead of `starting-indent`.
  - If the body part only contains `\\newline`s and whitespace,
    everything except `\\newline`s is discarded.
  - Otherwise if it starts with (maybe some whitespace and) `\\newline`,
    or ends with `\\newline` (and maybe some whitespace), these are discarded.
  - Any whitespace right before a `\\newline` is discarded.
  - Any whitespace after a `\\newline` with more than `starting-indent`
    characters is truncated by this amount,
    otherwise it is discarded completely."
  [body-part starting-indent]
    (let [common-indent (get-common-indent body-part starting-indent)]
      (cond
        (empty? body-part) body-part
        (whitespace-only? body-part)
          (filterv (fn [^BodyToken token] (.newline? token)) body-part)
        :else (-> body-part
          (trim-leading-whitespace common-indent)
          trim-leading-newline
          trim-trailing-newline))))

Postprocess body-accum and return the final body part container.

(defn body-postprocess
  [body-accum starting-indent]
  (-> body-accum
    body-accum-finalize
    merge-starting-whitespace
    merge-ending-whitespace
    (trim-whitespace starting-indent)
    body-part-finalize))
 

Contains the main reader macro and readers for nested special forms.

(ns scribble.reader
  (:use [clojure.set :only [intersection]])
  (:require [chiara.reader.utils :as reader-methods]
            [scribble.types :refer :all]
            [scribble.postprocess :refer :all]
            [scribble.settings :refer :all])
  (:import [scribble.settings Settings]))

Throws an ExceptionInfo with the given message. If reader provides line/column metadata, it will be included in the exception.

(defn- reader-error
  [reader & msg]
  (throw
    (ex-info
      (clojure.string/join msg)
      (merge {:type :reader-exception}
             (when-let [[l c] (reader-methods/reader-position reader)]
               {:line l :column c})))))

Creates beginning and ending here-string markers for usage in the body part reader.

(defn- here-markers
  [here-str]
  (if (nil? here-str)
    [[] []]
    ; Additional symbols allow us to avoid `dec`s in comparisons.
    [(str \space here-str)
     (str \space (inverse-str here-str))]))

The following two functions could be joined into one, but that would require creation and destructuring of a vector [body-accum str-accum], and which noticeably affects read-body performance.

Updates a body part accumulator when a non-whitespace character is encountered

(defn- dump-nonws-char-body
  [body-accum str-accum leading-ws]
  (if leading-ws
    (dump-leading-ws body-accum str-accum)
    body-accum))

Updates a string accumulator when a non-whitespace character is encountered

(defn- dump-nonws-char-str
  [str-accum c leading-ws]
  (if leading-ws
    (make-str-accum c)
    (str-accum-push str-accum c)))
(declare read-entry)

Returns a body-accum containing tokens with strings and nested forms. The strings are separated as [leading whitespace, contents, trailing whitespace, newline] (for the ease of further processing).

(defn- read-body
  [^Settings settings reader here-str]
  (let [[here-start here-end] (here-markers here-str)
        here-marker-len (int (count here-start))
        escaped (not (nil? here-str))
        entry-char (.entry-char settings)
        body-start-char (.body-start-char settings)
        body-end-char (.body-end-char settings)
        escape-start-char (.escape-start-char settings)
        escape-end-char (.escape-end-char settings)]
    (loop [body-accum (make-body-accum)
           str-accum (make-str-accum)
           ; Indicates the leading whitespace reading mode.
           leading-ws true
           ; Using privitive integers gets us some speed-up.
           brace-level (int 0)
           ; If the position is positive, we are reading the starting marker,
           ; if it is negative, we are reading the ending marker.
           here-str-pos (int 0)]
      (let [c (reader-methods/read-1 reader)]
        (cond
          ; Catches the body part ending character
          ; (or the escape ending one, if we are in escaped mode)
          (or (and escaped
                   (= c escape-end-char)
                   (== (- here-str-pos) here-marker-len))
              (and (not escaped)
                   (= c body-end-char)))
            (if (zero? brace-level)
              (if leading-ws
                (dump-leading-ws body-accum str-accum)
                (dump-string
                  body-accum
                  (str-accum-pop str-accum here-marker-len)))
              (recur
                (dump-nonws-char-body body-accum str-accum leading-ws)
                (dump-nonws-char-str str-accum c leading-ws)
                false
                (dec brace-level)
                (int 0)))
          ; Catches the body part starting character
          ; (or the escape starting one, if we are in escaped mode)
          (or (and escaped
                   (= c escape-start-char))
              (and (= c body-start-char)
                   (== here-str-pos here-marker-len)))
            (recur
              (dump-nonws-char-body body-accum str-accum leading-ws)
              (dump-nonws-char-str str-accum c leading-ws)
              false
              ; The body-start-char will be at the end of the here-string
              ; sequence in escaped mode, so we can safely increase
              ; the brace level regardless of `escaped`.
              (if (= c body-start-char)
                (inc brace-level)
                brace-level)
              ; If we are in escaped mode and caught the escape starting,
              ; just increase the here-string position.
              (if (= c escape-start-char)
                (int 1)
                (int 0)))
          ; In escaped mode, body-end-char only means that
          ; we should start comparing the following characters with
          ; the ending here-string marker.
          (and escaped
               (= c body-end-char))
            (recur
              (dump-nonws-char-body body-accum str-accum leading-ws)
              (dump-nonws-char-str str-accum c leading-ws)
              false
              brace-level
              (int -1))
          ; Entry character encountered,
          ; and we are at the end of the starting here-string marker.
          (and (= c entry-char)
               (== here-str-pos here-marker-len))
            (let [nested-form (read-entry settings reader c)
                  ; Pop here-string marker from the accumulator
                  str-accum (str-accum-pop str-accum here-marker-len)
                  [body-accum str-accum]
                    ; Check if the nested form was a comment
                    (if (identical? nested-form reader)
                      [body-accum str-accum]
                      (dump-nested-form
                        body-accum str-accum nested-form leading-ws))]
              (recur
                body-accum
                str-accum
                false
                brace-level
                (int 0)))
          ; Reading the starting here-string marker,
          ; and the current character is correct.
          (and (pos? here-str-pos)
               (< here-str-pos here-marker-len)
               (= c (nth here-start here-str-pos)))
            (recur
              body-accum
              (str-accum-push str-accum c)
              leading-ws
              brace-level
              (inc here-str-pos))
          ; Reading the ending here-string marker,
          ; and the current character is correct.
          (and (neg? here-str-pos)
               (< (- here-str-pos) here-marker-len)
               (= c (nth here-end (- here-str-pos))))
            (recur
              body-accum
              (str-accum-push str-accum c)
              leading-ws
              brace-level
              (dec here-str-pos))
          ; Unexpected EOF
          (nil? c)
            (reader-error reader "Unexpected EOF while reading a body part")
          ; Newline encountered: dump accumulator,
          ; turn the leading whitespace mode on
          (= c \newline)
            (let [body-accum
                   (-> body-accum
                     (dump-string str-accum)
                     push-newline)]
              (recur
                body-accum
                (make-str-accum)
                true
                brace-level
                (int 0)))
          ; In leading whitespace mode, a whitespace character encountered
          (and (whitespace? c) leading-ws)
            (recur
              body-accum
              (str-accum-push str-accum c)
              true
              brace-level
              (int 0))
          ; A normal character or a whitespace character
          ; out of the leading whitespace mode
          :else
            (recur
              (dump-nonws-char-body body-accum str-accum leading-ws)
              (dump-nonws-char-str str-accum c leading-ws)
              false
              brace-level
              (int 0)))))))

Checks that here-str does not contain escape-start/end chars, entry char, or body-end char (it does not contain the body-start char because of the way it was read). This makes it easier to watch for it when reading the body part.

(defn- validate-here-str
  [^Settings settings reader here-str]
  (if (nil? here-str)
    here-str
    (let [prohibited-chars (set [(.entry-char settings)
                                 (.body-end-char settings)
                                 (.escape-start-char settings)
                                 (.escape-end-char settings)])
          here-str-chars (set here-str)]
      (if (empty? (intersection prohibited-chars here-str-chars))
        here-str
        (reader-error reader "Here-string contains invalid characters")))))

Reads a body part, escaped by here-str (i.e. looking like `<here-str>{text here}<inverse-here-str>`). If here-str is nil, the body part is considered to be non-escaped.

(defn- read-body-part
  [^Settings settings reader here-str]
  (let [here-str (validate-here-str settings reader here-str)
        [_ c] (reader-methods/reader-position reader)
        column (if (nil? c) 0 c)
        body-accum (read-body settings reader here-str)
        body-part (body-postprocess body-accum column)]
    body-part))

Reads and returns a string until (and not including) delim.

(defn- read-until
  [reader delim]
  (loop [str-accum (make-str-accum)]
    (let [c (reader-methods/read-1 reader)]
      (cond
        (nil? c) (str-accum-finalize str-accum)
        (= delim c)
          (do
            (reader-methods/unread reader c)
            (str-accum-finalize str-accum))
        :else (recur (str-accum-push str-accum c))))))

Reads datum and body parts of the form, until EOF or whitespace is encountered.

(defn- read-parts
  [^Settings settings reader]
  (let [body-start-char (.body-start-char settings)
        datum-start-char (.datum-start-char settings)
        datum-end-char (.datum-end-char settings)
        escape-start-char (.escape-start-char settings)]
    (loop [forms-read []
           ; We want to make a difference between `@foo[]`
           ; (reads as `'(foo)`) and `@foo` (reads as `'foo`).
           ; This flag will be set to `true` when
           ; either datum or body part is encountered.
           forms-present false]
      (let [c (reader-methods/read-1 reader)]
        (cond
          ; Simple escaped string, e.g. the one starting with just
          ; escape start char + body start char
          (and (= c escape-start-char)
               (= (reader-methods/peek reader) body-start-char))
            (do
              (reader-methods/read-1 reader)
              (recur
                (conj forms-read (read-body-part settings reader ""))
                true))
          ; An escaped string with non-empty here-string
          (= c escape-start-char)
            (let [here-str (read-until reader body-start-char)
                  next-c (reader-methods/read-1 reader)]
              (if (nil? next-c)
                (reader-error reader
                  "Unexpected EOF while reading a here-string")
                (let [body-part (read-body-part settings reader here-str)]
                  (recur
                    (conj forms-read body-part)
                    true))))
          ; A simple body part
          (= c body-start-char)
            (recur
              (conj forms-read (read-body-part settings reader nil))
              true)
          ; A datum part
          (= c datum-start-char)
            (let [forms (reader-methods/read-delimited-list
                           datum-end-char reader)]
              (recur (vec (concat forms-read forms)) true))
          (nil? c)
            (if forms-present
              (list* forms-read)
              reader)
          :else
            (do
              (reader-methods/unread reader c)
              (if forms-present
                (list* forms-read)
                reader)))))))

Reads from reader until \newline or EOF is encountered (the final \newline is not consumed). Returns nil.

(defn- skip-to-newline
  "Reads from `reader` until `\\newline` or `EOF` is encountered
  (the final `\\newline` is not consumed).
  Returns `nil`."
  [reader]
  (loop []
    (let [c (reader-methods/read-1 reader)]
      (cond
        (nil? c) nil
        (= \newline c) (do (reader-methods/unread reader c) nil)
        :else (recur)))))

Reads from reader until \newline is encountered and then until the first non-whitespace character is encountered (the final character is not consumed), or until EOF is encountered. Returns nil.

(defn- skip-to-meaningful-char
  "Reads from `reader` until `\\newline` is encountered
  and then until the first non-whitespace character is encountered
  (the final character is not consumed), or until `EOF` is encountered.
  Returns `nil`."
  [reader]
  (loop [newline-encountered false]
    (let [c (reader-methods/read-1 reader)]
      (cond
        (nil? c) nil
        (and newline-encountered (not (whitespace? c)))
          (do (reader-methods/unread reader c) nil)
        (= \newline c) (recur true)
        :else (recur newline-encountered)))))

The entry point of the reader macro.

(defn read-entry
  [^Settings settings reader _]
  (let [body-start-char (.body-start-char settings)
        datum-start-char (.datum-start-char settings)
        escape-start-char (.escape-start-char settings)
        escape-end-char (.escape-end-char settings)
        comment-char (.comment-char settings)
        c (reader-methods/read-1 reader)]
    (cond
      ; No command, body or datum starts right away.
      ; Pass the execution to the parts reader.
      (or (= c body-start-char)
          (= c datum-start-char))
        (do
          (reader-methods/unread reader c)
          (read-parts settings reader))
      ; A comment form
      (= c comment-char)
        (let [next-c (reader-methods/peek reader)]
          (condp = next-c
            comment-char (skip-to-meaningful-char reader)
            body-start-char (read-parts settings reader)
            (skip-to-newline reader))
          ; By convention, if the reader function has read nothing,
          ; it returns the reader.
          reader)
      (whitespace? c)
        (reader-error reader
          "Unexpected whitespace at the start of a Scribble form")
      (nil? c)
        (reader-error reader
          "Unexpected EOF at the start of a Scribble form")
      ; An escaped (spliced) form, or an escaped string.
      (= c escape-start-char)
        (let [next-c (reader-methods/peek reader)]
          (if (= next-c escape-start-char)
            (read-parts settings reader)
            ; If it is a spliced form, mark it with metadata,
            ; so that it could be spliced in the parent reader.
            (mark-for-splice
              (reader-methods/read-delimited-list
                escape-end-char reader))))
      :else
        (do
          (reader-methods/unread reader c)
          (let [command (reader-methods/read-next reader)
                forms (read-parts settings reader)]
            (cond
              (identical? reader forms) command
              (empty? forms) (list command)
              :else (cons command forms)))))))
 

Contains functions that allow you to tune the characters used for different purposes in the reader.

(ns scribble.settings
  (:use [clojure.math.combinatorics :only [combinations]]
        [clojure.set :only [intersection]]))
(deftype Settings [
  ^Character entry-char
  ^Character body-start-char
  ^Character body-end-char
  ^Character datum-start-char
  ^Character datum-end-char
  ^Character escape-start-char
  ^Character escape-end-char
  ^Character comment-char])
(defn- assert-not-intersect
  [set1 set2 message]
  (when (seq (intersection set1 set2))
    (throw (ex-info message {:set1 set1 :set2 set2}))))

Checks the reader settings for conflicts.

(defn- validate-settings
  [^Settings settings]
  (let [entry-char (.entry-char settings)
        body-start-char (.body-start-char settings)
        body-end-char (.body-end-char settings)
        datum-start-char (.datum-start-char settings)
        datum-end-char (.datum-end-char settings)
        escape-start-char (.escape-start-char settings)
        escape-end-char (.escape-end-char settings)
        comment-char (.comment-char settings)
        all-chars (set [entry-char
                        body-start-char
                        body-end-char
                        datum-start-char
                        datum-end-char
                        escape-start-char
                        escape-end-char
                        comment-char])
        entry-chars #{entry-char}
        body-chars (set [body-start-char body-end-char])
        datum-chars (set [datum-start-char datum-end-char])
        escape-chars (set [escape-start-char escape-end-char])
        comment-chars #{comment-char}
        char-sets [entry-chars body-chars datum-chars
                   escape-chars comment-chars]]
    (assert-not-intersect all-chars #{\space \tab \newline}
      "None of the characters can be whitespace or newlines")
    (doseq [[set1 set2] (combinations char-sets 2)]
      (assert-not-intersect set1 set2
        "Characters used for different purposes must be different"))
    settings))

Creates a reader settings structure, and checks the characters used for conflicts. Parameters:

  • entry-char: the entry point to our custom reader. Warning: any default reader macro for this symbol will be unusable when Scribble is enabled.
  • body-start-char, body-end-char: mark the body part. Ideally these occur rarely in the text, otherwise a lot of escaping will have to be used.
  • datum-start-char, datum-end-char: mark the datum part. datum-end-char must be a macro-terminating character (if it's not, it has to be preceeded by whitespace when it's used to terminate the datum part).
  • escape-start-char, escape-end-char: used for escaping body part and splicing forms. If escape-start-char has a reader macro assigned, this macro will be unusable in the first form of the splice (because two escaping characters mark the beginning of an escaped body part). escape-end-char must be a macro-terminating character (if it's not, it has to be preceeded by whitespace when it's used to terminate the spliced form).
  • comment-char is used (with a preceding entry-char) to start line comments, newline-consuming comments, and multiline comments.

    Characters in every group may be equal, but should differ from all characters in other groups.

(defn make-settings
  [entry-char
   body-start-char
   body-end-char
   datum-start-char
   datum-end-char
   escape-start-char
   escape-end-char
   comment-char]
  (validate-settings
    (Settings. entry-char
               body-start-char
               body-end-char
               datum-start-char
               datum-end-char
               escape-start-char
               escape-end-char
               comment-char)))
(def default-settings (make-settings \@ \{ \} \[ \] \` \` \;))
(defn whitespace?
  [c]
  (or (= c \space) (= c \tab)))
(defn entry-char
  [^Settings settings]
  (.entry-char settings))
(defn inverse-char
  [c]
  (case c
    \( \)
    \) \(
    \[ \]
    \] \[
    \< \>
    \> \<
    c))
(defn inverse-str
  [s]
  (clojure.string/join
    (mapv inverse-char (reverse s))))
 

Contains methods for the string accumulator, the body part accumulator, and the body part token. This way the underlying data structures can be changed easily if needed, and the complexity requirements can be assessed.

(ns scribble.types)

String accumulator methods

String accumulator is used when the reader reads and collects characters from the stream.

Creates an empty string accumulator.

(defn make-str-accum
  ([]
    [])
  ([c]
    [c]))

Removes the last n characters from str-accum.

(defn str-accum-pop
  [str-accum n]
  (if (zero? n)
    str-accum
    (subvec str-accum 0 (- (count str-accum) n))))

Adds a character c to the end of str-accum.

(defn str-accum-push
  [str-accum c]
  (conj str-accum c))

Returns a string representing the contents of the accumulator.

(defn str-accum-finalize
  [str-accum]
  (clojure.string/join str-accum))

Body part token methods

When the body part is read, it is organized into tokens containing strings or arbitrary forms, and some metadata, which is readily available at read time, but would require O(n) time to obtain during the postprocessing.

(deftype BodyToken [
  contents
  ; `true` if `contents` is a string "\n"
  ^boolean newline?
  ; `true` if `contents` is a string of whitespace characters,
  ; representing the leading whitespace in the body part.
  ^boolean leading-ws?
  ; `true` if `contents` is a string of whitespace characters,
  ; representing the trailing whitespace in the body part.
  ^boolean trailing-ws?])

Creates a BodyToken with an optional metadata.

(defn make-body-token
  [contents & {:keys [newline leading-ws trailing-ws]
               :or {newline false
                    leading-ws false
                    trailing-ws false}}]
  (BodyToken. contents
              (boolean newline)
              (boolean leading-ws)
              (boolean trailing-ws)))

Body part accumulator methods

Body part accumulator is used to collect BodyTokens while reading the body part.

Creates an empty body part accumulator.

(defn make-body-accum
  []
  [])

Adds a token to the end of the accumulator.

(defn- body-accum-push
  [body-accum token]
  (conj body-accum token))

Converts the accumulator to a data structure more suitable for postprocessing.

(defn body-accum-finalize
  [body-accum]
  body-accum)

Body accumulator helpers

Wraps a string of trailing whitespace in a token and adds it to the accumulator.

(defn- push-trailing-ws
  [body-accum s]
  (if (empty? s)
    body-accum
    (body-accum-push body-accum (make-body-token s :trailing-ws true))))

If the given string is non-empty, wraps it in a token and adds it to the accumulator.

(defn- push-string
  [body-accum s]
  (if (empty? s)
    body-accum
    (body-accum-push body-accum (make-body-token s))))

Wraps an arbitrary form in a token and adds it to the accumulator.

(defn- push-form
  [body-accum f]
  (body-accum-push body-accum (make-body-token f)))

Wraps a newline in a token and adds it to the accumulator.

(defn push-newline
  [body-accum]
  (body-accum-push body-accum (make-body-token "\n" :newline true)))

Body- and string-accumulator combined updaters

Finalizes a string accumulator containing leading whitespace and pushes it to the body accumulator.

(defn dump-leading-ws
  [body-accum str-accum]
  (body-accum-push
    body-accum
    (make-body-token (str-accum-finalize str-accum) :leading-ws true)))

Finalizes a string accumulator containing an arbitrary string and pushes it to the body accumulator.

(defn- dump-string-verbatim
  [body-accum str-accum]
  (push-string body-accum (str-accum-finalize str-accum)))

Splits the string into two strings containing the trailing whitespace and the remaining part. Returns a vector [main-part trailing-ws].

(defn- split-trimr
  [s]
  (let [trimmed-s (clojure.string/trimr s)
        count-full (count s)
        count-trimmed (count trimmed-s)]
    (if (= count-full count-trimmed)
      [s ""]
      [trimmed-s (subs s count-trimmed)])))

Finalizes a string accumulator containing an arbitrary string and pushes it to the body accumulator. If str-accum is empty, body-accum is returned unchanged. Otherwise, the string constructed from str-accum is split into the main part and the trailing whitespace part before the attachment to body-accum.

(defn dump-string
  [body-accum str-accum]
  (let [[main-part trailing-ws] (split-trimr (str-accum-finalize str-accum))]
    (-> body-accum
      (push-string main-part)
      (push-trailing-ws trailing-ws))))

Marks the list to be spliced in the body part in the parent call to read-body.

(defn mark-for-splice
  [l]
  (with-meta l {::splice true}))

Pushes an arbitrary form to the accumulator, possibly finalizing and pushing the string accumulator first. Splices the list into the body accumulator, if it is marked with the corresponding meta tag.

(defn dump-nested-form
  [body-accum str-accum nested-form leading-ws]
  (cond
    leading-ws
      (dump-nested-form
        (dump-leading-ws body-accum str-accum)
        (make-str-accum)
        nested-form
        false)
    ; it was a string: special case, append it to the accumulator
    (string? nested-form)
      ; FIXME: prepending is O(n)
      [body-accum (vec (concat str-accum nested-form))]
    ; an actual form
    :else
      (let [body-accum-with-str (dump-string-verbatim body-accum str-accum)
            body-accum
              (if (::splice (meta nested-form))
                (reduce push-form body-accum-with-str nested-form)
                (push-form body-accum-with-str nested-form))]
        [body-accum (make-str-accum)])))