(ns outliner.model.core.markdown (:require [clojure.string :as string] [outliner.model.core.tree :as tree] [outliner.model.core.text-ops :as text-ops] [outliner.model.core.annotations :as annotations])) (def completed-markdown-prefix "[COMPLETE] ") (declare parse-markdown) (defn- find-best-match [text patterns] (reduce (fn [best p] (if-let [m (re-find (:regex p) text)] (let [full-match (if (string? m) m (first m)) idx (string/index-of text full-match)] (if (or (nil? best) (< idx (:idx best))) {:type (:type p) :match m :idx idx :full-match full-match} best)) best)) nil patterns)) (defn- make-markdown-part [match] (let [{:keys [type match full-match]} match] (case type :horizontal-line {:type :horizontal-line :value full-match} :bold-italic {:type :bold-italic :value (nth match 1) :children (parse-markdown (nth match 1))} :bold {:type :bold :value (nth match 1) :children (parse-markdown (nth match 1))} :italic {:type :italic :value (nth match 1) :children (parse-markdown (nth match 1))} :highlight {:type :highlight :value (nth match 1) :children (parse-markdown (nth match 1))} :code {:type :code :value (nth match 1) :children [{:type :text :value (nth match 1)}]} :markdown-link {:type :markdown-link :value full-match :display-text (nth match 1) :url (nth match 2) :children (parse-markdown (nth match 1))} :link {:type :link :value full-match} {:type :text :value full-match}))) (defn parse-markdown [text] (if (string/blank? text) [] (let [patterns [{:type :horizontal-line :regex #"^(\-\-\-|\*\*\*|\_\_\_)$"} {:type :bold-italic :regex #"\*\*\*(.*?)\*\*\*"} {:type :bold :regex #"\*\*(.*?)\*\*"} {:type :italic :regex #"\*(.*?)\*"} {:type :highlight :regex #"==(.+?)=="} {:type :code :regex #"`(.*?)`"} {:type :markdown-link :regex #"\[(.*?)\]\(((?:https?://|node-id://)[^)]+)\)"} {:type :link :regex #"(https?://[^\s]+)"}]] (loop [remaining text acc []] (if-let [best (find-best-match remaining patterns)] (let [{:keys [idx full-match]} best before (subs remaining 0 idx) after (subs remaining (+ idx (count full-match))) acc (cond-> acc (seq before) (conj {:type :text :value before}))] (recur after (conj acc (make-markdown-part best)))) (cond-> acc (seq remaining) (conj {:type :text :value remaining}))))))) (declare parse-parts-to-annotated) (defn- parse-style-to-annotated [part pos] (let [type (:type part) res (parse-parts-to-annotated (:children part) pos) inner-text (:text res) inner-annotations (:annotations res) end (+ pos (count inner-text)) style-annotations (if (= type :bold-italic) [{:range-start pos :range-end end :type :style :style :bold} {:range-start pos :range-end end :type :style :style :italic}] [{:range-start pos :range-end end :type :style :style type}])] {:text inner-text :annotations (into inner-annotations style-annotations)})) (defn- parse-md-link-to-annotated [part pos] (let [label-start (inc pos) res (parse-parts-to-annotated (:children part) label-start) inner-annotations (:annotations res) end (+ pos (count (:value part)))] {:text (:value part) :annotations (conj inner-annotations {:range-start pos :range-end end :type :markdown-link :url (:url part) :display-text (:display-text part)})})) (defn- parse-simple-to-annotated [part pos] (let [type (:type part) value (:value part) end (+ pos (count value)) annotation (cond (= type :link) {:range-start pos :range-end end :type :link :url value} (= type :code) {:range-start pos :range-end end :type :style :style :code} (= type :horizontal-line) {:range-start pos :range-end end :type :horizontal-line} :else nil)] {:text value :annotations (cond-> [] annotation (conj annotation))})) (defn- parse-parts-to-annotated [parts base-pos] (loop [remaining parts pos base-pos pieces [] all-annotations []] (if (seq remaining) (let [part (first remaining) type (:type part) res (cond (#{:bold :italic :bold-italic :highlight} type) (parse-style-to-annotated part pos) (= type :markdown-link) (parse-md-link-to-annotated part pos) :else (parse-simple-to-annotated part pos))] (recur (rest remaining) (+ pos (count (:text res))) (conj pieces (:text res)) (into all-annotations (:annotations res)))) {:text (apply str pieces) :annotations all-annotations}))) (defn markdown-to-annotated [text] (parse-parts-to-annotated (parse-markdown text) 0)) (defn- get-style-marker [style] (case style :bold "**" :italic "*" :code "`" :highlight "==" "")) (defn annotated-to-markdown [text annotations] (let [boundaries (->> annotations (mapcat (fn [a] [(:range-start a) (:range-end a)])) (concat [0 (count text)]) (filter #(<= 0 % (count text))) distinct sort) segments (loop [bs boundaries segments []] (if (next bs) (let [start (first bs) end (second bs) content (subs text start end) applicable-annotations (filter (fn [a] (and (<= (:range-start a) start) (>= (:range-end a) end))) annotations)] (recur (rest bs) (conj segments {:text content :annotations applicable-annotations}))) segments))] (apply str (map (fn [seg] (let [segment-annotations (:annotations seg) v (:text seg)] (reduce (fn [inner annotation] (case (:type annotation) :style (let [m (get-style-marker (:style annotation))] (str m inner m)) :link inner :markdown-link inner :horizontal-line inner inner)) v (annotations/sort-annotations segment-annotations)))) segments)))) (defn- marker-size [type] (case type :bold 2 :italic 1 :bold-italic 3 :highlight 2 :code 1 0)) (defn- adjust-position "Given an original-text position and sorted removal ranges, return the position in the cleaned text after those ranges have been removed." [p sorted-removals] (loop [rems sorted-removals shift 0] (if-not (seq rems) (- p shift) (let [[rs re] (first rems)] (cond (< p rs) (- p shift) (< p re) (- rs shift) :else (recur (rest rems) (+ shift (- re rs)))))))) (declare collect-removals-and-annotations) (defn- collect-style-removals [part orig-pos ms] (let [type (:type part) content-start (+ orig-pos ms) res (collect-removals-and-annotations (:children part) content-start) content-end (:orig-end res) close-end (+ content-end ms) style-annotations (if (= type :bold-italic) [{:range-start content-start :range-end content-end :type :style :style :bold} {:range-start content-start :range-end content-end :type :style :style :italic}] [{:range-start content-start :range-end content-end :type :style :style type}])] {:end close-end :removals (conj (into [] (:removals res)) [orig-pos content-start] [content-end close-end]) :annotations (into (:new-annotations res) style-annotations)})) (defn- collect-md-link-removals [part orig-pos] (let [value (:value part) end (+ orig-pos (count value)) res (collect-removals-and-annotations (:children part) (inc orig-pos)) annotation {:range-start orig-pos :range-end end :type :markdown-link :url (:url part) :display-text (:display-text part)}] {:end end :removals (:removals res) :annotations (conj (:new-annotations res) annotation)})) (defn- collect-simple-removals [part orig-pos] (let [type (:type part) value (:value part) end (+ orig-pos (count value)) annotation (case type :horizontal-line {:range-start orig-pos :range-end end :type :horizontal-line} :link {:range-start orig-pos :range-end end :type :link :url value} nil)] {:end end :removals [] :annotations (cond-> [] annotation (conj annotation))})) (defn- collect-removals-and-annotations "Walk parsed markdown parts, tracking original positions. Returns marker removal ranges and new annotations derived from the markdown syntax." ([parts] (collect-removals-and-annotations parts 0)) ([parts base-orig-pos] (loop [remaining parts, orig-pos base-orig-pos, removals [], new-annotations []] (if-not (seq remaining) {:removals removals :new-annotations new-annotations :orig-end orig-pos} (let [part (first remaining) type (:type part) ms (marker-size type) res (cond (pos? ms) (collect-style-removals part orig-pos ms) (= type :markdown-link) (collect-md-link-removals part orig-pos) :else (collect-simple-removals part orig-pos))] (recur (rest remaining) (:end res) (into removals (:removals res)) (into new-annotations (:annotations res)))))))) (defn- strip-marker-ranges "Build a new string from text by omitting the character ranges in sorted-removals." [text sorted-removals] (apply str (loop [pos 0, rems sorted-removals, pieces []] (if (seq rems) (let [[rs re] (first rems)] (recur re (rest rems) (conj pieces (subs text pos rs)))) (conj pieces (subs text pos)))))) (defn- adjust-annotations "Shift annotation positions to account for removed marker ranges." [annotations sorted-removals] (mapv (fn [a] (assoc a :range-start (adjust-position (:range-start a) sorted-removals) :range-end (adjust-position (:range-end a) sorted-removals))) annotations)) (defn create-node-in-doc [doc id text] (let [id (or id (str #?(:clj (java.util.UUID/randomUUID) :cljs (random-uuid)))) {:keys [text annotations]} (cond (map? text) text (string? text) (markdown-to-annotated text) :else {:text "" :annotations []})] [(assoc-in doc [:nodes id] {:id id :text text :annotations annotations :children [] :parent nil}) id])) (declare normalize-markdown) (defn sanitize-text [node] (let [trimmed (text-ops/trim-node-text node) result (normalize-markdown (:text trimmed) (:annotations trimmed []))] (assoc trimmed :text (:text result) :annotations (annotations/merge-annotations (:annotations result))))) (defn normalize-markdown "Scan text for markdown syntax, convert found patterns to annotations, strip marker characters from the text, and adjust existing annotation offsets. Returns {:text ... :annotations ...} with combined (but not yet merged) annotations. Links and horizontal lines keep their text as-is." [text existing-annotations] (if (string/blank? text) {:text (or text "") :annotations (vec existing-annotations)} (let [parts (parse-markdown text) {:keys [removals new-annotations]} (collect-removals-and-annotations parts) sorted-removals (sort-by first removals) new-text (strip-marker-ranges text sorted-removals) adjusted-existing (adjust-annotations existing-annotations sorted-removals) adjusted-new (adjust-annotations new-annotations sorted-removals) all-annotations (->> (into adjusted-existing adjusted-new) (filter #(< (:range-start %) (:range-end %))) vec)] {:text new-text :annotations (text-ops/merge-style-annotations all-annotations)}))) (defn parse-multi-line-markdown [text] (let [lines (string/split-lines text)] (keep (fn [line] (let [[_ indent bullet content] (re-find #"^(\s*)([-*+]\s+)?(.*)$" line) level (quot (count (string/replace indent "\t" " ")) 2) complete? (and (string? content) (string/starts-with? content completed-markdown-prefix)) clean-text (if complete? (subs content (count completed-markdown-prefix)) (or content ""))] {:text clean-text :level level :has-bullet? (some? bullet) :blank? (string/blank? line) :complete? complete?})) lines))) (defn- find-pasted-parent-id [level-to-id rel-level] (loop [l (dec rel-level)] (if (< l -1) nil (if (contains? level-to-id l) (get level-to-id l) (recur (dec l)))))) (defn- attach-pasted-node [doc new-id rel-level prev-level last-id level-to-id anchor-id] (let [parent-id (if (> rel-level prev-level) last-id (find-pasted-parent-id level-to-id rel-level))] (if-let [prev-at-level (get level-to-id rel-level)] [new-id (tree/attach-node doc new-id prev-at-level :after)] (if (some? parent-id) [new-id (tree/attach-node doc new-id parent-id :last-child)] [new-id (tree/attach-node doc new-id anchor-id :after)])))) (defn- handle-pasted-anchor [doc node id before before-annotations first-line lines] (if (:has-bullet? first-line) [(assoc-in doc [:nodes id] (assoc node :text before :annotations before-annotations)) lines {-1 (tree/get-parent-id doc id)} -1] (let [annotation-line (markdown-to-annotated (:text first-line)) new-text (str before (:text annotation-line)) new-annotations (annotations/merge-annotations (into before-annotations (map #(-> % (update :range-start + (count before)) (update :range-end + (count before))) (:annotations annotation-line))))] [(assoc-in doc [:nodes id] (assoc node :text new-text :annotations new-annotations :complete? (:complete? first-line))) (rest lines) {-1 id} -1]))) (defn- attach-remaining-text [doc id after after-annotations] (let [node (tree/get-node doc id) text (:text node) new-text (str text after) new-annotations (into (:annotations node []) (map #(-> % (update :range-start + (count text)) (update :range-end + (count text))) after-annotations))] (assoc-in doc [:nodes id] (sanitize-text (assoc node :text new-text :annotations new-annotations))))) (defn- insert-remaining-lines [doc lines base-level anchor-id level-to-id prev-level] (loop [doc doc remaining lines last-id anchor-id level-to-id level-to-id prev-level prev-level] (if (seq remaining) (let [line (first remaining)] (if (:blank? line) (recur doc (rest remaining) last-id level-to-id prev-level) (let [rel-level (- (:level line) base-level) [doc new-id] (create-node-in-doc doc nil (:text line)) doc (cond-> (update-in doc [:nodes new-id] #(sanitize-text %)) (:complete? line) (assoc-in [:nodes new-id :complete?] true)) [new-node-id updated-doc] (attach-pasted-node doc new-id rel-level prev-level last-id level-to-id anchor-id) new-level-to-id (assoc (into {} (filter (fn [[k v]] (<= k rel-level)) level-to-id)) rel-level new-node-id)] (recur updated-doc (rest remaining) new-node-id new-level-to-id rel-level)))) [doc last-id]))) (defn insert-markdown-nodes [doc view sys id pos text] (let [lines (parse-multi-line-markdown text)] (if (seq lines) (let [node (tree/get-node doc id) node-text (:text node) node-annotations (:annotations node []) before (subs node-text 0 pos) after (subs node-text pos) [before-annotations after-annotations] (text-ops/split-annotations node-annotations pos) first-line (first lines) base-level (:level first-line) [doc remaining level-to-id anchor-prev-level] (handle-pasted-anchor doc node id before before-annotations first-line lines) [doc last-id] (insert-remaining-lines doc remaining base-level id level-to-id anchor-prev-level) doc (attach-remaining-text doc last-id after after-annotations)] {:doc doc :view (assoc view :focused-id last-id :preserved-caret-position (count (:text (tree/get-node doc last-id))))}) {:doc doc :view view}))) (defn node-to-markdown ([doc id] (node-to-markdown doc id 0)) ([doc id level] (let [node (tree/get-node doc id) indent (apply str (repeat (* level 2) " ")) text (:text node) annotations (:annotations node []) md-text (annotated-to-markdown text annotations) prefix (if (:complete? node) completed-markdown-prefix "") children (:children node) bullet (if (zero? level) "" "- ") children-md (string/join (map #(node-to-markdown doc % (inc level)) children)) separator (if (and (zero? level) (seq children)) "\n" "")] (str indent bullet prefix md-text "\n" separator children-md))))