(ns metabase.util.markdown (:require [clojure.edn :as edn] [clojure.java.io :as io] [clojure.string :as str] [clojure.walk :as walk] [metabase.public-settings :as public-settings] [metabase.util :as u]) (:import (com.vladsch.flexmark.ast AutoLink BlockQuote BulletList BulletListItem Code Emphasis FencedCodeBlock HardLineBreak Heading HtmlBlock HtmlCommentBlock HtmlEntity HtmlInline HtmlInlineBase HtmlInlineComment HtmlInnerBlockComment Image ImageRef IndentedCodeBlock Link LinkRef MailLink OrderedList OrderedListItem Paragraph Reference SoftLineBreak StrongEmphasis Text ThematicBreak) (com.vladsch.flexmark.ext.autolink AutolinkExtension) (com.vladsch.flexmark.html HtmlRenderer LinkResolver LinkResolverFactory) (com.vladsch.flexmark.html.renderer LinkResolverBasicContext LinkStatus) (com.vladsch.flexmark.parser Parser) (com.vladsch.flexmark.util.ast Document Node) (com.vladsch.flexmark.util.data MutableDataSet) (java.net URI))) | |
(set! *warn-on-reflection* true) | |
+----------------------------------------------------------------------------------------------------------------+ | Markdown parsing | +----------------------------------------------------------------------------------------------------------------+ | |
An instance of a Flexmark parser | (def ^:private parser (let [options (.. (MutableDataSet.) (set Parser/EXTENSIONS [(AutolinkExtension/create)]))] (.build (Parser/builder options)))) |
Mappings from Flexmark AST nodes to keyword tags | (def ^:private node-to-tag-mapping {Document :document Paragraph :paragraph ThematicBreak :horizontal-line HardLineBreak :hard-line-break SoftLineBreak :soft-line-break Heading :heading StrongEmphasis :bold Emphasis :italic OrderedList :ordered-list BulletList :unordered-list OrderedListItem :list-item BulletListItem :list-item Code :code FencedCodeBlock :codeblock IndentedCodeBlock :codeblock BlockQuote :blockquote Link :link Reference :reference LinkRef :link-ref ImageRef :image-ref Image :image AutoLink :auto-link MailLink :mail-link HtmlEntity :html-entity HtmlBlock :html-block HtmlInline :html-inline HtmlCommentBlock :html-comment-block HtmlInlineBase :html-inline-base HtmlInlineComment :html-inline-comment HtmlInnerBlockComment :html-inner-block-comment}) |
(defn- node-to-tag [node] (node-to-tag-mapping (type node))) | |
(defprotocol ^:private ASTNode (to-clojure [this])) | |
(defn- convert-children [node] (map to-clojure (.getChildren ^Node node))) | |
(extend-protocol ASTNode Node (to-clojure [this] {:tag (node-to-tag this) :attrs {} :content (convert-children this)}) Text (to-clojure [this] (str (.getChars this))) FencedCodeBlock (to-clojure [this] {:tag (node-to-tag this) :attrs {} :content (str (.getContentChars this))}) IndentedCodeBlock (to-clojure [this] {:tag (node-to-tag this) :attrs {} :content (str (.getContentChars this))}) Link (to-clojure [this] {:tag (node-to-tag this) :attrs {:href (str (.getUrl this)) :title (not-empty (str (.getTitle this)))} :content (convert-children this)}) Reference (to-clojure [this] {:tag (node-to-tag this) :attrs {:title (not-empty (str (.getTitle this))) :label (str (.getReference this)) :url (str (.getUrl this))}}) LinkRef (to-clojure [this] {:tag (node-to-tag this) :attrs {:reference (-> (.getDocument this) (.get Parser/REFERENCES) (get (u/lower-case-en (str (.getReference this)))) to-clojure)} :content (convert-children this)}) ImageRef (to-clojure [this] {:tag (node-to-tag this) :attrs {:reference (-> (.getDocument this) (.get Parser/REFERENCES) (get (u/lower-case-en (str (.getReference this)))) to-clojure)} :content (convert-children this)}) Image (to-clojure [this] {:tag (node-to-tag this) :attrs {:src (str (.getUrl this)) :alt (str (.getText this)) :title (not-empty (str (.getTitle this)))}}) AutoLink (to-clojure [this] {:tag (node-to-tag this) :attrs {:href (str (.getUrl this))}}) MailLink (to-clojure [this] {:tag (node-to-tag this) :attrs {:address (str (.getText this))}}) HtmlEntity (to-clojure [this] {:tag (node-to-tag this) :content (str (.getChars this))}) HtmlBlock (to-clojure [this] (str (.getChars this))) HtmlInline (to-clojure [this] (str (.getChars this))) HtmlCommentBlock (to-clojure [this] (str (.getChars this))) HtmlInlineComment (to-clojure [this] (str (.getChars this))) nil (to-clojure [_this] nil)) | |
+----------------------------------------------------------------------------------------------------------------+ | Slack markup generation | +----------------------------------------------------------------------------------------------------------------+ | |
(def ^:private html-entities (delay (edn/read-string (slurp (io/resource "html-entities.edn"))))) | |
(def ^:private escaped-chars-regex #"\\[\\/*_`'\[\](){}<>#+-.!$@%^&=|\?~]") | |
Insert zero-width characters before and after certain characters that are escaped in the Markdown (or are otherwise parsed as plain text) to prevent them from being parsed as formatting in Slack. | (defn- escape-text [string] (-> string ;; First, remove backslashes from escaped formatting characters since they're not removed during Markdown parsing (str/replace escaped-chars-regex #(str (second %1))) ;; Add a soft hyphen around certain chars to avoid triggering formatting in Slack (str/replace "&" "\u00ad&\u00ad") (str/replace ">" "\u00ad>\u00ad") (str/replace "<" "\u00ad<\u00ad") (str/replace "*" "\u00ad*\u00ad") (str/replace "_" "\u00ad_\u00ad") (str/replace "`" "\u00ad`\u00ad") (str/replace "~" "\u00ad~\u00ad"))) |
If the provided URI is a relative path, resolve it relative to the site URL so that links work correctly in Slack/Email. | (defn- resolve-uri [^String uri] (letfn [(ensure-slash [s] (when s (cond-> s (not (str/ends-with? s "/")) (str "/"))))] (when uri (if-let [^String site-url (ensure-slash (public-settings/site-url))] (.. (URI. site-url) (resolve uri) toString) uri)))) |
Given the value from the :content field of a Markdown AST node, and a keyword representing a tag type, converts all
instances of the tag in the content to | (defn- ^:private strip-tag [content tag] (walk/postwalk (fn [node] (if (and (map? node) (= (:tag node) tag)) (assoc node :tag :default) node)) content)) |
Takes an AST representing Markdown input, and converts it to a string that will render nicely in Slack. Some of the differences to Markdown include:
* All headers are just rendered as bold text.
* Ordered and unordered lists are printed in plain text.
* Inline images are rendered as text that links to the image source, e.g. | (defmulti ast->slack {:arglists '([ast-map])} :tag) |
Given the value from the :content field of a Markdown AST node, recursively resolves subnodes into a nested list of strings. | (defn ^:private resolved-content [content] (if (string? content) (escape-text content) (map #(if (string? %) (escape-text %) (ast->slack %)) content))) |
Given the resolved content of a Markdown AST node, converts it into a single flattened string. This is used for rendering a couple specific types of nodes, such as list items. | (defn ^:private resolved-content-string [resolved-content] (-> resolved-content flatten str/join)) |
Given the value from the :content field of a Markdown AST node, recursively resolves it and returns a list of strings corresponding to individual lines in the result. | (defn ^:private resolved-lines [content] (-> content resolved-content resolved-content-string str/split-lines)) |
(defmethod ast->slack :default [{content :content}] (resolved-content content)) | |
(defmethod ast->slack :document [{content :content}] (resolved-content content)) | |
(defmethod ast->slack :paragraph [{content :content}] [(resolved-content content) "\n"]) | |
(defmethod ast->slack :soft-line-break [_] " ") | |
(defmethod ast->slack :hard-line-break [_] "\n") | |
(defmethod ast->slack :horizontal-line [_] "\nāāāāāāāāāāāāāāāāāāā\n") | |
(defmethod ast->slack :heading [{content :content}] ["*" (resolved-content content) "*\n"]) | |
(defmethod ast->slack :bold [{content :content}] ["*" (resolved-content (strip-tag content :bold)) "*"]) | |
(defmethod ast->slack :italic [{content :content}] ["_" (resolved-content (strip-tag content :italic)) "_"]) | |
(defmethod ast->slack :code [{content :content}] ["`" (resolved-content content) "`"]) | |
(defmethod ast->slack :codeblock [{content :content}] ["```\n" (resolved-content content) "```"]) | |
(defmethod ast->slack :blockquote [{content :content}] (let [lines (resolved-lines content)] (interpose "\n" (map (fn [line] [">" line]) lines)))) | |
(defmethod ast->slack :link [{:keys [content attrs]}] (let [resolved-uri (resolve-uri (:href attrs)) resolved-content (resolved-content content)] (if (contains? #{:image :image-ref} (:tag (first content))) ;; If this is a linked image, add link target on separate line after image placeholder [resolved-content "\n(" resolved-uri ")"] ["<" resolved-uri "|" resolved-content ">"]))) | |
(defmethod ast->slack :link-ref [{:keys [content attrs]}] (let [resolved-uri (resolve-uri (-> attrs :reference :attrs :url)) resolved-content (resolved-content content)] (if resolved-uri ["<" resolved-uri "|" resolved-content ">"] ;; If this was parsed as a link-ref but has no reference, assume it was just a pair of square brackets and ;; restore them. This is a known discrepency between flexmark-java and Markdown rendering on the frontend. ["[" resolved-content "]"]))) | |
(defmethod ast->slack :auto-link [{{href :href} :attrs}] ["<" href ">"]) | |
(defmethod ast->slack :mail-link [{{address :address} :attrs}] ["<mailto:" address "|" address ">"]) | |
(defmethod ast->slack :list-item [{content :content}] (let [resolved-content (resolved-content content) ;; list items might have nested lists or other elements, which should have their indentation level increased indented-content (->> (rest resolved-content) resolved-content-string str/split-lines (map #(str " " %)) (str/join "\n"))] (if-not (str/blank? indented-content) [(first resolved-content) indented-content "\n"] resolved-content))) | |
(defmethod ast->slack :unordered-list [{content :content}] (map (fn [list-item] ["ā¢ " list-item]) (resolved-content content))) | |
(defmethod ast->slack :ordered-list [{content :content}] (map-indexed (fn [idx list-item] [(inc idx) ". " list-item]) (resolved-content content))) | |
(defmethod ast->slack :image [{{:keys [src alt]} :attrs}] ;; Replace images with text that links to source, including alt text if available (if (str/blank? alt) ["<" src "|[Image]>"] ["<" src "|[Image: " alt "]>"])) | |
(defmethod ast->slack :image-ref [{:keys [content attrs]}] (let [src (-> attrs :reference :attrs :url) alt (-> content resolved-content resolved-content-string)] (if (str/blank? alt) ["<" src "|[Image]>"] ["<" src "|[Image: " alt "]>"]))) | |
(defmethod ast->slack :html-entity [{content :content}] (some->> content (get @html-entities) (:characters))) | |
Returns true if this node was parsed as a link ref, but has no references. This probably means the original text was just a pair of square brackets, and not an actual link ref. This is a known discrepency between flexmark-java and Markdown rendering on the frontend. | (defn- empty-link-ref? [^Node node] (and (instance? LinkRef node) (-> (.getDocument node) (.get Parser/REFERENCES) empty?))) |
An instance of a Flexmark HTML renderer | (def ^:private renderer (let [options (.. (MutableDataSet.) (set HtmlRenderer/ESCAPE_HTML true) (toImmutable)) lr-factory (reify LinkResolverFactory (^LinkResolver apply [_this ^LinkResolverBasicContext _context] (reify LinkResolver (resolveLink [_this node _context link] (if-let [url (cond (instance? MailLink node) (.getUrl link) (empty-link-ref? node) nil :else (resolve-uri (.getUrl link)))] (.. link (withStatus LinkStatus/VALID) (withUrl url)) link)))))] (.build (.linkResolverFactory (HtmlRenderer/builder options) lr-factory)))) |
Converts a markdown string from a virtual card into a form that can be sent to a channel (Slack's markup language, or HTML for email). | (defmulti process-markdown {:arglists '([markdown channel-type])} (fn [_markdown channel-type] channel-type)) |
(defmethod process-markdown :slack [markdown _channel-type] (-> (.parse ^Parser parser ^String markdown) to-clojure ast->slack flatten str/join str/trim)) | |
(defmethod process-markdown :html [markdown _channel-type] (let [ast (.parse ^Parser parser ^String markdown)] (.render ^HtmlRenderer renderer ^Document ast))) | |