Macaw

0.0.1-SNAPSHOT


A Clojure wrapper for JSqlParser 🦜




(this space intentionally left almost blank)
 
(ns macaw.collect
  (:require
   [clojure.string :as str]
   [macaw.util :as u]
   [macaw.walk :as mw])
  (:import
   (com.metabase.macaw AstWalker$Scope)
   (java.util.regex Pattern)
   (net.sf.jsqlparser.expression Alias)
   (net.sf.jsqlparser.schema Column Table)
   (net.sf.jsqlparser.statement Statement)
   (net.sf.jsqlparser.statement.select AllTableColumns SelectItem)))
(set! *warn-on-reflection* true)
(defn- conj-to
  ([key-name]
   (conj-to key-name identity))
  ([key-name xf]
   (fn item-conjer [results component context]
     (update results key-name conj {:component (xf component)
                                    :context   context}))))
(defn- query->raw-components
  [^Statement parsed-ast]
  (mw/fold-query parsed-ast
                 {:alias            (conj-to :aliases (fn [^SelectItem item]
                                                        {:alias      (.getName (.getAlias item))
                                                         :expression (.getExpression item)}))
                  :column           (conj-to :columns)
                  :column-qualifier (conj-to :qualifiers)
                  :mutation         (conj-to :mutation-commands)
                  :pseudo-table     (conj-to :pseudo-tables)
                  :table            (conj-to :tables)
                  :table-wildcard   (conj-to :table-wildcards)
                  :wildcard         (conj-to :has-wildcard? (constantly true))}
                 {:aliases           #{}
                  :columns           #{}
                  :has-wildcard?     #{}
                  :mutation-commands #{}
                  :pseudo-tables     #{}
                  :tables            #{}
                  :table-wildcards   #{}}))

tables

(def ^:private quotes (map str "`\"["))
(def ^:private closing {"[" "]"})
(defn- quoted? [s]
  (some (fn [q]
          (and (str/starts-with? s q)
               (str/ends-with? s (closing q q))))
        quotes))
(defn- strip-quotes [s]
  (subs s 1 (dec (count s))))
(defn- setting->relax-case [{:keys [case-insensitive]}]
  (when case-insensitive
    (case case-insensitive
      :lower str/lower-case
      :upper str/upper-case
      ;; This will work for replace, but not for analyzing where we need a literal to accumulate
      :agnostic (fn [s] (re-pattern (str "(?i)" (Pattern/quote s)))))))

Normalize a schema, table, column, etc. references so that we can match them regardless of syntactic differences.

(defn normalize-reference
  [s {:keys [preserve-identifiers? quotes-preserve-case?] :as opts}]
  (if preserve-identifiers?
    s
    (when s
      (let [quoted     (quoted? s)
            relax-case (when-not (and quotes-preserve-case? quoted)
                         (setting->relax-case opts))]
        (cond-> s
          quoted strip-quotes
          relax-case relax-case)))))
(defn- find-table [{:keys [alias->table name->table keep-internal-tables?] :as opts} ^Table t]
  (let [n      (normalize-reference (.getName t) opts)
        schema (normalize-reference (.getSchemaName t) opts)]
    (or (get alias->table n)
        (:component (last (u/find-relevant name->table {:table n :schema schema} [:table :schema])))
        (when keep-internal-tables?
          {:table n, :schema schema, :internal? true}))))
(defn- find-qualifier-table [opts ^Table q _ctx]
  (when-let [table (find-table opts q)]
    (cond-> table
      (:with-instance opts) (assoc :instances [q]))))
(defn- make-table [{:keys [with-instance qualifier? alias->table] :as opts} ^Table t _ctx]
  (if (and qualifier?
           (get alias->table (.getName t)))
    (get alias->table (.getName t))
    (u/strip-nils
     {:table     (normalize-reference (.getName t) opts)
      :schema    (normalize-reference (.getSchemaName t) opts)
      :instances (when with-instance [t])})))
(defn- alias-mapping
  [opts ^Table table ctx]
  (when-let [^Alias table-alias (.getAlias table)]
    [(.getName table-alias) (make-table opts table ctx)]))

JSQLParser can't tell whether the f in select f.* refers to a real table or an alias. Therefore, we have to disambiguate them based on our own map of aliases->table names. So this function will return the real name of the table referenced in a table-wildcard (as far as can be determined from the query).

(defn- resolve-table-name
  [opts ^AllTableColumns atc _ctx]
  (find-table opts (.getTable atc)))

columns

(defn- maybe-column-table [alias? {:keys [name->table] :as opts} ^Column c]
  (if-let [t (.getTable c)]
    (find-table opts t)
    ;; if we see only a single table, we can safely say it's the table of that column
    (when (and (= (count name->table) 1) (not (alias? (.getColumnName c))))
      (:component (val (first name->table))))))
(defn- scope-type [^AstWalker$Scope s] (keyword (.getType s)))
(defn- make-column [aliases opts ^Column c ctx]
  (let [{:keys [schema table]} (maybe-column-table aliases opts c)]
    (u/strip-nils
     {:schema    schema
      :table     table
      :column    (normalize-reference (.getColumnName c) opts)
      :alias     (when-let [s (first ctx)]
                   (when (= :alias (scope-type s))
                     (.getLabel ^AstWalker$Scope s)))
      :instances (when (:with-instance opts) [c])})))

get them together

(defn- only-query-context [ctx]
  (filter #(= (scope-type %) :query) ctx))
(def ^:private strip-non-query-contexts
  (map #(update % :context only-query-context)))
(defn- update-components
  ([f]
   (map #(update % :component f (:context %))))
  ([f components]
   (eduction (update-components f) components)))

Merge two nodes, keeping the union of their instances.

(defn- merge-with-instances
  [a b]
  (let [cs-a (-> a :component :instances)]
    (cond-> (merge a b)
      cs-a (update-in [:component :instances] into cs-a))))
(defn- literal? [{:keys [column]}]
  ;; numbers and strings are already handled by JSQLParser
  (#{"true" "false"} (str/lower-case column)))

Remove any unqualified references that would resolve to an alias or qualified reference

(defn- remove-redundant-columns
  [alias? column-set]
  (let [{qualified true, unqualified false} (group-by (comp boolean :table) column-set)
        ;; Get all the bindings introduced by qualified columns
        has-qualification? (into #{} (mapcat #(keep % qualified)) [:column :alias])]
    (into qualified
          (remove (comp (some-fn alias? has-qualification?)
                        :column))
          unqualified)))

If a column reference is qualified by a table that we don't know about, its probably not a real field. See the :generate_series fixture.

(defn- remove-phantom-table-columns
  [table-map columns]
  (let [known-table-name? (into #{} (map (comp :table :component val)) table-map)]
    (filter (fn [{s :schema, t :table}]
              (or (nil? t) (some? s) (known-table-name? t)))
            columns)))
(defn- infer-table-schema [columns node]
  (update node :component
          #(let [{:keys [schema table] :as element} %]
             (if schema
               element
               (if-let [schema' (->> columns
                                     (filter (comp #{table} :table))
                                     (some :schema))]
                 (assoc element :schema schema')
                 element)))))

See macaw.core/query->components doc.

(defn query->components
  [^Statement parsed-ast & {:as opts}]
  (let [{:keys [aliases
                columns
                qualifiers
                has-wildcard?
                mutation-commands
                pseudo-tables
                tables
                table-wildcards]} (query->raw-components parsed-ast)
        alias-map                 (into {} (map #(-> % :component ((partial alias-mapping opts) (:context %))) tables))
        ;; we're parsing qualifiers here for a single purpose - rewrite uses instances to find tables for renaming
        table-map                 (->> (update-components (partial make-table opts) tables)
                                       (u/group-with #(select-keys (:component %) [:schema :table])
                                                     merge-with-instances))
        pseudo-table-names        (into #{} (comp (map :component)
                                                  (map (fn [^Alias a] (.getName a))))
                                        pseudo-tables)
        table-map                 (into (empty table-map)
                                        (remove (comp pseudo-table-names :table :component val))
                                        table-map)
        ;; we need both aliases and tables for columns
        opts                      (assoc opts
                                         :alias->table alias-map
                                         :name->table table-map)
        qualifier-map             (->> (update-components (partial find-qualifier-table opts) qualifiers)
                                       (u/group-with #(select-keys (:component %) [:schema :table])
                                                     merge-with-instances))
        alias?                    (into #{} (keep (comp :alias :component)) aliases)
        all-columns               (into #{}
                                        (comp (update-components (partial make-column alias?
                                                                          (assoc opts :keep-internal-tables? true)))
                                              strip-non-query-contexts)
                                        columns)
        strip-alias               (fn [c] (dissoc c :alias))
        source-columns            (->> (map :component all-columns)
                                       (remove-redundant-columns alias?)
                                       (remove literal?)
                                       (remove-phantom-table-columns table-map)
                                       (into #{}
                                             (comp (remove (comp pseudo-table-names :table))
                                                   (remove :internal?)
                                                   (map strip-alias))))
        table-map                 (update-vals table-map (partial infer-table-schema source-columns))]
    {:columns           all-columns
     :source-columns    source-columns
     ;; result-columns ... filter out the elements (and wildcards) in the top level scope only.
     :has-wildcard?     (into #{} strip-non-query-contexts has-wildcard?)
     :mutation-commands (into #{} mutation-commands)
     :tables            (into #{} (comp (map val)
                                        (remove (comp :internal? :component))
                                        strip-non-query-contexts)
                              table-map)
     :tables-superset   (into #{}
                              (comp (map val) strip-non-query-contexts)
                              (merge-with merge-with-instances qualifier-map table-map))
     :table-wildcards   (into #{}
                              (comp strip-non-query-contexts
                                    (update-components (partial resolve-table-name opts)))
                              table-wildcards)}))
 
(ns macaw.config
  (:require
   [clojure.string :as str]
   [environ.core :as env]))

Default configuration; could be overridden by Java properties, environment variables, etc.

(def ^:private app-defaults
  {:macaw-run-mode "prod"})
(defn- config-value
  [k]
  (let [env-val (get env/env k)]
    (or (when-not (str/blank? env-val) env-val)
        (get app-defaults k))))

The mode (dev/test/prod) in which Macaw is being run.

(def run-mode 
  (some-> :macaw-run-mode config-value keyword))

Is Macaw running in dev mode (i.e., in a REPL)?

(defn is-dev?
  []
  (= run-mode :dev))

Is Macaw running in test mode?

(defn is-test?
  []
  (= run-mode :test))

Is Macaw running in prod mode (i.e., from a JAR)?

(defn is-prod?
  []
  (= run-mode :prod))
 
(ns macaw.core
  (:require
   [clojure.string :as str]
   [clojure.walk :as walk]
   [macaw.collect :as collect]
   [macaw.rewrite :as rewrite]
   [macaw.types :as m.types]
   [macaw.util.malli :as mu])
  (:import
   (com.metabase.macaw AnalysisError AstWalker$Scope BasicTableExtractor CompoundTableExtractor)
   (java.util.function Consumer)
   (net.sf.jsqlparser JSQLParserException)
   (net.sf.jsqlparser.parser CCJSqlParser CCJSqlParserUtil)
   (net.sf.jsqlparser.parser.feature Feature)
   (net.sf.jsqlparser.schema Table)
   (net.sf.jsqlparser.statement Statement)))
(set! *warn-on-reflection* true)
(defn- escape-keywords ^String [sql keywords]
  (reduce
   (fn [sql k]
     (str/replace sql (re-pattern (str "(?i)\\b(" (name k) ")\\b")) "$1____escaped____"))
   sql
   keywords))
(defn- unescape-keywords [sql _keywords]
  (str/replace sql "____escaped____" ))
(def ^:private features
  {:backslash-escape-char  Feature/allowBackslashEscapeCharacter
   :complex-parsing        Feature/allowComplexParsing
   :postgres-syntax        Feature/allowPostgresSpecificSyntax
   :square-bracket-quotes  Feature/allowSquareBracketQuotation
   :unsupported-statements Feature/allowUnsupportedStatements})
(defn- ->Feature ^Feature [k]
  (get features k))
(def ^:private default-timeout-seconds 5)
(defn- ->parser-fn ^Consumer [opts]
  (reify Consumer
    (accept [_this parser]
      (let [^long timeout-ms (:timeout opts (* default-timeout-seconds 1000))]
        (.withFeature ^CCJSqlParser parser Feature/timeOut timeout-ms))
      (doseq [[f ^boolean v] (:features opts)]
        (.withFeature ^CCJSqlParser parser (->Feature f) v)))))

Main entry point: takes a string query and returns a Statement object that can be handled by the other functions.

(defn parsed-query
  [^String query & {:as opts}]
  (try
    (-> query
        ;; Dialects like SQLite and Databricks treat consecutive blank lines as implicit semicolons.
        ;; JSQLParser, as a polyglot parser, always has this behavior, and there is no option to disable it.
        ;; This utility pre-processed the query to remove any such blank lines.
        (CCJSqlParserUtil/sanitizeSingleSql)
        (escape-keywords (:non-reserved-words opts))
        (CCJSqlParserUtil/parse (->parser-fn opts)))
    (catch JSQLParserException e
      {:error   :macaw.error/unable-to-parse
       :context {:cause e}})))

A unique identifier for the given scope.

(defn scope-id
  [^AstWalker$Scope s]
  (.getId s))

The type of scope we're talking about e.g., a top-level SELECT.

(defn scope-label
  [^AstWalker$Scope s]
  (.getLabel s))
(defn- ->macaw-error [^AnalysisError analysis-error]
  {:error (keyword "macaw.error" (-> (.-errorType analysis-error)
                                     str/lower-case
                                     (str/replace #"_" "-")))})
(mu/defn query->components :- [:or m.types/error-result m.types/components-result]
  "Given a parsed query (i.e., a [subclass of] `Statement`) return a map with the elements found within it.
  (Specifically, it returns their fully-qualified names as strings, where 'fully-qualified' means 'as referred to in
  the query'; this function doesn't do additional inference work to find out a table's schema.)"
  [parsed       :- [:or m.types/error-result [:fn #(instance? Statement %)]]
   & {:as opts} :- [:maybe m.types/options-map]]
  ;; By default, we will preserve identifiers verbatim, to be agnostic of casing and quoting.
  ;; This may result in duplicate components, which are left to the caller to deduplicate.
  ;; In Metabase's case, this is done during the stage where the database metadata is queried.
  (try
    (if (map? parsed)
      parsed
      (->> (collect/query->components parsed (merge {:preserve-identifiers? true} opts))
           (walk/postwalk (fn [x]
                            (if (string? x)
                              (unescape-keywords x (:non-reserved-words opts))
                              x)))))
    (catch AnalysisError e
      (->macaw-error e))))
(defn- raw-components [xs]
  (into (empty xs) (keep :component) xs))

Given a table object, return a map with the schema and table names.

(defn- table->identifier
  [^Table t]
  (if (.getSchemaName t)
    {:schema (.getSchemaName t)
     :table  (.getName t)}
    {:table (.getName t)}))
(defn- tables->identifiers [expr]
  {:tables (set (map table->identifier expr))})
(mu/defn query->tables :- [:or m.types/error-result m.types/tables-result]
  "Given a parsed query (i.e., a [subclass of] `Statement`) return a set of all the table identifiers found within it."
  [sql :- :string & {:keys [mode] :as opts} :- [:maybe m.types/options-map]]
  (try
    (let [parsed (parsed-query sql opts)]
      (if (map? parsed)
        parsed
        (case mode
          :ast-walker-1 (-> (query->components parsed opts) :tables raw-components (->> (hash-map :tables)))
          :basic-select (-> (BasicTableExtractor/getTables parsed) tables->identifiers)
          :compound-select (-> (CompoundTableExtractor/getTables parsed) tables->identifiers))))
    (catch AnalysisError e
      (->macaw-error e))))
(mu/defn replace-names :- :string
  "Given an SQL query, apply the given table, column, and schema renames.
  Supported options:
  - case-insensitive: whether to relax the comparison
    - :upper    - identifiers are implicitly case to uppercase, as per the SQL-92 standard.
    - :lower    - identifiers are implicitly cast to lowercase, as per Postgres et al.
    - :agnostic - case is ignored when comparing identifiers in code to replacement \"from\" strings.
  - quotes-preserve-case: whether quoted identifiers should override the previous option."
  [sql          :- :string
   renames      :- :map
   & {:as opts} :- [:maybe m.types/options-map]]
  ;; We need to pre-sanitize the SQL before its analyzed so that the AST token positions match up correctly.
  ;; Currently, we use a more complex and expensive sanitization method, so that it's reversible.
  ;; If we decide that it's OK to normalize whitespace etc. during replacement, then we can use the same helper.
  (let [sql'     (-> (str/replace sql #"(?m)^\n" " \n")
                     (escape-keywords (:non-reserved-words opts)))
        opts'    (select-keys opts [:case-insensitive :quotes-preserve-case? :allow-unused?])
        renames' (walk/postwalk (fn [x]
                                  (if (string? x)
                                    (escape-keywords x (:non-reserved-words opts))
                                    x))
                                renames)
        parsed   (parsed-query sql' opts)]
    (-> (rewrite/replace-names sql' parsed renames' opts')
        (str/replace #"(?m)^ \n" "\n")
        (unescape-keywords (:non-reserved-words opts)))))
 
(ns macaw.rewrite
  (:require
   [macaw.collect :as collect]
   [macaw.util :as u]
   [macaw.walk :as mw])
  (:import
   (net.sf.jsqlparser.parser ASTNodeAccess SimpleNode)
   (net.sf.jsqlparser.schema Column Table)))
(set! *warn-on-reflection* true)
(defn- index-of-nth [^String haystack ^String needle n]
  (assert (not (neg? n)))
  (if (zero? n)
    -1
    (loop [n   n
           idx 0]
      (let [next-id (.indexOf haystack needle idx)]
        (cond
          (= 1 n) next-id
          (neg? next-id) next-id
          :else (recur (dec n) (inc next-id)))))))
(defn- ->idx [^String sql line col]
  ;; The second `dec` on `line` is a workaround for what appears to be an off-by-one error in this JSQLParser version.
  (+ col (index-of-nth sql "\n" (dec (dec line)))))

Find the start and end index of the underlying tokens for a given AST node from a given SQL string.

(defn- node->idx-range
  [^SimpleNode node sql]
  (let [first-token (.jjtGetFirstToken node)
        last-token  (.jjtGetLastToken node)
        first-idx   (->idx sql
                           (.-beginLine first-token)
                           (.-beginColumn first-token))
        last-idx    (->idx sql
                           (.-endLine last-token)
                           (.-endColumn last-token))]
    [first-idx last-idx]))
(defn- splice-replacements [^String sql replacements]
  (let [sb     (StringBuilder.)
        append #(.append sb %)]
    (loop [start 0
           [[[first-idx last-idx] value] & rst] replacements]
      (if (nil? last-idx)
        (when (< start (count sql))
          (append (.substring sql start)))
        (do (append (.substring sql start first-idx))
            (append value)
            (recur (inc ^long last-idx) rst))))
    (str sb)))

Emit a SQL string for an updated AST, preserving the comments and whitespace from the original SQL.

(defn- update-query
  [updated-ast updated-nodes sql & {:as _opts}]
  (let [updated-node? (set (map first updated-nodes))
        replacement   (fn [->text visitable]
                        (let [ast-node  (.getASTNode ^ASTNodeAccess visitable)
                              idx-range (node->idx-range ast-node sql)
                              node-text (->text visitable)]
                          [idx-range node-text]))
        replace-name  (fn [->text]
                        (fn [acc visitable _ctx]
                          (cond-> acc
                            (updated-node? visitable)
                            (conj (replacement ->text visitable)))))]
    (splice-replacements
     sql
     (mw/fold-query
      updated-ast
      {:table  (replace-name #(let [fqn     (.getFullyQualifiedName ^Table %)
                                    t-alias (.getAlias ^Table %)]
                                (if t-alias
                                  (str fqn " " (.getName t-alias))
                                  fqn)))
       :column (replace-name #(.getFullyQualifiedName ^Column %))}
      []))))
(defn- rename-table
  [updated-nodes table-renames schema-renames known-tables opts ^Table t _ctx]
  (when-let [rename (u/find-relevant table-renames (get known-tables t) [:table :schema])]
    ;; Handle both raw string renames, as well as more precise element based ones.
    (vswap! updated-nodes conj [t rename])
    (let [identifier (as-> (val rename) % (:table % %))]
      (.setName t identifier)))
  (let [raw-schema-name (.getSchemaName t)
        schema-name     (collect/normalize-reference raw-schema-name opts)]
    (when-let [schema-rename (u/seek (comp (partial u/match-component schema-name) key) schema-renames)]
      (vswap! updated-nodes conj [raw-schema-name schema-rename])
      (let [identifier (as-> (val schema-rename) % (:table % %))]
        (.setSchemaName t identifier)))))
(defn- rename-column
  [updated-nodes column-renames known-columns ^Column c _ctx]
  (when-let [rename (u/find-relevant column-renames (get known-columns c) [:column :table :schema])]
    ;; Handle both raw string renames, as well as more precise element based ones.
    (vswap! updated-nodes conj [c rename])
    (let [identifier (as-> (val rename) % (:column % %))]
      (.setColumnName c identifier))))
(defn- alert-unused! [updated-nodes renames]
  (let [known-rename? (set (map second updated-nodes))]
    (doseq [[k items] renames]
      (when-let [unknown (first (remove known-rename? items))]
        (throw (ex-info (str "Unknown rename: " unknown) {:type   k
                                                          :rename unknown}))))))
(defn- index-by-instances [xs]
  (into {} (for [x xs
                 :let [c (:component x)]
                 i (:instances c)]
             [i c])))

Given a SQL query and its corresponding (untransformed) AST, apply the given table and column renames.

(defn replace-names
  [sql parsed-ast renames & {:as opts}]
  (let [{schema-renames :schemas
         table-renames  :tables
         column-renames :columns} renames
        comps          (collect/query->components parsed-ast (assoc opts :with-instance true))
        columns        (index-by-instances (:columns comps))
        tables         (index-by-instances (:tables-superset comps))
        ;; execute rename
        updated-nodes  (volatile! [])
        rename-table*  (partial rename-table updated-nodes table-renames schema-renames tables opts)
        rename-column* (partial rename-column updated-nodes column-renames columns)
        res            (-> parsed-ast
                           (mw/walk-query
                            {:table            rename-table*
                             :column-qualifier rename-table*
                             :column           rename-column*})
                           (update-query @updated-nodes sql opts))]
    (when-not (:allow-unused? opts)
      (alert-unused! @updated-nodes renames))
    res))
 
(ns macaw.types)

The different analyzer strategies that Macaw supports.

(def modes
  [:ast-walker-1
   :basic-select
   :compound-select])

The shape of the options accepted by our API

(def options-map
  [:map
   [:mode                  {:optional true} (into [:enum] modes)]
   [:non-reserved-words    {:optional true} [:seqable :keyword]]
   [:allow-unused?         {:optional true} :boolean]
   [:case-insensitive      {:optional true} [:enum :upper :lower :agnostic]]
   [:quotes-preserve-case? {:optional true} :boolean]])

The different types of errors that Macaw can return.

(def error-types
  [:macaw.error/analysis-error
   :macaw.error/illegal-expression
   :macaw.error/invalid-query
   :macaw.error/unable-to-parse
   :macaw.error/unsupported-expression])

A map indicating that we were not able to parse the query.

(def error-result
  [:map
   [:error (into [:enum] error-types)]])
(def ^:private table-ident
  [:map
   [:schema {:optional true} :string]
   [:table                   :string]])
(def ^:private column-ident
  [:map
   [:schema {:optional true} :string]
   [:table  {:optional true} :string]
   [:column                  :string]])
(defn- with-context [t]
  [:map
   [:component t]
   [:context :any]])

A map holding all the components that we were able to parse from a query

(def components-result
  [:map {:closed true}
   [:tables         [:set (with-context table-ident)]]
   [:columns        [:set (with-context column-ident)]]
   [:source-columns [:set column-ident]]
   ;; TODO Unclear why we would want to wrap any of these.
   [:table-wildcards   [:set (with-context table-ident)]]
   ;; This :maybe would be a problem, if anything actually used this value.
   [:tables-superset   [:set (with-context [:maybe table-ident])]]
   ;; Unclear why we need a collection here
   [:has-wildcard?     [:set (with-context :boolean)]]
   [:mutation-commands [:set (with-context :string)]]])

A map holding the tables that we were able to parse from a query

(def tables-result
  [:map
   [:tables [:set table-ident]]])
 
(ns macaw.util
  (:require
   [clojure.string :as str])
  (:import (java.util.regex Pattern)))

Generalized group-by, where you can supply your own reducing function (instead of usual conj).

https://ask.clojure.org/index.php/12319/can-group-by-be-generalized

(defn group-with
  [kf rf coll]
  (persistent!
   (reduce
    (fn [ret x]
      (let [k (kf x)]
        (assoc! ret k (rf (get ret k) x))))
    (transient {})
    coll)))

Like (first (filter ... )), but doesn't realize chunks of the sequence. Returns the first item in coll for which pred returns a truthy value, or nil if no such item is found.

(defn seek
  [pred coll]
  (reduce
   (fn [acc x] (if (pred x) (reduced x) acc))
   nil
   coll))

A hack around the fact that we don't (yet) track what columns are exposed by given sentinels.

(defn non-sentinel
  [s]
  (when s
    (nil? (str/index-of s "_sentinel_"))))

Check whether the given literal matches the expected literal or pattern.

(defn match-component
  [expected actual]
  (when expected
    (if (instance? Pattern expected)
      (boolean (re-find expected actual))
      (= expected actual))))
(defn- match-prefix [element ks-prefix]
  (let [expected (map element ks-prefix)]
    (fn [entry]
      (every? true? (map match-component expected (map (key entry) ks-prefix))))))

Search the given map for the entry corresponding to [[element]], considering only the relevant keys. The relevant keys are obtained by ignoring any suffix of [[ks]] for which [[element]] has nil or missing values. We require that there is at least one relevant key to find a match.

(defn find-relevant
  [m element ks]
  (when element
    ;; Strip off keys from right-to-left where they are nil, and relax search to only consider these keys.
    ;; We need at least one non-generate key to remain for the search.
    ;; NOTE: we could optimize away calling `non-sentinel` twice in this function, but for now just keeping it simple.
    (when-let [ks-prefix (->> ks reverse (drop-while (comp not non-sentinel element)) reverse seq)]
      (seek (match-prefix element ks-prefix) m))))
(def ^:private nil-val? (comp nil? val))

Remove any keys corresponding to nil values from the given map.

(defn strip-nils
  [m]
  (if (some nil-val? m)
    (with-meta (into {} (remove nil-val?) m) (meta m))
    m))
 
(ns macaw.util.malli
  (:refer-clojure :exclude [defn defn-])
  (:require
   [macaw.util.malli.defn :as mu.defn]
   [potemkin :as p]))
(p/import-vars
 [mu.defn defn defn-])
 
(ns macaw.util.malli.defn
  (:refer-clojure :exclude [defn defn-])
  (:require
   [clojure.core :as core]
   [clojure.string :as str]
   [macaw.util.malli.fn :as mu.fn]
   [malli.destructure]))
(set! *warn-on-reflection* true)

TODO -- this should generate type hints from the schemas and from the return type as well.

(core/defn- deparameterized-arglist [{:keys [args]}]
  (-> (malli.destructure/parse args)
      :arglist
      (with-meta (let [args-meta    (meta args)
                       tag          (:tag args-meta)
                       resolved-tag (when (symbol? tag)
                                      (let [resolved (ns-resolve *ns* tag)]
                                        (when (class? resolved)
                                          (symbol (.getName ^Class resolved)))))]
                   (cond-> args-meta
                     resolved-tag (assoc :tag resolved-tag))))))
(core/defn- deparameterized-arglists [{:keys [arities], :as _parsed}]
  (let [[arities-type arities-value] arities]
    (case arities-type
      :single   (list (deparameterized-arglist arities-value))
      :multiple (map deparameterized-arglist (:arities arities-value)))))

Generate a docstring with additional information about inputs and return type using a parsed fn tail (as parsed by [[mx/SchematizedParams]]).

(core/defn- annotated-docstring
  [{original-docstring           :doc
    [arities-type arities-value] :arities
    :keys                        [return]
    :as                          _parsed}]
  (str/trim
   (str "Inputs: " (case arities-type
                     :single   (pr-str (:args arities-value))
                     :multiple (str "("
                                    (str/join "\n           "
                                              (map (comp pr-str :args)
                                                   (:arities arities-value)))
                                    ")"))
        "\n  Return: " (str/replace (:schema return :any) ; used to be a pprint
                                    "\n"
                                    (str "\n          "))
        (when (not-empty original-docstring)
          (str "\n\n  " original-docstring)))))

Implementation of [[metabase.util.malli/defn]] taken from Metabase. Like [[schema.core/defn]], but for Malli.

See notes/justification in the main Metabase repo.

(defmacro defn
  [& [fn-name :as fn-tail]]
  (let [parsed           (mu.fn/parse-fn-tail fn-tail)
        cosmetic-name    (gensym (munge (str fn-name)))
        {attr-map :meta} parsed
        attr-map         (merge
                          {:arglists (list 'quote (deparameterized-arglists parsed))
                           :schema   (mu.fn/fn-schema parsed {:target :target/metadata})}
                          attr-map)
        docstring        (annotated-docstring parsed)
        instrument?      (mu.fn/instrument-ns? *ns*)]
    (if-not instrument?
      `(def ~(vary-meta fn-name merge attr-map)
         ~docstring
         ~(mu.fn/deparameterized-fn-form parsed cosmetic-name))
      `(def ~(vary-meta fn-name merge attr-map)
         ~docstring
         ~(let [error-context {:fn-name (list 'quote fn-name)}]
            (mu.fn/instrumented-fn-form error-context parsed cosmetic-name))))))

Same as defn, but creates a private def.

(defmacro defn-
  [fn-name & fn-tail]
  `(defn
     ~(with-meta fn-name (assoc (meta fn-name) :private true))
     ~@fn-tail))
 
(ns macaw.util.malli.fn
  (:refer-clojure :exclude [fn namespace])
  (:require
   [clojure.core :as core]
   [macaw.config :as config]
   [macaw.util.malli.registry :as mr]
   [malli.core :as mc]
   [malli.destructure :as md]
   [malli.error :as me]))
(set! *warn-on-reflection* true)

Malli normally generates wacky default schemas when you use destructuring in an argslist; this never seems to work correctly, so just add default schemas manually to circumvent Malli's weird behavior.

(add-default-schemas '[x {:keys [y]}]) ;; => [x {:keys [y]} :- [:maybe :map]]

(defn- add-default-schemas
  [args]
  (if (empty? args)
    args
    (loop [acc [], [x & [y z :as more]] args]
      (let [schema (when (= y :-) z)
            more   (if schema
                     (drop 2 more)
                     more)
            schema (cond
                     schema
                     schema
                     (and (or (map? x)
                              (sequential? x))
                          (= (last acc) '&))
                     [:* :any]
                     (map? x)
                     [:maybe :map]
                     (sequential? x)
                     [:maybe [:sequential :any]])
            acc    (concat acc (if schema
                                 [x :- schema]
                                 [x]))]
        (if (seq more)
          (recur acc more)
          acc)))))

Given a fn arity as parsed by [[SchematizedParams]] an return-schema, return an appropriate :=> schema for the arity.

(defn- arity-schema
  [{:keys [args], :as _arity} return-schema {:keys [target], :as _options}]
  (let [parsed       (md/parse (add-default-schemas args))
        varargs-info (get-in parsed [:parsed :rest :arg :arg])
        varargs-type (cond
                       (= (first varargs-info) :map) :varargs/map
                       (seq varargs-info)            :varargs/sequential)
        schema       (case target
                       :target/metadata        (if (= varargs-type :varargs/map)
                                                 (vec (concat (butlast (:schema parsed)) [[:* :any]]))
                                                 (:schema parsed))
                       :target/instrumentation (:schema parsed))]
    [:=>
     (cond-> schema
       varargs-type (vary-meta assoc :varargs/type varargs-type))
     return-schema]))

This is exactly the same as [[malli.experimental/SchematizedParams]], but it preserves metadata from the arglists.

(def ^:private SchematizedParams
  (mc/schema
   [:schema
    {:registry {"Schema"    any?
                "Separator" [:= :-]
                "Args"      vector? ; [:vector :any] loses metadata, but vector? keeps it :shrug:
                "PrePost"   [:map
                             [:pre {:optional true} [:sequential any?]]
                             [:post {:optional true} [:sequential any?]]]
                "Arity"     [:catn
                             [:args "Args"]
                             [:prepost [:? "PrePost"]]
                             [:body [:* :any]]]
                "Params"    [:catn
                             [:name symbol?]
                             [:return [:? [:catn
                                           [:- "Separator"]
                                           [:schema "Schema"]]]]
                             [:doc [:? string?]]
                             [:meta [:? :map]]
                             [:arities [:altn
                                        [:single "Arity"]
                                        [:multiple [:catn
                                                    [:arities [:+ [:schema "Arity"]]]
                                                    [:meta [:? :map]]]]]]]}}
    "Params"]))
(def ^:private ^{:arglists '([fn-tail])} parse-SchematizedParams
  (mc/parser SchematizedParams))

Parse a parameterized fn tail with the [[SchematizedParams]] schema. Throw an exception if it cannot be parsed.

(defn parse-fn-tail
  [fn-tail]
  (let [parsed (parse-SchematizedParams (if (symbol? (first fn-tail))
                                          fn-tail
                                          (cons '&f fn-tail)))]
    (when (= parsed ::mc/invalid)
      (let [error     (mc/explain SchematizedParams fn-tail)
            humanized (me/humanize error)]
        (throw (ex-info (format "Invalid function tail: %s" humanized)
                        {:fn-tail   fn-tail
                         :error     error
                         :humanized humanized}))))
    parsed))

Implementation for [[fn]] and [[macaw.util.malli.defn/defn]]. Given an unparsed parametered fn tail, extract the annotations and return a :=> or :function schema.

options can contain :target which is either

  • :target/metadata: generate the schema to attach to the metadata for a [[macaw.util.malli.defn/defn]]. For key-value varargs like & {:as kvs} get a schema like [:* :any] in this case since the args aren't parsed to a map yet

  • :target/instrumentation: generate a schema for use in generating the instrumented fn form. & {:as kvs} can have a real map schema here.

(defn fn-schema
  ([parsed]
   (fn-schema parsed {:target :target/instrumentation}))
  ([parsed options]
   (let [{:keys [return arities]}     parsed
         return-schema                (:schema return :any)
         [arities-type arities-value] arities]
     (case arities-type
       :single   (arity-schema arities-value return-schema options)
       :multiple (into [:function]
                       (for [arity (:arities arities-value)]
                         (arity-schema arity return-schema options)))))))
(defn- deparameterized-arity [{:keys [body args prepost], :as _arity}]
  (concat
   [(:arglist (md/parse args))]
   (when prepost
     [prepost])
   body))

Generate a deparameterized fn tail (the contents of a fn form after the fn symbol).

(defn deparameterized-fn-tail
  [{[arities-type arities-value] :arities, :as _parsed}]
  (let [body (case arities-type
               :single   (deparameterized-arity arities-value)
               :multiple (for [arity (:arities arities-value)]
                           (deparameterized-arity arity)))]
    body))

Impl for [[macaw.util.malli.fn/fn]] and [[macaw.util.malli.defn/defn]]. Given a parsed fn tail (as parsed by [[parsed-fn-tail]]), return a [[clojure.core.fn]] form with the parameters stripped out.

(deparameterized-fn-form (parse-fn-tail '[:- :int [x :- :int] (inc x)])) ;; => (fn [x] (inc x))

(defn deparameterized-fn-form
  [parsed & [fn-name]]
  `(core/fn ~@(when fn-name [fn-name]) ~@(deparameterized-fn-tail parsed)))

Whether [[validate-input]] and [[validate-output]] should validate things or not.

(def ^:dynamic *enforce*
  true)
(defn- validate [error-context schema value error-type]
  (when *enforce*
    (when-let [error (mr/explain schema value)]
      (let [humanized (me/humanize error {:wrap (core/fn humanize-include-value
                                                  [{:keys [value message]}]
                                                  (str message ", got: " (pr-str value)))})
            details   (merge
                       {:type      error-type
                        :error     error
                        :humanized humanized
                        :schema    schema
                        :value     value}
                       error-context)]
        (when-not (config/is-prod?)
          (throw (ex-info (case error-type
                            ::invalid-input  (format "Invalid input: %s" (pr-str humanized))
                            ::invalid-output (format "Invalid output: %s" (pr-str humanized)))
                          details)))))))

Impl for [[macaw.util.malli.fn/fn]]; validates an input argument with value against schema using a cached explainer and throws an exception if the check fails.

(defn validate-input
  [error-context schema value]
  (validate error-context schema value ::invalid-input))

Impl for [[macaw.util.malli.fn/fn]]; validates function output value against schema using a cached explainer and throws an exception if the check fails. Returns validated value.

(defn validate-output
  [error-context schema value]
  (validate error-context schema value ::invalid-output)
  value)
(defn- varargs-type [input-schema]
  (-> input-schema meta :varargs/type))
(defn- input-schema-arg-names [[_cat & args :as input-schema]]
  (let [the-varargs-type (varargs-type input-schema)
        normal-args      (if the-varargs-type
                           (butlast args)
                           args)]
    (concat
     (for [n (range (count normal-args))]
       (symbol (str (char (+ (int \a) n)))))
     (case the-varargs-type
       :varargs/sequential ['more]
       :varargs/map        ['kvs]
       nil))))
(defn- input-schema->arglist [input-schema]
  (let [arg-names (input-schema-arg-names input-schema)]
    (vec (if-let [the-varargs-type (varargs-type input-schema)]
           (concat (butlast arg-names) ['& (case the-varargs-type
                                             :varargs/sequential (last arg-names)
                                             :varargs/map        {:as (last arg-names)})])
           arg-names))))
(defn- input-schema->validation-forms [error-context [_cat & schemas :as input-schema]]
  (let [arg-names (input-schema-arg-names input-schema)
        schemas   (if (= (varargs-type input-schema) :varargs/sequential)
                    (concat (butlast schemas) [[:maybe (last schemas)]])
                    schemas)]
    (->> (map (core/fn [arg-name schema]
                ;; 1. Skip checks against `:any` schema, there is no situation where it would fail.
                ;;
                ;; 2. Skip checks against the default varargs schemas, there is no situation where [:maybe [:* :any]] is
                ;; going to fail.
                (when-not (= schema (condp = arg-name
                                      'more [:maybe [:* :any]]
                                      'kvs  [:* :any]
                                      :any))
                  `(validate-input ~error-context ~schema ~arg-name)))
              arg-names
              schemas)
         (filter some?))))
(defn- input-schema->application-form [input-schema]
  (let [arg-names (input-schema-arg-names input-schema)]
    (if (= (varargs-type input-schema) :varargs/sequential)
      (list* `apply '&f arg-names)
      (list* '&f arg-names))))

If exception is thrown from the [[validate]] machinery, remove those stack trace elements so the top of the stack is the calling function.

(defn fixup-stacktrace
  [^Exception e]
  (if (#{::invalid-input ::invalid-output} (-> e ex-data :type))
    (let [trace (.getStackTrace e)
          cleaned (when trace
                    (into-array StackTraceElement
                                (drop-while (comp #{(.getName (class validate))
                                                    (.getName (class validate-input))
                                                    (.getName (class validate-output))}
                                                  #(.getClassName ^StackTraceElement %))
                                            trace)))]
      (doto e
        (.setStackTrace cleaned)))
    e))
(defn- instrumented-arity [error-context [_=> input-schema output-schema]]
  (let [input-schema           (if (= input-schema :cat)
                                 [:cat]
                                 input-schema)
        arglist                (input-schema->arglist input-schema)
        input-validation-forms (input-schema->validation-forms error-context input-schema)
        result-form            (input-schema->application-form input-schema)
        result-form            (if (and output-schema
                                        (not= output-schema :any))
                                 `(->> ~result-form
                                       (validate-output ~error-context ~output-schema))
                                 result-form)]
    `(~arglist
      (try
        ~@input-validation-forms
        ~result-form
        (catch Exception ~'error
          (throw (fixup-stacktrace ~'error)))))))
(defn- instrumented-fn-tail [error-context [schema-type :as schema]]
  (case schema-type
    :=>
    [(instrumented-arity error-context schema)]
    :function
    (let [[_function & schemas] schema]
      (for [schema schemas]
        (instrumented-arity error-context schema)))))

Given a fn-tail like

([x :- :int y] (+ 1 2))

and parsed by [[parsed-fn-tail]],

return an unevaluated instrumented [[fn]] form like

(mc/-instrument {:schema [:=> [:cat :int :any] :any]} (fn [x y] (+ 1 2)))

(defn instrumented-fn-form
  [error-context parsed & [fn-name]]
  `(let [~'&f ~(deparameterized-fn-form parsed fn-name)]
     (core/fn ~@(instrumented-fn-tail error-context (fn-schema parsed)))))

------------------------------ Skipping Namespace Enforcement in prod ------------------------------

Returns true if mu.fn/fn and mu/defn in a namespace should be instrumented with malli schema validation.

(defn instrument-ns?
  [namespace]
  (or (true? (:instrument/always (meta namespace)))
      (not (config/is-prod?))))

Malli version of [[schema.core/fn]].

Unless it's in a skipped namespace during prod, a form like:

(fn :- :int [x :- :int] (inc x))

compiles to something like

(let [&f (fn [x] (inc x))] (fn [a] (validate-input {} :int a) (validate-output {} :int (&f a))))

The map arg here is additional error context; for something like [[macaw.util.malli/defn]], it will be something like

{:fn-name 'metabase.lib.field/resolve-field-id}

If compiled in a namespace in [[namespaces-toskip]], during config/is-prod?, it will be emitted as a vanilla clojure.core/fn form.

Known issue: this version of fn does not capture the optional function name and make it available, e.g. you can't do

(mu/fn my-fn ([x] (my-fn x 1)) ([x y :- :int] (+ x y)))

If we were to include my-fn in the uninstrumented fn form, then it would bypass schema checks when you call another arity:

(let [&f (fn my-fn ([x] (my-fn x 1)) ([x y] (+ x y)))] (fn ([a] (&f a)) ([a b] (validate-input {} :int b) (&f a b))))

;; skips the :- :int check on y in the 2-arity (my-fn 1.0) ;; => 2.0

Since this is a big gotcha, we are currently not including the optional function name my-fn in the generated output. We can probably fix this with [[letfn]], since it allows mutually recursive function calls, but that's a problem for another day. The passed function name comes back from [[mc/parse]] as :name if we want to attempt to fix this later.

(defmacro fn
  [& fn-tail]
  (let [parsed (parse-fn-tail fn-tail)
        instrument? (instrument-ns? *ns*)]
    (if-not instrument?
      (deparameterized-fn-form parsed)
      (let [error-context (if (symbol? (first fn-tail))
                            ;; We want the quoted symbol of first fn-tail:
                            {:fn-name (list 'quote (first fn-tail))} {})]
        (instrumented-fn-form error-context parsed)))))
 
(ns macaw.util.malli.registry
  (:refer-clojure :exclude [declare def type])
  (:require
   [malli.core :as mc]
   [malli.experimental.time :as malli.time]
   [malli.registry]
   [malli.util :as mut]))
(defonce ^:private cache (atom {}))

Get a cached value for k + schema. Cache is cleared whenever a schema is (re)defined with [[macaw.util.malli.registry/def]]. If value doesn't exist, value-thunk is used to calculate (and cache) it.

You generally shouldn't use this outside of this namespace unless you have a really good reason to do so! Make sure you used namespaced keys if you are using it elsewhere.

(defn cached
  [k schema value-thunk]
  (or (get (get @cache k) schema) ; get-in is terribly inefficient
      (let [v (value-thunk)]
        (swap! cache assoc-in [k schema] v)
        v)))

Fetch a cached [[mc/validator]] for schema, creating one if needed. The cache is flushed whenever the registry changes.

(defn validator
  [schema]
  (cached :validator schema #(mc/validator schema)))

[[mc/validate]], but uses a cached validator from [[validator]].

(defn validate
  [schema value]
  ((validator schema) value))

Fetch a cached [[mc/explainer]] for schema, creating one if needed. The cache is flushed whenever the registry changes.

(defn explainer
  [schema]
  (letfn [(make-explainer []
            #_{:clj-kondo/ignore [:discouraged-var]}
            (let [validator* (mc/validator schema)
                  explainer* (mc/explainer schema)]
              ;; for valid values, it's significantly faster to just call the validator. Let's optimize for the 99.9%
              ;; of calls whose values are valid.
              (fn schema-explainer [value]
                (when-not (validator* value)
                  (explainer* value)))))]
    (cached :explainer schema make-explainer)))

[[mc/explain]], but uses a cached explainer from [[explainer]].

(defn explain
  [schema value]
  ((explainer schema) value))
(defonce ^:private registry*
  (atom (merge (mc/default-schemas)
               (mut/schemas)
               (malli.time/schemas))))
(defonce ^:private registry (malli.registry/mutable-registry registry*))

Register a spec with our Malli spec registry.

(defn register!
  [schema definition]
  (swap! registry* assoc schema definition)
  (reset! cache {})
  nil)

Get the Malli schema for type from the registry.

(defn schema
  [type]
  (malli.registry/schema registry type))

Add a :doc/message option to a schema. Tries to merge it in existing vector schemas to avoid unnecessary indirection.

(defn -with-doc
  [the-schema docstring]
  (cond
    (and (vector? the-schema)
         (map? (second the-schema)))
    (let [[tag opts & args] the-schema]
      (into [tag (assoc opts :doc/message docstring)] args))
    (vector? the-schema)
    (let [[tag & args] the-schema]
      (into [tag {:doc/message docstring}] args))
    :else
    [:schema {:doc/message docstring} the-schema]))

Like [[clojure.spec.alpha/def]]; add a Malli schema to our registry.

(defmacro def
  ([type the-schema]
   `(register! ~type ~the-schema))
  ([type docstring the-schema]
   `(macaw.util.malli.registry/def ~type
      (-with-doc ~the-schema ~docstring))))

For REPL/test usage: get the definition of a registered schema from the registry.

(defn resolve-schema
  [the-schema]
  (mc/deref-all (mc/schema the-schema)))
 
(ns macaw.walk
  (:import
   (com.metabase.macaw AstWalker AstWalker$CallbackKey)))
(set! *warn-on-reflection* true)

keyword->key map for the AST-folding callbacks.

(def ->callback-key
  ;; TODO: Move this to a Malli schema to simplify the indirection
  {:alias            AstWalker$CallbackKey/ALIAS
   :column           AstWalker$CallbackKey/COLUMN
   :column-qualifier AstWalker$CallbackKey/COLUMN_QUALIFIER
   :mutation         AstWalker$CallbackKey/MUTATION_COMMAND
   :pseudo-table     AstWalker$CallbackKey/PSEUDO_TABLES
   :table            AstWalker$CallbackKey/TABLE
   :table-wildcard   AstWalker$CallbackKey/ALL_TABLE_COLUMNS
   :wildcard         AstWalker$CallbackKey/ALL_COLUMNS})

Lift a side effecting callback so that it preserves the accumulator.

(defn- preserve
  [f]
  (fn [acc & args]
    (apply f args)
    acc))

work around ast walker repeatedly visiting the same expressions (bug ?!)

(defn- deduplicate-visits [f]
  (let [seen (volatile! #{})]
    (fn [& [acc visitable & _ :as args]]
      (if (contains? @seen visitable)
        acc
        (do (vswap! seen conj visitable)
            (apply f args))))))
(defn- update-keys-vals [m key-f val-f]
  (let [ret (persistent!
             (reduce-kv (fn [acc k v]
                          (assoc! acc (key-f k) (val-f v)))
                        (transient {})
                        m))]
    (with-meta ret (meta m))))

Walk over the query's AST, using the callbacks for their side effects, for example to mutate the AST itself.

(defn walk-query
  [parsed-query callbacks]
  (let [callbacks (update-keys-vals callbacks ->callback-key (comp deduplicate-visits preserve))]
    (.walk (AstWalker. callbacks ::ignored) parsed-query)))

Fold over the query's AST, using the callbacks to update the accumulator.

(defn fold-query
  [parsed-query callbacks init-val]
  (let [callbacks (update-keys-vals callbacks ->callback-key deduplicate-visits)]
    (.fold (AstWalker. callbacks init-val) parsed-query)))
 
(ns macaw.acceptance-test
  (:require
   [clojure.java.io :as io]
   [clojure.set :as set]
   [clojure.string :as str]
   [clojure.test :refer :all]
   [macaw.core :as m]
   [macaw.core-test :as ct]
   [macaw.types])
  (:import
   (java.io File)))
(set! *warn-on-reflection* true)
(defn- fixture-analysis [fixture]
  (some-> fixture (ct/fixture->filename "acceptance" ".analysis.edn") io/resource slurp read-string))
(defn- fixture-renames [fixture]
  (some-> fixture (ct/fixture->filename "acceptance" ".renames.edn") io/resource slurp read-string))
(defn- fixture-rewritten [fixture]
  (some-> fixture (ct/fixture->filename "acceptance" ".rewritten.sql") io/resource slurp str/trim))
(defn- get-component [cs k]
  (case k
    :source-columns (get cs k)
    :columns-with-scope (ct/contexts->scopes (get cs :columns))
    (ct/raw-components (get cs k))))
(def ^:private test-modes
  (set macaw.types/modes))
(def override-hierarchy
  (-> (make-hierarchy)
      (derive :basic-select :select-only)
      (derive :compound-select :select-only)))
(defn- lineage [h k]
  (when k
    (assert (<= (count (parents h k)) 1) "Multiple inheritance not supported for override hierarchy.")
    (cons k (lineage h (first (parents h k))))))
(def global-overrides
  {})
(def ns-overrides
  {:select-only  {"mutation" :macaw.error/invalid-query
                  "dynamic"  :macaw.error/invalid-query}
   :basic-select {"compound" :macaw.error/unsupported-expression}})
(def ^:private merged-fixtures-file "test/resources/acceptance/queries.sql")

TODO generically detect queries..sql files

(def ^:private dynamic-fixtures-file "test/resources/acceptance/queries.dynamic.sql")
(defn- read-merged [file]
  (->> (str/split (slurp file) #"-- FIXTURE: ")
       (keep (fn [named-query]
               (when-not (str/blank? named-query)
                 (let [[nm qry] (.split ^String named-query "\n" 2)]
                   [(keyword nm) (str/trim qry)]))))
       (into {})))

The fixtures in merged fixtures file, mapped by their identifiers.

(defn- merged-fixtures
  []
  (merge (read-merged merged-fixtures-file)
         (update-keys
          (read-merged dynamic-fixtures-file)
          #(keyword "dynamic" (name %)))))
(defn- validate-analysis [correct override actual]
  (let [expected (or override correct)]
    (when override
      (testing "Override is still needed"
        (if (and (vector? correct) (not (keyword actual)))
          (is (not= correct (ct/sorted actual)))
          (is (not= correct actual)))))
    (if (and (vector? expected) (not (keyword actual)))
      (is (= expected (ct/sorted actual)))
      (when (not= expected actual)
        (is (= expected actual))))))
(defn- when-keyword [x]
  (when (keyword? x)
    x))
(defn- get-override* [expected-cs mode fixture ck]
  (or (get-in expected-cs [:overrides mode :error])
      (get-in expected-cs [:overrides mode ck])
      (when-keyword (get-in expected-cs [:overrides mode]))
      (get-in ns-overrides [mode (namespace fixture)])
      (get global-overrides mode)))
(defn- get-override [expected-cs mode fixture ck]
  (or
   (some #(get-override* expected-cs % fixture ck)
         (lineage override-hierarchy mode))
   (get-in expected-cs [:overrides :error])
   (get-in expected-cs [:overrides ck])
   (when-keyword (get expected-cs :overrides))))

Test that we can parse a given fixture, and compare against expected analysis and rewrites, where they are defined.

(defn- test-fixture
  [fixture]
  (let [prefix      (str "(fixture: " (subs (str fixture) 1) ")")
        merged      (merged-fixtures)
        sql         (or (ct/query-fixture fixture) (get merged fixture))
        expected-cs (fixture-analysis fixture)
        renames     (fixture-renames fixture)
        expected-rw (fixture-rewritten fixture)
        base-opts   {:non-reserved-words [:final], :allow-unused? true}
        opts-mode   (fn [mode] (assoc base-opts :mode mode))]
    (assert sql "Fixture exists")
    (doseq [m test-modes
            :let [opts (opts-mode m)]]
      (if (= m :ast-walker-1)
        ;; Legacy testing path for `components`, which only supports the original walker, and throws exceptions.
        (let [cs (testing (str prefix " analysis does not throw")
                   (is (ct/components sql opts)))]
          (doseq [[ck cv] (dissoc expected-cs :overrides :error :skip)]
            (testing (str prefix " analysis is correct: " (name ck))
              (let [actual-cv (:error cs (get-component cs ck))
                    override  (get-override expected-cs m fixture ck)]
                (validate-analysis cv override actual-cv)))))
        ;; Testing path for newer modes.
        (let [correct  (:error expected-cs (:tables expected-cs))
              override (cond
                         (str/includes? sql "-- BROKEN")
                         :macaw.error/unable-to-parse
                         (str/includes? sql "-- UNSUPPORTED")
                         :macaw.error/unsupported-expression
                         :else
                         (get-override expected-cs m fixture :tables))
              ;; For now, we only support (and test) :tables
              tables   (testing (str prefix " table analysis does not throw for mode " m)
                         (is (ct/tables sql opts)))]
          (if (and (nil? correct) (nil? override))
            (testing "Must define expected tables, or explicitly skip analysis"
              (is (:skip expected-cs)))
            (testing (str prefix " table analysis is correct for mode " m)
              (validate-analysis correct override tables))))))
    (when renames
      (let [broken?   (:broken? renames)
            rewritten (testing (str prefix " rewriting does not throw")
                        (is (str/trim (m/replace-names sql (dissoc renames :broken?) base-opts))))]
        (when expected-rw
          (testing (str prefix " rewritten SQL is correct")
            (if broken?
              (is (not= expected-rw rewritten))
              (is (= expected-rw rewritten)))))))))

Find all the fixture symbols for stand-alone sql files within our test resources.

(defn isolated-fixtures
  []
  (->> (io/resource "acceptance")
       io/file
       file-seq
       (keep #(when (.isFile ^File %)
                (let [n (.getName ^File %)]
                  (when (.endsWith n ".sql")
                    (str/replace n #"\.sql$" "")))))
       (remove #(.contains ^String % "."))
       (remove #{"queries"})
       (map ct/stem->fixture)
       (sort-by str)))
(defn- all-fixtures []
  (let [isolated (isolated-fixtures)
        merged   (keys (merged-fixtures))]
    (assert (empty? (set/intersection (set isolated) (set merged)))
            "No fixtures should be in both the isolated and merged files")
    (sort-by str (distinct (concat isolated merged)))))

Find all the fixture files and for each of them run all the tests we can construct from the related files.

(defmacro create-fixture-tests!
  []
  (let [fixtures (all-fixtures)]
    (cons 'do
          (for [f fixtures
                :let [test-name (symbol (str/replace (ct/fixture->filename f "-test") #"(?<!_)_(?!_)" "-"))]]
            `(deftest ^:parallel ~test-name
               (test-fixture ~f))))))
(create-fixture-tests!)
(comment
  ;; Unload all the tests, useful for flushing stale fixture tests
  (doseq [[sym ns-var] (ns-interns *ns*)]
    (when (:test (meta ns-var))
      (ns-unmap *ns* sym)))
  (merged-fixtures)
  ;; Append all the isolated fixtures to the merged file.
  ;; For now, we keep the stress-testing fixtures separate, because OH LAWDY they HUGE.
  (spit merged-fixtures-file
        (str/join "\n\n"
                  (for [fixture (isolated-fixtures)]
                    (str "-- FIXTURE: "
                         (when-let [nms (namespace fixture)]
                           (str nms "/"))
                         (name fixture) "\n"
                         (str/trim
                          (ct/query-fixture fixture))))))
  (deftest ^:parallel single-test
    (test-fixture :compound/nested-cte-sneaky))
  (test-fixture :compound/cte)
  (test-fixture :compound/cte-nonambiguous)
  (test-fixture :literal/with-table)
  (test-fixture :literal/without-table)
  (test-fixture :interpolation/crosstab)
  (test-fixture :broken/filter-where))
 
(ns ^:parallel macaw.core-test
  (:require
   [clojure.java.io :as io]
   [clojure.string :as str]
   [clojure.test :refer [deftest is testing]]
   [clojure.walk :as walk]
   [macaw.core :as m]
   [macaw.test.util :refer [ws=]]
   [macaw.walk :as mw]
   [mb.hawk.assert-exprs])
  (:import
   (clojure.lang ExceptionInfo)
   (net.sf.jsqlparser.schema Table)))
(set! *warn-on-reflection* true)
(defn- non-empty-and-truthy [xs]
  (if (seq xs)
    (every? true? xs)
    false))
(defn components [sql & {:as opts}]
  (m/query->components (m/parsed-query sql opts) opts))
(defn tables [sql & {:as opts}]
  (let [opts   (update opts :mode #(or % :ast-walker-1))
        result (m/query->tables sql opts)]
    (or (:error result)
        (:tables result))))
(def raw-components #(let [xs (empty %)] (into xs (keep :component) %)))
(def columns        (comp raw-components :columns components))
(def source-columns (comp :source-columns components))
(def has-wildcard?  (comp non-empty-and-truthy raw-components :has-wildcard? components))
(def mutations      (comp raw-components :mutation-commands components))
(def table-wcs      (comp raw-components :table-wildcards components))

Strip the scope id from the context stacks, to get deterministic values for testing.

(defn- strip-context-ids
  [m]
  (walk/prewalk
   (fn [x]
     (if (:context x)
       (update x :context (partial mapv m/scope-label))
       x))
   m))
(defn scope->vec [s]
  [(m/scope-label s) (m/scope-id s)])

Replace full context stack with a reference to the local scope, only.

(defn contexts->scopes
  [m]
  (walk/prewalk
   (fn [x]
     (if-let [context (:context x)]
       (-> x (dissoc :context) (assoc :scope (scope->vec (first context))))
       x))
   m))
(defn column-qualifiers
  [query]
  (mw/fold-query (m/parsed-query query)
                 {:column-qualifier (fn [acc tbl _ctx] (conj acc (.getName ^Table tbl)))}
                 #{}))

See [[macaw.core/parsed-query]] and https://github.com/JSQLParser/JSqlParser/issues/1988 for more details.

(def ^:private implicit-semicolon
  "select id
from foo")
(defn- ->windows [sql]
  (str/replace sql "\n" "\r\n"))
(deftest three-or-more-line-breaks-test
  (doseq [f [identity ->windows]
          :let [query (f implicit-semicolon)]]
    (testing (if (= ->windows f) "windows" "unix")
      (is (= (-> query (str/replace "id" "pk") (str/replace "foo" "bar"))
             (m/replace-names query
                              {:columns {{:table "foo" :column "id"} "pk"}
                               :tables  {{:table "foo"} "bar"}}))))))
(deftest query->tables-test
  (testing "Simple queries"
    (is (= #{{:table "core_user"}}
           (tables "SELECT * FROM core_user;")))
    (is (= #{{:table "core_user"}}
           (tables "SELECT id, email FROM core_user;"))))
  (testing "With a schema (Postgres)" ;; TODO: only run this against supported DBs
    (is (= #{{:table "core_user" :schema "the_schema_name"}}
           (tables "SELECT * FROM the_schema_name.core_user;")))
    (is (= #{{:table "orders" :schema "public"}
             {:table "orders" :schema "private"}}
           (tables "SELECT a.x FROM public.orders a, private.orders"))))
  (testing "Sub-selects"
    (is (= #{{:table "core_user"}}
           (tables "SELECT * FROM (SELECT DISTINCT email FROM core_user) q;")))))
(deftest tables-with-complex-aliases-issue-14-test
  (testing "With an alias that is also a table name"
    (is (= #{{:table "user"}
             {:table "user2_final"}}
           (tables
            "SELECT legacy_user.id AS old_id,
                    user.id AS new_id
             FROM user AS legacy_user
             OUTER JOIN user2_final AS user
             ON legacy_user.email = user2_final.email;")))))
(deftest column-qualifier-test
  (testing "column-qualifiers works with tables and aliases"
    (is (= #{"user" "legacy_user"}
           (column-qualifiers "SELECT
                                 user.id AS user_id,
                                 legacy_user.id AS old_id
                               FROM user
                               OUTER JOIN user as legacy_user
                               ON user.email = user.electronic_mail_address
                               JOIN unrelated_table on foo = user.unrelated_id;")))))
(deftest query->columns-test
  (testing "Simple queries"
    (is (= #{{:column "foo"}
             {:column "bar"}
             {:column "id" :table "quux"}
             {:column "quux_id" :table "baz"}}
           (columns "SELECT foo, bar FROM baz INNER JOIN quux ON quux.id = baz.quux_id"))))
  (testing "'group by' columns present"
    (is (= #{{:column "id" :table "orders"}
             {:column "user_id" :table "orders"}}
           (columns "SELECT id FROM orders GROUP BY user_id"))))
  (testing "table alias present"
    (is (= #{{:column "id" :table "orders" :schema "public"}}
           (columns "SELECT o.id FROM public.orders o"))))
  (testing "schema is determined correctly"
    (is (= #{{:column "x" :table "orders" :schema "public"}}
           (columns "SELECT public.orders.x FROM public.orders, private.orders"))))
  (testing "quotes are retained"
    (is (= #{{:column "`x`" :table "`orders`" :schema "`public`"}}
           (columns "SELECT `public`.`orders`.`x` FROM `public`.`orders`, `private`.`orders`"))))
  (testing "quotes and case are not interpreted"
    (is (= #{{:column "x" :table "ORDERS" :schema "`public`"}
             {:column "X" :table "ORDERS" :schema "`public`"}
             {:column "`x`" :table "ORDERS" :schema "`public`"}
             {:column "`X`" :table "ORDERS" :schema "`public`"}}
           (columns "SELECT x, X, `x`, `X` FROM `public`.ORDERS")))))
(def ^:private heavily-quoted-query
  "SELECT raw, \"foo\", \"dong\".\"bar\", `ding`.`dong`.`fee` FROM `ding`.dong")
(def ^:private heavily-quoted-query-rewritten
  "SELECT flaw, glue, long.lark, king.long.flee FROM king.long")
(def ^:private heavily-quoted-query-rewrites
  {:schemas {"ding" "king"}
   :tables  {{:schema "ding" :table "dong"} "long"}
   :columns {{:schema "ding" :table "dong" :column "raw"} "flaw"
             {:schema "ding" :table "dong" :column "fee"} "flee"
             {:schema "ding" :table "dong" :column "bar"} "lark"
             {:schema "ding" :table "dong" :column "foo"} "glue"}})
(defn normalized-components [sql]
  (m/query->components (m/parsed-query sql) {:preserve-identifiers? false}))
(def normalized-columns (comp raw-components :columns normalized-components))
(def normalized-tables (comp raw-components :tables normalized-components))
(deftest quotes-test
  (is (= #{{:column "raw", :table "dong", :schema "ding"}
           {:column "foo", :table "dong", :schema "ding"}
           {:column "bar", :table "dong", :schema "ding"}
           {:column "fee", :table "dong", :schema "ding"}}
         (normalized-columns heavily-quoted-query)))
  (is (= #{{:table "dong", :schema "ding"}}
         (normalized-tables heavily-quoted-query)))
  (is (= heavily-quoted-query-rewritten
         (m/replace-names heavily-quoted-query heavily-quoted-query-rewrites))))
(deftest case-sensitive-test
  (is (= "SELECT X.Y, X.y, x.Z, x.z FROM X LEFT JOIN x ON X.Y=x.Z"
         (m/replace-names "SELECT a.b, a.B, A.b, A.B FROM a LEFT JOIN A ON a.b=A.b"
                          {:tables  {{:table "a"} "X"
                                     {:table "A"} "x"}
                           :columns {{:table "a" :column "b"} "Y"
                                     {:table "a" :column "B"} "y"
                                     {:table "A" :column "b"} "Z"
                                     {:table "A" :column "B"} "z"}}))))
(deftest case-insensitive-test
  ;; In the future, we might try to be smarter and preserve case.
  (is (= "SELECT cats.meow FROM cats"
         (m/replace-names "SELECT DOGS.BaRk FROM dOGS"
                          {:tables  {{:table "dogs"} "cats"}
                           :columns {{:table "dogs" :column "bark"} "meow"}}
                          {:case-insensitive :lower})))
  (is (= "SELECT meow FROM private.cats"
         (m/replace-names "SELECT bark FROM PUBLIC.dogs"
                          {:schemas {"public" "private"}
                           :tables  {{:schema "public" :table "dogs"} "cats"}
                           :columns {{:schema "public" :table "dogs" :column "bark"} "meow"}}
                          {:case-insensitive :lower})))
  (is (= "SELECT id, meow FROM private.cats"
         (m/replace-names "SELECT id, bark FROM PUBLIC.dogs"
                          {:schemas {"public" "private"}
                           :tables  {{:schema "public" :table "DOGS"} "cats"}
                           :columns {{:schema "PUBLIC" :table "dogs" :column "bark"} "meow"}}
                          {:case-insensitive :agnostic
                           :allow-unused?    true}))))
(def ^:private heavily-quoted-query-mixed-case
  "SELECT RAW, \"Foo\", \"doNg\".\"bAr\", `ding`.`doNg`.`feE` FROM `ding`.`doNg`")
(deftest case-and-quotes-test
  (testing "By default, quoted references are also case insensitive"
    (is (= heavily-quoted-query-rewritten
           (m/replace-names heavily-quoted-query-mixed-case
                            heavily-quoted-query-rewrites
                            :case-insensitive :lower))))
  (testing "One can opt-into ignoring case only for unquoted references\n"
    (testing "None of the quoted identifiers with different case will be matched"
      (is (thrown-with-msg? ExceptionInfo
                            #"Unknown rename: .* \"(dong)|(bar)|(foo)|(fee)\
                            (m/replace-names heavily-quoted-query-mixed-case
                                             heavily-quoted-query-rewrites
                                             :case-insensitive :agnostic
                                             :quotes-preserve-case? true))))
    (testing "The query is unchanged when allowed to run partially"
      (is (= heavily-quoted-query-mixed-case
             (m/replace-names heavily-quoted-query-mixed-case
                              heavily-quoted-query-rewrites
                              {:case-insensitive      :agnostic
                               :quotes-preserve-case? true
                               :allow-unused?         true}))))))
(def ^:private ambiguous-case-replacements
  {:columns {{:schema "public" :table "DOGS" :column "BARK"}  "MEOW"
             {:schema "public" :table "dogs" :column "bark"}  "meow"
             {:schema "public" :table "dogs" :column "growl"} "purr"
             {:schema "public" :table "dogs" :column "GROWL"} "PuRr"
             {:schema "public" :table "DOGS" :column "GROWL"} "PURR"}})
(deftest ambiguous-case-test
  (testing "Correctly handles flexibility around the case of the replacements"
    (doseq [[case-insensitive expected] {:lower    "SELECT meow, PuRr FROM DOGS"
                                         :upper    "SELECT MEOW, PURR FROM DOGS"
                                         ;; Not strictly deterministic, depends on map ordering.
                                         :agnostic "SELECT MEOW, PuRr FROM DOGS"}]
      (is (= expected
             (m/replace-names "SELECT bark, `GROWL` FROM DOGS"
                              ambiguous-case-replacements
                              {:case-insensitive      case-insensitive
                               :quotes-preserve-case? true
                               :allow-unused?         true}))))))
(deftest infer-test
  (testing "We can infer a column through a few hoops"
    (is (= #{{:column "amount" :table "orders"}}
           (columns "SELECT amount FROM (SELECT amount FROM orders)")))
    (is (= #{{:column "amount" :alias "cost" :table "orders"}
             ;; We preserve this  for now, which has its scope to differentiate it from the qualified element.
             ;; Importantly, we do not infer it as coming from the orders table, despite that being the only table.
             {:column "cost"}}
           (columns "SELECT cost FROM (SELECT amount AS cost FROM orders)")))
    (testing "We do not expose phantom columns due to references to aliases"
      (is (= #{{:column "amount" :table "orders"}}
             (source-columns "SELECT cost FROM (SELECT amount AS cost FROM orders)"))))))
(deftest infer-from-schema-test
  (is (= #{{:schema "public" :table "towns"}}
         (tables "select public.towns.id from towns"))))
(deftest mutation-test
  (is (= #{"alter-sequence"}
         (mutations "ALTER SEQUENCE serial RESTART WITH 42")))
  (is (= #{"alter-session"}
         (mutations "ALTER SESSION SET foo = 'bar'")))
  (is (= #{"alter-system"}
         (mutations "ALTER SYSTEM RESET ALL")))
  (is (= #{"alter-table"}
         (mutations "ALTER TABLE orders ADD COLUMN email text")))
  (is (= #{"alter-view"}
         (mutations "ALTER VIEW foo AS SELECT bar;")))
  (is (= #{"create-function"}           ; Postgres syntax
         (mutations "CREATE FUNCTION multiply(integer, integer) RETURNS integer AS 'SELECT $1 * $2;' LANGUAGE SQL
         IMMUTABLE RETURNS NULL ON NULL INPUT;")))
  (is (= #{"create-function"}           ; Conventional syntax
         (mutations "CREATE FUNCTION multiply(a integer, b integer) RETURNS integer LANGUAGE SQL IMMUTABLE RETURNS
         NULL ON NULL INPUT RETURN a + b;")))
  (is (= #{"create-index"}
         (mutations "CREATE INDEX idx_user_id ON orders(user_id);")))
  (is (= #{"create-schema"}
         (mutations "CREATE SCHEMA perthshire")))
  (is (= #{"create-sequence"}
         (mutations "CREATE SEQUENCE users_seq START WITH 42 INCREMENT BY 17")))
  (is (= #{"create-synonym"}
         (mutations "CREATE SYNONYM folk FOR people")))
  (is (= #{"create-table"}
         (mutations "CREATE TABLE poets (name text, id integer)")))
  (is (= #{"create-view"}
         (mutations "CREATE VIEW folk AS SELECT * FROM people WHERE id > 10")))
  (is (= #{"delete"}
         (mutations "DELETE FROM people")))
  (is (= #{"drop"}
         (mutations "DROP TABLE people")))
  (is (= #{"grant"}
         (mutations "GRANT SELECT, UPDATE, INSERT ON people TO myself")))
  (is (= #{"insert"}
         (mutations "INSERT INTO people(name, source) VALUES ('Robert Fergusson', 'Twitter'), ('Robert Burns',
         'Facebook')")))
  (is (= #{"purge"}
         (mutations "PURGE TABLE people")))
  (is (= #{"rename-table"}
         (mutations "RENAME TABLE people TO folk")))
  (is (= #{"truncate"}
         (mutations "TRUNCATE TABLE people")))
  (is (= #{"update"}
         (mutations "UPDATE people SET name = 'Robert Fergusson' WHERE id = 23"))))
(deftest complicated-mutations-test
  ;; https://github.com/metabase/macaw/issues/18
  #_(is (= #{"delete" "insert"}
           (mutations "WITH outdated_orders AS (
                       DELETE FROM orders
                       WHERE
                         date <= '2018-01-01'
                       RETURNING *)
                     INSERT INTO order_log
                     SELECT * from outdated_orders;")))
  (is (= #{"insert"}
         (mutations "WITH outdated_orders AS (
                       SELECT * from orders)
                     INSERT INTO order_log
                     SELECT * from outdated_orders;"))))
(deftest alias-inclusion-test
  (testing "Aliases are not included"
    (is (= #{{:table "orders"} {:table "foo"}}
           (tables "SELECT id, o.id FROM orders o JOIN foo ON orders.id = foo.order_id")))))
(deftest select-*-test
  (is (true? (has-wildcard? "SELECT * FROM orders")))
  (is (true? (has-wildcard? "SELECT id, * FROM orders JOIN foo ON orders.id = foo.order_id"))))
(deftest table-wildcard-test-without-aliases
  (is (= #{{:table "orders"}}
         (table-wcs "SELECT orders.* FROM orders JOIN foo ON orders.id = foo.order_id")))
  (is (= #{{:table "foo" :schema "public"}}
         (table-wcs "SELECT foo.* FROM orders JOIN public.foo f ON orders.id = foo.order_id"))))
(deftest table-star-test-with-aliases
  (is (= #{{:table "orders"}}
         (table-wcs "SELECT o.* FROM orders o JOIN foo ON orders.id = foo.order_id")))
  (is (= #{{:table "foo"}}
         (table-wcs "SELECT f.* FROM orders o JOIN foo f ON orders.id = foo.order_id"))))
(deftest context-test
  (testing "Sub-select with outer wildcard"
    ;; TODO we should test the source and result columns too
    (is (=? {:columns
             #{{:component {:column "total" :table "orders"}, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}
               {:component {:column "id"    :table "orders"}, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}
               {:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}},
             :has-wildcard?     #{{:component true, :context ["SELECT"]}},
             :mutation-commands #{},
             :tables            #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}},
             :table-wildcards   #{}}
            (strip-context-ids (components "SELECT * FROM (SELECT id, total FROM orders) WHERE total > 10")))))
  (testing "Sub-select with inner wildcard"
    (is (=? {:columns
             #{{:component {:column "id"    :table "orders"}, :context ["SELECT"]}
               {:component {:column "total" :table "orders"}, :context ["SELECT"]}
               {:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}},
             :has-wildcard?     #{{:component true, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}},
             :mutation-commands #{},
             :tables            #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}},
             :table-wildcards   #{}}
            (strip-context-ids (components "SELECT id, total FROM (SELECT * FROM orders) WHERE total > 10")))))
  (testing "Sub-select with dual wildcards"
    (is (=? {:columns           #{{:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}},
             :has-wildcard?
             #{{:component true, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}
               {:component true, :context ["SELECT"]}},
             :mutation-commands #{},
             :tables            #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}},
             :table-wildcards   #{}}
            (strip-context-ids (components "SELECT * FROM (SELECT * FROM orders) WHERE total > 10")))))
  (testing "Join; table wildcard"
    (is (=? {:columns           #{{:component {:column "order_id" :table "foo"}, :context ["JOIN" "SELECT"]}
                                  {:component {:column "id" :table "orders"}, :context ["JOIN" "SELECT"]}},
             :has-wildcard?     #{},
             :mutation-commands #{},
             :tables            #{{:component {:table "foo"}, :context ["FROM" "JOIN" "SELECT"]}
                                  {:component {:table "orders"}, :context ["FROM" "SELECT"]}},
             :table-wildcards   #{{:component {:table "orders"}, :context ["SELECT"]}}}
            (strip-context-ids (components "SELECT o.* FROM orders o JOIN foo ON orders.id = foo.order_id"))))))
(deftest replace-names-test
  (is (= "SELECT aa.xx, b.x, b.y FROM aa, b;"
         (m/replace-names "SELECT a.x, b.x, b.y FROM a, b;"
                          {:tables  {{:schema "public" :table "a"} "aa"}
                           :columns {{:schema "public" :table "a" :column "x"} "xx"}})))
  (testing "Handle fully qualified replacement targets"
    ;; Giving Macaw more context could make it easier to
    ;; In any case, this is trivial for Metabase to provide.
    (is (= "SELECT aa.xx, b.x, b.y FROM aa, b;"
           (m/replace-names "SELECT a.x, b.x, b.y FROM a, b;"
                            {:tables  {{:schema "public" :table "a"} "aa"}
                             :columns {{:schema "public" :table "a"  :column "x"}
                                       {:schema "public" :table "aa" :column "xx"}}}))))
  ;; To consider - we could avoid splitting up the renames into column and table portions in the client, as
  ;; qualified targets would allow us to infer such changes. Partial qualification could also work fine where there
  ;; is no ambiguity - even if this is just a nice convenience for testing.
  #_(is (= "SELECT aa.xx, b.x, b.y FROM aa, b;"
           (m/replace-names "SELECT a.x, b.x, b.y FROM a, b;"
                            {:columns {{:schema "public" :table "a" :column "x"}
                                       {:table "aa" :column "xx"}}})))
  (is (= "SELECT qwe FROM orders"
         (m/replace-names "SELECT id FROM orders"
                          {:columns {{:schema "public" :table "orders" :column "id"} "qwe"}})))
  (is (= "SELECT p.id, q.id FROM public.whatever p join private.orders q"
         (m/replace-names "SELECT p.id, q.id FROM public.orders p join private.orders q"
                          {:tables {{:schema "public" :table "orders"} "whatever"}})))
  (is (ws= "SELECT SUM(public.orders.total) AS s,
            MAX(orders.total) AS max,
            MIN(total) AS min
            FROM public.orders"
           (m/replace-names
            "SELECT SUM(public.orders.amount) AS s,
             MAX(orders.amount) AS max,
             MIN(amount) AS min
             FROM public.orders"
            {:columns {{:schema "public" :table "orders" :column "amount"} "total"}})))
  (is (ws= "SELECT *, sturmunddrang
                    , oink AS oink
            FROM /* /* lore */
                 floor_muser,
                 user,  /* more */ vigilant_user ;"
           (m/replace-names
            "SELECT *, boink
                     , yoink AS oink
             FROM /* /* lore */
                  core_user,
                  bore_user,  /* more */ snore_user ;"
            {:tables  {{:schema "public" :table "core_user"}  "floor_muser"
                       {:schema "public" :table "bore_user"}  "user"
                       {:schema "public" :table "snore_user"} "vigilant_user"}
             :columns {{:schema "public" :table "core_user" :column "boink"}  "sturmunddrang"
                       {:schema "public" :table "snore_user" :column "yoink"} "oink"}}))))
(deftest replace-schema-test
  ;; Somehow we broke renaming the `x` in the WHERE clause.
  #_(is (= "SELECT totally_private.purchases.xx FROM totally_private.purchases, private.orders WHERE xx = 1"
           (m/replace-names "SELECT public.orders.x FROM public.orders, private.orders WHERE x = 1"
                            {:schemas {"public" "totally_private"}
                             :tables  {{:schema "public" :table "orders"} "purchases"}
                             :columns {{:schema "public" :table "orders" :column "x"} "xx"}}))))
(deftest allow-unused-test
  (is (thrown-with-msg?
       Exception #"Unknown rename"
       (m/replace-names "SELECT 1" {:tables {{:schema "public" :table "a"} "aa"}})))
  (is (= "SELECT 1"
         (m/replace-names "SELECT 1" {:tables {{:schema "public" :table "a"} "aa"}}
                          {:allow-unused? true}))))
(deftest model-reference-test
  (is (= "SELECT subtotal FROM metabase_sentinel_table_154643 LIMIT 3"
         (m/replace-names "SELECT total FROM metabase_sentinel_table_154643 LIMIT 3"
                          {:columns {{:table "orders" :column "total"} "subtotal"}
                           :tables  {{:table "orders"} "purchases"}}
                          {:allow-unused? true}))))
(defn- name-seq [seq-type]
  (let [prefix (str seq-type "_")]
    (rest (iterate (fn [_] (str (gensym prefix))) nil))))
(defn fixture->filename
  ([fixture suffix]
   (fixture->filename fixture nil suffix))
  ([fixture path suffix]
   (as-> fixture %
     [(namespace %) (name %)]
     (remove nil? %)
     (str/join "__" %)
     (str/replace % "-" "_")
     (if path (str path "/" %) %)
     (str % suffix))))
(defn stem->fixture [stem]
  (let [[x y] (map #(str/replace % "_" "-") (str/split stem #"__"))]
    (if y
      (keyword x y)
      (keyword x))))
(def ^:private fixture-paths
  #{nil "acceptance"})
(defn query-fixture
  ([fixture]
   (let [paths (map #(fixture->filename fixture % ".sql") fixture-paths)]
     (when-let [r (some io/resource paths)]
       (slurp r)))))
(defn- anonymize-query [query]
  (let [m (components query)
        ts (raw-components (:tables m))
        cs (raw-components (:columns m))
        ss (transduce (keep :schema) conj #{} (concat ts cs))]
    (m/replace-names query
                     {:schemas (zipmap ss (name-seq "schema"))
                      :tables  (zipmap ts (name-seq "table"))
                      :columns (zipmap cs (name-seq "column"))}
                     ;; nothing should be unused... but we currently get some junk from analysis, sadly
                     {:allow-unused? true})))

Read a fixture, anonymize the identifiers, write it back out again.

(defn- anonymize-fixture
  [fixture]
  (let [filename (fixture->filename fixture ".sql")]
    (spit (str "test/resources/" filename)
          (anonymize-query (query-fixture fixture)))))
(def ^:private alias-shadow-query
  "SELECT people.*, orders.a as foo
   FROM orders
   JOIN people
   ON
   people.foo = orders.foo_id")
(deftest alias-shadow-replace-test
  (testing "Aliases are not replaced, but real usages are"
    (is (ws= (str/replace alias-shadow-query "people.foo" "people.bar")
             (m/replace-names alias-shadow-query
                              {:columns {{:table "people" :column "foo"} "bar"}}
                              {:allow-unused? true})))))
(def ^:private cte-query
  "WITH engineering_employees AS (
       SELECT id, name, department, favorite_language
       FROM employees
       WHERE department = 'Engineering')
   SELECT id, name, favorite_language as fave_lang
   FROM engineering_employees
   WHERE favorite_language in ('mandarin clojure', 'middle javascript');")
(def ^:private sub-select-query
  "SELECT id, name, favorite_language as fave_lang
   FROM (
       SELECT id, name, department, favorite_language
       FROM employees
       WHERE department = 'Engineering') as engineering_employees
   WHERE favorite_language in ('mandarin clojure', 'middle javascript');")
(deftest cte-propagate-test
  (testing "Transitive references are tracked to their source when replacing columns in queries with CTEs."
    (is (= (str/replace cte-query "favorite_language" "first_language")
           (m/replace-names cte-query
                            {:columns {{:table "employees", :column "favorite_language"} "first_language"}})))))
(deftest sub-select-propagate-test
  (testing "Transitive references are tracked to their source when replacing columns in queries with sub-selects."
    (is (= (str/replace sub-select-query "favorite_language" "first_language")
           (m/replace-names sub-select-query
                            {:columns {{:table "employees", :column "favorite_language"} "first_language"}})))))

A transformation to help write tests, where hawk would face limitations on predicates within sets.

(defn sorted
  [element-set]
  (sort-by (comp (juxt :schema :table :column :scope) #(:component % %)) element-set))
(deftest count-field-test
  (testing "COUNT(*) does not actually read any columns"
    (is (empty? (columns "SELECT COUNT(*) FROM users")))
    (is (false? (has-wildcard? "SELECT COUNT(*) FROM users")))
    (is (empty? (table-wcs "SELECT COUNT(*) FROM users"))))
  (testing "COUNT(1) does not actually read any columns"
    (is (empty? (columns "SELECT COUNT(1) FROM users")))
    (is (false? (has-wildcard? "SELECT COUNT(1) FROM users")))
    (is (empty? (table-wcs "SELECT COUNT(1) FROM users"))))
  (testing "We do care about explicitly referenced fields in a COUNT"
    (is (= #{{:table "users" :column "id"}}
           (source-columns "SELECT COUNT(id) FROM users"))))
  (testing "We do care about deeply referenced fields in a COUNT however"
    (is (= #{{:table "users" :column "id"}}
           (source-columns "SELECT COUNT(DISTINCT(id)) FROM users")))))
(deftest reserved-word-test
  (testing "We can opt-out of reserving specific keywords"
    (is (= #{{:schema "serial" :table "limit" :column "final"}}
           (source-columns "SELECT limit.final FROM serial.limit" :non-reserved-words [:final :serial :limit]))))
  (testing "We can replace with and from non-reserved keywords"
    (is (= "SELECT y FROM final"
           (m/replace-names "SELECT final FROM x"
                            {:tables  {{:table "x"} "final"}
                             :columns {{:table "x" :column "final"} "y"}}
                            {:non-reserved-words [:final]})))))
(deftest square-bracket-test
  (testing "We can opt into allowing square brackets to quote things"
    (is (=? {:tables  #{{:schema "s" :table "t"}}
             :columns #{{:schema "s" :table "t" :column "f"}}}
            (update-vals
             (components "SELECT [f] FROM [s].[t]"
                         {:features              {:square-bracket-quotes true}
                          :preserve-identifiers? false})
             raw-components)))))
(comment
  (require 'user) ;; kondo, really
  (require '[clj-async-profiler.core :as prof])
  (prof/serve-ui 8080)
  (defn- simple-benchmark []
    (source-columns "SELECT x FROM t"))
  (defn- complex-benchmark []
    (count
     (source-columns
      (query-fixture :snowflake))))
  (user/time+ (simple-benchmark))
  (prof/profile {:event :alloc}
                (dotimes [_ 1000] (simple-benchmark)))
  (user/time+ (complex-benchmark))
  (prof/profile {:event :alloc}
                (dotimes [_ 100] (complex-benchmark)))
  (anonymize-query "SELECT x FROM a")
  (anonymize-fixture :snowflakelet)
  (require 'virgil)
  (require 'clojure.tools.namespace.repl)
  (virgil/watch-and-recompile ["java"] :post-hook clojure.tools.namespace.repl/refresh-all))
 
(ns macaw.test.util
  (:require
   [clojure.string :as str]
   [clojure.test :refer :all]
   [clojure.walk :as walk]))
(defn- indentation [s]
  (count (re-find #"^\s*" s)))
(defn- trim-indent* [margin s]
  (if (< (count s) margin)
    (subs s margin)))

Given a multi-line string, remove the common margin from the remaining lines. Used so that strings with significant whitespace may be visually aligned.

(defn trim-indent
  [s]
  (let [lines  (str/split-lines s)
        margin (->> (rest lines)
                    (remove str/blank?)
                    (transduce (map indentation) min Integer/MAX_VALUE))]
    (str/join "\n" (cons (first lines) (map (partial trim-indent* margin) (rest lines))))))

Trim the extra indentation from all string literals before evaluation a given equality form.

(defmacro ws=
  [& xs]
  `(= ~@(walk/postwalk #(cond-> % (string? %) trim-indent) xs)))
 
(ns macaw.test.util-test
  (:require
   [clojure.string :as str]
   [clojure.test :refer :all]
   [macaw.test.util :refer [ws=]]))
(deftest ^:parallel ws=-test
  (testing "Code indentation is ignored"
    (is (ws= "ABC
                DEF
              XXX"
             (str/replace "ABC
                             DEF
                           GHI"
                          "GHI" "XXX"))))
  (testing "Comparison is still whitespace sensitive"
    (is (not (ws= "A    B" "A B")))
    (is (not (ws= "A
                   B
                   C"
                  "A
                    B
                   C")))))
 
(ns macaw.util-test
  (:require
   [clojure.test :refer :all]
   [macaw.util :as u]))
(def ^:private haystack
  {{:a 3 :b 2 :c 1}   2
   {:a 3 :b nil :c 1} 3
   {:a 1 :b 2 :c 3}   1})
(deftest ^:parallel relevant-find-test
  (testing "We ignore any suffix of degenerate keys"
    (doseq [x [{:a 1}
               {:a 1 :b nil}
               {:a 1 :b 2}
               {:a 1 :b 2 :c nil}
               {:a 1 :b 2 :c 3}
               {:a 1 :b 2 :c 3 :d 4}]]
      (is (= [{:a 1 :b 2 :c 3} 1]
             (u/find-relevant haystack x [:a :b :c])))))
  (testing "We need at least one non-degenerate key"
    (is (nil? (u/find-relevant haystack {} [:a :b :c]))))
  (testing "We don't ignore non-suffix degenerate keys"
    (doseq [x [{:a nil :b 2}
               {:a 1 :b nil :c 3}
               {:a nil :b 2 :c 3}]]
      (is (nil?
           (u/find-relevant haystack x [:a :b :c]))))))
 
(ns macaw.util.malli-test
  (:require
   [clojure.test :refer :all]
   [macaw.util.malli :as mu]))
(mu/defn enhance :- :int
  [x :- :int, fail-deliberately? :- :boolean]
  (if fail-deliberately?
    :poppycock
    (int (+ x 43))))
(deftest ^:parallel defn-good-in-good-out-test
  (is (= 50 (enhance 7 false))))
(deftest ^:parallel defn-good-in-bad-out-test
  (is (thrown-with-msg? Exception
                        #"Invalid output.*"
                        (enhance 7 true))))
(deftest ^:parallel defn-bad-in-good-out-test
  (is (thrown-with-msg? Exception
                        #"Invalid input.*"
                        (enhance 7.3 false))))
(deftest ^:parallel defn-bad-in-bad-out-test
  (is (thrown-with-msg? Exception
                        #"Invalid input.*"
                        (enhance 7.3 true))))