Macaw0.0.1-SNAPSHOTA Clojure wrapper for JSqlParser 🦜 | (this space intentionally left almost blank) |
(ns macaw.collect (:require [clojure.string :as str] [macaw.util :as u] [macaw.walk :as mw]) (:import (com.metabase.macaw AstWalker$Scope) (java.util.regex Pattern) (net.sf.jsqlparser.expression Alias) (net.sf.jsqlparser.schema Column Table) (net.sf.jsqlparser.statement Statement) (net.sf.jsqlparser.statement.select AllTableColumns SelectItem))) | |
(set! *warn-on-reflection* true) | |
(defn- conj-to ([key-name] (conj-to key-name identity)) ([key-name xf] (fn item-conjer [results component context] (update results key-name conj {:component (xf component) :context context})))) | |
(defn- query->raw-components [^Statement parsed-ast] (mw/fold-query parsed-ast {:alias (conj-to :aliases (fn [^SelectItem item] {:alias (.getName (.getAlias item)) :expression (.getExpression item)})) :column (conj-to :columns) :column-qualifier (conj-to :qualifiers) :mutation (conj-to :mutation-commands) :pseudo-table (conj-to :pseudo-tables) :table (conj-to :tables) :table-wildcard (conj-to :table-wildcards) :wildcard (conj-to :has-wildcard? (constantly true))} {:aliases #{} :columns #{} :has-wildcard? #{} :mutation-commands #{} :pseudo-tables #{} :tables #{} :table-wildcards #{}})) | |
tables | |
(def ^:private quotes (map str "`\"[")) | |
(def ^:private closing {"[" "]"}) | |
(defn- quoted? [s] (some (fn [q] (and (str/starts-with? s q) (str/ends-with? s (closing q q)))) quotes)) | |
(defn- strip-quotes [s] (subs s 1 (dec (count s)))) | |
(defn- setting->relax-case [{:keys [case-insensitive]}] (when case-insensitive (case case-insensitive :lower str/lower-case :upper str/upper-case ;; This will work for replace, but not for analyzing where we need a literal to accumulate :agnostic (fn [s] (re-pattern (str "(?i)" (Pattern/quote s))))))) | |
Normalize a schema, table, column, etc. references so that we can match them regardless of syntactic differences. | (defn normalize-reference [s {:keys [preserve-identifiers? quotes-preserve-case?] :as opts}] (if preserve-identifiers? s (when s (let [quoted (quoted? s) relax-case (when-not (and quotes-preserve-case? quoted) (setting->relax-case opts))] (cond-> s quoted strip-quotes relax-case relax-case))))) |
(defn- find-table [{:keys [alias->table name->table keep-internal-tables?] :as opts} ^Table t] (let [n (normalize-reference (.getName t) opts) schema (normalize-reference (.getSchemaName t) opts)] (or (get alias->table n) (:component (last (u/find-relevant name->table {:table n :schema schema} [:table :schema]))) (when keep-internal-tables? {:table n, :schema schema, :internal? true})))) | |
(defn- find-qualifier-table [opts ^Table q _ctx] (when-let [table (find-table opts q)] (cond-> table (:with-instance opts) (assoc :instances [q])))) | |
(defn- make-table [{:keys [with-instance qualifier? alias->table] :as opts} ^Table t _ctx] (if (and qualifier? (get alias->table (.getName t))) (get alias->table (.getName t)) (u/strip-nils {:table (normalize-reference (.getName t) opts) :schema (normalize-reference (.getSchemaName t) opts) :instances (when with-instance [t])}))) | |
(defn- alias-mapping [opts ^Table table ctx] (when-let [^Alias table-alias (.getAlias table)] [(.getName table-alias) (make-table opts table ctx)])) | |
JSQLParser can't tell whether the | (defn- resolve-table-name [opts ^AllTableColumns atc _ctx] (find-table opts (.getTable atc))) |
columns | |
(defn- maybe-column-table [alias? {:keys [name->table] :as opts} ^Column c] (if-let [t (.getTable c)] (find-table opts t) ;; if we see only a single table, we can safely say it's the table of that column (when (and (= (count name->table) 1) (not (alias? (.getColumnName c)))) (:component (val (first name->table)))))) | |
(defn- scope-type [^AstWalker$Scope s] (keyword (.getType s))) | |
(defn- make-column [aliases opts ^Column c ctx] (let [{:keys [schema table]} (maybe-column-table aliases opts c)] (u/strip-nils {:schema schema :table table :column (normalize-reference (.getColumnName c) opts) :alias (when-let [s (first ctx)] (when (= :alias (scope-type s)) (.getLabel ^AstWalker$Scope s))) :instances (when (:with-instance opts) [c])}))) | |
get them together | |
(defn- only-query-context [ctx] (filter #(= (scope-type %) :query) ctx)) | |
(def ^:private strip-non-query-contexts (map #(update % :context only-query-context))) | |
(defn- update-components ([f] (map #(update % :component f (:context %)))) ([f components] (eduction (update-components f) components))) | |
Merge two nodes, keeping the union of their instances. | (defn- merge-with-instances [a b] (let [cs-a (-> a :component :instances)] (cond-> (merge a b) cs-a (update-in [:component :instances] into cs-a)))) |
(defn- literal? [{:keys [column]}] ;; numbers and strings are already handled by JSQLParser (#{"true" "false"} (str/lower-case column))) | |
Remove any unqualified references that would resolve to an alias or qualified reference | (defn- remove-redundant-columns [alias? column-set] (let [{qualified true, unqualified false} (group-by (comp boolean :table) column-set) ;; Get all the bindings introduced by qualified columns has-qualification? (into #{} (mapcat #(keep % qualified)) [:column :alias])] (into qualified (remove (comp (some-fn alias? has-qualification?) :column)) unqualified))) |
If a column reference is qualified by a table that we don't know about, its probably
not a real field.
See the | (defn- remove-phantom-table-columns [table-map columns] (let [known-table-name? (into #{} (map (comp :table :component val)) table-map)] (filter (fn [{s :schema, t :table}] (or (nil? t) (some? s) (known-table-name? t))) columns))) |
(defn- infer-table-schema [columns node] (update node :component #(let [{:keys [schema table] :as element} %] (if schema element (if-let [schema' (->> columns (filter (comp #{table} :table)) (some :schema))] (assoc element :schema schema') element))))) | |
See macaw.core/query->components doc. | (defn query->components [^Statement parsed-ast & {:as opts}] (let [{:keys [aliases columns qualifiers has-wildcard? mutation-commands pseudo-tables tables table-wildcards]} (query->raw-components parsed-ast) alias-map (into {} (map #(-> % :component ((partial alias-mapping opts) (:context %))) tables)) ;; we're parsing qualifiers here for a single purpose - rewrite uses instances to find tables for renaming table-map (->> (update-components (partial make-table opts) tables) (u/group-with #(select-keys (:component %) [:schema :table]) merge-with-instances)) pseudo-table-names (into #{} (comp (map :component) (map (fn [^Alias a] (.getName a)))) pseudo-tables) table-map (into (empty table-map) (remove (comp pseudo-table-names :table :component val)) table-map) ;; we need both aliases and tables for columns opts (assoc opts :alias->table alias-map :name->table table-map) qualifier-map (->> (update-components (partial find-qualifier-table opts) qualifiers) (u/group-with #(select-keys (:component %) [:schema :table]) merge-with-instances)) alias? (into #{} (keep (comp :alias :component)) aliases) all-columns (into #{} (comp (update-components (partial make-column alias? (assoc opts :keep-internal-tables? true))) strip-non-query-contexts) columns) strip-alias (fn [c] (dissoc c :alias)) source-columns (->> (map :component all-columns) (remove-redundant-columns alias?) (remove literal?) (remove-phantom-table-columns table-map) (into #{} (comp (remove (comp pseudo-table-names :table)) (remove :internal?) (map strip-alias)))) table-map (update-vals table-map (partial infer-table-schema source-columns))] {:columns all-columns :source-columns source-columns ;; result-columns ... filter out the elements (and wildcards) in the top level scope only. :has-wildcard? (into #{} strip-non-query-contexts has-wildcard?) :mutation-commands (into #{} mutation-commands) :tables (into #{} (comp (map val) (remove (comp :internal? :component)) strip-non-query-contexts) table-map) :tables-superset (into #{} (comp (map val) strip-non-query-contexts) (merge-with merge-with-instances qualifier-map table-map)) :table-wildcards (into #{} (comp strip-non-query-contexts (update-components (partial resolve-table-name opts))) table-wildcards)})) |
(ns macaw.config (:require [clojure.string :as str] [environ.core :as env])) | |
Default configuration; could be overridden by Java properties, environment variables, etc. | (def ^:private app-defaults {:macaw-run-mode "prod"}) |
(defn- config-value [k] (let [env-val (get env/env k)] (or (when-not (str/blank? env-val) env-val) (get app-defaults k)))) | |
The mode (dev/test/prod) in which Macaw is being run. | (def run-mode (some-> :macaw-run-mode config-value keyword)) |
Is Macaw running in | (defn is-dev? [] (= run-mode :dev)) |
Is Macaw running in | (defn is-test? [] (= run-mode :test)) |
Is Macaw running in | (defn is-prod? [] (= run-mode :prod)) |
(ns macaw.core (:require [clojure.string :as str] [clojure.walk :as walk] [macaw.collect :as collect] [macaw.rewrite :as rewrite] [macaw.types :as m.types] [macaw.util.malli :as mu]) (:import (com.metabase.macaw AnalysisError AstWalker$Scope BasicTableExtractor CompoundTableExtractor) (java.util.function Consumer) (net.sf.jsqlparser JSQLParserException) (net.sf.jsqlparser.parser CCJSqlParser CCJSqlParserUtil) (net.sf.jsqlparser.parser.feature Feature) (net.sf.jsqlparser.schema Table) (net.sf.jsqlparser.statement Statement))) | |
(set! *warn-on-reflection* true) | |
(defn- escape-keywords ^String [sql keywords] (reduce (fn [sql k] (str/replace sql (re-pattern (str "(?i)\\b(" (name k) ")\\b")) "$1____escaped____")) sql keywords)) | |
(defn- unescape-keywords [sql _keywords] (str/replace sql "____escaped____" )) | |
(def ^:private features {:backslash-escape-char Feature/allowBackslashEscapeCharacter :complex-parsing Feature/allowComplexParsing :postgres-syntax Feature/allowPostgresSpecificSyntax :square-bracket-quotes Feature/allowSquareBracketQuotation :unsupported-statements Feature/allowUnsupportedStatements}) | |
(defn- ->Feature ^Feature [k] (get features k)) | |
(def ^:private default-timeout-seconds 5) | |
(defn- ->parser-fn ^Consumer [opts] (reify Consumer (accept [_this parser] (let [^long timeout-ms (:timeout opts (* default-timeout-seconds 1000))] (.withFeature ^CCJSqlParser parser Feature/timeOut timeout-ms)) (doseq [[f ^boolean v] (:features opts)] (.withFeature ^CCJSqlParser parser (->Feature f) v))))) | |
Main entry point: takes a string query and returns a | (defn parsed-query [^String query & {:as opts}] (try (-> query ;; Dialects like SQLite and Databricks treat consecutive blank lines as implicit semicolons. ;; JSQLParser, as a polyglot parser, always has this behavior, and there is no option to disable it. ;; This utility pre-processed the query to remove any such blank lines. (CCJSqlParserUtil/sanitizeSingleSql) (escape-keywords (:non-reserved-words opts)) (CCJSqlParserUtil/parse (->parser-fn opts))) (catch JSQLParserException e {:error :macaw.error/unable-to-parse :context {:cause e}}))) |
A unique identifier for the given scope. | (defn scope-id [^AstWalker$Scope s] (.getId s)) |
The type of scope we're talking about e.g., a top-level SELECT. | (defn scope-label [^AstWalker$Scope s] (.getLabel s)) |
(defn- ->macaw-error [^AnalysisError analysis-error] {:error (keyword "macaw.error" (-> (.-errorType analysis-error) str/lower-case (str/replace #"_" "-")))}) | |
(mu/defn query->components :- [:or m.types/error-result m.types/components-result] "Given a parsed query (i.e., a [subclass of] `Statement`) return a map with the elements found within it. (Specifically, it returns their fully-qualified names as strings, where 'fully-qualified' means 'as referred to in the query'; this function doesn't do additional inference work to find out a table's schema.)" [parsed :- [:or m.types/error-result [:fn #(instance? Statement %)]] & {:as opts} :- [:maybe m.types/options-map]] ;; By default, we will preserve identifiers verbatim, to be agnostic of casing and quoting. ;; This may result in duplicate components, which are left to the caller to deduplicate. ;; In Metabase's case, this is done during the stage where the database metadata is queried. (try (if (map? parsed) parsed (->> (collect/query->components parsed (merge {:preserve-identifiers? true} opts)) (walk/postwalk (fn [x] (if (string? x) (unescape-keywords x (:non-reserved-words opts)) x))))) (catch AnalysisError e (->macaw-error e)))) | |
(defn- raw-components [xs] (into (empty xs) (keep :component) xs)) | |
Given a table object, return a map with the schema and table names. | (defn- table->identifier [^Table t] (if (.getSchemaName t) {:schema (.getSchemaName t) :table (.getName t)} {:table (.getName t)})) |
(defn- tables->identifiers [expr] {:tables (set (map table->identifier expr))}) | |
(mu/defn query->tables :- [:or m.types/error-result m.types/tables-result] "Given a parsed query (i.e., a [subclass of] `Statement`) return a set of all the table identifiers found within it." [sql :- :string & {:keys [mode] :as opts} :- [:maybe m.types/options-map]] (try (let [parsed (parsed-query sql opts)] (if (map? parsed) parsed (case mode :ast-walker-1 (-> (query->components parsed opts) :tables raw-components (->> (hash-map :tables))) :basic-select (-> (BasicTableExtractor/getTables parsed) tables->identifiers) :compound-select (-> (CompoundTableExtractor/getTables parsed) tables->identifiers)))) (catch AnalysisError e (->macaw-error e)))) | |
(mu/defn replace-names :- :string "Given an SQL query, apply the given table, column, and schema renames. Supported options: - case-insensitive: whether to relax the comparison - :upper - identifiers are implicitly case to uppercase, as per the SQL-92 standard. - :lower - identifiers are implicitly cast to lowercase, as per Postgres et al. - :agnostic - case is ignored when comparing identifiers in code to replacement \"from\" strings. - quotes-preserve-case: whether quoted identifiers should override the previous option." [sql :- :string renames :- :map & {:as opts} :- [:maybe m.types/options-map]] ;; We need to pre-sanitize the SQL before its analyzed so that the AST token positions match up correctly. ;; Currently, we use a more complex and expensive sanitization method, so that it's reversible. ;; If we decide that it's OK to normalize whitespace etc. during replacement, then we can use the same helper. (let [sql' (-> (str/replace sql #"(?m)^\n" " \n") (escape-keywords (:non-reserved-words opts))) opts' (select-keys opts [:case-insensitive :quotes-preserve-case? :allow-unused?]) renames' (walk/postwalk (fn [x] (if (string? x) (escape-keywords x (:non-reserved-words opts)) x)) renames) parsed (parsed-query sql' opts)] (-> (rewrite/replace-names sql' parsed renames' opts') (str/replace #"(?m)^ \n" "\n") (unescape-keywords (:non-reserved-words opts))))) | |
(ns macaw.rewrite (:require [macaw.collect :as collect] [macaw.util :as u] [macaw.walk :as mw]) (:import (net.sf.jsqlparser.parser ASTNodeAccess SimpleNode) (net.sf.jsqlparser.schema Column Table))) | |
(set! *warn-on-reflection* true) | |
(defn- index-of-nth [^String haystack ^String needle n] (assert (not (neg? n))) (if (zero? n) -1 (loop [n n idx 0] (let [next-id (.indexOf haystack needle idx)] (cond (= 1 n) next-id (neg? next-id) next-id :else (recur (dec n) (inc next-id))))))) | |
(defn- ->idx [^String sql line col] ;; The second `dec` on `line` is a workaround for what appears to be an off-by-one error in this JSQLParser version. (+ col (index-of-nth sql "\n" (dec (dec line))))) | |
Find the start and end index of the underlying tokens for a given AST node from a given SQL string. | (defn- node->idx-range [^SimpleNode node sql] (let [first-token (.jjtGetFirstToken node) last-token (.jjtGetLastToken node) first-idx (->idx sql (.-beginLine first-token) (.-beginColumn first-token)) last-idx (->idx sql (.-endLine last-token) (.-endColumn last-token))] [first-idx last-idx])) |
(defn- splice-replacements [^String sql replacements] (let [sb (StringBuilder.) append #(.append sb %)] (loop [start 0 [[[first-idx last-idx] value] & rst] replacements] (if (nil? last-idx) (when (< start (count sql)) (append (.substring sql start))) (do (append (.substring sql start first-idx)) (append value) (recur (inc ^long last-idx) rst)))) (str sb))) | |
Emit a SQL string for an updated AST, preserving the comments and whitespace from the original SQL. | (defn- update-query [updated-ast updated-nodes sql & {:as _opts}] (let [updated-node? (set (map first updated-nodes)) replacement (fn [->text visitable] (let [ast-node (.getASTNode ^ASTNodeAccess visitable) idx-range (node->idx-range ast-node sql) node-text (->text visitable)] [idx-range node-text])) replace-name (fn [->text] (fn [acc visitable _ctx] (cond-> acc (updated-node? visitable) (conj (replacement ->text visitable)))))] (splice-replacements sql (mw/fold-query updated-ast {:table (replace-name #(let [fqn (.getFullyQualifiedName ^Table %) t-alias (.getAlias ^Table %)] (if t-alias (str fqn " " (.getName t-alias)) fqn))) :column (replace-name #(.getFullyQualifiedName ^Column %))} [])))) |
(defn- rename-table [updated-nodes table-renames schema-renames known-tables opts ^Table t _ctx] (when-let [rename (u/find-relevant table-renames (get known-tables t) [:table :schema])] ;; Handle both raw string renames, as well as more precise element based ones. (vswap! updated-nodes conj [t rename]) (let [identifier (as-> (val rename) % (:table % %))] (.setName t identifier))) (let [raw-schema-name (.getSchemaName t) schema-name (collect/normalize-reference raw-schema-name opts)] (when-let [schema-rename (u/seek (comp (partial u/match-component schema-name) key) schema-renames)] (vswap! updated-nodes conj [raw-schema-name schema-rename]) (let [identifier (as-> (val schema-rename) % (:table % %))] (.setSchemaName t identifier))))) | |
(defn- rename-column [updated-nodes column-renames known-columns ^Column c _ctx] (when-let [rename (u/find-relevant column-renames (get known-columns c) [:column :table :schema])] ;; Handle both raw string renames, as well as more precise element based ones. (vswap! updated-nodes conj [c rename]) (let [identifier (as-> (val rename) % (:column % %))] (.setColumnName c identifier)))) | |
(defn- alert-unused! [updated-nodes renames] (let [known-rename? (set (map second updated-nodes))] (doseq [[k items] renames] (when-let [unknown (first (remove known-rename? items))] (throw (ex-info (str "Unknown rename: " unknown) {:type k :rename unknown})))))) | |
(defn- index-by-instances [xs] (into {} (for [x xs :let [c (:component x)] i (:instances c)] [i c]))) | |
Given a SQL query and its corresponding (untransformed) AST, apply the given table and column renames. | (defn replace-names [sql parsed-ast renames & {:as opts}] (let [{schema-renames :schemas table-renames :tables column-renames :columns} renames comps (collect/query->components parsed-ast (assoc opts :with-instance true)) columns (index-by-instances (:columns comps)) tables (index-by-instances (:tables-superset comps)) ;; execute rename updated-nodes (volatile! []) rename-table* (partial rename-table updated-nodes table-renames schema-renames tables opts) rename-column* (partial rename-column updated-nodes column-renames columns) res (-> parsed-ast (mw/walk-query {:table rename-table* :column-qualifier rename-table* :column rename-column*}) (update-query @updated-nodes sql opts))] (when-not (:allow-unused? opts) (alert-unused! @updated-nodes renames)) res)) |
(ns macaw.types) | |
The different analyzer strategies that Macaw supports. | (def modes [:ast-walker-1 :basic-select :compound-select]) |
The shape of the options accepted by our API | (def options-map [:map [:mode {:optional true} (into [:enum] modes)] [:non-reserved-words {:optional true} [:seqable :keyword]] [:allow-unused? {:optional true} :boolean] [:case-insensitive {:optional true} [:enum :upper :lower :agnostic]] [:quotes-preserve-case? {:optional true} :boolean]]) |
The different types of errors that Macaw can return. | (def error-types [:macaw.error/analysis-error :macaw.error/illegal-expression :macaw.error/invalid-query :macaw.error/unable-to-parse :macaw.error/unsupported-expression]) |
A map indicating that we were not able to parse the query. | (def error-result [:map [:error (into [:enum] error-types)]]) |
(def ^:private table-ident [:map [:schema {:optional true} :string] [:table :string]]) | |
(def ^:private column-ident [:map [:schema {:optional true} :string] [:table {:optional true} :string] [:column :string]]) | |
(defn- with-context [t] [:map [:component t] [:context :any]]) | |
A map holding all the components that we were able to parse from a query | (def components-result [:map {:closed true} [:tables [:set (with-context table-ident)]] [:columns [:set (with-context column-ident)]] [:source-columns [:set column-ident]] ;; TODO Unclear why we would want to wrap any of these. [:table-wildcards [:set (with-context table-ident)]] ;; This :maybe would be a problem, if anything actually used this value. [:tables-superset [:set (with-context [:maybe table-ident])]] ;; Unclear why we need a collection here [:has-wildcard? [:set (with-context :boolean)]] [:mutation-commands [:set (with-context :string)]]]) |
A map holding the tables that we were able to parse from a query | (def tables-result [:map [:tables [:set table-ident]]]) |
(ns macaw.util (:require [clojure.string :as str]) (:import (java.util.regex Pattern))) | |
Generalized https://ask.clojure.org/index.php/12319/can-group-by-be-generalized | (defn group-with [kf rf coll] (persistent! (reduce (fn [ret x] (let [k (kf x)] (assoc! ret k (rf (get ret k) x)))) (transient {}) coll))) |
Like (first (filter ... )), but doesn't realize chunks of the sequence. Returns the first item in | (defn seek [pred coll] (reduce (fn [acc x] (if (pred x) (reduced x) acc)) nil coll)) |
A hack around the fact that we don't (yet) track what columns are exposed by given sentinels. | (defn non-sentinel [s] (when s (nil? (str/index-of s "_sentinel_")))) |
Check whether the given literal matches the expected literal or pattern. | (defn match-component [expected actual] (when expected (if (instance? Pattern expected) (boolean (re-find expected actual)) (= expected actual)))) |
(defn- match-prefix [element ks-prefix] (let [expected (map element ks-prefix)] (fn [entry] (every? true? (map match-component expected (map (key entry) ks-prefix)))))) | |
Search the given map for the entry corresponding to [[element]], considering only the relevant keys. The relevant keys are obtained by ignoring any suffix of [[ks]] for which [[element]] has nil or missing values. We require that there is at least one relevant key to find a match. | (defn find-relevant [m element ks] (when element ;; Strip off keys from right-to-left where they are nil, and relax search to only consider these keys. ;; We need at least one non-generate key to remain for the search. ;; NOTE: we could optimize away calling `non-sentinel` twice in this function, but for now just keeping it simple. (when-let [ks-prefix (->> ks reverse (drop-while (comp not non-sentinel element)) reverse seq)] (seek (match-prefix element ks-prefix) m)))) |
(def ^:private nil-val? (comp nil? val)) | |
Remove any keys corresponding to nil values from the given map. | (defn strip-nils [m] (if (some nil-val? m) (with-meta (into {} (remove nil-val?) m) (meta m)) m)) |
(ns macaw.util.malli (:refer-clojure :exclude [defn defn-]) (:require [macaw.util.malli.defn :as mu.defn] [potemkin :as p])) | |
(p/import-vars [mu.defn defn defn-]) | |
(ns macaw.util.malli.defn (:refer-clojure :exclude [defn defn-]) (:require [clojure.core :as core] [clojure.string :as str] [macaw.util.malli.fn :as mu.fn] [malli.destructure])) | |
(set! *warn-on-reflection* true) | |
TODO -- this should generate type hints from the schemas and from the return type as well. | (core/defn- deparameterized-arglist [{:keys [args]}] (-> (malli.destructure/parse args) :arglist (with-meta (let [args-meta (meta args) tag (:tag args-meta) resolved-tag (when (symbol? tag) (let [resolved (ns-resolve *ns* tag)] (when (class? resolved) (symbol (.getName ^Class resolved)))))] (cond-> args-meta resolved-tag (assoc :tag resolved-tag)))))) |
(core/defn- deparameterized-arglists [{:keys [arities], :as _parsed}] (let [[arities-type arities-value] arities] (case arities-type :single (list (deparameterized-arglist arities-value)) :multiple (map deparameterized-arglist (:arities arities-value))))) | |
Generate a docstring with additional information about inputs and return type using a parsed fn tail (as parsed by [[mx/SchematizedParams]]). | (core/defn- annotated-docstring [{original-docstring :doc [arities-type arities-value] :arities :keys [return] :as _parsed}] (str/trim (str "Inputs: " (case arities-type :single (pr-str (:args arities-value)) :multiple (str "(" (str/join "\n " (map (comp pr-str :args) (:arities arities-value))) ")")) "\n Return: " (str/replace (:schema return :any) ; used to be a pprint "\n" (str "\n ")) (when (not-empty original-docstring) (str "\n\n " original-docstring))))) |
Implementation of [[metabase.util.malli/defn]] taken from Metabase. Like [[schema.core/defn]], but for Malli. See notes/justification in the main Metabase repo. | (defmacro defn [& [fn-name :as fn-tail]] (let [parsed (mu.fn/parse-fn-tail fn-tail) cosmetic-name (gensym (munge (str fn-name))) {attr-map :meta} parsed attr-map (merge {:arglists (list 'quote (deparameterized-arglists parsed)) :schema (mu.fn/fn-schema parsed {:target :target/metadata})} attr-map) docstring (annotated-docstring parsed) instrument? (mu.fn/instrument-ns? *ns*)] (if-not instrument? `(def ~(vary-meta fn-name merge attr-map) ~docstring ~(mu.fn/deparameterized-fn-form parsed cosmetic-name)) `(def ~(vary-meta fn-name merge attr-map) ~docstring ~(let [error-context {:fn-name (list 'quote fn-name)}] (mu.fn/instrumented-fn-form error-context parsed cosmetic-name)))))) |
Same as defn, but creates a private def. | (defmacro defn- [fn-name & fn-tail] `(defn ~(with-meta fn-name (assoc (meta fn-name) :private true)) ~@fn-tail)) |
(ns macaw.util.malli.fn (:refer-clojure :exclude [fn namespace]) (:require [clojure.core :as core] [macaw.config :as config] [macaw.util.malli.registry :as mr] [malli.core :as mc] [malli.destructure :as md] [malli.error :as me])) | |
(set! *warn-on-reflection* true) | |
Malli normally generates wacky default schemas when you use destructuring in an argslist; this never seems to work correctly, so just add default schemas manually to circumvent Malli's weird behavior. (add-default-schemas '[x {:keys [y]}]) ;; => [x {:keys [y]} :- [:maybe :map]] | (defn- add-default-schemas [args] (if (empty? args) args (loop [acc [], [x & [y z :as more]] args] (let [schema (when (= y :-) z) more (if schema (drop 2 more) more) schema (cond schema schema (and (or (map? x) (sequential? x)) (= (last acc) '&)) [:* :any] (map? x) [:maybe :map] (sequential? x) [:maybe [:sequential :any]]) acc (concat acc (if schema [x :- schema] [x]))] (if (seq more) (recur acc more) acc))))) |
Given a | (defn- arity-schema [{:keys [args], :as _arity} return-schema {:keys [target], :as _options}] (let [parsed (md/parse (add-default-schemas args)) varargs-info (get-in parsed [:parsed :rest :arg :arg]) varargs-type (cond (= (first varargs-info) :map) :varargs/map (seq varargs-info) :varargs/sequential) schema (case target :target/metadata (if (= varargs-type :varargs/map) (vec (concat (butlast (:schema parsed)) [[:* :any]])) (:schema parsed)) :target/instrumentation (:schema parsed))] [:=> (cond-> schema varargs-type (vary-meta assoc :varargs/type varargs-type)) return-schema])) |
This is exactly the same as [[malli.experimental/SchematizedParams]], but it preserves metadata from the arglists. | (def ^:private SchematizedParams (mc/schema [:schema {:registry {"Schema" any? "Separator" [:= :-] "Args" vector? ; [:vector :any] loses metadata, but vector? keeps it :shrug: "PrePost" [:map [:pre {:optional true} [:sequential any?]] [:post {:optional true} [:sequential any?]]] "Arity" [:catn [:args "Args"] [:prepost [:? "PrePost"]] [:body [:* :any]]] "Params" [:catn [:name symbol?] [:return [:? [:catn [:- "Separator"] [:schema "Schema"]]]] [:doc [:? string?]] [:meta [:? :map]] [:arities [:altn [:single "Arity"] [:multiple [:catn [:arities [:+ [:schema "Arity"]]] [:meta [:? :map]]]]]]]}} "Params"])) |
(def ^:private ^{:arglists '([fn-tail])} parse-SchematizedParams (mc/parser SchematizedParams)) | |
Parse a parameterized | (defn parse-fn-tail [fn-tail] (let [parsed (parse-SchematizedParams (if (symbol? (first fn-tail)) fn-tail (cons '&f fn-tail)))] (when (= parsed ::mc/invalid) (let [error (mc/explain SchematizedParams fn-tail) humanized (me/humanize error)] (throw (ex-info (format "Invalid function tail: %s" humanized) {:fn-tail fn-tail :error error :humanized humanized})))) parsed)) |
Implementation for [[fn]] and [[macaw.util.malli.defn/defn]]. Given an unparsed parametered fn tail, extract the
annotations and return a
| (defn fn-schema ([parsed] (fn-schema parsed {:target :target/instrumentation})) ([parsed options] (let [{:keys [return arities]} parsed return-schema (:schema return :any) [arities-type arities-value] arities] (case arities-type :single (arity-schema arities-value return-schema options) :multiple (into [:function] (for [arity (:arities arities-value)] (arity-schema arity return-schema options))))))) |
(defn- deparameterized-arity [{:keys [body args prepost], :as _arity}] (concat [(:arglist (md/parse args))] (when prepost [prepost]) body)) | |
Generate a deparameterized | (defn deparameterized-fn-tail [{[arities-type arities-value] :arities, :as _parsed}] (let [body (case arities-type :single (deparameterized-arity arities-value) :multiple (for [arity (:arities arities-value)] (deparameterized-arity arity)))] body)) |
Impl for [[macaw.util.malli.fn/fn]] and [[macaw.util.malli.defn/defn]]. Given a parsed (deparameterized-fn-form (parse-fn-tail '[:- :int [x :- :int] (inc x)])) ;; => (fn [x] (inc x)) | (defn deparameterized-fn-form [parsed & [fn-name]] `(core/fn ~@(when fn-name [fn-name]) ~@(deparameterized-fn-tail parsed))) |
Whether [[validate-input]] and [[validate-output]] should validate things or not. | (def ^:dynamic *enforce* true) |
(defn- validate [error-context schema value error-type] (when *enforce* (when-let [error (mr/explain schema value)] (let [humanized (me/humanize error {:wrap (core/fn humanize-include-value [{:keys [value message]}] (str message ", got: " (pr-str value)))}) details (merge {:type error-type :error error :humanized humanized :schema schema :value value} error-context)] (when-not (config/is-prod?) (throw (ex-info (case error-type ::invalid-input (format "Invalid input: %s" (pr-str humanized)) ::invalid-output (format "Invalid output: %s" (pr-str humanized))) details))))))) | |
Impl for [[macaw.util.malli.fn/fn]]; validates an input argument with | (defn validate-input [error-context schema value] (validate error-context schema value ::invalid-input)) |
Impl for [[macaw.util.malli.fn/fn]]; validates function output | (defn validate-output [error-context schema value] (validate error-context schema value ::invalid-output) value) |
(defn- varargs-type [input-schema] (-> input-schema meta :varargs/type)) | |
(defn- input-schema-arg-names [[_cat & args :as input-schema]] (let [the-varargs-type (varargs-type input-schema) normal-args (if the-varargs-type (butlast args) args)] (concat (for [n (range (count normal-args))] (symbol (str (char (+ (int \a) n))))) (case the-varargs-type :varargs/sequential ['more] :varargs/map ['kvs] nil)))) | |
(defn- input-schema->arglist [input-schema] (let [arg-names (input-schema-arg-names input-schema)] (vec (if-let [the-varargs-type (varargs-type input-schema)] (concat (butlast arg-names) ['& (case the-varargs-type :varargs/sequential (last arg-names) :varargs/map {:as (last arg-names)})]) arg-names)))) | |
(defn- input-schema->validation-forms [error-context [_cat & schemas :as input-schema]] (let [arg-names (input-schema-arg-names input-schema) schemas (if (= (varargs-type input-schema) :varargs/sequential) (concat (butlast schemas) [[:maybe (last schemas)]]) schemas)] (->> (map (core/fn [arg-name schema] ;; 1. Skip checks against `:any` schema, there is no situation where it would fail. ;; ;; 2. Skip checks against the default varargs schemas, there is no situation where [:maybe [:* :any]] is ;; going to fail. (when-not (= schema (condp = arg-name 'more [:maybe [:* :any]] 'kvs [:* :any] :any)) `(validate-input ~error-context ~schema ~arg-name))) arg-names schemas) (filter some?)))) | |
(defn- input-schema->application-form [input-schema] (let [arg-names (input-schema-arg-names input-schema)] (if (= (varargs-type input-schema) :varargs/sequential) (list* `apply '&f arg-names) (list* '&f arg-names)))) | |
If exception is thrown from the [[validate]] machinery, remove those stack trace elements so the top of the stack is the calling function. | (defn fixup-stacktrace [^Exception e] (if (#{::invalid-input ::invalid-output} (-> e ex-data :type)) (let [trace (.getStackTrace e) cleaned (when trace (into-array StackTraceElement (drop-while (comp #{(.getName (class validate)) (.getName (class validate-input)) (.getName (class validate-output))} #(.getClassName ^StackTraceElement %)) trace)))] (doto e (.setStackTrace cleaned))) e)) |
(defn- instrumented-arity [error-context [_=> input-schema output-schema]] (let [input-schema (if (= input-schema :cat) [:cat] input-schema) arglist (input-schema->arglist input-schema) input-validation-forms (input-schema->validation-forms error-context input-schema) result-form (input-schema->application-form input-schema) result-form (if (and output-schema (not= output-schema :any)) `(->> ~result-form (validate-output ~error-context ~output-schema)) result-form)] `(~arglist (try ~@input-validation-forms ~result-form (catch Exception ~'error (throw (fixup-stacktrace ~'error))))))) | |
(defn- instrumented-fn-tail [error-context [schema-type :as schema]] (case schema-type :=> [(instrumented-arity error-context schema)] :function (let [[_function & schemas] schema] (for [schema schemas] (instrumented-arity error-context schema))))) | |
Given a ([x :- :int y] (+ 1 2)) and parsed by [[parsed-fn-tail]], return an unevaluated instrumented [[fn]] form like (mc/-instrument {:schema [:=> [:cat :int :any] :any]} (fn [x y] (+ 1 2))) | (defn instrumented-fn-form [error-context parsed & [fn-name]] `(let [~'&f ~(deparameterized-fn-form parsed fn-name)] (core/fn ~@(instrumented-fn-tail error-context (fn-schema parsed))))) |
------------------------------ Skipping Namespace Enforcement in prod ------------------------------ | |
Returns true if mu.fn/fn and mu/defn in a namespace should be instrumented with malli schema validation. | (defn instrument-ns? [namespace] (or (true? (:instrument/always (meta namespace))) (not (config/is-prod?)))) |
Malli version of [[schema.core/fn]]. Unless it's in a skipped namespace during prod, a form like: (fn :- :int [x :- :int] (inc x)) compiles to something like (let [&f (fn [x] (inc x))] (fn [a] (validate-input {} :int a) (validate-output {} :int (&f a)))) The map arg here is additional error context; for something like [[macaw.util.malli/defn]], it will be something like {:fn-name 'metabase.lib.field/resolve-field-id} If compiled in a namespace in [[namespaces-toskip]], during Known issue: this version of (mu/fn my-fn ([x] (my-fn x 1)) ([x y :- :int] (+ x y))) If we were to include (let [&f (fn my-fn ([x] (my-fn x 1)) ([x y] (+ x y)))] (fn ([a] (&f a)) ([a b] (validate-input {} :int b) (&f a b)))) ;; skips the Since this is a big gotcha, we are currently not including the optional function name | (defmacro fn [& fn-tail] (let [parsed (parse-fn-tail fn-tail) instrument? (instrument-ns? *ns*)] (if-not instrument? (deparameterized-fn-form parsed) (let [error-context (if (symbol? (first fn-tail)) ;; We want the quoted symbol of first fn-tail: {:fn-name (list 'quote (first fn-tail))} {})] (instrumented-fn-form error-context parsed))))) |
(ns macaw.util.malli.registry (:refer-clojure :exclude [declare def type]) (:require [malli.core :as mc] [malli.experimental.time :as malli.time] [malli.registry] [malli.util :as mut])) | |
(defonce ^:private cache (atom {})) | |
Get a cached value for You generally shouldn't use this outside of this namespace unless you have a really good reason to do so! Make sure you used namespaced keys if you are using it elsewhere. | (defn cached [k schema value-thunk] (or (get (get @cache k) schema) ; get-in is terribly inefficient (let [v (value-thunk)] (swap! cache assoc-in [k schema] v) v))) |
Fetch a cached [[mc/validator]] for | (defn validator [schema] (cached :validator schema #(mc/validator schema))) |
[[mc/validate]], but uses a cached validator from [[validator]]. | (defn validate [schema value] ((validator schema) value)) |
Fetch a cached [[mc/explainer]] for | (defn explainer [schema] (letfn [(make-explainer [] #_{:clj-kondo/ignore [:discouraged-var]} (let [validator* (mc/validator schema) explainer* (mc/explainer schema)] ;; for valid values, it's significantly faster to just call the validator. Let's optimize for the 99.9% ;; of calls whose values are valid. (fn schema-explainer [value] (when-not (validator* value) (explainer* value)))))] (cached :explainer schema make-explainer))) |
[[mc/explain]], but uses a cached explainer from [[explainer]]. | (defn explain [schema value] ((explainer schema) value)) |
(defonce ^:private registry* (atom (merge (mc/default-schemas) (mut/schemas) (malli.time/schemas)))) | |
(defonce ^:private registry (malli.registry/mutable-registry registry*)) | |
Register a spec with our Malli spec registry. | (defn register! [schema definition] (swap! registry* assoc schema definition) (reset! cache {}) nil) |
Get the Malli schema for | (defn schema [type] (malli.registry/schema registry type)) |
Add a | (defn -with-doc [the-schema docstring] (cond (and (vector? the-schema) (map? (second the-schema))) (let [[tag opts & args] the-schema] (into [tag (assoc opts :doc/message docstring)] args)) (vector? the-schema) (let [[tag & args] the-schema] (into [tag {:doc/message docstring}] args)) :else [:schema {:doc/message docstring} the-schema])) |
Like [[clojure.spec.alpha/def]]; add a Malli schema to our registry. | (defmacro def ([type the-schema] `(register! ~type ~the-schema)) ([type docstring the-schema] `(macaw.util.malli.registry/def ~type (-with-doc ~the-schema ~docstring)))) |
For REPL/test usage: get the definition of a registered schema from the registry. | (defn resolve-schema [the-schema] (mc/deref-all (mc/schema the-schema))) |
(ns macaw.walk (:import (com.metabase.macaw AstWalker AstWalker$CallbackKey))) | |
(set! *warn-on-reflection* true) | |
keyword->key map for the AST-folding callbacks. | (def ->callback-key ;; TODO: Move this to a Malli schema to simplify the indirection {:alias AstWalker$CallbackKey/ALIAS :column AstWalker$CallbackKey/COLUMN :column-qualifier AstWalker$CallbackKey/COLUMN_QUALIFIER :mutation AstWalker$CallbackKey/MUTATION_COMMAND :pseudo-table AstWalker$CallbackKey/PSEUDO_TABLES :table AstWalker$CallbackKey/TABLE :table-wildcard AstWalker$CallbackKey/ALL_TABLE_COLUMNS :wildcard AstWalker$CallbackKey/ALL_COLUMNS}) |
Lift a side effecting callback so that it preserves the accumulator. | (defn- preserve [f] (fn [acc & args] (apply f args) acc)) |
work around ast walker repeatedly visiting the same expressions (bug ?!) | (defn- deduplicate-visits [f] (let [seen (volatile! #{})] (fn [& [acc visitable & _ :as args]] (if (contains? @seen visitable) acc (do (vswap! seen conj visitable) (apply f args)))))) |
(defn- update-keys-vals [m key-f val-f] (let [ret (persistent! (reduce-kv (fn [acc k v] (assoc! acc (key-f k) (val-f v))) (transient {}) m))] (with-meta ret (meta m)))) | |
Walk over the query's AST, using the callbacks for their side effects, for example to mutate the AST itself. | (defn walk-query [parsed-query callbacks] (let [callbacks (update-keys-vals callbacks ->callback-key (comp deduplicate-visits preserve))] (.walk (AstWalker. callbacks ::ignored) parsed-query))) |
Fold over the query's AST, using the callbacks to update the accumulator. | (defn fold-query [parsed-query callbacks init-val] (let [callbacks (update-keys-vals callbacks ->callback-key deduplicate-visits)] (.fold (AstWalker. callbacks init-val) parsed-query))) |
(ns macaw.acceptance-test (:require [clojure.java.io :as io] [clojure.set :as set] [clojure.string :as str] [clojure.test :refer :all] [macaw.core :as m] [macaw.core-test :as ct] [macaw.types]) (:import (java.io File))) | |
(set! *warn-on-reflection* true) | |
(defn- fixture-analysis [fixture] (some-> fixture (ct/fixture->filename "acceptance" ".analysis.edn") io/resource slurp read-string)) | |
(defn- fixture-renames [fixture] (some-> fixture (ct/fixture->filename "acceptance" ".renames.edn") io/resource slurp read-string)) | |
(defn- fixture-rewritten [fixture] (some-> fixture (ct/fixture->filename "acceptance" ".rewritten.sql") io/resource slurp str/trim)) | |
(defn- get-component [cs k] (case k :source-columns (get cs k) :columns-with-scope (ct/contexts->scopes (get cs :columns)) (ct/raw-components (get cs k)))) | |
(def ^:private test-modes (set macaw.types/modes)) | |
(def override-hierarchy (-> (make-hierarchy) (derive :basic-select :select-only) (derive :compound-select :select-only))) | |
(defn- lineage [h k] (when k (assert (<= (count (parents h k)) 1) "Multiple inheritance not supported for override hierarchy.") (cons k (lineage h (first (parents h k)))))) | |
(def global-overrides {}) | |
(def ns-overrides {:select-only {"mutation" :macaw.error/invalid-query "dynamic" :macaw.error/invalid-query} :basic-select {"compound" :macaw.error/unsupported-expression}}) | |
(def ^:private merged-fixtures-file "test/resources/acceptance/queries.sql") | |
TODO generically detect queries. | (def ^:private dynamic-fixtures-file "test/resources/acceptance/queries.dynamic.sql") |
(defn- read-merged [file] (->> (str/split (slurp file) #"-- FIXTURE: ") (keep (fn [named-query] (when-not (str/blank? named-query) (let [[nm qry] (.split ^String named-query "\n" 2)] [(keyword nm) (str/trim qry)])))) (into {}))) | |
The fixtures in merged fixtures file, mapped by their identifiers. | (defn- merged-fixtures [] (merge (read-merged merged-fixtures-file) (update-keys (read-merged dynamic-fixtures-file) #(keyword "dynamic" (name %))))) |
(defn- validate-analysis [correct override actual] (let [expected (or override correct)] (when override (testing "Override is still needed" (if (and (vector? correct) (not (keyword actual))) (is (not= correct (ct/sorted actual))) (is (not= correct actual))))) (if (and (vector? expected) (not (keyword actual))) (is (= expected (ct/sorted actual))) (when (not= expected actual) (is (= expected actual)))))) | |
(defn- when-keyword [x] (when (keyword? x) x)) | |
(defn- get-override* [expected-cs mode fixture ck] (or (get-in expected-cs [:overrides mode :error]) (get-in expected-cs [:overrides mode ck]) (when-keyword (get-in expected-cs [:overrides mode])) (get-in ns-overrides [mode (namespace fixture)]) (get global-overrides mode))) | |
(defn- get-override [expected-cs mode fixture ck] (or (some #(get-override* expected-cs % fixture ck) (lineage override-hierarchy mode)) (get-in expected-cs [:overrides :error]) (get-in expected-cs [:overrides ck]) (when-keyword (get expected-cs :overrides)))) | |
Test that we can parse a given fixture, and compare against expected analysis and rewrites, where they are defined. | (defn- test-fixture [fixture] (let [prefix (str "(fixture: " (subs (str fixture) 1) ")") merged (merged-fixtures) sql (or (ct/query-fixture fixture) (get merged fixture)) expected-cs (fixture-analysis fixture) renames (fixture-renames fixture) expected-rw (fixture-rewritten fixture) base-opts {:non-reserved-words [:final], :allow-unused? true} opts-mode (fn [mode] (assoc base-opts :mode mode))] (assert sql "Fixture exists") (doseq [m test-modes :let [opts (opts-mode m)]] (if (= m :ast-walker-1) ;; Legacy testing path for `components`, which only supports the original walker, and throws exceptions. (let [cs (testing (str prefix " analysis does not throw") (is (ct/components sql opts)))] (doseq [[ck cv] (dissoc expected-cs :overrides :error :skip)] (testing (str prefix " analysis is correct: " (name ck)) (let [actual-cv (:error cs (get-component cs ck)) override (get-override expected-cs m fixture ck)] (validate-analysis cv override actual-cv))))) ;; Testing path for newer modes. (let [correct (:error expected-cs (:tables expected-cs)) override (cond (str/includes? sql "-- BROKEN") :macaw.error/unable-to-parse (str/includes? sql "-- UNSUPPORTED") :macaw.error/unsupported-expression :else (get-override expected-cs m fixture :tables)) ;; For now, we only support (and test) :tables tables (testing (str prefix " table analysis does not throw for mode " m) (is (ct/tables sql opts)))] (if (and (nil? correct) (nil? override)) (testing "Must define expected tables, or explicitly skip analysis" (is (:skip expected-cs))) (testing (str prefix " table analysis is correct for mode " m) (validate-analysis correct override tables)))))) (when renames (let [broken? (:broken? renames) rewritten (testing (str prefix " rewriting does not throw") (is (str/trim (m/replace-names sql (dissoc renames :broken?) base-opts))))] (when expected-rw (testing (str prefix " rewritten SQL is correct") (if broken? (is (not= expected-rw rewritten)) (is (= expected-rw rewritten))))))))) |
Find all the fixture symbols for stand-alone sql files within our test resources. | (defn isolated-fixtures [] (->> (io/resource "acceptance") io/file file-seq (keep #(when (.isFile ^File %) (let [n (.getName ^File %)] (when (.endsWith n ".sql") (str/replace n #"\.sql$" ""))))) (remove #(.contains ^String % ".")) (remove #{"queries"}) (map ct/stem->fixture) (sort-by str))) |
(defn- all-fixtures [] (let [isolated (isolated-fixtures) merged (keys (merged-fixtures))] (assert (empty? (set/intersection (set isolated) (set merged))) "No fixtures should be in both the isolated and merged files") (sort-by str (distinct (concat isolated merged))))) | |
Find all the fixture files and for each of them run all the tests we can construct from the related files. | (defmacro create-fixture-tests! [] (let [fixtures (all-fixtures)] (cons 'do (for [f fixtures :let [test-name (symbol (str/replace (ct/fixture->filename f "-test") #"(?<!_)_(?!_)" "-"))]] `(deftest ^:parallel ~test-name (test-fixture ~f)))))) |
(create-fixture-tests!) | |
(comment ;; Unload all the tests, useful for flushing stale fixture tests (doseq [[sym ns-var] (ns-interns *ns*)] (when (:test (meta ns-var)) (ns-unmap *ns* sym))) (merged-fixtures) ;; Append all the isolated fixtures to the merged file. ;; For now, we keep the stress-testing fixtures separate, because OH LAWDY they HUGE. (spit merged-fixtures-file (str/join "\n\n" (for [fixture (isolated-fixtures)] (str "-- FIXTURE: " (when-let [nms (namespace fixture)] (str nms "/")) (name fixture) "\n" (str/trim (ct/query-fixture fixture)))))) (deftest ^:parallel single-test (test-fixture :compound/nested-cte-sneaky)) (test-fixture :compound/cte) (test-fixture :compound/cte-nonambiguous) (test-fixture :literal/with-table) (test-fixture :literal/without-table) (test-fixture :interpolation/crosstab) (test-fixture :broken/filter-where)) | |
(ns ^:parallel macaw.core-test (:require [clojure.java.io :as io] [clojure.string :as str] [clojure.test :refer [deftest is testing]] [clojure.walk :as walk] [macaw.core :as m] [macaw.test.util :refer [ws=]] [macaw.walk :as mw] [mb.hawk.assert-exprs]) (:import (clojure.lang ExceptionInfo) (net.sf.jsqlparser.schema Table))) | |
(set! *warn-on-reflection* true) | |
(defn- non-empty-and-truthy [xs] (if (seq xs) (every? true? xs) false)) | |
(defn components [sql & {:as opts}] (m/query->components (m/parsed-query sql opts) opts)) | |
(defn tables [sql & {:as opts}] (let [opts (update opts :mode #(or % :ast-walker-1)) result (m/query->tables sql opts)] (or (:error result) (:tables result)))) | |
(def raw-components #(let [xs (empty %)] (into xs (keep :component) %))) (def columns (comp raw-components :columns components)) (def source-columns (comp :source-columns components)) (def has-wildcard? (comp non-empty-and-truthy raw-components :has-wildcard? components)) (def mutations (comp raw-components :mutation-commands components)) (def table-wcs (comp raw-components :table-wildcards components)) | |
Strip the scope id from the context stacks, to get deterministic values for testing. | (defn- strip-context-ids [m] (walk/prewalk (fn [x] (if (:context x) (update x :context (partial mapv m/scope-label)) x)) m)) |
(defn scope->vec [s] [(m/scope-label s) (m/scope-id s)]) | |
Replace full context stack with a reference to the local scope, only. | (defn contexts->scopes [m] (walk/prewalk (fn [x] (if-let [context (:context x)] (-> x (dissoc :context) (assoc :scope (scope->vec (first context)))) x)) m)) |
(defn column-qualifiers [query] (mw/fold-query (m/parsed-query query) {:column-qualifier (fn [acc tbl _ctx] (conj acc (.getName ^Table tbl)))} #{})) | |
See [[macaw.core/parsed-query]] and https://github.com/JSQLParser/JSqlParser/issues/1988 for more details. | (def ^:private implicit-semicolon "select id from foo") |
(defn- ->windows [sql] (str/replace sql "\n" "\r\n")) | |
(deftest three-or-more-line-breaks-test (doseq [f [identity ->windows] :let [query (f implicit-semicolon)]] (testing (if (= ->windows f) "windows" "unix") (is (= (-> query (str/replace "id" "pk") (str/replace "foo" "bar")) (m/replace-names query {:columns {{:table "foo" :column "id"} "pk"} :tables {{:table "foo"} "bar"}})))))) | |
(deftest query->tables-test (testing "Simple queries" (is (= #{{:table "core_user"}} (tables "SELECT * FROM core_user;"))) (is (= #{{:table "core_user"}} (tables "SELECT id, email FROM core_user;")))) (testing "With a schema (Postgres)" ;; TODO: only run this against supported DBs (is (= #{{:table "core_user" :schema "the_schema_name"}} (tables "SELECT * FROM the_schema_name.core_user;"))) (is (= #{{:table "orders" :schema "public"} {:table "orders" :schema "private"}} (tables "SELECT a.x FROM public.orders a, private.orders")))) (testing "Sub-selects" (is (= #{{:table "core_user"}} (tables "SELECT * FROM (SELECT DISTINCT email FROM core_user) q;"))))) | |
(deftest tables-with-complex-aliases-issue-14-test (testing "With an alias that is also a table name" (is (= #{{:table "user"} {:table "user2_final"}} (tables "SELECT legacy_user.id AS old_id, user.id AS new_id FROM user AS legacy_user OUTER JOIN user2_final AS user ON legacy_user.email = user2_final.email;"))))) | |
(deftest column-qualifier-test (testing "column-qualifiers works with tables and aliases" (is (= #{"user" "legacy_user"} (column-qualifiers "SELECT user.id AS user_id, legacy_user.id AS old_id FROM user OUTER JOIN user as legacy_user ON user.email = user.electronic_mail_address JOIN unrelated_table on foo = user.unrelated_id;"))))) | |
(deftest query->columns-test (testing "Simple queries" (is (= #{{:column "foo"} {:column "bar"} {:column "id" :table "quux"} {:column "quux_id" :table "baz"}} (columns "SELECT foo, bar FROM baz INNER JOIN quux ON quux.id = baz.quux_id")))) (testing "'group by' columns present" (is (= #{{:column "id" :table "orders"} {:column "user_id" :table "orders"}} (columns "SELECT id FROM orders GROUP BY user_id")))) (testing "table alias present" (is (= #{{:column "id" :table "orders" :schema "public"}} (columns "SELECT o.id FROM public.orders o")))) (testing "schema is determined correctly" (is (= #{{:column "x" :table "orders" :schema "public"}} (columns "SELECT public.orders.x FROM public.orders, private.orders")))) (testing "quotes are retained" (is (= #{{:column "`x`" :table "`orders`" :schema "`public`"}} (columns "SELECT `public`.`orders`.`x` FROM `public`.`orders`, `private`.`orders`")))) (testing "quotes and case are not interpreted" (is (= #{{:column "x" :table "ORDERS" :schema "`public`"} {:column "X" :table "ORDERS" :schema "`public`"} {:column "`x`" :table "ORDERS" :schema "`public`"} {:column "`X`" :table "ORDERS" :schema "`public`"}} (columns "SELECT x, X, `x`, `X` FROM `public`.ORDERS"))))) | |
(def ^:private heavily-quoted-query "SELECT raw, \"foo\", \"dong\".\"bar\", `ding`.`dong`.`fee` FROM `ding`.dong") | |
(def ^:private heavily-quoted-query-rewritten "SELECT flaw, glue, long.lark, king.long.flee FROM king.long") | |
(def ^:private heavily-quoted-query-rewrites {:schemas {"ding" "king"} :tables {{:schema "ding" :table "dong"} "long"} :columns {{:schema "ding" :table "dong" :column "raw"} "flaw" {:schema "ding" :table "dong" :column "fee"} "flee" {:schema "ding" :table "dong" :column "bar"} "lark" {:schema "ding" :table "dong" :column "foo"} "glue"}}) | |
(defn normalized-components [sql] (m/query->components (m/parsed-query sql) {:preserve-identifiers? false})) | |
(def normalized-columns (comp raw-components :columns normalized-components)) (def normalized-tables (comp raw-components :tables normalized-components)) | |
(deftest quotes-test (is (= #{{:column "raw", :table "dong", :schema "ding"} {:column "foo", :table "dong", :schema "ding"} {:column "bar", :table "dong", :schema "ding"} {:column "fee", :table "dong", :schema "ding"}} (normalized-columns heavily-quoted-query))) (is (= #{{:table "dong", :schema "ding"}} (normalized-tables heavily-quoted-query))) (is (= heavily-quoted-query-rewritten (m/replace-names heavily-quoted-query heavily-quoted-query-rewrites)))) | |
(deftest case-sensitive-test (is (= "SELECT X.Y, X.y, x.Z, x.z FROM X LEFT JOIN x ON X.Y=x.Z" (m/replace-names "SELECT a.b, a.B, A.b, A.B FROM a LEFT JOIN A ON a.b=A.b" {:tables {{:table "a"} "X" {:table "A"} "x"} :columns {{:table "a" :column "b"} "Y" {:table "a" :column "B"} "y" {:table "A" :column "b"} "Z" {:table "A" :column "B"} "z"}})))) | |
(deftest case-insensitive-test ;; In the future, we might try to be smarter and preserve case. (is (= "SELECT cats.meow FROM cats" (m/replace-names "SELECT DOGS.BaRk FROM dOGS" {:tables {{:table "dogs"} "cats"} :columns {{:table "dogs" :column "bark"} "meow"}} {:case-insensitive :lower}))) (is (= "SELECT meow FROM private.cats" (m/replace-names "SELECT bark FROM PUBLIC.dogs" {:schemas {"public" "private"} :tables {{:schema "public" :table "dogs"} "cats"} :columns {{:schema "public" :table "dogs" :column "bark"} "meow"}} {:case-insensitive :lower}))) (is (= "SELECT id, meow FROM private.cats" (m/replace-names "SELECT id, bark FROM PUBLIC.dogs" {:schemas {"public" "private"} :tables {{:schema "public" :table "DOGS"} "cats"} :columns {{:schema "PUBLIC" :table "dogs" :column "bark"} "meow"}} {:case-insensitive :agnostic :allow-unused? true})))) | |
(def ^:private heavily-quoted-query-mixed-case "SELECT RAW, \"Foo\", \"doNg\".\"bAr\", `ding`.`doNg`.`feE` FROM `ding`.`doNg`") | |
(deftest case-and-quotes-test (testing "By default, quoted references are also case insensitive" (is (= heavily-quoted-query-rewritten (m/replace-names heavily-quoted-query-mixed-case heavily-quoted-query-rewrites :case-insensitive :lower)))) (testing "One can opt-into ignoring case only for unquoted references\n" (testing "None of the quoted identifiers with different case will be matched" (is (thrown-with-msg? ExceptionInfo #"Unknown rename: .* \"(dong)|(bar)|(foo)|(fee)\ (m/replace-names heavily-quoted-query-mixed-case heavily-quoted-query-rewrites :case-insensitive :agnostic :quotes-preserve-case? true)))) (testing "The query is unchanged when allowed to run partially" (is (= heavily-quoted-query-mixed-case (m/replace-names heavily-quoted-query-mixed-case heavily-quoted-query-rewrites {:case-insensitive :agnostic :quotes-preserve-case? true :allow-unused? true})))))) | |
(def ^:private ambiguous-case-replacements {:columns {{:schema "public" :table "DOGS" :column "BARK"} "MEOW" {:schema "public" :table "dogs" :column "bark"} "meow" {:schema "public" :table "dogs" :column "growl"} "purr" {:schema "public" :table "dogs" :column "GROWL"} "PuRr" {:schema "public" :table "DOGS" :column "GROWL"} "PURR"}}) | |
(deftest ambiguous-case-test (testing "Correctly handles flexibility around the case of the replacements" (doseq [[case-insensitive expected] {:lower "SELECT meow, PuRr FROM DOGS" :upper "SELECT MEOW, PURR FROM DOGS" ;; Not strictly deterministic, depends on map ordering. :agnostic "SELECT MEOW, PuRr FROM DOGS"}] (is (= expected (m/replace-names "SELECT bark, `GROWL` FROM DOGS" ambiguous-case-replacements {:case-insensitive case-insensitive :quotes-preserve-case? true :allow-unused? true})))))) | |
(deftest infer-test (testing "We can infer a column through a few hoops" (is (= #{{:column "amount" :table "orders"}} (columns "SELECT amount FROM (SELECT amount FROM orders)"))) (is (= #{{:column "amount" :alias "cost" :table "orders"} ;; We preserve this for now, which has its scope to differentiate it from the qualified element. ;; Importantly, we do not infer it as coming from the orders table, despite that being the only table. {:column "cost"}} (columns "SELECT cost FROM (SELECT amount AS cost FROM orders)"))) (testing "We do not expose phantom columns due to references to aliases" (is (= #{{:column "amount" :table "orders"}} (source-columns "SELECT cost FROM (SELECT amount AS cost FROM orders)")))))) | |
(deftest infer-from-schema-test (is (= #{{:schema "public" :table "towns"}} (tables "select public.towns.id from towns")))) | |
(deftest mutation-test (is (= #{"alter-sequence"} (mutations "ALTER SEQUENCE serial RESTART WITH 42"))) (is (= #{"alter-session"} (mutations "ALTER SESSION SET foo = 'bar'"))) (is (= #{"alter-system"} (mutations "ALTER SYSTEM RESET ALL"))) (is (= #{"alter-table"} (mutations "ALTER TABLE orders ADD COLUMN email text"))) (is (= #{"alter-view"} (mutations "ALTER VIEW foo AS SELECT bar;"))) (is (= #{"create-function"} ; Postgres syntax (mutations "CREATE FUNCTION multiply(integer, integer) RETURNS integer AS 'SELECT $1 * $2;' LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT;"))) (is (= #{"create-function"} ; Conventional syntax (mutations "CREATE FUNCTION multiply(a integer, b integer) RETURNS integer LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT RETURN a + b;"))) (is (= #{"create-index"} (mutations "CREATE INDEX idx_user_id ON orders(user_id);"))) (is (= #{"create-schema"} (mutations "CREATE SCHEMA perthshire"))) (is (= #{"create-sequence"} (mutations "CREATE SEQUENCE users_seq START WITH 42 INCREMENT BY 17"))) (is (= #{"create-synonym"} (mutations "CREATE SYNONYM folk FOR people"))) (is (= #{"create-table"} (mutations "CREATE TABLE poets (name text, id integer)"))) (is (= #{"create-view"} (mutations "CREATE VIEW folk AS SELECT * FROM people WHERE id > 10"))) (is (= #{"delete"} (mutations "DELETE FROM people"))) (is (= #{"drop"} (mutations "DROP TABLE people"))) (is (= #{"grant"} (mutations "GRANT SELECT, UPDATE, INSERT ON people TO myself"))) (is (= #{"insert"} (mutations "INSERT INTO people(name, source) VALUES ('Robert Fergusson', 'Twitter'), ('Robert Burns', 'Facebook')"))) (is (= #{"purge"} (mutations "PURGE TABLE people"))) (is (= #{"rename-table"} (mutations "RENAME TABLE people TO folk"))) (is (= #{"truncate"} (mutations "TRUNCATE TABLE people"))) (is (= #{"update"} (mutations "UPDATE people SET name = 'Robert Fergusson' WHERE id = 23")))) | |
(deftest complicated-mutations-test ;; https://github.com/metabase/macaw/issues/18 #_(is (= #{"delete" "insert"} (mutations "WITH outdated_orders AS ( DELETE FROM orders WHERE date <= '2018-01-01' RETURNING *) INSERT INTO order_log SELECT * from outdated_orders;"))) (is (= #{"insert"} (mutations "WITH outdated_orders AS ( SELECT * from orders) INSERT INTO order_log SELECT * from outdated_orders;")))) | |
(deftest alias-inclusion-test (testing "Aliases are not included" (is (= #{{:table "orders"} {:table "foo"}} (tables "SELECT id, o.id FROM orders o JOIN foo ON orders.id = foo.order_id"))))) | |
(deftest select-*-test (is (true? (has-wildcard? "SELECT * FROM orders"))) (is (true? (has-wildcard? "SELECT id, * FROM orders JOIN foo ON orders.id = foo.order_id")))) | |
(deftest table-wildcard-test-without-aliases (is (= #{{:table "orders"}} (table-wcs "SELECT orders.* FROM orders JOIN foo ON orders.id = foo.order_id"))) (is (= #{{:table "foo" :schema "public"}} (table-wcs "SELECT foo.* FROM orders JOIN public.foo f ON orders.id = foo.order_id")))) | |
(deftest table-star-test-with-aliases (is (= #{{:table "orders"}} (table-wcs "SELECT o.* FROM orders o JOIN foo ON orders.id = foo.order_id"))) (is (= #{{:table "foo"}} (table-wcs "SELECT f.* FROM orders o JOIN foo f ON orders.id = foo.order_id")))) | |
(deftest context-test (testing "Sub-select with outer wildcard" ;; TODO we should test the source and result columns too (is (=? {:columns #{{:component {:column "total" :table "orders"}, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]} {:component {:column "id" :table "orders"}, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]} {:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}}, :has-wildcard? #{{:component true, :context ["SELECT"]}}, :mutation-commands #{}, :tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}}, :table-wildcards #{}} (strip-context-ids (components "SELECT * FROM (SELECT id, total FROM orders) WHERE total > 10"))))) (testing "Sub-select with inner wildcard" (is (=? {:columns #{{:component {:column "id" :table "orders"}, :context ["SELECT"]} {:component {:column "total" :table "orders"}, :context ["SELECT"]} {:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}}, :has-wildcard? #{{:component true, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}}, :mutation-commands #{}, :tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}}, :table-wildcards #{}} (strip-context-ids (components "SELECT id, total FROM (SELECT * FROM orders) WHERE total > 10"))))) (testing "Sub-select with dual wildcards" (is (=? {:columns #{{:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}}, :has-wildcard? #{{:component true, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]} {:component true, :context ["SELECT"]}}, :mutation-commands #{}, :tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}}, :table-wildcards #{}} (strip-context-ids (components "SELECT * FROM (SELECT * FROM orders) WHERE total > 10"))))) (testing "Join; table wildcard" (is (=? {:columns #{{:component {:column "order_id" :table "foo"}, :context ["JOIN" "SELECT"]} {:component {:column "id" :table "orders"}, :context ["JOIN" "SELECT"]}}, :has-wildcard? #{}, :mutation-commands #{}, :tables #{{:component {:table "foo"}, :context ["FROM" "JOIN" "SELECT"]} {:component {:table "orders"}, :context ["FROM" "SELECT"]}}, :table-wildcards #{{:component {:table "orders"}, :context ["SELECT"]}}} (strip-context-ids (components "SELECT o.* FROM orders o JOIN foo ON orders.id = foo.order_id")))))) | |
(deftest replace-names-test (is (= "SELECT aa.xx, b.x, b.y FROM aa, b;" (m/replace-names "SELECT a.x, b.x, b.y FROM a, b;" {:tables {{:schema "public" :table "a"} "aa"} :columns {{:schema "public" :table "a" :column "x"} "xx"}}))) (testing "Handle fully qualified replacement targets" ;; Giving Macaw more context could make it easier to ;; In any case, this is trivial for Metabase to provide. (is (= "SELECT aa.xx, b.x, b.y FROM aa, b;" (m/replace-names "SELECT a.x, b.x, b.y FROM a, b;" {:tables {{:schema "public" :table "a"} "aa"} :columns {{:schema "public" :table "a" :column "x"} {:schema "public" :table "aa" :column "xx"}}})))) ;; To consider - we could avoid splitting up the renames into column and table portions in the client, as ;; qualified targets would allow us to infer such changes. Partial qualification could also work fine where there ;; is no ambiguity - even if this is just a nice convenience for testing. #_(is (= "SELECT aa.xx, b.x, b.y FROM aa, b;" (m/replace-names "SELECT a.x, b.x, b.y FROM a, b;" {:columns {{:schema "public" :table "a" :column "x"} {:table "aa" :column "xx"}}}))) (is (= "SELECT qwe FROM orders" (m/replace-names "SELECT id FROM orders" {:columns {{:schema "public" :table "orders" :column "id"} "qwe"}}))) (is (= "SELECT p.id, q.id FROM public.whatever p join private.orders q" (m/replace-names "SELECT p.id, q.id FROM public.orders p join private.orders q" {:tables {{:schema "public" :table "orders"} "whatever"}}))) (is (ws= "SELECT SUM(public.orders.total) AS s, MAX(orders.total) AS max, MIN(total) AS min FROM public.orders" (m/replace-names "SELECT SUM(public.orders.amount) AS s, MAX(orders.amount) AS max, MIN(amount) AS min FROM public.orders" {:columns {{:schema "public" :table "orders" :column "amount"} "total"}}))) (is (ws= "SELECT *, sturmunddrang , oink AS oink FROM /* /* lore */ floor_muser, user, /* more */ vigilant_user ;" (m/replace-names "SELECT *, boink , yoink AS oink FROM /* /* lore */ core_user, bore_user, /* more */ snore_user ;" {:tables {{:schema "public" :table "core_user"} "floor_muser" {:schema "public" :table "bore_user"} "user" {:schema "public" :table "snore_user"} "vigilant_user"} :columns {{:schema "public" :table "core_user" :column "boink"} "sturmunddrang" {:schema "public" :table "snore_user" :column "yoink"} "oink"}})))) | |
(deftest replace-schema-test ;; Somehow we broke renaming the `x` in the WHERE clause. #_(is (= "SELECT totally_private.purchases.xx FROM totally_private.purchases, private.orders WHERE xx = 1" (m/replace-names "SELECT public.orders.x FROM public.orders, private.orders WHERE x = 1" {:schemas {"public" "totally_private"} :tables {{:schema "public" :table "orders"} "purchases"} :columns {{:schema "public" :table "orders" :column "x"} "xx"}})))) | |
(deftest allow-unused-test (is (thrown-with-msg? Exception #"Unknown rename" (m/replace-names "SELECT 1" {:tables {{:schema "public" :table "a"} "aa"}}))) (is (= "SELECT 1" (m/replace-names "SELECT 1" {:tables {{:schema "public" :table "a"} "aa"}} {:allow-unused? true})))) | |
(deftest model-reference-test (is (= "SELECT subtotal FROM metabase_sentinel_table_154643 LIMIT 3" (m/replace-names "SELECT total FROM metabase_sentinel_table_154643 LIMIT 3" {:columns {{:table "orders" :column "total"} "subtotal"} :tables {{:table "orders"} "purchases"}} {:allow-unused? true})))) | |
(defn- name-seq [seq-type] (let [prefix (str seq-type "_")] (rest (iterate (fn [_] (str (gensym prefix))) nil)))) | |
(defn fixture->filename ([fixture suffix] (fixture->filename fixture nil suffix)) ([fixture path suffix] (as-> fixture % [(namespace %) (name %)] (remove nil? %) (str/join "__" %) (str/replace % "-" "_") (if path (str path "/" %) %) (str % suffix)))) | |
(defn stem->fixture [stem] (let [[x y] (map #(str/replace % "_" "-") (str/split stem #"__"))] (if y (keyword x y) (keyword x)))) | |
(def ^:private fixture-paths #{nil "acceptance"}) | |
(defn query-fixture ([fixture] (let [paths (map #(fixture->filename fixture % ".sql") fixture-paths)] (when-let [r (some io/resource paths)] (slurp r))))) | |
(defn- anonymize-query [query] (let [m (components query) ts (raw-components (:tables m)) cs (raw-components (:columns m)) ss (transduce (keep :schema) conj #{} (concat ts cs))] (m/replace-names query {:schemas (zipmap ss (name-seq "schema")) :tables (zipmap ts (name-seq "table")) :columns (zipmap cs (name-seq "column"))} ;; nothing should be unused... but we currently get some junk from analysis, sadly {:allow-unused? true}))) | |
Read a fixture, anonymize the identifiers, write it back out again. | (defn- anonymize-fixture [fixture] (let [filename (fixture->filename fixture ".sql")] (spit (str "test/resources/" filename) (anonymize-query (query-fixture fixture))))) |
(def ^:private alias-shadow-query "SELECT people.*, orders.a as foo FROM orders JOIN people ON people.foo = orders.foo_id") | |
(deftest alias-shadow-replace-test (testing "Aliases are not replaced, but real usages are" (is (ws= (str/replace alias-shadow-query "people.foo" "people.bar") (m/replace-names alias-shadow-query {:columns {{:table "people" :column "foo"} "bar"}} {:allow-unused? true}))))) | |
(def ^:private cte-query "WITH engineering_employees AS ( SELECT id, name, department, favorite_language FROM employees WHERE department = 'Engineering') SELECT id, name, favorite_language as fave_lang FROM engineering_employees WHERE favorite_language in ('mandarin clojure', 'middle javascript');") | |
(def ^:private sub-select-query "SELECT id, name, favorite_language as fave_lang FROM ( SELECT id, name, department, favorite_language FROM employees WHERE department = 'Engineering') as engineering_employees WHERE favorite_language in ('mandarin clojure', 'middle javascript');") | |
(deftest cte-propagate-test (testing "Transitive references are tracked to their source when replacing columns in queries with CTEs." (is (= (str/replace cte-query "favorite_language" "first_language") (m/replace-names cte-query {:columns {{:table "employees", :column "favorite_language"} "first_language"}}))))) | |
(deftest sub-select-propagate-test (testing "Transitive references are tracked to their source when replacing columns in queries with sub-selects." (is (= (str/replace sub-select-query "favorite_language" "first_language") (m/replace-names sub-select-query {:columns {{:table "employees", :column "favorite_language"} "first_language"}}))))) | |
A transformation to help write tests, where hawk would face limitations on predicates within sets. | (defn sorted [element-set] (sort-by (comp (juxt :schema :table :column :scope) #(:component % %)) element-set)) |
(deftest count-field-test (testing "COUNT(*) does not actually read any columns" (is (empty? (columns "SELECT COUNT(*) FROM users"))) (is (false? (has-wildcard? "SELECT COUNT(*) FROM users"))) (is (empty? (table-wcs "SELECT COUNT(*) FROM users")))) (testing "COUNT(1) does not actually read any columns" (is (empty? (columns "SELECT COUNT(1) FROM users"))) (is (false? (has-wildcard? "SELECT COUNT(1) FROM users"))) (is (empty? (table-wcs "SELECT COUNT(1) FROM users")))) (testing "We do care about explicitly referenced fields in a COUNT" (is (= #{{:table "users" :column "id"}} (source-columns "SELECT COUNT(id) FROM users")))) (testing "We do care about deeply referenced fields in a COUNT however" (is (= #{{:table "users" :column "id"}} (source-columns "SELECT COUNT(DISTINCT(id)) FROM users"))))) | |
(deftest reserved-word-test (testing "We can opt-out of reserving specific keywords" (is (= #{{:schema "serial" :table "limit" :column "final"}} (source-columns "SELECT limit.final FROM serial.limit" :non-reserved-words [:final :serial :limit])))) (testing "We can replace with and from non-reserved keywords" (is (= "SELECT y FROM final" (m/replace-names "SELECT final FROM x" {:tables {{:table "x"} "final"} :columns {{:table "x" :column "final"} "y"}} {:non-reserved-words [:final]}))))) | |
(deftest square-bracket-test (testing "We can opt into allowing square brackets to quote things" (is (=? {:tables #{{:schema "s" :table "t"}} :columns #{{:schema "s" :table "t" :column "f"}}} (update-vals (components "SELECT [f] FROM [s].[t]" {:features {:square-bracket-quotes true} :preserve-identifiers? false}) raw-components))))) | |
(comment (require 'user) ;; kondo, really (require '[clj-async-profiler.core :as prof]) (prof/serve-ui 8080) (defn- simple-benchmark [] (source-columns "SELECT x FROM t")) (defn- complex-benchmark [] (count (source-columns (query-fixture :snowflake)))) (user/time+ (simple-benchmark)) (prof/profile {:event :alloc} (dotimes [_ 1000] (simple-benchmark))) (user/time+ (complex-benchmark)) (prof/profile {:event :alloc} (dotimes [_ 100] (complex-benchmark))) (anonymize-query "SELECT x FROM a") (anonymize-fixture :snowflakelet) (require 'virgil) (require 'clojure.tools.namespace.repl) (virgil/watch-and-recompile ["java"] :post-hook clojure.tools.namespace.repl/refresh-all)) | |
(ns macaw.test.util (:require [clojure.string :as str] [clojure.test :refer :all] [clojure.walk :as walk])) | |
(defn- indentation [s] (count (re-find #"^\s*" s))) | |
(defn- trim-indent* [margin s] (if (< (count s) margin) (subs s margin))) | |
Given a multi-line string, remove the common margin from the remaining lines. Used so that strings with significant whitespace may be visually aligned. | (defn trim-indent [s] (let [lines (str/split-lines s) margin (->> (rest lines) (remove str/blank?) (transduce (map indentation) min Integer/MAX_VALUE))] (str/join "\n" (cons (first lines) (map (partial trim-indent* margin) (rest lines)))))) |
Trim the extra indentation from all string literals before evaluation a given equality form. | (defmacro ws= [& xs] `(= ~@(walk/postwalk #(cond-> % (string? %) trim-indent) xs))) |
(ns macaw.test.util-test (:require [clojure.string :as str] [clojure.test :refer :all] [macaw.test.util :refer [ws=]])) | |
(deftest ^:parallel ws=-test (testing "Code indentation is ignored" (is (ws= "ABC DEF XXX" (str/replace "ABC DEF GHI" "GHI" "XXX")))) (testing "Comparison is still whitespace sensitive" (is (not (ws= "A B" "A B"))) (is (not (ws= "A B C" "A B C"))))) | |
(ns macaw.util-test (:require [clojure.test :refer :all] [macaw.util :as u])) | |
(def ^:private haystack {{:a 3 :b 2 :c 1} 2 {:a 3 :b nil :c 1} 3 {:a 1 :b 2 :c 3} 1}) | |
(deftest ^:parallel relevant-find-test (testing "We ignore any suffix of degenerate keys" (doseq [x [{:a 1} {:a 1 :b nil} {:a 1 :b 2} {:a 1 :b 2 :c nil} {:a 1 :b 2 :c 3} {:a 1 :b 2 :c 3 :d 4}]] (is (= [{:a 1 :b 2 :c 3} 1] (u/find-relevant haystack x [:a :b :c]))))) (testing "We need at least one non-degenerate key" (is (nil? (u/find-relevant haystack {} [:a :b :c])))) (testing "We don't ignore non-suffix degenerate keys" (doseq [x [{:a nil :b 2} {:a 1 :b nil :c 3} {:a nil :b 2 :c 3}]] (is (nil? (u/find-relevant haystack x [:a :b :c])))))) | |
(ns macaw.util.malli-test (:require [clojure.test :refer :all] [macaw.util.malli :as mu])) | |
(mu/defn enhance :- :int [x :- :int, fail-deliberately? :- :boolean] (if fail-deliberately? :poppycock (int (+ x 43)))) | |
(deftest ^:parallel defn-good-in-good-out-test (is (= 50 (enhance 7 false)))) | |
(deftest ^:parallel defn-good-in-bad-out-test (is (thrown-with-msg? Exception #"Invalid output.*" (enhance 7 true)))) | |
(deftest ^:parallel defn-bad-in-good-out-test (is (thrown-with-msg? Exception #"Invalid input.*" (enhance 7.3 false)))) | |
(deftest ^:parallel defn-bad-in-bad-out-test (is (thrown-with-msg? Exception #"Invalid input.*" (enhance 7.3 true)))) | |