Macaw0.0.1-SNAPSHOTA Clojure wrapper for JSqlParser 🦜 | (this space intentionally left almost blank) |
(ns macaw.collect (:require [clojure.string :as str] [macaw.util :as u] [macaw.walk :as mw]) (:import (com.metabase.macaw AstWalker$Scope) (java.util.regex Pattern) (net.sf.jsqlparser.expression Alias) (net.sf.jsqlparser.schema Column Table) (net.sf.jsqlparser.statement Statement) (net.sf.jsqlparser.statement.select AllTableColumns SelectItem))) | |
(set! *warn-on-reflection* true) | |
(defn- conj-to
([key-name]
(conj-to key-name identity))
([key-name xf]
(fn item-conjer [results component context]
(update results key-name conj {:component (xf component)
:context context})))) | |
(defn- query->raw-components
[^Statement parsed-ast]
(mw/fold-query parsed-ast
{:alias (conj-to :aliases (fn [^SelectItem item]
{:alias (.getName (.getAlias item))
:expression (.getExpression item)}))
:column (conj-to :columns)
:column-qualifier (conj-to :qualifiers)
:mutation (conj-to :mutation-commands)
:pseudo-table (conj-to :pseudo-tables)
:table (conj-to :tables)
:table-wildcard (conj-to :table-wildcards)
:wildcard (conj-to :has-wildcard? (constantly true))}
{:aliases #{}
:columns #{}
:has-wildcard? #{}
:mutation-commands #{}
:pseudo-tables #{}
:tables #{}
:table-wildcards #{}})) | |
tables | |
(def ^:private quotes (map str "`\"[")) | |
(def ^:private closing {"[" "]"}) | |
(defn- quoted? [s]
(some (fn [q]
(and (str/starts-with? s q)
(str/ends-with? s (closing q q))))
quotes)) | |
(defn- strip-quotes [s] (subs s 1 (dec (count s)))) | |
(defn- setting->relax-case [{:keys [case-insensitive]}]
(when case-insensitive
(case case-insensitive
:lower str/lower-case
:upper str/upper-case
;; This will work for replace, but not for analyzing where we need a literal to accumulate
:agnostic (fn [s] (re-pattern (str "(?i)" (Pattern/quote s))))))) | |
Normalize a schema, table, column, etc. references so that we can match them regardless of syntactic differences. | (defn normalize-reference
[s {:keys [preserve-identifiers? quotes-preserve-case?] :as opts}]
(if preserve-identifiers?
s
(when s
(let [quoted (quoted? s)
relax-case (when-not (and quotes-preserve-case? quoted)
(setting->relax-case opts))]
(cond-> s
quoted strip-quotes
relax-case relax-case))))) |
(defn- find-table [{:keys [alias->table name->table keep-internal-tables?] :as opts} ^Table t]
(let [n (normalize-reference (.getName t) opts)
schema (normalize-reference (.getSchemaName t) opts)]
(or (get alias->table n)
(:component (last (u/find-relevant name->table {:table n :schema schema} [:table :schema])))
(when keep-internal-tables?
{:table n, :schema schema, :internal? true})))) | |
(defn- find-qualifier-table [opts ^Table q _ctx]
(when-let [table (find-table opts q)]
(cond-> table
(:with-instance opts) (assoc :instances [q])))) | |
(defn- make-table [{:keys [with-instance qualifier? alias->table] :as opts} ^Table t _ctx]
(if (and qualifier?
(get alias->table (.getName t)))
(get alias->table (.getName t))
(u/strip-nils
{:table (normalize-reference (.getName t) opts)
:schema (normalize-reference (.getSchemaName t) opts)
:instances (when with-instance [t])}))) | |
(defn- alias-mapping
[opts ^Table table ctx]
(when-let [^Alias table-alias (.getAlias table)]
[(.getName table-alias) (make-table opts table ctx)])) | |
JSQLParser can't tell whether the | (defn- resolve-table-name [opts ^AllTableColumns atc _ctx] (find-table opts (.getTable atc))) |
columns | |
(defn- maybe-column-table [alias? {:keys [name->table] :as opts} ^Column c]
(if-let [t (.getTable c)]
(find-table opts t)
;; if we see only a single table, we can safely say it's the table of that column
(when (and (= (count name->table) 1) (not (alias? (.getColumnName c))))
(:component (val (first name->table)))))) | |
(defn- scope-type [^AstWalker$Scope s] (keyword (.getType s))) | |
(defn- make-column [aliases opts ^Column c ctx]
(let [{:keys [schema table]} (maybe-column-table aliases opts c)]
(u/strip-nils
{:schema schema
:table table
:column (normalize-reference (.getColumnName c) opts)
:alias (when-let [s (first ctx)]
(when (= :alias (scope-type s))
(.getLabel ^AstWalker$Scope s)))
:instances (when (:with-instance opts) [c])}))) | |
get them together | |
(defn- only-query-context [ctx] (filter #(= (scope-type %) :query) ctx)) | |
(def ^:private strip-non-query-contexts (map #(update % :context only-query-context))) | |
(defn- update-components ([f] (map #(update % :component f (:context %)))) ([f components] (eduction (update-components f) components))) | |
Merge two nodes, keeping the union of their instances. | (defn- merge-with-instances
[a b]
(let [cs-a (-> a :component :instances)]
(cond-> (merge a b)
cs-a (update-in [:component :instances] into cs-a)))) |
(defn- literal? [{:keys [column]}]
;; numbers and strings are already handled by JSQLParser
(#{"true" "false"} (str/lower-case column))) | |
Remove any unqualified references that would resolve to an alias or qualified reference | (defn- remove-redundant-columns
[alias? column-set]
(let [{qualified true, unqualified false} (group-by (comp boolean :table) column-set)
;; Get all the bindings introduced by qualified columns
has-qualification? (into #{} (mapcat #(keep % qualified)) [:column :alias])]
(into qualified
(remove (comp (some-fn alias? has-qualification?)
:column))
unqualified))) |
If a column reference is qualified by a table that we don't know about, its probably
not a real field.
See the | (defn- remove-phantom-table-columns
[table-map columns]
(let [known-table-name? (into #{} (map (comp :table :component val)) table-map)]
(filter (fn [{s :schema, t :table}]
(or (nil? t) (some? s) (known-table-name? t)))
columns))) |
(defn- infer-table-schema [columns node]
(update node :component
#(let [{:keys [schema table] :as element} %]
(if schema
element
(if-let [schema' (->> columns
(filter (comp #{table} :table))
(some :schema))]
(assoc element :schema schema')
element))))) | |
See macaw.core/query->components doc. | (defn query->components
[^Statement parsed-ast & {:as opts}]
(let [{:keys [aliases
columns
qualifiers
has-wildcard?
mutation-commands
pseudo-tables
tables
table-wildcards]} (query->raw-components parsed-ast)
alias-map (into {} (map #(-> % :component ((partial alias-mapping opts) (:context %))) tables))
;; we're parsing qualifiers here for a single purpose - rewrite uses instances to find tables for renaming
table-map (->> (update-components (partial make-table opts) tables)
(u/group-with #(select-keys (:component %) [:schema :table])
merge-with-instances))
pseudo-table-names (into #{} (comp (map :component)
(map (fn [^Alias a] (.getName a))))
pseudo-tables)
table-map (into (empty table-map)
(remove (comp pseudo-table-names :table :component val))
table-map)
;; we need both aliases and tables for columns
opts (assoc opts
:alias->table alias-map
:name->table table-map)
qualifier-map (->> (update-components (partial find-qualifier-table opts) qualifiers)
(u/group-with #(select-keys (:component %) [:schema :table])
merge-with-instances))
alias? (into #{} (keep (comp :alias :component)) aliases)
all-columns (into #{}
(comp (update-components (partial make-column alias?
(assoc opts :keep-internal-tables? true)))
strip-non-query-contexts)
columns)
strip-alias (fn [c] (dissoc c :alias))
source-columns (->> (map :component all-columns)
(remove-redundant-columns alias?)
(remove literal?)
(remove-phantom-table-columns table-map)
(into #{}
(comp (remove (comp pseudo-table-names :table))
(remove :internal?)
(map strip-alias))))
table-map (update-vals table-map (partial infer-table-schema source-columns))]
{:columns all-columns
:source-columns source-columns
;; result-columns ... filter out the elements (and wildcards) in the top level scope only.
:has-wildcard? (into #{} strip-non-query-contexts has-wildcard?)
:mutation-commands (into #{} mutation-commands)
:tables (into #{} (comp (map val)
(remove (comp :internal? :component))
strip-non-query-contexts)
table-map)
:tables-superset (into #{}
(comp (map val) strip-non-query-contexts)
(merge-with merge-with-instances qualifier-map table-map))
:table-wildcards (into #{}
(comp strip-non-query-contexts
(update-components (partial resolve-table-name opts)))
table-wildcards)})) |
(ns macaw.config (:require [clojure.string :as str] [environ.core :as env])) | |
Default configuration; could be overridden by Java properties, environment variables, etc. | (def ^:private app-defaults
{:macaw-run-mode "prod"}) |
(defn- config-value
[k]
(let [env-val (get env/env k)]
(or (when-not (str/blank? env-val) env-val)
(get app-defaults k)))) | |
The mode (dev/test/prod) in which Macaw is being run. | (def run-mode (some-> :macaw-run-mode config-value keyword)) |
Is Macaw running in | (defn is-dev? [] (= run-mode :dev)) |
Is Macaw running in | (defn is-test? [] (= run-mode :test)) |
Is Macaw running in | (defn is-prod? [] (= run-mode :prod)) |
(ns macaw.core (:require [clojure.string :as str] [clojure.walk :as walk] [macaw.collect :as collect] [macaw.rewrite :as rewrite] [macaw.types :as m.types] [macaw.util.malli :as mu]) (:import (com.metabase.macaw AnalysisError AstWalker$Scope BasicTableExtractor CompoundTableExtractor) (java.util.function Consumer) (net.sf.jsqlparser JSQLParserException) (net.sf.jsqlparser.parser CCJSqlParser CCJSqlParserUtil) (net.sf.jsqlparser.parser.feature Feature) (net.sf.jsqlparser.schema Table) (net.sf.jsqlparser.statement Statement))) | |
(set! *warn-on-reflection* true) | |
(defn- escape-keywords ^String [sql keywords]
(reduce
(fn [sql k]
(str/replace sql (re-pattern (str "(?i)\\b(" (name k) ")\\b")) "$1____escaped____"))
sql
keywords)) | |
(defn- unescape-keywords [sql _keywords] (str/replace sql "____escaped____" )) | |
(def ^:private features
{:backslash-escape-char Feature/allowBackslashEscapeCharacter
:complex-parsing Feature/allowComplexParsing
:postgres-syntax Feature/allowPostgresSpecificSyntax
:square-bracket-quotes Feature/allowSquareBracketQuotation
:unsupported-statements Feature/allowUnsupportedStatements}) | |
(defn- ->Feature ^Feature [k] (get features k)) | |
(def ^:private default-timeout-seconds 5) | |
(defn- ->parser-fn ^Consumer [opts]
(reify Consumer
(accept [_this parser]
(let [^long timeout-ms (:timeout opts (* default-timeout-seconds 1000))]
(.withFeature ^CCJSqlParser parser Feature/timeOut timeout-ms))
(doseq [[f ^boolean v] (:features opts)]
(.withFeature ^CCJSqlParser parser (->Feature f) v))))) | |
Main entry point: takes a string query and returns a | (defn parsed-query
[^String query & {:as opts}]
(try
(-> query
;; Dialects like SQLite and Databricks treat consecutive blank lines as implicit semicolons.
;; JSQLParser, as a polyglot parser, always has this behavior, and there is no option to disable it.
;; This utility pre-processed the query to remove any such blank lines.
(CCJSqlParserUtil/sanitizeSingleSql)
(escape-keywords (:non-reserved-words opts))
(CCJSqlParserUtil/parse (->parser-fn opts)))
(catch JSQLParserException e
{:error :macaw.error/unable-to-parse
:context {:cause e}}))) |
A unique identifier for the given scope. | (defn scope-id [^AstWalker$Scope s] (.getId s)) |
The type of scope we're talking about e.g., a top-level SELECT. | (defn scope-label [^AstWalker$Scope s] (.getLabel s)) |
(defn- ->macaw-error [^AnalysisError analysis-error]
{:error (keyword "macaw.error" (-> (.-errorType analysis-error)
str/lower-case
(str/replace #"_" "-")))}) | |
(mu/defn query->components :- [:or m.types/error-result m.types/components-result]
"Given a parsed query (i.e., a [subclass of] `Statement`) return a map with the elements found within it.
(Specifically, it returns their fully-qualified names as strings, where 'fully-qualified' means 'as referred to in
the query'; this function doesn't do additional inference work to find out a table's schema.)"
[parsed :- [:or m.types/error-result [:fn #(instance? Statement %)]]
& {:as opts} :- [:maybe m.types/options-map]]
;; By default, we will preserve identifiers verbatim, to be agnostic of casing and quoting.
;; This may result in duplicate components, which are left to the caller to deduplicate.
;; In Metabase's case, this is done during the stage where the database metadata is queried.
(try
(if (map? parsed)
parsed
(->> (collect/query->components parsed (merge {:preserve-identifiers? true} opts))
(walk/postwalk (fn [x]
(if (string? x)
(unescape-keywords x (:non-reserved-words opts))
x)))))
(catch AnalysisError e
(->macaw-error e)))) | |
(defn- raw-components [xs] (into (empty xs) (keep :component) xs)) | |
Given a table object, return a map with the schema and table names. | (defn- table->identifier
[^Table t]
(if (.getSchemaName t)
{:schema (.getSchemaName t)
:table (.getName t)}
{:table (.getName t)})) |
(defn- tables->identifiers [expr]
{:tables (set (map table->identifier expr))}) | |
(mu/defn query->tables :- [:or m.types/error-result m.types/tables-result]
"Given a parsed query (i.e., a [subclass of] `Statement`) return a set of all the table identifiers found within it."
[sql :- :string & {:keys [mode] :as opts} :- [:maybe m.types/options-map]]
(try
(let [parsed (parsed-query sql opts)]
(if (map? parsed)
parsed
(case mode
:ast-walker-1 (-> (query->components parsed opts) :tables raw-components (->> (hash-map :tables)))
:basic-select (-> (BasicTableExtractor/getTables parsed) tables->identifiers)
:compound-select (-> (CompoundTableExtractor/getTables parsed) tables->identifiers))))
(catch AnalysisError e
(->macaw-error e)))) | |
(mu/defn replace-names :- :string
"Given an SQL query, apply the given table, column, and schema renames.
Supported options:
- case-insensitive: whether to relax the comparison
- :upper - identifiers are implicitly case to uppercase, as per the SQL-92 standard.
- :lower - identifiers are implicitly cast to lowercase, as per Postgres et al.
- :agnostic - case is ignored when comparing identifiers in code to replacement \"from\" strings.
- quotes-preserve-case: whether quoted identifiers should override the previous option."
[sql :- :string
renames :- :map
& {:as opts} :- [:maybe m.types/options-map]]
;; We need to pre-sanitize the SQL before its analyzed so that the AST token positions match up correctly.
;; Currently, we use a more complex and expensive sanitization method, so that it's reversible.
;; If we decide that it's OK to normalize whitespace etc. during replacement, then we can use the same helper.
(let [sql' (-> (str/replace sql #"(?m)^\n" " \n")
(escape-keywords (:non-reserved-words opts)))
opts' (select-keys opts [:case-insensitive :quotes-preserve-case? :allow-unused?])
renames' (walk/postwalk (fn [x]
(if (string? x)
(escape-keywords x (:non-reserved-words opts))
x))
renames)
parsed (parsed-query sql' opts)]
(-> (rewrite/replace-names sql' parsed renames' opts')
(str/replace #"(?m)^ \n" "\n")
(unescape-keywords (:non-reserved-words opts))))) | |
(ns macaw.rewrite (:require [macaw.collect :as collect] [macaw.util :as u] [macaw.walk :as mw]) (:import (net.sf.jsqlparser.parser ASTNodeAccess SimpleNode) (net.sf.jsqlparser.schema Column Table))) | |
(set! *warn-on-reflection* true) | |
(defn- index-of-nth [^String haystack ^String needle n]
(assert (not (neg? n)))
(if (zero? n)
-1
(loop [n n
idx 0]
(let [next-id (.indexOf haystack needle idx)]
(cond
(= 1 n) next-id
(neg? next-id) next-id
:else (recur (dec n) (inc next-id))))))) | |
(defn- ->idx [^String sql line col] ;; The second `dec` on `line` is a workaround for what appears to be an off-by-one error in this JSQLParser version. (+ col (index-of-nth sql "\n" (dec (dec line))))) | |
Find the start and end index of the underlying tokens for a given AST node from a given SQL string. | (defn- node->idx-range
[^SimpleNode node sql]
(let [first-token (.jjtGetFirstToken node)
last-token (.jjtGetLastToken node)
first-idx (->idx sql
(.-beginLine first-token)
(.-beginColumn first-token))
last-idx (->idx sql
(.-endLine last-token)
(.-endColumn last-token))]
[first-idx last-idx])) |
(defn- splice-replacements [^String sql replacements]
(let [sb (StringBuilder.)
append #(.append sb %)]
(loop [start 0
[[[first-idx last-idx] value] & rst] replacements]
(if (nil? last-idx)
(when (< start (count sql))
(append (.substring sql start)))
(do (append (.substring sql start first-idx))
(append value)
(recur (inc ^long last-idx) rst))))
(str sb))) | |
Emit a SQL string for an updated AST, preserving the comments and whitespace from the original SQL. | (defn- update-query
[updated-ast updated-nodes sql & {:as _opts}]
(let [updated-node? (set (map first updated-nodes))
replacement (fn [->text visitable]
(let [ast-node (.getASTNode ^ASTNodeAccess visitable)
idx-range (node->idx-range ast-node sql)
node-text (->text visitable)]
[idx-range node-text]))
replace-name (fn [->text]
(fn [acc visitable _ctx]
(cond-> acc
(updated-node? visitable)
(conj (replacement ->text visitable)))))]
(splice-replacements
sql
(mw/fold-query
updated-ast
{:table (replace-name #(let [fqn (.getFullyQualifiedName ^Table %)
t-alias (.getAlias ^Table %)]
(if t-alias
(str fqn " " (.getName t-alias))
fqn)))
:column (replace-name #(.getFullyQualifiedName ^Column %))}
[])))) |
(defn- rename-table
[updated-nodes table-renames schema-renames known-tables opts ^Table t _ctx]
(when-let [rename (u/find-relevant table-renames (get known-tables t) [:table :schema])]
;; Handle both raw string renames, as well as more precise element based ones.
(vswap! updated-nodes conj [t rename])
(let [identifier (as-> (val rename) % (:table % %))]
(.setName t identifier)))
(let [raw-schema-name (.getSchemaName t)
schema-name (collect/normalize-reference raw-schema-name opts)]
(when-let [schema-rename (u/seek (comp (partial u/match-component schema-name) key) schema-renames)]
(vswap! updated-nodes conj [raw-schema-name schema-rename])
(let [identifier (as-> (val schema-rename) % (:table % %))]
(.setSchemaName t identifier))))) | |
(defn- rename-column
[updated-nodes column-renames known-columns ^Column c _ctx]
(when-let [rename (u/find-relevant column-renames (get known-columns c) [:column :table :schema])]
;; Handle both raw string renames, as well as more precise element based ones.
(vswap! updated-nodes conj [c rename])
(let [identifier (as-> (val rename) % (:column % %))]
(.setColumnName c identifier)))) | |
(defn- alert-unused! [updated-nodes renames]
(let [known-rename? (set (map second updated-nodes))]
(doseq [[k items] renames]
(when-let [unknown (first (remove known-rename? items))]
(throw (ex-info (str "Unknown rename: " unknown) {:type k
:rename unknown})))))) | |
(defn- index-by-instances [xs]
(into {} (for [x xs
:let [c (:component x)]
i (:instances c)]
[i c]))) | |
Given a SQL query and its corresponding (untransformed) AST, apply the given table and column renames. | (defn replace-names
[sql parsed-ast renames & {:as opts}]
(let [{schema-renames :schemas
table-renames :tables
column-renames :columns} renames
comps (collect/query->components parsed-ast (assoc opts :with-instance true))
columns (index-by-instances (:columns comps))
tables (index-by-instances (:tables-superset comps))
;; execute rename
updated-nodes (volatile! [])
rename-table* (partial rename-table updated-nodes table-renames schema-renames tables opts)
rename-column* (partial rename-column updated-nodes column-renames columns)
res (-> parsed-ast
(mw/walk-query
{:table rename-table*
:column-qualifier rename-table*
:column rename-column*})
(update-query @updated-nodes sql opts))]
(when-not (:allow-unused? opts)
(alert-unused! @updated-nodes renames))
res)) |
(ns macaw.types) | |
The different analyzer strategies that Macaw supports. | (def modes [:ast-walker-1 :basic-select :compound-select]) |
The shape of the options accepted by our API | (def options-map
[:map
[:mode {:optional true} (into [:enum] modes)]
[:non-reserved-words {:optional true} [:seqable :keyword]]
[:allow-unused? {:optional true} :boolean]
[:case-insensitive {:optional true} [:enum :upper :lower :agnostic]]
[:quotes-preserve-case? {:optional true} :boolean]]) |
The different types of errors that Macaw can return. | (def error-types [:macaw.error/analysis-error :macaw.error/illegal-expression :macaw.error/invalid-query :macaw.error/unable-to-parse :macaw.error/unsupported-expression]) |
A map indicating that we were not able to parse the query. | (def error-result [:map [:error (into [:enum] error-types)]]) |
(def ^:private table-ident
[:map
[:schema {:optional true} :string]
[:table :string]]) | |
(def ^:private column-ident
[:map
[:schema {:optional true} :string]
[:table {:optional true} :string]
[:column :string]]) | |
(defn- with-context [t] [:map [:component t] [:context :any]]) | |
A map holding all the components that we were able to parse from a query | (def components-result
[:map {:closed true}
[:tables [:set (with-context table-ident)]]
[:columns [:set (with-context column-ident)]]
[:source-columns [:set column-ident]]
;; TODO Unclear why we would want to wrap any of these.
[:table-wildcards [:set (with-context table-ident)]]
;; This :maybe would be a problem, if anything actually used this value.
[:tables-superset [:set (with-context [:maybe table-ident])]]
;; Unclear why we need a collection here
[:has-wildcard? [:set (with-context :boolean)]]
[:mutation-commands [:set (with-context :string)]]]) |
A map holding the tables that we were able to parse from a query | (def tables-result [:map [:tables [:set table-ident]]]) |
(ns macaw.util (:require [clojure.string :as str]) (:import (java.util.regex Pattern))) | |
Generalized https://ask.clojure.org/index.php/12319/can-group-by-be-generalized | (defn group-with
[kf rf coll]
(persistent!
(reduce
(fn [ret x]
(let [k (kf x)]
(assoc! ret k (rf (get ret k) x))))
(transient {})
coll))) |
Like (first (filter ... )), but doesn't realize chunks of the sequence. Returns the first item in | (defn seek [pred coll] (reduce (fn [acc x] (if (pred x) (reduced x) acc)) nil coll)) |
A hack around the fact that we don't (yet) track what columns are exposed by given sentinels. | (defn non-sentinel
[s]
(when s
(nil? (str/index-of s "_sentinel_")))) |
Check whether the given literal matches the expected literal or pattern. | (defn match-component
[expected actual]
(when expected
(if (instance? Pattern expected)
(boolean (re-find expected actual))
(= expected actual)))) |
(defn- match-prefix [element ks-prefix]
(let [expected (map element ks-prefix)]
(fn [entry]
(every? true? (map match-component expected (map (key entry) ks-prefix)))))) | |
Search the given map for the entry corresponding to [[element]], considering only the relevant keys. The relevant keys are obtained by ignoring any suffix of [[ks]] for which [[element]] has nil or missing values. We require that there is at least one relevant key to find a match. | (defn find-relevant
[m element ks]
(when element
;; Strip off keys from right-to-left where they are nil, and relax search to only consider these keys.
;; We need at least one non-generate key to remain for the search.
;; NOTE: we could optimize away calling `non-sentinel` twice in this function, but for now just keeping it simple.
(when-let [ks-prefix (->> ks reverse (drop-while (comp not non-sentinel element)) reverse seq)]
(seek (match-prefix element ks-prefix) m)))) |
(def ^:private nil-val? (comp nil? val)) | |
Remove any keys corresponding to nil values from the given map. | (defn strip-nils
[m]
(if (some nil-val? m)
(with-meta (into {} (remove nil-val?) m) (meta m))
m)) |
(ns macaw.util.malli (:refer-clojure :exclude [defn defn-]) (:require [macaw.util.malli.defn :as mu.defn] [potemkin :as p])) | |
(p/import-vars [mu.defn defn defn-]) | |
(ns macaw.util.malli.defn (:refer-clojure :exclude [defn defn-]) (:require [clojure.core :as core] [clojure.string :as str] [macaw.util.malli.fn :as mu.fn] [malli.destructure])) | |
(set! *warn-on-reflection* true) | |
TODO -- this should generate type hints from the schemas and from the return type as well. | (core/defn- deparameterized-arglist [{:keys [args]}]
(-> (malli.destructure/parse args)
:arglist
(with-meta (let [args-meta (meta args)
tag (:tag args-meta)
resolved-tag (when (symbol? tag)
(let [resolved (ns-resolve *ns* tag)]
(when (class? resolved)
(symbol (.getName ^Class resolved)))))]
(cond-> args-meta
resolved-tag (assoc :tag resolved-tag)))))) |
(core/defn- deparameterized-arglists [{:keys [arities], :as _parsed}]
(let [[arities-type arities-value] arities]
(case arities-type
:single (list (deparameterized-arglist arities-value))
:multiple (map deparameterized-arglist (:arities arities-value))))) | |
Generate a docstring with additional information about inputs and return type using a parsed fn tail (as parsed by [[mx/SchematizedParams]]). | (core/defn- annotated-docstring
[{original-docstring :doc
[arities-type arities-value] :arities
:keys [return]
:as _parsed}]
(str/trim
(str "Inputs: " (case arities-type
:single (pr-str (:args arities-value))
:multiple (str "("
(str/join "\n "
(map (comp pr-str :args)
(:arities arities-value)))
")"))
"\n Return: " (str/replace (:schema return :any) ; used to be a pprint
"\n"
(str "\n "))
(when (not-empty original-docstring)
(str "\n\n " original-docstring))))) |
Implementation of [[metabase.util.malli/defn]] taken from Metabase. Like [[schema.core/defn]], but for Malli. See notes/justification in the main Metabase repo. | (defmacro defn
[& [fn-name :as fn-tail]]
(let [parsed (mu.fn/parse-fn-tail fn-tail)
cosmetic-name (gensym (munge (str fn-name)))
{attr-map :meta} parsed
attr-map (merge
{:arglists (list 'quote (deparameterized-arglists parsed))
:schema (mu.fn/fn-schema parsed {:target :target/metadata})}
attr-map)
docstring (annotated-docstring parsed)
instrument? (mu.fn/instrument-ns? *ns*)]
(if-not instrument?
`(def ~(vary-meta fn-name merge attr-map)
~docstring
~(mu.fn/deparameterized-fn-form parsed cosmetic-name))
`(def ~(vary-meta fn-name merge attr-map)
~docstring
~(let [error-context {:fn-name (list 'quote fn-name)}]
(mu.fn/instrumented-fn-form error-context parsed cosmetic-name)))))) |
Same as defn, but creates a private def. | (defmacro defn-
[fn-name & fn-tail]
`(defn
~(with-meta fn-name (assoc (meta fn-name) :private true))
~@fn-tail)) |
(ns macaw.util.malli.fn (:refer-clojure :exclude [fn namespace]) (:require [clojure.core :as core] [macaw.config :as config] [macaw.util.malli.registry :as mr] [malli.core :as mc] [malli.destructure :as md] [malli.error :as me])) | |
(set! *warn-on-reflection* true) | |
Malli normally generates wacky default schemas when you use destructuring in an argslist; this never seems to work correctly, so just add default schemas manually to circumvent Malli's weird behavior. (add-default-schemas '[x {:keys [y]}]) ;; => [x {:keys [y]} :- [:maybe :map]] | (defn- add-default-schemas
[args]
(if (empty? args)
args
(loop [acc [], [x & [y z :as more]] args]
(let [schema (when (= y :-) z)
more (if schema
(drop 2 more)
more)
schema (cond
schema
schema
(and (or (map? x)
(sequential? x))
(= (last acc) '&))
[:* :any]
(map? x)
[:maybe :map]
(sequential? x)
[:maybe [:sequential :any]])
acc (concat acc (if schema
[x :- schema]
[x]))]
(if (seq more)
(recur acc more)
acc))))) |
Given a | (defn- arity-schema
[{:keys [args], :as _arity} return-schema {:keys [target], :as _options}]
(let [parsed (md/parse (add-default-schemas args))
varargs-info (get-in parsed [:parsed :rest :arg :arg])
varargs-type (cond
(= (first varargs-info) :map) :varargs/map
(seq varargs-info) :varargs/sequential)
schema (case target
:target/metadata (if (= varargs-type :varargs/map)
(vec (concat (butlast (:schema parsed)) [[:* :any]]))
(:schema parsed))
:target/instrumentation (:schema parsed))]
[:=>
(cond-> schema
varargs-type (vary-meta assoc :varargs/type varargs-type))
return-schema])) |
This is exactly the same as [[malli.experimental/SchematizedParams]], but it preserves metadata from the arglists. | (def ^:private SchematizedParams
(mc/schema
[:schema
{:registry {"Schema" any?
"Separator" [:= :-]
"Args" vector? ; [:vector :any] loses metadata, but vector? keeps it :shrug:
"PrePost" [:map
[:pre {:optional true} [:sequential any?]]
[:post {:optional true} [:sequential any?]]]
"Arity" [:catn
[:args "Args"]
[:prepost [:? "PrePost"]]
[:body [:* :any]]]
"Params" [:catn
[:name symbol?]
[:return [:? [:catn
[:- "Separator"]
[:schema "Schema"]]]]
[:doc [:? string?]]
[:meta [:? :map]]
[:arities [:altn
[:single "Arity"]
[:multiple [:catn
[:arities [:+ [:schema "Arity"]]]
[:meta [:? :map]]]]]]]}}
"Params"])) |
(def ^:private ^{:arglists '([fn-tail])} parse-SchematizedParams
(mc/parser SchematizedParams)) | |
Parse a parameterized | (defn parse-fn-tail
[fn-tail]
(let [parsed (parse-SchematizedParams (if (symbol? (first fn-tail))
fn-tail
(cons '&f fn-tail)))]
(when (= parsed ::mc/invalid)
(let [error (mc/explain SchematizedParams fn-tail)
humanized (me/humanize error)]
(throw (ex-info (format "Invalid function tail: %s" humanized)
{:fn-tail fn-tail
:error error
:humanized humanized}))))
parsed)) |
Implementation for [[fn]] and [[macaw.util.malli.defn/defn]]. Given an unparsed parametered fn tail, extract the
annotations and return a
| (defn fn-schema
([parsed]
(fn-schema parsed {:target :target/instrumentation}))
([parsed options]
(let [{:keys [return arities]} parsed
return-schema (:schema return :any)
[arities-type arities-value] arities]
(case arities-type
:single (arity-schema arities-value return-schema options)
:multiple (into [:function]
(for [arity (:arities arities-value)]
(arity-schema arity return-schema options))))))) |
(defn- deparameterized-arity [{:keys [body args prepost], :as _arity}]
(concat
[(:arglist (md/parse args))]
(when prepost
[prepost])
body)) | |
Generate a deparameterized | (defn deparameterized-fn-tail
[{[arities-type arities-value] :arities, :as _parsed}]
(let [body (case arities-type
:single (deparameterized-arity arities-value)
:multiple (for [arity (:arities arities-value)]
(deparameterized-arity arity)))]
body)) |
Impl for [[macaw.util.malli.fn/fn]] and [[macaw.util.malli.defn/defn]]. Given a parsed (deparameterized-fn-form (parse-fn-tail '[:- :int [x :- :int] (inc x)])) ;; => (fn [x] (inc x)) | (defn deparameterized-fn-form [parsed & [fn-name]] `(core/fn ~@(when fn-name [fn-name]) ~@(deparameterized-fn-tail parsed))) |
Whether [[validate-input]] and [[validate-output]] should validate things or not. | (def ^:dynamic *enforce* true) |
(defn- validate [error-context schema value error-type]
(when *enforce*
(when-let [error (mr/explain schema value)]
(let [humanized (me/humanize error {:wrap (core/fn humanize-include-value
[{:keys [value message]}]
(str message ", got: " (pr-str value)))})
details (merge
{:type error-type
:error error
:humanized humanized
:schema schema
:value value}
error-context)]
(when-not (config/is-prod?)
(throw (ex-info (case error-type
::invalid-input (format "Invalid input: %s" (pr-str humanized))
::invalid-output (format "Invalid output: %s" (pr-str humanized)))
details))))))) | |
Impl for [[macaw.util.malli.fn/fn]]; validates an input argument with | (defn validate-input [error-context schema value] (validate error-context schema value ::invalid-input)) |
Impl for [[macaw.util.malli.fn/fn]]; validates function output | (defn validate-output [error-context schema value] (validate error-context schema value ::invalid-output) value) |
(defn- varargs-type [input-schema] (-> input-schema meta :varargs/type)) | |
(defn- input-schema-arg-names [[_cat & args :as input-schema]]
(let [the-varargs-type (varargs-type input-schema)
normal-args (if the-varargs-type
(butlast args)
args)]
(concat
(for [n (range (count normal-args))]
(symbol (str (char (+ (int \a) n)))))
(case the-varargs-type
:varargs/sequential ['more]
:varargs/map ['kvs]
nil)))) | |
(defn- input-schema->arglist [input-schema]
(let [arg-names (input-schema-arg-names input-schema)]
(vec (if-let [the-varargs-type (varargs-type input-schema)]
(concat (butlast arg-names) ['& (case the-varargs-type
:varargs/sequential (last arg-names)
:varargs/map {:as (last arg-names)})])
arg-names)))) | |
(defn- input-schema->validation-forms [error-context [_cat & schemas :as input-schema]]
(let [arg-names (input-schema-arg-names input-schema)
schemas (if (= (varargs-type input-schema) :varargs/sequential)
(concat (butlast schemas) [[:maybe (last schemas)]])
schemas)]
(->> (map (core/fn [arg-name schema]
;; 1. Skip checks against `:any` schema, there is no situation where it would fail.
;;
;; 2. Skip checks against the default varargs schemas, there is no situation where [:maybe [:* :any]] is
;; going to fail.
(when-not (= schema (condp = arg-name
'more [:maybe [:* :any]]
'kvs [:* :any]
:any))
`(validate-input ~error-context ~schema ~arg-name)))
arg-names
schemas)
(filter some?)))) | |
(defn- input-schema->application-form [input-schema]
(let [arg-names (input-schema-arg-names input-schema)]
(if (= (varargs-type input-schema) :varargs/sequential)
(list* `apply '&f arg-names)
(list* '&f arg-names)))) | |
If exception is thrown from the [[validate]] machinery, remove those stack trace elements so the top of the stack is the calling function. | (defn fixup-stacktrace
[^Exception e]
(if (#{::invalid-input ::invalid-output} (-> e ex-data :type))
(let [trace (.getStackTrace e)
cleaned (when trace
(into-array StackTraceElement
(drop-while (comp #{(.getName (class validate))
(.getName (class validate-input))
(.getName (class validate-output))}
#(.getClassName ^StackTraceElement %))
trace)))]
(doto e
(.setStackTrace cleaned)))
e)) |
(defn- instrumented-arity [error-context [_=> input-schema output-schema]]
(let [input-schema (if (= input-schema :cat)
[:cat]
input-schema)
arglist (input-schema->arglist input-schema)
input-validation-forms (input-schema->validation-forms error-context input-schema)
result-form (input-schema->application-form input-schema)
result-form (if (and output-schema
(not= output-schema :any))
`(->> ~result-form
(validate-output ~error-context ~output-schema))
result-form)]
`(~arglist
(try
~@input-validation-forms
~result-form
(catch Exception ~'error
(throw (fixup-stacktrace ~'error))))))) | |
(defn- instrumented-fn-tail [error-context [schema-type :as schema]]
(case schema-type
:=>
[(instrumented-arity error-context schema)]
:function
(let [[_function & schemas] schema]
(for [schema schemas]
(instrumented-arity error-context schema))))) | |
Given a ([x :- :int y] (+ 1 2)) and parsed by [[parsed-fn-tail]], return an unevaluated instrumented [[fn]] form like (mc/-instrument {:schema [:=> [:cat :int :any] :any]} (fn [x y] (+ 1 2))) | (defn instrumented-fn-form
[error-context parsed & [fn-name]]
`(let [~'&f ~(deparameterized-fn-form parsed fn-name)]
(core/fn ~@(instrumented-fn-tail error-context (fn-schema parsed))))) |
------------------------------ Skipping Namespace Enforcement in prod ------------------------------ | |
Returns true if mu.fn/fn and mu/defn in a namespace should be instrumented with malli schema validation. | (defn instrument-ns?
[namespace]
(or (true? (:instrument/always (meta namespace)))
(not (config/is-prod?)))) |
Malli version of [[schema.core/fn]]. Unless it's in a skipped namespace during prod, a form like: (fn :- :int [x :- :int] (inc x)) compiles to something like (let [&f (fn [x] (inc x))] (fn [a] (validate-input {} :int a) (validate-output {} :int (&f a)))) The map arg here is additional error context; for something like [[macaw.util.malli/defn]], it will be something like {:fn-name 'metabase.lib.field/resolve-field-id} If compiled in a namespace in [[namespaces-toskip]], during Known issue: this version of (mu/fn my-fn ([x] (my-fn x 1)) ([x y :- :int] (+ x y))) If we were to include (let [&f (fn my-fn ([x] (my-fn x 1)) ([x y] (+ x y)))] (fn ([a] (&f a)) ([a b] (validate-input {} :int b) (&f a b)))) ;; skips the Since this is a big gotcha, we are currently not including the optional function name | (defmacro fn
[& fn-tail]
(let [parsed (parse-fn-tail fn-tail)
instrument? (instrument-ns? *ns*)]
(if-not instrument?
(deparameterized-fn-form parsed)
(let [error-context (if (symbol? (first fn-tail))
;; We want the quoted symbol of first fn-tail:
{:fn-name (list 'quote (first fn-tail))} {})]
(instrumented-fn-form error-context parsed))))) |
(ns macaw.util.malli.registry (:refer-clojure :exclude [declare def type]) (:require [malli.core :as mc] [malli.experimental.time :as malli.time] [malli.registry] [malli.util :as mut])) | |
(defonce ^:private cache (atom {})) | |
Get a cached value for You generally shouldn't use this outside of this namespace unless you have a really good reason to do so! Make sure you used namespaced keys if you are using it elsewhere. | (defn cached
[k schema value-thunk]
(or (get (get @cache k) schema) ; get-in is terribly inefficient
(let [v (value-thunk)]
(swap! cache assoc-in [k schema] v)
v))) |
Fetch a cached [[mc/validator]] for | (defn validator [schema] (cached :validator schema #(mc/validator schema))) |
[[mc/validate]], but uses a cached validator from [[validator]]. | (defn validate [schema value] ((validator schema) value)) |
Fetch a cached [[mc/explainer]] for | (defn explainer
[schema]
(letfn [(make-explainer []
#_{:clj-kondo/ignore [:discouraged-var]}
(let [validator* (mc/validator schema)
explainer* (mc/explainer schema)]
;; for valid values, it's significantly faster to just call the validator. Let's optimize for the 99.9%
;; of calls whose values are valid.
(fn schema-explainer [value]
(when-not (validator* value)
(explainer* value)))))]
(cached :explainer schema make-explainer))) |
[[mc/explain]], but uses a cached explainer from [[explainer]]. | (defn explain [schema value] ((explainer schema) value)) |
(defonce ^:private registry*
(atom (merge (mc/default-schemas)
(mut/schemas)
(malli.time/schemas)))) | |
(defonce ^:private registry (malli.registry/mutable-registry registry*)) | |
Register a spec with our Malli spec registry. | (defn register!
[schema definition]
(swap! registry* assoc schema definition)
(reset! cache {})
nil) |
Get the Malli schema for | (defn schema [type] (malli.registry/schema registry type)) |
Add a | (defn -with-doc
[the-schema docstring]
(cond
(and (vector? the-schema)
(map? (second the-schema)))
(let [[tag opts & args] the-schema]
(into [tag (assoc opts :doc/message docstring)] args))
(vector? the-schema)
(let [[tag & args] the-schema]
(into [tag {:doc/message docstring}] args))
:else
[:schema {:doc/message docstring} the-schema])) |
Like [[clojure.spec.alpha/def]]; add a Malli schema to our registry. | (defmacro def
([type the-schema]
`(register! ~type ~the-schema))
([type docstring the-schema]
`(macaw.util.malli.registry/def ~type
(-with-doc ~the-schema ~docstring)))) |
For REPL/test usage: get the definition of a registered schema from the registry. | (defn resolve-schema [the-schema] (mc/deref-all (mc/schema the-schema))) |
(ns macaw.walk (:import (com.metabase.macaw AstWalker AstWalker$CallbackKey))) | |
(set! *warn-on-reflection* true) | |
keyword->key map for the AST-folding callbacks. | (def ->callback-key
;; TODO: Move this to a Malli schema to simplify the indirection
{:alias AstWalker$CallbackKey/ALIAS
:column AstWalker$CallbackKey/COLUMN
:column-qualifier AstWalker$CallbackKey/COLUMN_QUALIFIER
:mutation AstWalker$CallbackKey/MUTATION_COMMAND
:pseudo-table AstWalker$CallbackKey/PSEUDO_TABLES
:table AstWalker$CallbackKey/TABLE
:table-wildcard AstWalker$CallbackKey/ALL_TABLE_COLUMNS
:wildcard AstWalker$CallbackKey/ALL_COLUMNS}) |
Lift a side effecting callback so that it preserves the accumulator. | (defn- preserve
[f]
(fn [acc & args]
(apply f args)
acc)) |
work around ast walker repeatedly visiting the same expressions (bug ?!) | (defn- deduplicate-visits [f]
(let [seen (volatile! #{})]
(fn [& [acc visitable & _ :as args]]
(if (contains? @seen visitable)
acc
(do (vswap! seen conj visitable)
(apply f args)))))) |
(defn- update-keys-vals [m key-f val-f]
(let [ret (persistent!
(reduce-kv (fn [acc k v]
(assoc! acc (key-f k) (val-f v)))
(transient {})
m))]
(with-meta ret (meta m)))) | |
Walk over the query's AST, using the callbacks for their side effects, for example to mutate the AST itself. | (defn walk-query
[parsed-query callbacks]
(let [callbacks (update-keys-vals callbacks ->callback-key (comp deduplicate-visits preserve))]
(.walk (AstWalker. callbacks ::ignored) parsed-query))) |
Fold over the query's AST, using the callbacks to update the accumulator. | (defn fold-query
[parsed-query callbacks init-val]
(let [callbacks (update-keys-vals callbacks ->callback-key deduplicate-visits)]
(.fold (AstWalker. callbacks init-val) parsed-query))) |
(ns macaw.acceptance-test (:require [clojure.java.io :as io] [clojure.set :as set] [clojure.string :as str] [clojure.test :refer :all] [macaw.core :as m] [macaw.core-test :as ct] [macaw.types]) (:import (java.io File))) | |
(set! *warn-on-reflection* true) | |
(defn- fixture-analysis [fixture] (some-> fixture (ct/fixture->filename "acceptance" ".analysis.edn") io/resource slurp read-string)) | |
(defn- fixture-renames [fixture] (some-> fixture (ct/fixture->filename "acceptance" ".renames.edn") io/resource slurp read-string)) | |
(defn- fixture-rewritten [fixture] (some-> fixture (ct/fixture->filename "acceptance" ".rewritten.sql") io/resource slurp str/trim)) | |
(defn- get-component [cs k]
(case k
:source-columns (get cs k)
:columns-with-scope (ct/contexts->scopes (get cs :columns))
(ct/raw-components (get cs k)))) | |
(def ^:private test-modes (set macaw.types/modes)) | |
(def override-hierarchy
(-> (make-hierarchy)
(derive :basic-select :select-only)
(derive :compound-select :select-only))) | |
(defn- lineage [h k]
(when k
(assert (<= (count (parents h k)) 1) "Multiple inheritance not supported for override hierarchy.")
(cons k (lineage h (first (parents h k)))))) | |
(def global-overrides
{}) | |
(def ns-overrides
{:select-only {"mutation" :macaw.error/invalid-query
"dynamic" :macaw.error/invalid-query}
:basic-select {"compound" :macaw.error/unsupported-expression}}) | |
(def ^:private merged-fixtures-file "test/resources/acceptance/queries.sql") | |
TODO generically detect queries. | (def ^:private dynamic-fixtures-file "test/resources/acceptance/queries.dynamic.sql") |
(defn- read-merged [file]
(->> (str/split (slurp file) #"-- FIXTURE: ")
(keep (fn [named-query]
(when-not (str/blank? named-query)
(let [[nm qry] (.split ^String named-query "\n" 2)]
[(keyword nm) (str/trim qry)]))))
(into {}))) | |
The fixtures in merged fixtures file, mapped by their identifiers. | (defn- merged-fixtures
[]
(merge (read-merged merged-fixtures-file)
(update-keys
(read-merged dynamic-fixtures-file)
#(keyword "dynamic" (name %))))) |
(defn- validate-analysis [correct override actual]
(let [expected (or override correct)]
(when override
(testing "Override is still needed"
(if (and (vector? correct) (not (keyword actual)))
(is (not= correct (ct/sorted actual)))
(is (not= correct actual)))))
(if (and (vector? expected) (not (keyword actual)))
(is (= expected (ct/sorted actual)))
(when (not= expected actual)
(is (= expected actual)))))) | |
(defn- when-keyword [x]
(when (keyword? x)
x)) | |
(defn- get-override* [expected-cs mode fixture ck]
(or (get-in expected-cs [:overrides mode :error])
(get-in expected-cs [:overrides mode ck])
(when-keyword (get-in expected-cs [:overrides mode]))
(get-in ns-overrides [mode (namespace fixture)])
(get global-overrides mode))) | |
(defn- get-override [expected-cs mode fixture ck]
(or
(some #(get-override* expected-cs % fixture ck)
(lineage override-hierarchy mode))
(get-in expected-cs [:overrides :error])
(get-in expected-cs [:overrides ck])
(when-keyword (get expected-cs :overrides)))) | |
Test that we can parse a given fixture, and compare against expected analysis and rewrites, where they are defined. | (defn- test-fixture
[fixture]
(let [prefix (str "(fixture: " (subs (str fixture) 1) ")")
merged (merged-fixtures)
sql (or (ct/query-fixture fixture) (get merged fixture))
expected-cs (fixture-analysis fixture)
renames (fixture-renames fixture)
expected-rw (fixture-rewritten fixture)
base-opts {:non-reserved-words [:final], :allow-unused? true}
opts-mode (fn [mode] (assoc base-opts :mode mode))]
(assert sql "Fixture exists")
(doseq [m test-modes
:let [opts (opts-mode m)]]
(if (= m :ast-walker-1)
;; Legacy testing path for `components`, which only supports the original walker, and throws exceptions.
(let [cs (testing (str prefix " analysis does not throw")
(is (ct/components sql opts)))]
(doseq [[ck cv] (dissoc expected-cs :overrides :error :skip)]
(testing (str prefix " analysis is correct: " (name ck))
(let [actual-cv (:error cs (get-component cs ck))
override (get-override expected-cs m fixture ck)]
(validate-analysis cv override actual-cv)))))
;; Testing path for newer modes.
(let [correct (:error expected-cs (:tables expected-cs))
override (cond
(str/includes? sql "-- BROKEN")
:macaw.error/unable-to-parse
(str/includes? sql "-- UNSUPPORTED")
:macaw.error/unsupported-expression
:else
(get-override expected-cs m fixture :tables))
;; For now, we only support (and test) :tables
tables (testing (str prefix " table analysis does not throw for mode " m)
(is (ct/tables sql opts)))]
(if (and (nil? correct) (nil? override))
(testing "Must define expected tables, or explicitly skip analysis"
(is (:skip expected-cs)))
(testing (str prefix " table analysis is correct for mode " m)
(validate-analysis correct override tables))))))
(when renames
(let [broken? (:broken? renames)
rewritten (testing (str prefix " rewriting does not throw")
(is (str/trim (m/replace-names sql (dissoc renames :broken?) base-opts))))]
(when expected-rw
(testing (str prefix " rewritten SQL is correct")
(if broken?
(is (not= expected-rw rewritten))
(is (= expected-rw rewritten))))))))) |
Find all the fixture symbols for stand-alone sql files within our test resources. | (defn isolated-fixtures
[]
(->> (io/resource "acceptance")
io/file
file-seq
(keep #(when (.isFile ^File %)
(let [n (.getName ^File %)]
(when (.endsWith n ".sql")
(str/replace n #"\.sql$" "")))))
(remove #(.contains ^String % "."))
(remove #{"queries"})
(map ct/stem->fixture)
(sort-by str))) |
(defn- all-fixtures []
(let [isolated (isolated-fixtures)
merged (keys (merged-fixtures))]
(assert (empty? (set/intersection (set isolated) (set merged)))
"No fixtures should be in both the isolated and merged files")
(sort-by str (distinct (concat isolated merged))))) | |
Find all the fixture files and for each of them run all the tests we can construct from the related files. | (defmacro create-fixture-tests!
[]
(let [fixtures (all-fixtures)]
(cons 'do
(for [f fixtures
:let [test-name (symbol (str/replace (ct/fixture->filename f "-test") #"(?<!_)_(?!_)" "-"))]]
`(deftest ^:parallel ~test-name
(test-fixture ~f)))))) |
(create-fixture-tests!) | |
(comment
;; Unload all the tests, useful for flushing stale fixture tests
(doseq [[sym ns-var] (ns-interns *ns*)]
(when (:test (meta ns-var))
(ns-unmap *ns* sym)))
(merged-fixtures)
;; Append all the isolated fixtures to the merged file.
;; For now, we keep the stress-testing fixtures separate, because OH LAWDY they HUGE.
(spit merged-fixtures-file
(str/join "\n\n"
(for [fixture (isolated-fixtures)]
(str "-- FIXTURE: "
(when-let [nms (namespace fixture)]
(str nms "/"))
(name fixture) "\n"
(str/trim
(ct/query-fixture fixture))))))
(deftest ^:parallel single-test
(test-fixture :compound/nested-cte-sneaky))
(test-fixture :compound/cte)
(test-fixture :compound/cte-nonambiguous)
(test-fixture :literal/with-table)
(test-fixture :literal/without-table)
(test-fixture :interpolation/crosstab)
(test-fixture :broken/filter-where)) | |
(ns ^:parallel macaw.core-test (:require [clojure.java.io :as io] [clojure.string :as str] [clojure.test :refer [deftest is testing]] [clojure.walk :as walk] [macaw.core :as m] [macaw.test.util :refer [ws=]] [macaw.walk :as mw] [mb.hawk.assert-exprs]) (:import (clojure.lang ExceptionInfo) (net.sf.jsqlparser.schema Table))) | |
(set! *warn-on-reflection* true) | |
(defn- non-empty-and-truthy [xs]
(if (seq xs)
(every? true? xs)
false)) | |
(defn components [sql & {:as opts}]
(m/query->components (m/parsed-query sql opts) opts)) | |
(defn tables [sql & {:as opts}]
(let [opts (update opts :mode #(or % :ast-walker-1))
result (m/query->tables sql opts)]
(or (:error result)
(:tables result)))) | |
(def raw-components #(let [xs (empty %)] (into xs (keep :component) %))) (def columns (comp raw-components :columns components)) (def source-columns (comp :source-columns components)) (def has-wildcard? (comp non-empty-and-truthy raw-components :has-wildcard? components)) (def mutations (comp raw-components :mutation-commands components)) (def table-wcs (comp raw-components :table-wildcards components)) | |
Strip the scope id from the context stacks, to get deterministic values for testing. | (defn- strip-context-ids
[m]
(walk/prewalk
(fn [x]
(if (:context x)
(update x :context (partial mapv m/scope-label))
x))
m)) |
(defn scope->vec [s] [(m/scope-label s) (m/scope-id s)]) | |
Replace full context stack with a reference to the local scope, only. | (defn contexts->scopes
[m]
(walk/prewalk
(fn [x]
(if-let [context (:context x)]
(-> x (dissoc :context) (assoc :scope (scope->vec (first context))))
x))
m)) |
(defn column-qualifiers
[query]
(mw/fold-query (m/parsed-query query)
{:column-qualifier (fn [acc tbl _ctx] (conj acc (.getName ^Table tbl)))}
#{})) | |
See [[macaw.core/parsed-query]] and https://github.com/JSQLParser/JSqlParser/issues/1988 for more details. | (def ^:private implicit-semicolon "select id from foo") |
(defn- ->windows [sql] (str/replace sql "\n" "\r\n")) | |
(deftest three-or-more-line-breaks-test
(doseq [f [identity ->windows]
:let [query (f implicit-semicolon)]]
(testing (if (= ->windows f) "windows" "unix")
(is (= (-> query (str/replace "id" "pk") (str/replace "foo" "bar"))
(m/replace-names query
{:columns {{:table "foo" :column "id"} "pk"}
:tables {{:table "foo"} "bar"}})))))) | |
(deftest query->tables-test
(testing "Simple queries"
(is (= #{{:table "core_user"}}
(tables "SELECT * FROM core_user;")))
(is (= #{{:table "core_user"}}
(tables "SELECT id, email FROM core_user;"))))
(testing "With a schema (Postgres)" ;; TODO: only run this against supported DBs
(is (= #{{:table "core_user" :schema "the_schema_name"}}
(tables "SELECT * FROM the_schema_name.core_user;")))
(is (= #{{:table "orders" :schema "public"}
{:table "orders" :schema "private"}}
(tables "SELECT a.x FROM public.orders a, private.orders"))))
(testing "Sub-selects"
(is (= #{{:table "core_user"}}
(tables "SELECT * FROM (SELECT DISTINCT email FROM core_user) q;"))))) | |
(deftest tables-with-complex-aliases-issue-14-test
(testing "With an alias that is also a table name"
(is (= #{{:table "user"}
{:table "user2_final"}}
(tables
"SELECT legacy_user.id AS old_id,
user.id AS new_id
FROM user AS legacy_user
OUTER JOIN user2_final AS user
ON legacy_user.email = user2_final.email;"))))) | |
(deftest column-qualifier-test
(testing "column-qualifiers works with tables and aliases"
(is (= #{"user" "legacy_user"}
(column-qualifiers "SELECT
user.id AS user_id,
legacy_user.id AS old_id
FROM user
OUTER JOIN user as legacy_user
ON user.email = user.electronic_mail_address
JOIN unrelated_table on foo = user.unrelated_id;"))))) | |
(deftest query->columns-test
(testing "Simple queries"
(is (= #{{:column "foo"}
{:column "bar"}
{:column "id" :table "quux"}
{:column "quux_id" :table "baz"}}
(columns "SELECT foo, bar FROM baz INNER JOIN quux ON quux.id = baz.quux_id"))))
(testing "'group by' columns present"
(is (= #{{:column "id" :table "orders"}
{:column "user_id" :table "orders"}}
(columns "SELECT id FROM orders GROUP BY user_id"))))
(testing "table alias present"
(is (= #{{:column "id" :table "orders" :schema "public"}}
(columns "SELECT o.id FROM public.orders o"))))
(testing "schema is determined correctly"
(is (= #{{:column "x" :table "orders" :schema "public"}}
(columns "SELECT public.orders.x FROM public.orders, private.orders"))))
(testing "quotes are retained"
(is (= #{{:column "`x`" :table "`orders`" :schema "`public`"}}
(columns "SELECT `public`.`orders`.`x` FROM `public`.`orders`, `private`.`orders`"))))
(testing "quotes and case are not interpreted"
(is (= #{{:column "x" :table "ORDERS" :schema "`public`"}
{:column "X" :table "ORDERS" :schema "`public`"}
{:column "`x`" :table "ORDERS" :schema "`public`"}
{:column "`X`" :table "ORDERS" :schema "`public`"}}
(columns "SELECT x, X, `x`, `X` FROM `public`.ORDERS"))))) | |
(def ^:private heavily-quoted-query "SELECT raw, \"foo\", \"dong\".\"bar\", `ding`.`dong`.`fee` FROM `ding`.dong") | |
(def ^:private heavily-quoted-query-rewritten "SELECT flaw, glue, long.lark, king.long.flee FROM king.long") | |
(def ^:private heavily-quoted-query-rewrites
{:schemas {"ding" "king"}
:tables {{:schema "ding" :table "dong"} "long"}
:columns {{:schema "ding" :table "dong" :column "raw"} "flaw"
{:schema "ding" :table "dong" :column "fee"} "flee"
{:schema "ding" :table "dong" :column "bar"} "lark"
{:schema "ding" :table "dong" :column "foo"} "glue"}}) | |
(defn normalized-components [sql]
(m/query->components (m/parsed-query sql) {:preserve-identifiers? false})) | |
(def normalized-columns (comp raw-components :columns normalized-components)) (def normalized-tables (comp raw-components :tables normalized-components)) | |
(deftest quotes-test
(is (= #{{:column "raw", :table "dong", :schema "ding"}
{:column "foo", :table "dong", :schema "ding"}
{:column "bar", :table "dong", :schema "ding"}
{:column "fee", :table "dong", :schema "ding"}}
(normalized-columns heavily-quoted-query)))
(is (= #{{:table "dong", :schema "ding"}}
(normalized-tables heavily-quoted-query)))
(is (= heavily-quoted-query-rewritten
(m/replace-names heavily-quoted-query heavily-quoted-query-rewrites)))) | |
(deftest case-sensitive-test
(is (= "SELECT X.Y, X.y, x.Z, x.z FROM X LEFT JOIN x ON X.Y=x.Z"
(m/replace-names "SELECT a.b, a.B, A.b, A.B FROM a LEFT JOIN A ON a.b=A.b"
{:tables {{:table "a"} "X"
{:table "A"} "x"}
:columns {{:table "a" :column "b"} "Y"
{:table "a" :column "B"} "y"
{:table "A" :column "b"} "Z"
{:table "A" :column "B"} "z"}})))) | |
(deftest case-insensitive-test
;; In the future, we might try to be smarter and preserve case.
(is (= "SELECT cats.meow FROM cats"
(m/replace-names "SELECT DOGS.BaRk FROM dOGS"
{:tables {{:table "dogs"} "cats"}
:columns {{:table "dogs" :column "bark"} "meow"}}
{:case-insensitive :lower})))
(is (= "SELECT meow FROM private.cats"
(m/replace-names "SELECT bark FROM PUBLIC.dogs"
{:schemas {"public" "private"}
:tables {{:schema "public" :table "dogs"} "cats"}
:columns {{:schema "public" :table "dogs" :column "bark"} "meow"}}
{:case-insensitive :lower})))
(is (= "SELECT id, meow FROM private.cats"
(m/replace-names "SELECT id, bark FROM PUBLIC.dogs"
{:schemas {"public" "private"}
:tables {{:schema "public" :table "DOGS"} "cats"}
:columns {{:schema "PUBLIC" :table "dogs" :column "bark"} "meow"}}
{:case-insensitive :agnostic
:allow-unused? true})))) | |
(def ^:private heavily-quoted-query-mixed-case "SELECT RAW, \"Foo\", \"doNg\".\"bAr\", `ding`.`doNg`.`feE` FROM `ding`.`doNg`") | |
(deftest case-and-quotes-test
(testing "By default, quoted references are also case insensitive"
(is (= heavily-quoted-query-rewritten
(m/replace-names heavily-quoted-query-mixed-case
heavily-quoted-query-rewrites
:case-insensitive :lower))))
(testing "One can opt-into ignoring case only for unquoted references\n"
(testing "None of the quoted identifiers with different case will be matched"
(is (thrown-with-msg? ExceptionInfo
#"Unknown rename: .* \"(dong)|(bar)|(foo)|(fee)\
(m/replace-names heavily-quoted-query-mixed-case
heavily-quoted-query-rewrites
:case-insensitive :agnostic
:quotes-preserve-case? true))))
(testing "The query is unchanged when allowed to run partially"
(is (= heavily-quoted-query-mixed-case
(m/replace-names heavily-quoted-query-mixed-case
heavily-quoted-query-rewrites
{:case-insensitive :agnostic
:quotes-preserve-case? true
:allow-unused? true})))))) | |
(def ^:private ambiguous-case-replacements
{:columns {{:schema "public" :table "DOGS" :column "BARK"} "MEOW"
{:schema "public" :table "dogs" :column "bark"} "meow"
{:schema "public" :table "dogs" :column "growl"} "purr"
{:schema "public" :table "dogs" :column "GROWL"} "PuRr"
{:schema "public" :table "DOGS" :column "GROWL"} "PURR"}}) | |
(deftest ambiguous-case-test
(testing "Correctly handles flexibility around the case of the replacements"
(doseq [[case-insensitive expected] {:lower "SELECT meow, PuRr FROM DOGS"
:upper "SELECT MEOW, PURR FROM DOGS"
;; Not strictly deterministic, depends on map ordering.
:agnostic "SELECT MEOW, PuRr FROM DOGS"}]
(is (= expected
(m/replace-names "SELECT bark, `GROWL` FROM DOGS"
ambiguous-case-replacements
{:case-insensitive case-insensitive
:quotes-preserve-case? true
:allow-unused? true})))))) | |
(deftest infer-test
(testing "We can infer a column through a few hoops"
(is (= #{{:column "amount" :table "orders"}}
(columns "SELECT amount FROM (SELECT amount FROM orders)")))
(is (= #{{:column "amount" :alias "cost" :table "orders"}
;; We preserve this for now, which has its scope to differentiate it from the qualified element.
;; Importantly, we do not infer it as coming from the orders table, despite that being the only table.
{:column "cost"}}
(columns "SELECT cost FROM (SELECT amount AS cost FROM orders)")))
(testing "We do not expose phantom columns due to references to aliases"
(is (= #{{:column "amount" :table "orders"}}
(source-columns "SELECT cost FROM (SELECT amount AS cost FROM orders)")))))) | |
(deftest infer-from-schema-test
(is (= #{{:schema "public" :table "towns"}}
(tables "select public.towns.id from towns")))) | |
(deftest mutation-test
(is (= #{"alter-sequence"}
(mutations "ALTER SEQUENCE serial RESTART WITH 42")))
(is (= #{"alter-session"}
(mutations "ALTER SESSION SET foo = 'bar'")))
(is (= #{"alter-system"}
(mutations "ALTER SYSTEM RESET ALL")))
(is (= #{"alter-table"}
(mutations "ALTER TABLE orders ADD COLUMN email text")))
(is (= #{"alter-view"}
(mutations "ALTER VIEW foo AS SELECT bar;")))
(is (= #{"create-function"} ; Postgres syntax
(mutations "CREATE FUNCTION multiply(integer, integer) RETURNS integer AS 'SELECT $1 * $2;' LANGUAGE SQL
IMMUTABLE RETURNS NULL ON NULL INPUT;")))
(is (= #{"create-function"} ; Conventional syntax
(mutations "CREATE FUNCTION multiply(a integer, b integer) RETURNS integer LANGUAGE SQL IMMUTABLE RETURNS
NULL ON NULL INPUT RETURN a + b;")))
(is (= #{"create-index"}
(mutations "CREATE INDEX idx_user_id ON orders(user_id);")))
(is (= #{"create-schema"}
(mutations "CREATE SCHEMA perthshire")))
(is (= #{"create-sequence"}
(mutations "CREATE SEQUENCE users_seq START WITH 42 INCREMENT BY 17")))
(is (= #{"create-synonym"}
(mutations "CREATE SYNONYM folk FOR people")))
(is (= #{"create-table"}
(mutations "CREATE TABLE poets (name text, id integer)")))
(is (= #{"create-view"}
(mutations "CREATE VIEW folk AS SELECT * FROM people WHERE id > 10")))
(is (= #{"delete"}
(mutations "DELETE FROM people")))
(is (= #{"drop"}
(mutations "DROP TABLE people")))
(is (= #{"grant"}
(mutations "GRANT SELECT, UPDATE, INSERT ON people TO myself")))
(is (= #{"insert"}
(mutations "INSERT INTO people(name, source) VALUES ('Robert Fergusson', 'Twitter'), ('Robert Burns',
'Facebook')")))
(is (= #{"purge"}
(mutations "PURGE TABLE people")))
(is (= #{"rename-table"}
(mutations "RENAME TABLE people TO folk")))
(is (= #{"truncate"}
(mutations "TRUNCATE TABLE people")))
(is (= #{"update"}
(mutations "UPDATE people SET name = 'Robert Fergusson' WHERE id = 23")))) | |
(deftest complicated-mutations-test
;; https://github.com/metabase/macaw/issues/18
#_(is (= #{"delete" "insert"}
(mutations "WITH outdated_orders AS (
DELETE FROM orders
WHERE
date <= '2018-01-01'
RETURNING *)
INSERT INTO order_log
SELECT * from outdated_orders;")))
(is (= #{"insert"}
(mutations "WITH outdated_orders AS (
SELECT * from orders)
INSERT INTO order_log
SELECT * from outdated_orders;")))) | |
(deftest alias-inclusion-test
(testing "Aliases are not included"
(is (= #{{:table "orders"} {:table "foo"}}
(tables "SELECT id, o.id FROM orders o JOIN foo ON orders.id = foo.order_id"))))) | |
(deftest select-*-test (is (true? (has-wildcard? "SELECT * FROM orders"))) (is (true? (has-wildcard? "SELECT id, * FROM orders JOIN foo ON orders.id = foo.order_id")))) | |
(deftest table-wildcard-test-without-aliases
(is (= #{{:table "orders"}}
(table-wcs "SELECT orders.* FROM orders JOIN foo ON orders.id = foo.order_id")))
(is (= #{{:table "foo" :schema "public"}}
(table-wcs "SELECT foo.* FROM orders JOIN public.foo f ON orders.id = foo.order_id")))) | |
(deftest table-star-test-with-aliases
(is (= #{{:table "orders"}}
(table-wcs "SELECT o.* FROM orders o JOIN foo ON orders.id = foo.order_id")))
(is (= #{{:table "foo"}}
(table-wcs "SELECT f.* FROM orders o JOIN foo f ON orders.id = foo.order_id")))) | |
(deftest context-test
(testing "Sub-select with outer wildcard"
;; TODO we should test the source and result columns too
(is (=? {:columns
#{{:component {:column "total" :table "orders"}, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}
{:component {:column "id" :table "orders"}, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}
{:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}},
:has-wildcard? #{{:component true, :context ["SELECT"]}},
:mutation-commands #{},
:tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}},
:table-wildcards #{}}
(strip-context-ids (components "SELECT * FROM (SELECT id, total FROM orders) WHERE total > 10")))))
(testing "Sub-select with inner wildcard"
(is (=? {:columns
#{{:component {:column "id" :table "orders"}, :context ["SELECT"]}
{:component {:column "total" :table "orders"}, :context ["SELECT"]}
{:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}},
:has-wildcard? #{{:component true, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}},
:mutation-commands #{},
:tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}},
:table-wildcards #{}}
(strip-context-ids (components "SELECT id, total FROM (SELECT * FROM orders) WHERE total > 10")))))
(testing "Sub-select with dual wildcards"
(is (=? {:columns #{{:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}},
:has-wildcard?
#{{:component true, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}
{:component true, :context ["SELECT"]}},
:mutation-commands #{},
:tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}},
:table-wildcards #{}}
(strip-context-ids (components "SELECT * FROM (SELECT * FROM orders) WHERE total > 10")))))
(testing "Join; table wildcard"
(is (=? {:columns #{{:component {:column "order_id" :table "foo"}, :context ["JOIN" "SELECT"]}
{:component {:column "id" :table "orders"}, :context ["JOIN" "SELECT"]}},
:has-wildcard? #{},
:mutation-commands #{},
:tables #{{:component {:table "foo"}, :context ["FROM" "JOIN" "SELECT"]}
{:component {:table "orders"}, :context ["FROM" "SELECT"]}},
:table-wildcards #{{:component {:table "orders"}, :context ["SELECT"]}}}
(strip-context-ids (components "SELECT o.* FROM orders o JOIN foo ON orders.id = foo.order_id")))))) | |
(deftest replace-names-test
(is (= "SELECT aa.xx, b.x, b.y FROM aa, b;"
(m/replace-names "SELECT a.x, b.x, b.y FROM a, b;"
{:tables {{:schema "public" :table "a"} "aa"}
:columns {{:schema "public" :table "a" :column "x"} "xx"}})))
(testing "Handle fully qualified replacement targets"
;; Giving Macaw more context could make it easier to
;; In any case, this is trivial for Metabase to provide.
(is (= "SELECT aa.xx, b.x, b.y FROM aa, b;"
(m/replace-names "SELECT a.x, b.x, b.y FROM a, b;"
{:tables {{:schema "public" :table "a"} "aa"}
:columns {{:schema "public" :table "a" :column "x"}
{:schema "public" :table "aa" :column "xx"}}}))))
;; To consider - we could avoid splitting up the renames into column and table portions in the client, as
;; qualified targets would allow us to infer such changes. Partial qualification could also work fine where there
;; is no ambiguity - even if this is just a nice convenience for testing.
#_(is (= "SELECT aa.xx, b.x, b.y FROM aa, b;"
(m/replace-names "SELECT a.x, b.x, b.y FROM a, b;"
{:columns {{:schema "public" :table "a" :column "x"}
{:table "aa" :column "xx"}}})))
(is (= "SELECT qwe FROM orders"
(m/replace-names "SELECT id FROM orders"
{:columns {{:schema "public" :table "orders" :column "id"} "qwe"}})))
(is (= "SELECT p.id, q.id FROM public.whatever p join private.orders q"
(m/replace-names "SELECT p.id, q.id FROM public.orders p join private.orders q"
{:tables {{:schema "public" :table "orders"} "whatever"}})))
(is (ws= "SELECT SUM(public.orders.total) AS s,
MAX(orders.total) AS max,
MIN(total) AS min
FROM public.orders"
(m/replace-names
"SELECT SUM(public.orders.amount) AS s,
MAX(orders.amount) AS max,
MIN(amount) AS min
FROM public.orders"
{:columns {{:schema "public" :table "orders" :column "amount"} "total"}})))
(is (ws= "SELECT *, sturmunddrang
, oink AS oink
FROM /* /* lore */
floor_muser,
user, /* more */ vigilant_user ;"
(m/replace-names
"SELECT *, boink
, yoink AS oink
FROM /* /* lore */
core_user,
bore_user, /* more */ snore_user ;"
{:tables {{:schema "public" :table "core_user"} "floor_muser"
{:schema "public" :table "bore_user"} "user"
{:schema "public" :table "snore_user"} "vigilant_user"}
:columns {{:schema "public" :table "core_user" :column "boink"} "sturmunddrang"
{:schema "public" :table "snore_user" :column "yoink"} "oink"}})))) | |
(deftest replace-schema-test
;; Somehow we broke renaming the `x` in the WHERE clause.
#_(is (= "SELECT totally_private.purchases.xx FROM totally_private.purchases, private.orders WHERE xx = 1"
(m/replace-names "SELECT public.orders.x FROM public.orders, private.orders WHERE x = 1"
{:schemas {"public" "totally_private"}
:tables {{:schema "public" :table "orders"} "purchases"}
:columns {{:schema "public" :table "orders" :column "x"} "xx"}})))) | |
(deftest allow-unused-test
(is (thrown-with-msg?
Exception #"Unknown rename"
(m/replace-names "SELECT 1" {:tables {{:schema "public" :table "a"} "aa"}})))
(is (= "SELECT 1"
(m/replace-names "SELECT 1" {:tables {{:schema "public" :table "a"} "aa"}}
{:allow-unused? true})))) | |
(deftest model-reference-test
(is (= "SELECT subtotal FROM metabase_sentinel_table_154643 LIMIT 3"
(m/replace-names "SELECT total FROM metabase_sentinel_table_154643 LIMIT 3"
{:columns {{:table "orders" :column "total"} "subtotal"}
:tables {{:table "orders"} "purchases"}}
{:allow-unused? true})))) | |
(defn- name-seq [seq-type]
(let [prefix (str seq-type "_")]
(rest (iterate (fn [_] (str (gensym prefix))) nil)))) | |
(defn fixture->filename
([fixture suffix]
(fixture->filename fixture nil suffix))
([fixture path suffix]
(as-> fixture %
[(namespace %) (name %)]
(remove nil? %)
(str/join "__" %)
(str/replace % "-" "_")
(if path (str path "/" %) %)
(str % suffix)))) | |
(defn stem->fixture [stem]
(let [[x y] (map #(str/replace % "_" "-") (str/split stem #"__"))]
(if y
(keyword x y)
(keyword x)))) | |
(def ^:private fixture-paths
#{nil "acceptance"}) | |
(defn query-fixture
([fixture]
(let [paths (map #(fixture->filename fixture % ".sql") fixture-paths)]
(when-let [r (some io/resource paths)]
(slurp r))))) | |
(defn- anonymize-query [query]
(let [m (components query)
ts (raw-components (:tables m))
cs (raw-components (:columns m))
ss (transduce (keep :schema) conj #{} (concat ts cs))]
(m/replace-names query
{:schemas (zipmap ss (name-seq "schema"))
:tables (zipmap ts (name-seq "table"))
:columns (zipmap cs (name-seq "column"))}
;; nothing should be unused... but we currently get some junk from analysis, sadly
{:allow-unused? true}))) | |
Read a fixture, anonymize the identifiers, write it back out again. | (defn- anonymize-fixture
[fixture]
(let [filename (fixture->filename fixture ".sql")]
(spit (str "test/resources/" filename)
(anonymize-query (query-fixture fixture))))) |
(def ^:private alias-shadow-query "SELECT people.*, orders.a as foo FROM orders JOIN people ON people.foo = orders.foo_id") | |
(deftest alias-shadow-replace-test
(testing "Aliases are not replaced, but real usages are"
(is (ws= (str/replace alias-shadow-query "people.foo" "people.bar")
(m/replace-names alias-shadow-query
{:columns {{:table "people" :column "foo"} "bar"}}
{:allow-unused? true}))))) | |
(def ^:private cte-query
"WITH engineering_employees AS (
SELECT id, name, department, favorite_language
FROM employees
WHERE department = 'Engineering')
SELECT id, name, favorite_language as fave_lang
FROM engineering_employees
WHERE favorite_language in ('mandarin clojure', 'middle javascript');") | |
(def ^:private sub-select-query
"SELECT id, name, favorite_language as fave_lang
FROM (
SELECT id, name, department, favorite_language
FROM employees
WHERE department = 'Engineering') as engineering_employees
WHERE favorite_language in ('mandarin clojure', 'middle javascript');") | |
(deftest cte-propagate-test
(testing "Transitive references are tracked to their source when replacing columns in queries with CTEs."
(is (= (str/replace cte-query "favorite_language" "first_language")
(m/replace-names cte-query
{:columns {{:table "employees", :column "favorite_language"} "first_language"}}))))) | |
(deftest sub-select-propagate-test
(testing "Transitive references are tracked to their source when replacing columns in queries with sub-selects."
(is (= (str/replace sub-select-query "favorite_language" "first_language")
(m/replace-names sub-select-query
{:columns {{:table "employees", :column "favorite_language"} "first_language"}}))))) | |
A transformation to help write tests, where hawk would face limitations on predicates within sets. | (defn sorted [element-set] (sort-by (comp (juxt :schema :table :column :scope) #(:component % %)) element-set)) |
(deftest count-field-test
(testing "COUNT(*) does not actually read any columns"
(is (empty? (columns "SELECT COUNT(*) FROM users")))
(is (false? (has-wildcard? "SELECT COUNT(*) FROM users")))
(is (empty? (table-wcs "SELECT COUNT(*) FROM users"))))
(testing "COUNT(1) does not actually read any columns"
(is (empty? (columns "SELECT COUNT(1) FROM users")))
(is (false? (has-wildcard? "SELECT COUNT(1) FROM users")))
(is (empty? (table-wcs "SELECT COUNT(1) FROM users"))))
(testing "We do care about explicitly referenced fields in a COUNT"
(is (= #{{:table "users" :column "id"}}
(source-columns "SELECT COUNT(id) FROM users"))))
(testing "We do care about deeply referenced fields in a COUNT however"
(is (= #{{:table "users" :column "id"}}
(source-columns "SELECT COUNT(DISTINCT(id)) FROM users"))))) | |
(deftest reserved-word-test
(testing "We can opt-out of reserving specific keywords"
(is (= #{{:schema "serial" :table "limit" :column "final"}}
(source-columns "SELECT limit.final FROM serial.limit" :non-reserved-words [:final :serial :limit]))))
(testing "We can replace with and from non-reserved keywords"
(is (= "SELECT y FROM final"
(m/replace-names "SELECT final FROM x"
{:tables {{:table "x"} "final"}
:columns {{:table "x" :column "final"} "y"}}
{:non-reserved-words [:final]}))))) | |
(deftest square-bracket-test
(testing "We can opt into allowing square brackets to quote things"
(is (=? {:tables #{{:schema "s" :table "t"}}
:columns #{{:schema "s" :table "t" :column "f"}}}
(update-vals
(components "SELECT [f] FROM [s].[t]"
{:features {:square-bracket-quotes true}
:preserve-identifiers? false})
raw-components))))) | |
(comment
(require 'user) ;; kondo, really
(require '[clj-async-profiler.core :as prof])
(prof/serve-ui 8080)
(defn- simple-benchmark []
(source-columns "SELECT x FROM t"))
(defn- complex-benchmark []
(count
(source-columns
(query-fixture :snowflake))))
(user/time+ (simple-benchmark))
(prof/profile {:event :alloc}
(dotimes [_ 1000] (simple-benchmark)))
(user/time+ (complex-benchmark))
(prof/profile {:event :alloc}
(dotimes [_ 100] (complex-benchmark)))
(anonymize-query "SELECT x FROM a")
(anonymize-fixture :snowflakelet)
(require 'virgil)
(require 'clojure.tools.namespace.repl)
(virgil/watch-and-recompile ["java"] :post-hook clojure.tools.namespace.repl/refresh-all)) | |
(ns macaw.test.util (:require [clojure.string :as str] [clojure.test :refer :all] [clojure.walk :as walk])) | |
(defn- indentation [s] (count (re-find #"^\s*" s))) | |
(defn- trim-indent* [margin s]
(if (< (count s) margin)
(subs s margin))) | |
Given a multi-line string, remove the common margin from the remaining lines. Used so that strings with significant whitespace may be visually aligned. | (defn trim-indent
[s]
(let [lines (str/split-lines s)
margin (->> (rest lines)
(remove str/blank?)
(transduce (map indentation) min Integer/MAX_VALUE))]
(str/join "\n" (cons (first lines) (map (partial trim-indent* margin) (rest lines)))))) |
Trim the extra indentation from all string literals before evaluation a given equality form. | (defmacro ws= [& xs] `(= ~@(walk/postwalk #(cond-> % (string? %) trim-indent) xs))) |
(ns macaw.test.util-test (:require [clojure.string :as str] [clojure.test :refer :all] [macaw.test.util :refer [ws=]])) | |
(deftest ^:parallel ws=-test
(testing "Code indentation is ignored"
(is (ws= "ABC
DEF
XXX"
(str/replace "ABC
DEF
GHI"
"GHI" "XXX"))))
(testing "Comparison is still whitespace sensitive"
(is (not (ws= "A B" "A B")))
(is (not (ws= "A
B
C"
"A
B
C"))))) | |
(ns macaw.util-test (:require [clojure.test :refer :all] [macaw.util :as u])) | |
(def ^:private haystack
{{:a 3 :b 2 :c 1} 2
{:a 3 :b nil :c 1} 3
{:a 1 :b 2 :c 3} 1}) | |
(deftest ^:parallel relevant-find-test
(testing "We ignore any suffix of degenerate keys"
(doseq [x [{:a 1}
{:a 1 :b nil}
{:a 1 :b 2}
{:a 1 :b 2 :c nil}
{:a 1 :b 2 :c 3}
{:a 1 :b 2 :c 3 :d 4}]]
(is (= [{:a 1 :b 2 :c 3} 1]
(u/find-relevant haystack x [:a :b :c])))))
(testing "We need at least one non-degenerate key"
(is (nil? (u/find-relevant haystack {} [:a :b :c]))))
(testing "We don't ignore non-suffix degenerate keys"
(doseq [x [{:a nil :b 2}
{:a 1 :b nil :c 3}
{:a nil :b 2 :c 3}]]
(is (nil?
(u/find-relevant haystack x [:a :b :c])))))) | |
(ns macaw.util.malli-test (:require [clojure.test :refer :all] [macaw.util.malli :as mu])) | |
(mu/defn enhance :- :int
[x :- :int, fail-deliberately? :- :boolean]
(if fail-deliberately?
:poppycock
(int (+ x 43)))) | |
(deftest ^:parallel defn-good-in-good-out-test (is (= 50 (enhance 7 false)))) | |
(deftest ^:parallel defn-good-in-bad-out-test
(is (thrown-with-msg? Exception
#"Invalid output.*"
(enhance 7 true)))) | |
(deftest ^:parallel defn-bad-in-good-out-test
(is (thrown-with-msg? Exception
#"Invalid input.*"
(enhance 7.3 false)))) | |
(deftest ^:parallel defn-bad-in-bad-out-test
(is (thrown-with-msg? Exception
#"Invalid input.*"
(enhance 7.3 true)))) | |