Macaw0.0.1-SNAPSHOTA Clojure wrapper for JSqlParser 🦜 | (this space intentionally left almost blank) |
(ns macaw.core (:require [macaw.rewrite :as rewrite] [macaw.util :as u] [macaw.walk :as mw]) (:import (com.metabase.macaw AstWalker$QueueItem) (net.sf.jsqlparser.expression Alias) (net.sf.jsqlparser.parser CCJSqlParserUtil) (net.sf.jsqlparser.schema Column Table) (net.sf.jsqlparser.statement Statement) (net.sf.jsqlparser.statement.select AllTableColumns))) | |
(set! *warn-on-reflection* true) | |
(defn- conj-to ([key-name] (conj-to key-name identity)) ([key-name xf] (fn item-conjer [results component context] (update results key-name conj {:component (xf component) :context (mapv (fn [^AstWalker$QueueItem x] [(keyword (.getKey x)) (.getValue x)]) context)})))) | |
(defn- query->raw-components [^Statement parsed-query] (mw/fold-query parsed-query {:column (conj-to :columns) :mutation (conj-to :mutation-commands) :wildcard (conj-to :has-wildcard? (constantly true)) :table (conj-to :tables) :table-wildcard (conj-to :table-wildcards)} {:columns #{} :has-wildcard? #{} :mutation-commands #{} :tables #{} :table-wildcards #{}})) | |
tables | |
(defn- make-table [^Table t _ctx] (merge {:table (.getName t)} (when-let [s (.getSchemaName t)] {:schema s}))) | |
(defn- alias-mapping [^Table table ctx] (when-let [^Alias table-alias (.getAlias table)] [(.getName table-alias) (make-table table ctx)])) | |
JSQLParser can't tell whether the | (defn- resolve-table-name [{:keys [alias->table name->table]} ^AllTableColumns atc _ctx] (let [table-name (-> atc .getTable .getName)] (or (alias->table table-name) (name->table table-name)))) |
columns | |
(defn- maybe-column-alias [[maybe-alias :as _ctx]] (when (= (first maybe-alias) :alias) {:alias (second maybe-alias)})) | |
(defn- maybe-column-table [{:keys [alias->table name->table]} ^Column c] (if-let [t (.getTable c)] (or (get alias->table (.getName t)) (:component (get name->table (.getName t)))) ;; if we see only a single table, we can safely say it's the table of that column (when (= (count name->table) 1) (:component (val (first name->table)))))) | |
(defn- make-column [data ^Column c ctx] (merge {:column (.getColumnName c)} (maybe-column-alias ctx) (maybe-column-table data c))) | |
get them together | |
(defn- only-query-context [ctx] (into [] (comp (filter #(= (first %) :query)) (map second)) ctx)) | |
(defn- update-components [f components] (map #(-> % (update :component f (:context %)) (update :context only-query-context)) components)) | |
Given a parsed query (i.e., a [subclass of] (Specifically, it returns their fully-qualified names as strings, where 'fully-qualified' means 'as referred to in the query'; this function doesn't do additional inference work to find out a table's schema.) | (defn query->components [^Statement parsed-query] (let [{:keys [columns has-wildcard? mutation-commands tables table-wildcards]} (query->raw-components parsed-query) alias-map (into {} (map #(-> % :component (alias-mapping (:context %))) tables)) table-map (->> (update-components make-table tables) (u/group-with #(-> % :component :table) (fn [a b] (if (:schema a) a b)))) data {:alias->table alias-map :name->table table-map}] {:columns (into #{} (update-components (partial make-column data) columns)) :has-wildcard? (into #{} (update-components (fn [x & _args] x) has-wildcard?)) :mutation-commands (into #{} mutation-commands) :tables (into #{} (vals table-map)) :table-wildcards (into #{} (update-components (partial resolve-table-name data) table-wildcards))})) |
Main entry point: takes a string query and returns a | (defn parsed-query [^String query] (CCJSqlParserUtil/parse query)) |
Given a SQL query, apply the given table, column, and schema renames. | (defn replace-names [sql renames] (rewrite/replace-names sql (parsed-query sql) renames)) |
(ns macaw.rewrite (:require [macaw.walk :as mw]) (:import (net.sf.jsqlparser.parser ASTNodeAccess SimpleNode) (net.sf.jsqlparser.schema Column Table))) | |
(set! *warn-on-reflection* true) | |
(defn- index-of-nth [^String haystack ^String needle n] (assert (not (neg? n))) (if (zero? n) -1 (loop [n n idx 0] (let [next-id (.indexOf haystack needle idx)] (cond (= 1 n) next-id (neg? next-id) next-id :else (recur (dec n) (inc next-id))))))) | |
(defn- ->idx [^String sql line col] (+ col (index-of-nth sql "\n" (dec line)))) | |
Find the start and end index of the underlying tokens for a given AST node from a given SQL string. | (defn- node->idx-range [^SimpleNode node sql] (let [first-token (.jjtGetFirstToken node) last-token (.jjtGetLastToken node) first-idx (->idx sql (.-beginLine first-token) (.-beginColumn first-token)) last-idx (->idx sql (.-endLine last-token) (.-endColumn last-token))] [first-idx last-idx])) |
(defn- splice-replacements [^String sql replacements] (let [sb (StringBuilder.) append #(.append sb %)] (loop [start 0 [[[first-idx last-idx] value] & rst] replacements] (if (nil? last-idx) (when (< start (count sql)) (append (.substring sql start))) (do (append (.substring sql start first-idx)) (append value) (recur (inc ^long last-idx) rst)))) (str sb))) | |
Emit a SQL string for an updated AST, preserving the comments and whitespace from the original SQL. | (defn- update-query [updated-ast updated-node? sql] (let [replacement (fn [->text visitable] (let [ast-node (.getASTNode ^ASTNodeAccess visitable) idx-range (node->idx-range ast-node sql) node-text (->text visitable)] [idx-range node-text])) replace-name (fn [->text] (fn [acc visitable _ctx] (cond-> acc (updated-node? visitable) (conj (replacement ->text visitable)))))] (splice-replacements sql (mw/fold-query updated-ast {:table (replace-name #(.getFullyQualifiedName ^Table %)) :column (replace-name #(.getFullyQualifiedName ^Column %))} [])))) |
(defn- rename-table [updated-nodes table-renames schema-renames ^Table table _ctx] (when-let [name' (get table-renames (.getName table))] (vswap! updated-nodes conj table) (.setName table name')) (when-let [new-schema-name (get schema-renames (.getSchemaName table))] (.setSchemaName table new-schema-name))) | |
(defn- rename-column [updated-nodes column-renames ^Column column _ctx] (when-let [name' (get column-renames (.getColumnName column))] (vswap! updated-nodes conj column) (.setColumnName column name'))) | |
Given a SQL query and its corresponding (untransformed) AST, apply the given table and column renames. | (defn replace-names [sql parsed-ast {schema-renames :schemas table-renames :tables column-renames :columns}] (let [updated-nodes (volatile! #{})] (-> parsed-ast (mw/walk-query {:table (partial rename-table updated-nodes table-renames schema-renames) :column-qualifier (partial rename-table updated-nodes table-renames schema-renames) :column (partial rename-column updated-nodes column-renames)}) (update-query @updated-nodes sql)))) |
(ns macaw.util) | |
Generalized https://ask.clojure.org/index.php/12319/can-group-by-be-generalized | (defn group-with [kf rf coll] (persistent! (reduce (fn [ret x] (let [k (kf x)] (assoc! ret k (rf (get ret k) x)))) (transient {}) coll))) |
(ns macaw.walk (:import (com.metabase.macaw AstWalker AstWalker$CallbackKey))) | |
(set! *warn-on-reflection* true) | |
keyword->key map for the AST-folding callbacks. | (def ->callback-key ;; TODO: Move this to a Malli schema to simplify the indirection {:column AstWalker$CallbackKey/COLUMN :column-qualifier AstWalker$CallbackKey/COLUMN_QUALIFIER :mutation AstWalker$CallbackKey/MUTATION_COMMAND :table AstWalker$CallbackKey/TABLE :table-wildcard AstWalker$CallbackKey/ALL_TABLE_COLUMNS :wildcard AstWalker$CallbackKey/ALL_COLUMNS}) |
Lift a side effecting callback so that it preserves the accumulator. | (defn- preserve [f] (fn [acc & args] (apply f args) acc)) |
work around ast walker repeatedly visiting the same expressions (bug ?!) | (defn- deduplicate-visits [f] (let [seen (volatile! #{})] (fn [& [acc visitable & _ :as args]] (if (contains? @seen visitable) acc (do (vswap! seen conj visitable) (apply f args)))))) |
(defn- update-keys-vals [m key-f val-f] (into {} (map (fn [[k v]] [(key-f k) (val-f v)])) m)) | |
Walk over the query's AST, using the callbacks for their side-effects, for example to mutate the AST itself. | (defn walk-query [parsed-query callbacks] (let [callbacks (update-keys-vals callbacks ->callback-key (comp deduplicate-visits preserve))] (.walk (AstWalker. callbacks ::ignored) parsed-query))) |
Fold over the query's AST, using the callbacks to update the accumulator. | (defn fold-query [parsed-query callbacks init-val] (let [callbacks (update-keys-vals callbacks ->callback-key deduplicate-visits)] (.fold (AstWalker. callbacks init-val) parsed-query))) |
(ns ^:parallel macaw.core-test (:require [clojure.test :refer [deftest testing is]] [macaw.core :as m] [macaw.walk :as mw]) (:import (net.sf.jsqlparser.schema Table))) | |
(set! *warn-on-reflection* true) | |
(defn- and* [x y] (and x y)) | |
(def components (comp m/query->components m/parsed-query)) (def raw-components (comp (partial into #{}) (partial map :component))) (def columns (comp raw-components :columns components)) (def has-wildcard? (comp (partial reduce and*) raw-components :has-wildcard? components)) (def mutations (comp raw-components :mutation-commands components)) (def tables (comp raw-components :tables components)) (def table-wcs (comp raw-components :table-wildcards components)) | |
(defn column-qualifiers [query] (mw/fold-query (m/parsed-query query) {:column-qualifier (fn [acc tbl _ctx] (conj acc (.getName ^Table tbl)))} #{})) | |
(deftest query->tables-test (testing "Simple queries" (is (= #{{:table "core_user"}} (tables "SELECT * FROM core_user;"))) (is (= #{{:table "core_user"}} (tables "SELECT id, email FROM core_user;")))) (testing "With a schema (Postgres)" ;; TODO: only run this against supported DBs (is (= #{{:table "core_user" :schema "the_schema_name"}} (tables "SELECT * FROM the_schema_name.core_user;")))) (testing "Sub-selects" (is (= #{{:table "core_user"}} (tables "SELECT * FROM (SELECT DISTINCT email FROM core_user) q;"))))) | |
(deftest tables-with-complex-aliases-issue-14-test (testing "With an alias that is also a table name" (is (= #{{:table "user"} {:table "user2_final"}} (tables "SELECT legacy_user.id AS old_id, user.id AS new_id FROM user AS legacy_user OUTER JOIN user2_final AS user ON legacy_user.email = user2_final.email;"))))) | |
(deftest column-qualifier-test (testing "column-qualifiers works with tables and aliases" (is (= #{"user" "legacy_user"} (column-qualifiers "SELECT user.id AS user_id, legacy_user.id AS old_id FROM user OUTER JOIN user as legacy_user ON user.email = user.electronic_mail_address JOIN unrelated_table on foo = user.unrelated_id;"))))) | |
(deftest query->columns-test (testing "Simple queries" (is (= #{{:column "foo"} {:column "bar"} {:column "id" :table "quux"} {:column "quux_id" :table "baz"}} (columns "SELECT foo, bar FROM baz INNER JOIN quux ON quux.id = baz.quux_id")))) (testing "'group by' columns present" (is (= #{{:column "id" :table "orders"} {:column "user_id" :table "orders"}} (columns "SELECT id FROM orders GROUP BY user_id")))) (testing "table alias present" (is (= #{{:column "id" :table "orders" :schema "public"}} (columns "SELECT o.id FROM public.orders o"))))) | |
(deftest infer-test (testing "We can first column through a few hoops" (is (= #{{:column "amount" :table "orders"}} (columns "SELECT amount FROM (SELECT amount FROM orders)"))) (is (= #{{:column "amount" :alias "cost" :table "orders"} ;; FIXME: we need to figure out that `cost` is an alias from subquery {:column "cost", :table "orders"}} (columns "SELECT cost FROM (SELECT amount AS cost FROM orders)"))))) | |
(deftest mutation-test (is (= #{"alter-sequence"} (mutations "ALTER SEQUENCE serial RESTART WITH 42"))) (is (= #{"alter-session"} (mutations "ALTER SESSION SET foo = 'bar'"))) (is (= #{"alter-system"} (mutations "ALTER SYSTEM RESET ALL"))) (is (= #{"alter-table"} (mutations "ALTER TABLE orders ADD COLUMN email text"))) (is (= #{"alter-view"} (mutations "ALTER VIEW foo AS SELECT bar;"))) (is (= #{"create-function"} ; Postgres syntax (mutations "CREATE FUNCTION multiply(integer, integer) RETURNS integer AS 'SELECT $1 * $2;' LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT;"))) (is (= #{"create-function"} ; Conventional syntax (mutations "CREATE FUNCTION multiply(a integer, b integer) RETURNS integer LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT RETURN a + b;"))) (is (= #{"create-index"} (mutations "CREATE INDEX idx_user_id ON orders(user_id);"))) (is (= #{"create-schema"} (mutations "CREATE SCHEMA perthshire"))) (is (= #{"create-sequence"} (mutations "CREATE SEQUENCE users_seq START WITH 42 INCREMENT BY 17"))) (is (= #{"create-synonym"} (mutations "CREATE SYNONYM folk FOR people"))) (is (= #{"create-table"} (mutations "CREATE TABLE poets (name text, id integer)"))) (is (= #{"create-view"} (mutations "CREATE VIEW folk AS SELECT * FROM people WHERE id > 10"))) (is (= #{"delete"} (mutations "DELETE FROM people"))) (is (= #{"drop"} (mutations "DROP TABLE people"))) (is (= #{"grant"} (mutations "GRANT SELECT, UPDATE, INSERT ON people TO myself"))) (is (= #{"insert"} (mutations "INSERT INTO people(name, source) VALUES ('Robert Fergusson', 'Twitter'), ('Robert Burns', 'Facebook')"))) (is (= #{"purge"} (mutations "PURGE TABLE people"))) (is (= #{"rename-table"} (mutations "RENAME TABLE people TO folk"))) (is (= #{"truncate"} (mutations "TRUNCATE TABLE people"))) (is (= #{"update"} (mutations "UPDATE people SET name = 'Robert Fergusson' WHERE id = 23")))) | |
(deftest complicated-mutations-test ;; https://github.com/metabase/macaw/issues/18 #_ (is (= #{"delete" "insert"} (mutations "WITH outdated_orders AS ( DELETE FROM orders WHERE date <= '2018-01-01' RETURNING *) INSERT INTO order_log SELECT * from outdated_orders;"))) (is (= #{ "insert"} (mutations "WITH outdated_orders AS ( SELECT * from orders) INSERT INTO order_log SELECT * from outdated_orders;")))) | |
(deftest alias-inclusion-test (testing "Aliases are not included" (is (= #{{:table "orders"} {:table "foo"}} (tables "SELECT id, o.id FROM orders o JOIN foo ON orders.id = foo.order_id"))))) | |
(deftest select-*-test (is (true? (has-wildcard? "SELECT * FROM orders"))) (is (true? (has-wildcard? "SELECT id, * FROM orders JOIN foo ON orders.id = foo.order_id")))) | |
(deftest table-wildcard-test-without-aliases (is (= #{{:component {:table "orders"} :context ["FROM" "SELECT"]}} (table-wcs "SELECT orders.* FROM orders JOIN foo ON orders.id = foo.order_id"))) (is (= #{{:component {:table "foo" :schema "public"} :context ["FROM" "JOIN" "SELECT"]}} (table-wcs "SELECT foo.* FROM orders JOIN public.foo f ON orders.id = foo.order_id")))) | |
(deftest table-star-test-with-aliases (is (= #{{:table "orders"}} (table-wcs "SELECT o.* FROM orders o JOIN foo ON orders.id = foo.order_id"))) (is (= #{{:table "foo"}} (table-wcs "SELECT f.* FROM orders o JOIN foo f ON orders.id = foo.order_id")))) | |
(deftest context-test (testing "Sub-select with outer wildcard" (is (= {:columns #{{:component {:column "total" :table "orders"}, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]} {:component {:column "id" :table "orders"}, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]} {:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}}, :has-wildcard? #{{:component true, :context ["SELECT"]}}, :mutation-commands #{}, :tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}}, :table-wildcards #{}} (components "SELECT * FROM (SELECT id, total FROM orders) WHERE total > 10")))) (testing "Sub-select with inner wildcard" (is (= {:columns #{{:component {:column "id" :table "orders"}, :context ["SELECT"]} {:component {:column "total" :table "orders"}, :context ["SELECT"]} {:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}}, :has-wildcard? #{{:component true, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]}}, :mutation-commands #{}, :tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}}, :table-wildcards #{}} (components "SELECT id, total FROM (SELECT * FROM orders) WHERE total > 10")))) (testing "Sub-select with dual wildcards" (is (= {:columns #{{:component {:column "total" :table "orders"}, :context ["WHERE" "JOIN" "FROM" "SELECT"]}}, :has-wildcard? #{{:component true, :context ["SELECT" "SUB_SELECT" "FROM" "SELECT"]} {:component true, :context ["SELECT"]}}, :mutation-commands #{}, :tables #{{:component {:table "orders"}, :context ["FROM" "SELECT" "SUB_SELECT" "FROM" "SELECT"]}}, :table-wildcards #{}} (components "SELECT * FROM (SELECT * FROM orders) WHERE total > 10")))) (testing "Join; table wildcard" (is (= {:columns #{{:component {:column "order_id" :table "foo"}, :context ["JOIN" "SELECT"]} {:component {:column "id" :table "orders"}, :context ["JOIN" "SELECT"]}}, :has-wildcard? #{}, :mutation-commands #{}, :tables #{{:component {:table "foo"}, :context ["FROM" "JOIN" "SELECT"]} {:component {:table "orders"}, :context ["FROM" "SELECT"]}}, :table-wildcards #{{:component {:table "orders"}, :context ["SELECT"]}}} (components "SELECT o.* FROM orders o JOIN foo ON orders.id = foo.order_id"))))) | |
(defn test-replacement [before replacements after] (is (= after (m/replace-names before replacements)))) | |
(deftest replace-names-test (test-replacement "SELECT a.x, b.y FROM a, b;" {:tables {"a" "aa"} :columns {"x" "xx"}} "SELECT aa.xx, b.y FROM aa, b;") (test-replacement "SELECT *, boink , yoink AS oink FROM /* /* lore */ core_user, bore_user, /* more */ snore_user ;" {:tables {"core_user" "floor_muser" "bore_user" "user" "snore_user" "vigilant_user" "cruft" "tuft"} :columns {"boink" "sturmunddrang" "yoink" "oink" "hoi" "polloi"}} "SELECT *, sturmunddrang , oink AS oink FROM /* /* lore */ floor_muser, user, /* more */ vigilant_user ;")) | |
(deftest replace-schema-test (test-replacement "SELECT public.orders.x FROM public.orders" {:schemas {"public" "totally_private"} :tables {"orders" "purchases"} :columns {"x" "xx"}} "SELECT totally_private.purchases.xx FROM totally_private.purchases")) | |