(ns metabase.models.params.chain-filter.dedupe-joins (:require [clojure.set :as set] [medley.core :as m])) | |
(def ^:private source-node (comp :table :lhs)) | |
(def ^:private target-node (comp :table :rhs)) | |
(def ^:private edge-nodes (juxt source-node target-node)) | |
(defn- weight [terminal-ids edge] (let [[s e] (edge-nodes edge)] (+ (if (terminal-ids s) 0 1) (if (terminal-ids e) 0 1)))) | |
Return a map from the nodes in | (defn- node-degrees [nodes edges] (reduce (fn [degrees edge] (reduce #(update %1 %2 (fnil inc 0)) degrees (filter nodes (edge-nodes edge)))) {} edges)) |
Return a subset of | (defn- make-tree [source-id edges terminal-ids] (loop [s #{source-id}, edges edges, tree-edges []] (if (set/subset? terminal-ids s) tree-edges (if-let [out-edges (seq (filter (comp s source-node) edges))] (let [edge (apply min-key #(weight terminal-ids %) out-edges) s' (conj s (target-node edge))] (recur s' (remove (comp s' target-node) edges) (conj tree-edges edge))) tree-edges)))) |
Return the transitive closure of nodes reachable from | (defn- forced-nodes [node-fn start-nodes node->edges] (loop [[node & nodes] start-nodes, result start-nodes] (if (nil? node) result (let [[edge & more] (node->edges node)] (if (or (nil? edge) (seq more)) (recur nodes result) (let [next-node (node-fn edge)] (recur (cons next-node nodes) (conj result next-node)))))))) |
Remove unnecessary joins from a collection of
Note that this function implements a simple greedy algorithm, replacing the previous optimal, but exponential implementation. | (defn dedupe-joins [source-id in-joins keep-ids] (let [;; get rid of parallel edges, for our purposes they are equivalent edges (m/distinct-by edge-nodes in-joins) out-edges (group-by source-node edges) in-edges (group-by target-node edges) ;; Collect the IDs that must be included either way to prefer them ;; during the construction of the tree. transitive-keep-ids (forced-nodes source-node keep-ids in-edges) transitive-source-ids (forced-nodes target-node #{source-id} out-edges) terminal-ids (set/union transitive-source-ids transitive-keep-ids) tree (make-tree source-id edges terminal-ids) ;; The tree might contain nodes we don't need: any non-terminal node with degree less than 2 is redundant. intermediate-ids (into #{} (comp (mapcat edge-nodes) (remove terminal-ids)) edges) degrees (node-degrees intermediate-ids tree) redundant-nodes (into #{} (keep (fn [[n d]] (when (< d 2) n))) degrees)] ;; Return the tree with edges incident to redundant nodes removed. (into [] (remove #(some redundant-nodes (edge-nodes %))) tree))) |