(ns metabase.search.spec (:require [clojure.set :as set] [clojure.string :as str] [clojure.walk :as walk] [malli.core :as mc] [malli.error :as me] [metabase.config :as config] [metabase.search.config :as search.config] [metabase.util :as u] [toucan2.core :as t2] [toucan2.tools.transformed :as t2.transformed])) | |
(def ^:private SearchModel [:enum "dashboard" "table" "dataset" "segment" "collection" "database" "action" "indexed-entity" "metric" "card"]) | |
Key must be present, to show it's been explicitly considered.
| (def ^:private AttrValue [:union :boolean :keyword vector? :map]) |
The abstract types of each attribute. | (def attr-types {:archived :boolean :collection-id :pk :created-at :timestamp :creator-id :pk :dashboardcard-count :int :database-id :pk :id :pk :last-edited-at :timestamp :last-editor-id :pk :last-viewed-at :timestamp :name :text :native-query nil :official-collection :boolean :pinned :boolean :updated-at :timestamp :verified :boolean :view-count :int}) |
These attributes must be explicitly defined, omitting them could be a source of bugs. | (def ^:private explicit-attrs [:archived :collection-id]) |
These attributes may be omitted (for now) in the interest of brevity in the definitions. | (def ^:private optional-attrs (->> (keys (apply dissoc search.config/filters explicit-attrs)) ;; identifiers and rankers (into [:id ;; in addition to being a filter, this is a key property :name :official-collection :dashboardcard-count :last-viewed-at :pinned :verified ;; in addition to being a filter, this is also a ranker :view-count :updated-at]) distinct vec)) |
(def ^:private default-attrs {:id true :name true}) | |
Keys of a search-model that correspond to concrete columns in the index | (def ^:private attr-keys (into explicit-attrs optional-attrs)) |
Make sure to keep attr-types up to date | (assert (= (set (keys attr-types)) (set attr-keys))) |
Columns of an ingestion query that correspond to concrete columns in the index | (def attr-columns (mapv (comp keyword u/->snake_case_en name) attr-keys)) |
(assert (not-any? (set explicit-attrs) optional-attrs) "Attribute must only be mentioned in one list") | |
(def ^:private Attrs (into [:map {:closed true}] (concat (for [k explicit-attrs] [k AttrValue]) (for [k optional-attrs] [k {:optional true} AttrValue])))) | |
(def ^:private NonAttrKey ;; This is rather slow, not great for REPL development. (if config/is-dev? :keyword [:and :keyword [:not (into [:enum] attr-columns)]])) | |
We use our own schema instead of raw HoneySQL, so that we can invert it to calculate the update hooks. | (def ^:private JoinMap [:map-of :keyword [:tuple :keyword vector?]]) |
(def ^:private Specification [:map {:closed true} [:name SearchModel] [:visibility [:enum :all :app-user]] [:model :keyword] [:attrs Attrs] [:search-terms [:sequential {:min 1} :keyword]] [:render-terms [:map-of NonAttrKey AttrValue]] [:where {:optional true} vector?] [:bookmark {:optional true} vector?] [:joins {:optional true} JoinMap]]) | |
(defn- qualify-column* [table column] (if (str/includes? (name column) ".") column (keyword (str (name table) "." (name column))))) | |
Given a select-item, qualify the (potentially nested) column reference if it is naked. | (defn- qualify-column [table select-item] (cond (keyword? select-item) (let [qualified (qualify-column* table select-item)] (if (= select-item qualified) select-item [qualified select-item])) (and (vector? select-item) (keyword? (first select-item))) (assoc select-item 0 (qualify-column* table (first select-item))) :else select-item)) |
(defn- has-table? [table kw] (and (not (namespace kw)) (if table (str/starts-with? (name kw) (str (name table) ".")) (not (str/includes? (name kw) "."))))) | |
(defn- get-table [kw] (let [parts (str/split (name kw) #"\.")] (when (> (count parts) 1) (keyword (first parts))))) | |
(defn- remove-table [table kw] (if (and table (not (namespace kw))) (keyword (subs (name kw) (inc (count (name table))))) kw)) | |
(defn- find-fields-kw [kw] ;; Filter out SQL functions (when-not (str/starts-with? (name kw) "%") (when-not (#{:else :integer :float} kw) (let [table (get-table kw)] (list [(or table :this) (remove-table table kw)]))))) | |
(defn- find-fields-expr [expr] (cond (keyword? expr) (find-fields-kw expr) (vector? expr) (mapcat find-fields-expr (rest expr)))) | |
(defn- find-fields-attr [[k v]] (when v (if (true? v) [[:this (keyword (u/->snake_case_en (name k)))]] (find-fields-expr v)))) | |
(defn- find-fields-select-item [x] (cond (keyword? x) (find-fields-kw x) (vector? x) (find-fields-expr (first x)))) | |
(defn- find-fields-top [x] (cond (map? x) (mapcat find-fields-attr x) (sequential? x) (mapcat find-fields-select-item x) :else (throw (ex-info "Unexpected format for fields" {:x x})))) | |
Search within a definition for all the fields referenced on the given table alias. | (defn- find-fields [spec] (u/group-by first second conj #{} (concat (mapcat find-fields-top ;; Remove the keys with special meanings (should probably switch this to an allowlist rather) (vals (dissoc spec :name :visibility :native-query :where :joins :bookmark :model))) (find-fields-expr (:where spec))))) |
(defn- replace-qualification [expr from to] (cond (and (keyword? expr) (has-table? from expr)) (keyword (str/replace (name expr) (str (name from) ".") (str (name to) "."))) (sequential? expr) (into (empty expr) (map #(replace-qualification % from to) expr)) :else expr)) | |
(defn- insert-values [expr table m] (walk/postwalk (fn [x] (if (and (keyword? x) (has-table? table x)) (get m (remove-table table x)) x)) expr)) | |
Generate a map indicating which search-models to update based on which fields are modified for a given model. | (defn- search-model-hooks [spec] (let [s (:name spec) fields (find-fields spec)] (into {} (cons [(:model spec) #{{:search-model s :fields (:this (find-fields spec)) :where [:= :updated.id :this.id]}}] (for [[table-alias [model join-condition]] (:joins spec)] (let [table-fields (fields table-alias)] [model #{{:search-model s :fields table-fields :where (replace-qualification join-condition table-alias :updated)}}])))))) |
Combine the search index hooks corresponding to different search models. | (defn- merge-hooks [hooks] (reduce (partial merge-with set/union) {} hooks)) |
Given a list of select-item, qualify all naked column references to refer to the given table. | (defn qualify-columns [table select-item] (for [column select-item :when (and column (or (not (vector? column)) (some? (first column))))] (qualify-column table column))) |
Register a metabase model as a search-model. Once we're trying up the fulltext search project, we can inline a detailed explanation. For now, see its schema, and the existing definitions that use it. | (defmulti spec (fn [search-model] search-model)) |
A mapping from each search-model to its specification. | (defn specifications [] (into {} (for [[s spec-fn] (methods spec)] [s (spec-fn s)]))) |
Check whether a given specification is valid | (defn validate-spec! [spec] (when-let [info (mc/explain Specification spec)] (throw (ex-info (str "Invalid search specification for " (:name spec) ": " (me/humanize info)) info))) (doseq [table (keys (find-fields spec)) :when (not= :this table)] (assert (contains? (:joins spec) table) (str "Reference to table without a join: " table)))) |
Define a spec for a search model. | (defmacro define-spec [search-model spec] `(let [spec# (-> ~spec (assoc :name ~search-model) (update :visibility #(or % :all)) (update :attrs #(merge ~default-attrs %)))] (validate-spec! spec#) (derive (:model spec#) :hook/search-index) (defmethod spec ~search-model [~'_] spec#))) |
Return an inverted map of data dependencies to search models, used for updating them based on underlying models. TODO we should memoize this for production (based on spec values) | (defn model-hooks [] (merge-hooks (for [[search-model spec-fn] (methods spec)] (search-model-hooks (spec-fn search-model))))) |
Given a transformed toucan map, get back a mapping to the raw db values that we can use in a query. | (defn- instance->db-values [instance] (let [xforms (#'t2.transformed/in-transforms (t2/model instance))] (reduce-kv (fn [m k v] (assoc m k (if-let [f (get xforms k)] (f v) v))) {} instance))) |
Given an updated or created instance, return a description of which search-models to (re)index. | (defn search-models-to-update [instance & [always?]] (let [raw-values (delay (instance->db-values instance))] (into #{} (keep (fn [{:keys [search-model fields where]}] (when (or always? (and fields (some fields (keys (or (t2/changes instance) instance))))) [search-model (insert-values where :updated @raw-values)]))) (get (model-hooks) (t2/model instance))))) |
(comment (doseq [d (descendants :hook/search-index)] (underive d :hook/search-index)) (doseq [d (keys (model-hooks))] (derive d :hook/search-index))) | |