(ns metabase.search.spec (:require [clojure.set :as set] [clojure.string :as str] [clojure.walk :as walk] [malli.error :as me] [metabase.api.common :as api] [metabase.config :as config] [metabase.search.config :as search.config] [metabase.util :as u] [metabase.util.malli.registry :as mr] [toucan2.core :as t2] [toucan2.tools.transformed :as t2.transformed])) | |
Set of search model string names. | (def search-models
#{"dashboard" "table" "dataset" "segment" "collection" "database" "action" "indexed-entity" "metric" "card"}) |
(def ^:private search-model->toucan-model
(into {}
(map (fn [search-model]
[search-model (-> search-model api/model->db-model :db-model)]))
search-models)) | |
(def ^:private SearchModel (into [:enum] search-models)) | |
Key must be present, to show it's been explicitly considered.
| (def ^:private AttrValue [:union :boolean :keyword vector? :map]) |
The abstract types of each attribute. | (def attr-types
{:archived :boolean
:collection-id :pk
:created-at :timestamp
:creator-id :pk
:dashboard-id :int
:dashboardcard-count :int
:database-id :pk
:id :text
:last-edited-at :timestamp
:last-editor-id :pk
:last-viewed-at :timestamp
:name :text
:native-query nil
:official-collection :boolean
:pinned :boolean
:updated-at :timestamp
:verified :boolean
:view-count :int}) |
These attributes must be explicitly defined, omitting them could be a source of bugs. | (def ^:private explicit-attrs [:archived :collection-id]) |
These attributes may be omitted (for now) in the interest of brevity in the definitions. | (def ^:private optional-attrs
(->> (keys (apply dissoc search.config/filters explicit-attrs))
;; identifiers and rankers
(into
[:id ;; in addition to being a filter, this is a key property
:name
:official-collection
:dashboard-id
:dashboardcard-count
:last-viewed-at
:pinned
:verified ;; in addition to being a filter, this is also a ranker
:view-count
:updated-at])
distinct
vec)) |
(def ^:private default-attrs
{:id true
:name true}) | |
Keys of a search-model that correspond to concrete columns in the index | (def ^:private attr-keys (into explicit-attrs optional-attrs)) |
Make sure to keep attr-types up to date | (assert (= (set (keys attr-types)) (set attr-keys))) |
Columns of an ingestion query that correspond to concrete columns in the index | (def attr-columns (mapv (comp keyword u/->snake_case_en name) attr-keys)) |
(assert (not-any? (set explicit-attrs) optional-attrs) "Attribute must only be mentioned in one list") | |
(def ^:private Attrs
(into [:map {:closed true}]
(concat (for [k explicit-attrs] [k AttrValue])
(for [k optional-attrs] [k {:optional true} AttrValue])))) | |
(def ^:private NonAttrKey
;; This is rather slow, not great for REPL development.
(if config/is-dev?
:keyword
[:and :keyword [:not (into [:enum] attr-columns)]])) | |
We use our own schema instead of raw HoneySQL, so that we can invert it to calculate the update hooks. | (def ^:private JoinMap [:map-of :keyword [:tuple :keyword vector?]]) |
(def ^:private Specification
[:map {:closed true}
[:name SearchModel]
[:visibility [:enum :all :app-user]]
[:model :keyword]
[:attrs Attrs]
[:search-terms [:sequential {:min 1} :keyword]]
[:render-terms [:map-of NonAttrKey AttrValue]]
[:where {:optional true} vector?]
[:bookmark {:optional true} vector?]
[:joins {:optional true} JoinMap]]) | |
(defn- qualify-column* [table column]
(if (str/includes? (name column) ".")
column
(keyword (str (name table) "." (name column))))) | |
Given a select-item, qualify the (potentially nested) column reference if it is naked. | (defn- qualify-column
[table select-item]
(cond
(keyword? select-item)
(let [qualified (qualify-column* table select-item)]
(if (= select-item qualified)
select-item
[qualified select-item]))
(and (vector? select-item) (keyword? (first select-item)))
(assoc select-item 0 (qualify-column* table (first select-item)))
:else
select-item)) |
(defn- has-table? [table kw]
(and (not (namespace kw))
(if table
(str/starts-with? (name kw) (str (name table) "."))
(not (str/includes? (name kw) "."))))) | |
(defn- get-table [kw]
(let [parts (str/split (name kw) #"\.")]
(when (> (count parts) 1)
(keyword (first parts))))) | |
(defn- remove-table [table kw]
(if (and table (not (namespace kw)))
(keyword (subs (name kw) (inc (count (name table)))))
kw)) | |
(defn- add-table [table kw]
(if (and table (not (namespace kw)))
(keyword (str (name table) "." (name kw)))
kw)) | |
(defn- find-fields-kw [kw]
;; Filter out SQL functions
(when-not (str/starts-with? (name kw) "%")
(when-not (#{:else :integer :float} kw)
(let [table (get-table kw)]
(list [(or table :this) (remove-table table kw)]))))) | |
(defn- find-fields-expr [expr]
(cond
(keyword? expr)
(find-fields-kw expr)
(vector? expr)
(mapcat find-fields-expr (rest expr)))) | |
(defn- find-fields-attr [[k v]]
(when v
(if (true? v)
[[:this (keyword (u/->snake_case_en (name k)))]]
(find-fields-expr v)))) | |
(defn- find-fields-select-item [x]
(cond
(keyword? x)
(find-fields-kw x)
(vector? x)
(find-fields-expr (first x)))) | |
(defn- find-fields-top [x]
(cond
(map? x)
(mapcat find-fields-attr x)
(sequential? x)
(mapcat find-fields-select-item x)
:else
(throw (ex-info "Unexpected format for fields" {:x x})))) | |
Search within a definition for all the fields referenced on the given table alias. | (defn- find-fields
[spec]
(u/group-by first second conj #{}
(concat
(mapcat
find-fields-top
;; Remove the keys with special meanings (should probably switch this to an allowlist rather)
(vals (dissoc spec :name :visibility :native-query :where :joins :bookmark :model)))
(find-fields-expr (:where spec))))) |
(defn- replace-qualification [expr from to]
(cond
(and (keyword? expr) (has-table? from expr))
(keyword (str/replace (name expr) (str (name from) ".") (str (name to) ".")))
(sequential? expr)
(into (empty expr) (map #(replace-qualification % from to) expr))
:else
expr)) | |
(defn- insert-values [expr table m]
(walk/postwalk
(fn [x]
(if (and (keyword? x) (has-table? table x))
(get m (remove-table table x))
x))
expr)) | |
(defn- construct-source-where [id-fields]
(cond
(keyword? id-fields) [:= (add-table :updated id-fields) (add-table :this id-fields)]
(boolean? id-fields) [:= :updated.id :this.id]
;; Vector is probably something like `[:concat :field1 "sep" :field2]`; maybe we should switch to more restrictive
;; notation in `:attrs`?
(vector? id-fields) (into [:and]
(for [field (next id-fields) ;; first one is going to be a function
:when (keyword? field)]
[:= (add-table :updated field) (add-table :this field)]))
:else (throw (ex-info "Unknown :id form" {:id id-fields})))) | |
Generate a map indicating which search-models to update based on which fields are modified for a given model. | (defn- search-model-hooks
[spec]
(let [s (:name spec)
fields (find-fields spec)]
(into {}
(cons
[(:model spec) #{{:search-model s
:fields (:this (find-fields spec))
:where (construct-source-where (-> spec :attrs :id))}}]
(for [[table-alias [model join-condition]] (:joins spec)]
(let [table-fields (fields table-alias)]
[model #{{:search-model s
:fields table-fields
:where (replace-qualification join-condition table-alias :updated)}}])))))) |
Combine the search index hooks corresponding to different search models. | (defn- merge-hooks
[hooks]
(reduce (partial merge-with set/union) {} hooks)) |
Given a list of select-item, qualify all naked column references to refer to the given table. | (defn qualify-columns
[table select-item]
(for [column select-item
:when (and column (or (not (vector? column))
(some? (first column))))]
(qualify-column table column))) |
Impl for [[spec]]. | (defmulti spec*
{:arglists '([search-model])}
identity) |
Register a metabase model as a search-model. Once we're trying up the fulltext search project, we can inline a detailed explanation. For now, see its schema, and the existing definitions that use it. | (defn spec [search-model] ;; make sure the model namespace is loaded. (t2/resolve-model (search-model->toucan-model search-model)) (spec* search-model)) |
A mapping from each search-model to its specification. | (defn specifications
[]
(into {}
(map (fn [[search-model toucan-model]]
;; make sure the model namespace is loaded.
(t2/resolve-model toucan-model)
[search-model (spec search-model)]))
search-model->toucan-model)) |
Check whether a given specification is valid | (defn validate-spec!
[spec]
(when-let [info (mr/explain Specification spec)]
(throw (ex-info (str "Invalid search specification for " (:name spec) ": " (me/humanize info)) info)))
(doseq [table (keys (find-fields spec))
:when (not= :this table)]
(assert (contains? (:joins spec) table) (str "Reference to table without a join: " table)))) |
Define a spec for a search model. | (defmacro define-spec
[search-model spec]
`(let [spec# (-> ~spec
(assoc :name ~search-model)
(update :visibility #(or % :all))
(update :attrs #(merge ~default-attrs %)))]
(validate-spec! spec#)
(derive (:model spec#) :hook/search-index)
(defmethod spec* ~search-model [~'_] spec#))) |
Return an inverted map of data dependencies to search models, used for updating them based on underlying models. TODO we should memoize this for production (based on spec values) | (defn model-hooks
[]
(->> (specifications)
vals
(map search-model-hooks)
merge-hooks)) |
Given a transformed toucan map, get back a mapping to the raw db values that we can use in a query. | (defn- instance->db-values
[instance]
(let [xforms (try
(#'t2.transformed/in-transforms (t2/model instance))
(catch Exception _ ; this happens for :model/ModelIndexValue, which has no transforms
nil))]
(reduce-kv
(fn [m k v]
(assoc m k (if-let [f (get xforms k)] (f v) v)))
{}
instance))) |
Given an updated or created instance, return a description of which search-models to (re)index. | (defn search-models-to-update
[instance & [always?]]
(let [raw-values (delay (instance->db-values instance))]
(into #{}
(keep
(fn [{:keys [search-model fields where]}]
(when (or always? (and fields (some fields (keys (or (t2/changes instance) instance)))))
[search-model (insert-values where :updated @raw-values)])))
(get (model-hooks) (t2/model instance))))) |
(comment
(doseq [d (descendants :hook/search-index)]
(underive d :hook/search-index))
(doseq [d (keys (model-hooks))]
(derive d :hook/search-index))
(search-models-to-update (t2/select-one :model/Card))
(methods spec)
(model-hooks)
(let [where (-> (:model/ModelIndexValue (model-hooks)) first :where)]
(insert-values where :updated {:model_index_id 1 :model_pk 5}))) | |