(ns metabase.search.spec
   [clojure.set :as set]
   [clojure.string :as str]
   [clojure.walk :as walk]
   [malli.error :as me]
   [metabase.api.common :as api]
   [metabase.config :as config]
   [metabase.search.config :as search.config]
   [metabase.util :as u]
   [metabase.util.malli.registry :as mr]
   [toucan2.core :as t2]
   [toucan2.tools.transformed :as t2.transformed]))

Set of search model string names.

(def search-models
  #{"dashboard" "table" "dataset" "segment" "collection" "database" "action" "indexed-entity" "metric" "card"})
(def ^:private search-model->toucan-model
  (into {}
        (map (fn [search-model]
               [search-model (-> search-model api/model->db-model :db-model)]))
(def ^:private SearchModel
  (into [:enum] search-models))

Key must be present, to show it's been explicitly considered.

  • false: not present [note: consider making the nil instead, since it implies writing NULL to the column]
  • true: given by a column with the same name (snake case) [note: consider removing this sugar, just repeat the column]
  • keyword: given by the corresponding column
  • vector: calculated by the given expression
  • map: a sub-select
(def ^:private AttrValue
  [:union :boolean :keyword vector? :map])

The abstract types of each attribute.

(def attr-types
  {:archived            :boolean
   :collection-id       :pk
   :created-at          :timestamp
   :creator-id          :pk
   :dashboard-id        :int
   :dashboardcard-count :int
   :database-id         :pk
   :id                  :text
   :last-edited-at      :timestamp
   :last-editor-id      :pk
   :last-viewed-at      :timestamp
   :name                :text
   :native-query        nil
   :official-collection :boolean
   :pinned              :boolean
   :updated-at          :timestamp
   :verified            :boolean
   :view-count          :int})

These attributes must be explicitly defined, omitting them could be a source of bugs.

(def ^:private explicit-attrs

These attributes may be omitted (for now) in the interest of brevity in the definitions.

(def ^:private optional-attrs
  (->> (keys (apply dissoc search.config/filters explicit-attrs))
       ;; identifiers and rankers
        [:id                                                ;;  in addition to being a filter, this is a key property
         :verified                                          ;;  in addition to being a filter, this is also a ranker
(def ^:private default-attrs
  {:id   true
   :name true})

Keys of a search-model that correspond to concrete columns in the index

(def ^:private attr-keys
  (into explicit-attrs optional-attrs))

Make sure to keep attr-types up to date

(assert (= (set (keys attr-types)) (set attr-keys)))

Columns of an ingestion query that correspond to concrete columns in the index

(def attr-columns
  (mapv (comp keyword u/->snake_case_en name) attr-keys))
(assert (not-any? (set explicit-attrs) optional-attrs) "Attribute must only be mentioned in one list")
(def ^:private Attrs
  (into [:map {:closed true}]
        (concat (for [k explicit-attrs] [k AttrValue])
                (for [k optional-attrs] [k {:optional true} AttrValue]))))
(def ^:private NonAttrKey
  ;; This is rather slow, not great for REPL development.
  (if config/is-dev?
    [:and :keyword [:not (into [:enum] attr-columns)]]))

We use our own schema instead of raw HoneySQL, so that we can invert it to calculate the update hooks.

(def ^:private JoinMap
  [:map-of :keyword [:tuple :keyword vector?]])
(def ^:private Specification
  [:map {:closed true}
   [:name SearchModel]
   [:visibility [:enum :all :app-user]]
   [:model :keyword]
   [:attrs Attrs]
   [:search-terms [:sequential {:min 1} :keyword]]
   [:render-terms [:map-of NonAttrKey AttrValue]]
   [:where {:optional true} vector?]
   [:bookmark {:optional true} vector?]
   [:joins {:optional true} JoinMap]])
(defn- qualify-column* [table column]
  (if (str/includes? (name column) ".")
    (keyword (str (name table) "." (name column)))))

Given a select-item, qualify the (potentially nested) column reference if it is naked.

(defn- qualify-column
  [table select-item]
    (keyword? select-item)
    (let [qualified (qualify-column* table select-item)]
      (if (= select-item qualified)
        [qualified select-item]))
    (and (vector? select-item) (keyword? (first select-item)))
    (assoc select-item 0 (qualify-column* table (first select-item)))
(defn- has-table? [table kw]
  (and (not (namespace kw))
       (if table
         (str/starts-with? (name kw) (str (name table) "."))
         (not (str/includes? (name kw) ".")))))
(defn- get-table [kw]
  (let [parts (str/split (name kw) #"\.")]
    (when (> (count parts) 1)
      (keyword (first parts)))))
(defn- remove-table [table kw]
  (if (and table (not (namespace kw)))
    (keyword (subs (name kw) (inc (count (name table)))))
(defn- add-table [table kw]
  (if (and table (not (namespace kw)))
    (keyword (str (name table) "." (name kw)))
(defn- find-fields-kw [kw]
  ;; Filter out SQL functions
  (when-not (str/starts-with? (name kw) "%")
    (when-not (#{:else :integer :float} kw)
      (let [table (get-table kw)]
        (list [(or table :this) (remove-table table kw)])))))
(defn- find-fields-expr [expr]
    (keyword? expr)
    (find-fields-kw expr)
    (vector? expr)
    (mapcat find-fields-expr (rest expr))))
(defn- find-fields-attr [[k v]]
  (when v
    (if (true? v)
      [[:this (keyword (u/->snake_case_en (name k)))]]
      (find-fields-expr v))))
(defn- find-fields-select-item [x]
    (keyword? x)
    (find-fields-kw x)
    (vector? x)
    (find-fields-expr (first x))))
(defn- find-fields-top [x]
    (map? x)
    (mapcat find-fields-attr x)
    (sequential? x)
    (mapcat find-fields-select-item x)
    (throw (ex-info "Unexpected format for fields" {:x x}))))

Search within a definition for all the fields referenced on the given table alias.

(defn- find-fields
  (u/group-by first second conj #{}
                ;; Remove the keys with special meanings (should probably switch this to an allowlist rather)
                (vals (dissoc spec :name :visibility :native-query :where :joins :bookmark :model)))
               (find-fields-expr (:where spec)))))
(defn- replace-qualification [expr from to]
    (and (keyword? expr) (has-table? from expr))
    (keyword (str/replace (name expr) (str (name from) ".") (str (name to) ".")))
    (sequential? expr)
    (into (empty expr) (map #(replace-qualification % from to) expr))
(defn- insert-values [expr table m]
   (fn [x]
     (if (and (keyword? x) (has-table? table x))
       (get m (remove-table table x))
(defn- construct-source-where [id-fields]
    (keyword? id-fields) [:= (add-table :updated id-fields) (add-table :this id-fields)]
    (boolean? id-fields) [:= :updated.id :this.id]
    ;; Vector is probably something like `[:concat :field1 "sep" :field2]`; maybe we should switch to more restrictive
    ;; notation in `:attrs`?
    (vector? id-fields)  (into [:and]
                               (for [field (next id-fields) ;; first one is going to be a function
                                     :when (keyword? field)]
                                 [:= (add-table :updated field) (add-table :this field)]))
    :else                (throw (ex-info "Unknown :id form" {:id id-fields}))))

Generate a map indicating which search-models to update based on which fields are modified for a given model.

(defn- search-model-hooks
  (let [s      (:name spec)
        fields (find-fields spec)]
    (into {}
           [(:model spec) #{{:search-model s
                             :fields       (:this (find-fields spec))
                             :where        (construct-source-where (-> spec :attrs :id))}}]
           (for [[table-alias [model join-condition]] (:joins spec)]
             (let [table-fields (fields table-alias)]
               [model #{{:search-model s
                         :fields       table-fields
                         :where        (replace-qualification join-condition table-alias :updated)}}]))))))

Combine the search index hooks corresponding to different search models.

(defn- merge-hooks
  (reduce (partial merge-with set/union) {} hooks))

Given a list of select-item, qualify all naked column references to refer to the given table.

(defn qualify-columns
  [table select-item]
  (for [column select-item
        :when (and column (or (not (vector? column))
                              (some? (first column))))]
    (qualify-column table column)))

Impl for [[spec]].

(defmulti spec*
  {:arglists '([search-model])}

Register a metabase model as a search-model. Once we're trying up the fulltext search project, we can inline a detailed explanation. For now, see its schema, and the existing definitions that use it.

(defn spec
  ;; make sure the model namespace is loaded.
  (t2/resolve-model (search-model->toucan-model search-model))
  (spec* search-model))

A mapping from each search-model to its specification.

(defn specifications
  (into {}
        (map (fn [[search-model toucan-model]]
               ;; make sure the model namespace is loaded.
               (t2/resolve-model toucan-model)
               [search-model (spec search-model)]))

Check whether a given specification is valid

(defn validate-spec!
  (when-let [info (mr/explain Specification spec)]
    (throw (ex-info (str "Invalid search specification for " (:name spec) ": " (me/humanize info)) info)))
  (doseq [table (keys (find-fields spec))
          :when (not= :this table)]
    (assert (contains? (:joins spec) table) (str "Reference to table without a join: " table))))

Define a spec for a search model.

(defmacro define-spec
  [search-model spec]
  `(let [spec# (-> ~spec
                   (assoc :name ~search-model)
                   (update :visibility #(or % :all))
                   (update :attrs #(merge ~default-attrs %)))]
     (validate-spec! spec#)
     (derive (:model spec#) :hook/search-index)
     (defmethod spec* ~search-model [~'_] spec#)))

Return an inverted map of data dependencies to search models, used for updating them based on underlying models.

TODO we should memoize this for production (based on spec values)

(defn model-hooks
  (->> (specifications)
       (map search-model-hooks)

Given a transformed toucan map, get back a mapping to the raw db values that we can use in a query.

(defn- instance->db-values
  (let [xforms (try
                 (#'t2.transformed/in-transforms (t2/model instance))
                 (catch Exception _     ; this happens for :model/ModelIndexValue, which has no transforms
     (fn [m k v]
       (assoc m k (if-let [f (get xforms k)] (f v) v)))

Given an updated or created instance, return a description of which search-models to (re)index.

(defn search-models-to-update
  [instance & [always?]]
  (let [raw-values (delay (instance->db-values instance))]
    (into #{}
           (fn [{:keys [search-model fields where]}]
             (when (or always? (and fields (some fields (keys (or (t2/changes instance) instance)))))
               [search-model (insert-values where :updated @raw-values)])))
          (get (model-hooks) (t2/model instance)))))
  (doseq [d (descendants :hook/search-index)]
    (underive d :hook/search-index))
  (doseq [d (keys (model-hooks))]
    (derive d :hook/search-index))

  (search-models-to-update (t2/select-one :model/Card))
  (methods spec)

  (let [where (-> (:model/ModelIndexValue (model-hooks)) first :where)]
    (insert-values where :updated {:model_index_id 1 :model_pk 5})))