(ns metabase.search.impl
  (:require
   [clojure.string :as str]
   [metabase.config :as config]
   [metabase.legacy-mbql.normalize :as mbql.normalize]
   [metabase.lib.core :as lib]
   [metabase.models.collection :as collection]
   [metabase.models.collection.root :as collection.root]
   [metabase.models.data-permissions :as data-perms]
   [metabase.models.database :as database]
   [metabase.models.interface :as mi]
   [metabase.permissions.util :as perms.u]
   [metabase.public-settings :as public-settings]
   [metabase.public-settings.premium-features :as premium-features]
   [metabase.search.config
    :as search.config
    :refer [SearchableModel SearchContext]]
   [metabase.search.engine :as search.engine]
   [metabase.search.filter :as search.filter]
   [metabase.search.in-place.filter :as search.in-place.filter]
   [metabase.search.in-place.scoring :as scoring]
   [metabase.util :as u]
   [metabase.util.i18n :refer [tru deferred-tru]]
   [metabase.util.json :as json]
   [metabase.util.log :as log]
   [metabase.util.malli :as mu]
   [metabase.util.malli.registry :as mr]
   [metabase.util.malli.schema :as ms]
   [toucan2.core :as t2]
   [toucan2.instance :as t2.instance]
   [toucan2.realize :as t2.realize]))
(set! *warn-on-reflection* true)
(defmulti ^:private check-permissions-for-model
  {:arglists '([search-ctx search-result])}
  (fn [_search-ctx search-result] ((comp keyword :model) search-result)))

TODO FIXME -- search actually currently still requires [[metabase.api.common/current-user-permissions-set]] to be bound (since [[mi/can-write?]] and [[mi/can-read?]] depend on it) despite search context requiring :current-user-perms to be passed in. We should fix things so search works independently of API-specific dynamic variables. This might require updating can-read? and can-write? to take explicit perms sets instead of relying on dynamic variables.

(defmacro ^:private ensure-current-user-perms-set-is-bound
  {:style/indent 0}
  [current-user-perms & body]
  `(with-bindings {(requiring-resolve 'metabase.api.common/*current-user-permissions-set*) (atom ~current-user-perms)}
     ~@body))
(defn- can-write? [{:keys [current-user-perms]} instance]
  (ensure-current-user-perms-set-is-bound current-user-perms (mi/can-write? instance)))
(defn- can-read? [{:keys [current-user-perms]} instance]
  (ensure-current-user-perms-set-is-bound current-user-perms (mi/can-read? instance)))
(defmethod check-permissions-for-model :default
  [search-ctx instance]
  (if (:archived? search-ctx)
    (can-write? search-ctx instance)
    ;; We filter what we can (i.e., everything in a collection) out already when querying
    true))
(defmethod check-permissions-for-model :table
  [search-ctx instance]
  ;; we've already filtered out tables w/o collection permissions in the query itself.
  (let [instance-id (:id instance)
        user-id     (:current-user-id search-ctx)
        db-id       (database/table-id->database-id instance-id)]
    (and
     (data-perms/user-has-permission-for-table? user-id :perms/view-data :unrestricted db-id instance-id)
     (data-perms/user-has-permission-for-table? user-id :perms/create-queries :query-builder db-id instance-id))))
(defmethod check-permissions-for-model :indexed-entity
  [search-ctx instance]
  (let [user-id (:current-user-id search-ctx)
        db-id   (:database_id instance)]
    (and
     (= :query-builder-and-native (data-perms/full-db-permission-for-user user-id :perms/create-queries db-id))
     (= :unrestricted (data-perms/full-db-permission-for-user user-id :perms/view-data db-id)))))
(defmethod check-permissions-for-model :metric
  [search-ctx instance]
  (if (:archived? search-ctx)
    (can-write? search-ctx instance)
    (can-read? search-ctx instance)))
(defmethod check-permissions-for-model :segment
  [search-ctx instance]
  (if (:archived? search-ctx)
    (can-write? search-ctx instance)
    (can-read? search-ctx instance)))
(defmethod check-permissions-for-model :database
  [search-ctx instance]
  (if (:archived? search-ctx)
    (can-write? search-ctx instance)
    (can-read? search-ctx instance)))

Hydrate common-name for lasteditedby and created_by for each result.

(defn- hydrate-user-metadata
  [results]
  (let [user-ids             (set (flatten (for [result results]
                                             (remove nil? ((juxt :last_editor_id :creator_id) result)))))
        user-id->common-name (if (pos? (count user-ids))
                               (t2/select-pk->fn :common_name [:model/User :id :first_name :last_name :email] :id [:in user-ids])
                               {})]
    (mapv (fn [{:keys [creator_id last_editor_id] :as result}]
            (assoc result
                   :creator_common_name (get user-id->common-name creator_id)
                   :last_editor_common_name (get user-id->common-name last_editor_id)))
          results)))

Adds collection_effective_ancestors to datasets in the search results.

(defn add-dataset-collection-hierarchy
  [search-results]
  (let [annotate     (fn [result]
                       (cond-> result
                         (= (:model result) "dataset")
                         (assoc :collection_effective_ancestors
                                (->> (t2/hydrate
                                      (if (nil? (:collection_id result))
                                        collection/root-collection
                                        {:location (:collection_location result)})
                                      :effective_ancestors)
                                     :effective_ancestors
                                      ;; two pieces for backwards compatibility:
                                      ;; - remove the root collection
                                      ;; - remove the `personal_owner_id`
                                     (remove collection.root/is-root-collection?)
                                     (map #(dissoc % :personal_owner_id))))))]
    (map annotate search-results)))

Batch-hydrates :effective_location and :effective_parent on collection search results. Keeps search results in order.

(defn- add-collection-effective-location
  [search-results]
  (let [collections    (filter #(mi/instance-of? :model/Collection %) search-results)
        hydrated-colls (t2/hydrate collections :effective_parent)
        idx->coll      (into {} (map (juxt :id identity) hydrated-colls))]
    (map (fn [search-result]
           (if (mi/instance-of? :model/Collection search-result)
             (idx->coll (:id search-result))
             (assoc search-result :effective_location nil)))
         search-results)))

All the result components that by default are displayed by the frontend.

(def ^:private ^:const displayed-columns
  #{:name :display_name :collection_name :description})

Massage the raw result from the DB and match data into something more useful for the client

TODO OMG mix of kebab-case and snake_case here going to make me throw up, we should use all kebab-case in Clojure land and then convert the stuff that actually gets sent over the wire in the REST API to snake_case in the API endpoint itself, not in the search impl.

(defn serialize
  [{:as result :keys [all-scores relevant-scores name display_name collection_id collection_name
                      collection_authority_level collection_type collection_effective_ancestors effective_parent
                      archived_directly model]}]
  (let [matching-columns    (into #{} (keep :column relevant-scores))
        match-context-thunk (some :match-context-thunk relevant-scores)
        remove-thunks       (partial mapv #(dissoc % :match-context-thunk))
        use-display-name?   (and display_name
                                 ;; This collection will be empty unless we used in-place matching.
                                 ;; For now, for simplicity and performance reasons, we are not bothering to check
                                 ;; *where* the matches in the tsvector came from.
                                 (or (empty? matching-columns)
                                     (contains? matching-columns :display_name)))]
    (-> result
        (assoc
         :name           (if use-display-name? display_name name)
         :context        (when (and match-context-thunk
                                    (empty?
                                     (remove matching-columns displayed-columns)))
                           (match-context-thunk))
         :collection     (if (and archived_directly (not= "collection" model))
                           (select-keys (collection/trash-collection)
                                        [:id :name :authority_level :type])
                           (merge {:id              collection_id
                                   :name            collection_name
                                   :authority_level collection_authority_level
                                   :type            collection_type}
                                  ;; for non-root collections, override :collection with the values for its effective parent
                                  effective_parent
                                  (when collection_effective_ancestors
                                    {:effective_ancestors collection_effective_ancestors})))
         :scores          (remove-thunks all-scores))
        (update :dataset_query (fn [dataset-query]
                                 (when-let [query (some-> dataset-query json/decode)]
                                   (if (get query "type")
                                     (mbql.normalize/normalize query)
                                     (not-empty (lib/normalize query))))))
        (dissoc
         :all-scores
         :relevant-scores
         :collection_effective_ancestors
         :collection_id
         :collection_location
         :collection_name
         :collection_type
         :archived_directly
         :display_name
         :effective_parent))))

Coerce a bit returned by some MySQL/MariaDB versions in some situations to Boolean.

(defn- bit->boolean
  [v]
  (if (number? v)
    (not (zero? v))
    v))

In the absence of an explicit engine argument in a request, which engine should be used?

(defn default-engine
  []
  (if config/is-test?
    ;; TODO The API tests have not yet been ported to reflect the new search's results.
    :search.engine/in-place
    (if-let [s (public-settings/search-engine)]
      (u/prog1 (keyword "search.engine" (name s))
        (assert (search.engine/supported-engine? <>)))
      :search.engine/in-place)))
(defn- parse-engine [value]
  (or (when-not (str/blank? value)
        (let [engine (keyword "search.engine" value)]
          (cond
            (not (search.engine/known-engine? engine))
            (log/warnf "Search-engine is unknown: %s" value)
            (not (search.engine/supported-engine? engine))
            (log/warnf "Search-engine is not supported: %s" value)
            :else
            engine)))
      (default-engine)))

This forwarding is here for tests, we should clean those up.

(defn- apply-default-engine [{:keys [search-engine] :as search-ctx}]
  (let [default (default-engine)]
    (when (= default search-engine)
      (throw (ex-info "Missing implementation for default search-engine" {:search-engine search-engine})))
    (log/debugf "Missing implementation for %s so instead using %s" search-engine default)
    (assoc search-ctx :search-engine default)))
(defmethod search.engine/results :default [search-ctx]
  (search.engine/results (apply-default-engine search-ctx)))
(defmethod search.engine/model-set :default [search-ctx]
  (search.engine/model-set (apply-default-engine search-ctx)))
(mr/def ::search-context.input
  [:map {:closed true}
   [:search-string                                        [:maybe ms/NonBlankString]]
   [:context                             {:optional true} [:maybe :keyword]]
   [:models                                               [:maybe [:set SearchableModel]]]
   [:current-user-id                                      pos-int?]
   [:is-impersonated-user?               {:optional true} :boolean]
   [:is-sandboxed-user?                  {:optional true} :boolean]
   [:is-superuser?                                        :boolean]
   [:current-user-perms                                   [:set perms.u/PathSchema]]
   [:archived                            {:optional true} [:maybe :boolean]]
   [:created-at                          {:optional true} [:maybe ms/NonBlankString]]
   [:created-by                          {:optional true} [:maybe [:set ms/PositiveInt]]]
   [:filter-items-in-personal-collection {:optional true} [:maybe [:enum "all" "only" "only-mine" "exclude" "exclude-others"]]]
   [:last-edited-at                      {:optional true} [:maybe ms/NonBlankString]]
   [:last-edited-by                      {:optional true} [:maybe [:set ms/PositiveInt]]]
   [:limit                               {:optional true} [:maybe ms/Int]]
   [:offset                              {:optional true} [:maybe ms/Int]]
   [:table-db-id                         {:optional true} [:maybe ms/PositiveInt]]
   [:search-engine                       {:optional true} [:maybe string?]]
   [:search-native-query                 {:optional true} [:maybe true?]]
   [:model-ancestors?                    {:optional true} [:maybe boolean?]]
   [:verified                            {:optional true} [:maybe true?]]
   [:ids                                 {:optional true} [:maybe [:set ms/PositiveInt]]]
   [:calculate-available-models?         {:optional true} [:maybe :boolean]]
   [:include-dashboard-questions?        {:optional true} [:maybe boolean?]]])
(mu/defn search-context :- SearchContext
  "Create a new search context that you can pass to other functions like [[search]]."
  [{:keys [archived
           context
           calculate-available-models?
           created-at
           created-by
           current-user-id
           current-user-perms
           filter-items-in-personal-collection
           ids
           is-impersonated-user?
           is-sandboxed-user?
           include-dashboard-questions?
           is-superuser?
           last-edited-at
           last-edited-by
           limit
           model-ancestors?
           models
           offset
           search-engine
           search-native-query
           search-string
           table-db-id
           verified]} :- ::search-context.input]
  ;; for prod where Malli is disabled
  {:pre [(pos-int? current-user-id) (set? current-user-perms)]}
  (when (some? verified)
    (premium-features/assert-has-any-features
     [:content-verification :official-collections]
     (deferred-tru "Content Management or Official Collections")))
  (let [models (if (seq models) models search.config/all-models)
        engine (parse-engine search-engine)
        fvalue (fn [filter-key] (search.config/filter-default engine context filter-key))
        ctx    (cond-> {:archived?                           (boolean (or archived (fvalue :archived)))
                        :context                             (or context :unknown)
                        :calculate-available-models?         (boolean calculate-available-models?)
                        :current-user-id                     current-user-id
                        :current-user-perms                  current-user-perms
                        :filter-items-in-personal-collection (or filter-items-in-personal-collection
                                                                 (fvalue :personal-collection-id))
                        :is-impersonated-user?               is-impersonated-user?
                        :is-sandboxed-user?                  is-sandboxed-user?
                        :is-superuser?                       is-superuser?
                        :models                              models
                        :model-ancestors?                    (boolean model-ancestors?)
                        :search-engine                       engine
                        :search-string                       search-string}
                 (some? created-at)                          (assoc :created-at created-at)
                 (seq created-by)                            (assoc :created-by created-by)
                 (some? filter-items-in-personal-collection) (assoc :filter-items-in-personal-collection filter-items-in-personal-collection)
                 (some? last-edited-at)                      (assoc :last-edited-at last-edited-at)
                 (seq last-edited-by)                        (assoc :last-edited-by last-edited-by)
                 (some? table-db-id)                         (assoc :table-db-id table-db-id)
                 (some? limit)                               (assoc :limit-int limit)
                 (some? offset)                              (assoc :offset-int offset)
                 (some? search-native-query)                 (assoc :search-native-query search-native-query)
                 (some? verified)                            (assoc :verified verified)
                 (some? include-dashboard-questions?)        (assoc :include-dashboard-questions? include-dashboard-questions?)
                 (seq ids)                                   (assoc :ids ids))]
    (when (and (seq ids)
               (not= (count models) 1))
      (throw (ex-info (tru "Filtering by ids work only when you ask for a single model") {:status-code 400})))
    ;; TODO this is rather hidden, perhaps better to do it further down the stack
    (assoc ctx :models
           ;; We are not working to keep the legacy engine logic in sync with the new modular approach.
           (if (= :search.engine/in-place engine)
             (search.in-place.filter/search-context->applicable-models ctx)
             (search.filter/search-context->applicable-models ctx)))))
(defn- to-toucan-instance [row]
  (let [model (-> row :model search.config/model-to-db-model :db-model)]
    (t2.instance/instance model row)))
(defn- map-collection [collection]
  (cond-> collection
    (:archived_directly collection)
    (assoc :location (collection/trash-path))
    :always
    (assoc :type (:collection_type collection))
    :always
    collection/maybe-localize-trash-name))
(defn- normalize-result [result]
  (let [instance (to-toucan-instance (t2.realize/realize result))]
    (-> instance
        ;; MySQL returns booleans as `1` or `0` so convert those to boolean as needed
        (update :bookmark bit->boolean)
        (update :archived bit->boolean)
        (update :archived_directly bit->boolean)
        ;; Collections require some transformation before being scored and returned by search.
        (cond-> (t2/instance-of? :model/Collection instance) map-collection))))
(defn- add-can-write [search-ctx row]
  (if (some #(mi/instance-of? % row) [:model/Dashboard :model/Card])
    (assoc row :can_write (can-write? search-ctx row))
    row))

Additional normalization that is done after we've filtered by permissions, as its more expensive.

(defn- normalize-result-more
  [search-ctx result]
  (->> (update result :pk_ref json/decode)
       (add-can-write search-ctx)))
(defn- search-results [search-ctx model-set-fn total-results]
  (let [add-perms-for-col  (fn [item]
                             (cond-> item
                               (mi/instance-of? :model/Collection item)
                               (assoc :can_write (can-write? search-ctx item))))]
    ;; We get to do this slicing and dicing with the result data because
    ;; the pagination of search is for UI improvement, not for performance.
    ;; We intend for the cardinality of the search results to be below the default max before this slicing occurs
    (cond->
     {:data             (cond->> total-results
                          (some? (:offset-int search-ctx)) (drop (:offset-int search-ctx))
                          (some? (:limit-int search-ctx)) (take (:limit-int search-ctx))
                          true (map add-perms-for-col))
      :limit            (:limit-int search-ctx)
      :models           (:models search-ctx)
      :offset           (:offset-int search-ctx)
      :table_db_id      (:table-db-id search-ctx)
      :engine           (:search-engine search-ctx)
      :total            (count total-results)}
      (:calculate-available-models? search-ctx)
      (assoc :available_models (model-set-fn search-ctx)))))
(defn- hydrate-dashboards [results]
  (->> (t2/hydrate results [:dashboard :moderation_status])
       (map (fn [row] (update row :dashboard #(when % (select-keys % [:id :name :moderation_status])))))))

Builds a search query that includes all the searchable entities, and runs it.

(mu/defn search
  [search-ctx :- search.config/SearchContext]
  (let [reducible-results (search.engine/results search-ctx)
        scoring-ctx       (select-keys search-ctx [:search-engine :search-string :search-native-query])
        xf                (comp
                           (take search.config/*db-max-results*)
                           (map normalize-result)
                           (filter (partial check-permissions-for-model search-ctx))
                           (map (partial normalize-result-more search-ctx))
                           (keep #(search.engine/score scoring-ctx %)))
        total-results     (cond->> (scoring/top-results reducible-results search.config/max-filtered-results xf)
                            true hydrate-dashboards
                            true hydrate-user-metadata
                            (:model-ancestors? search-ctx) (add-dataset-collection-hierarchy)
                            true (add-collection-effective-location)
                            true (map serialize))]
    (search-results search-ctx search.engine/model-set total-results)))