Metabase -- Marginalia

metabase.search.appdb.specialization.postgres toc
	(ns metabase.search.appdb.specialization.postgres (:require [clojure.string :as str] [metabase.search.appdb.specialization.api :as specialization] [metabase.util :as u] [toucan2.core :as t2]))
	(def ^:private tsv-language "english")
	(defmethod specialization/table-schema :postgres [base-schema] (into [[:id :bigint [:primary-key] [:raw "GENERATED BY DEFAULT AS IDENTITY"]] [:search_vector :tsvector :not-null] [:with_native_query_vector :tsvector :not-null]] base-schema))
TODO I strongly suspect that there are more indexes that would help performance, we should examine EXPLAIN. Another idea to try, is using a tsvector for all the non-range filter fields.	(defmethod specialization/post-create-statements :postgres [prefix table-name] (mapv (fn [template] (format template prefix table-name)) ["CREATE UNIQUE INDEX IF NOT EXISTS %s_identity_idx ON %s (model, model_id)" "CREATE INDEX IF NOT EXISTS %s_tsvector_idx ON %s USING gin (search_vector)" "CREATE INDEX IF NOT EXISTS %s_native_tsvector_idx ON %s USING gin (with_native_query_vector)" ;; Spam all the indexes for now, let's see if they get used on Stats / Ephemeral envs. "CREATE INDEX IF NOT EXISTS %s_model_archived_idx ON %s (model, archived)" "CREATE INDEX IF NOT EXISTS %s_archived_idx ON %s (archived)"]))
	(defmethod specialization/batch-upsert! :postgres [table entries] (when (seq entries) (t2/query ;; The cost of dynamically calculating these keys should be small compared to the IO cost, so unoptimized. (let [update-keys (vec (disj (set (keys (first entries))) :id :model :model_id)) excluded-kw (fn [column] (keyword (str "excluded." (name column))))] {:insert-into table :values entries :on-conflict [:model :model_id] :do-update-set (zipmap update-keys (map excluded-kw update-keys))}))))
	(defn- quote* [s] (str "'" (str/replace s "'" "''") "'"))
	(defn- process-phrase [word-or-phrase] ;; a phrase is quoted even if the closing quotation mark has not been typed yet (cond ;; trailing quotation mark (= word-or-phrase "\) nil ;; quoted phrases must be matched sequentially (str/starts-with? word-or-phrase "\) (as-> word-or-phrase <> ;; remove the quote mark(s) (str/replace <> #"^\"\|\"$" ) (str/trim <>) (str/split <> #"\s+") (map quote* <>) (str/join " <-> " <>)) ;; negation (re-find #"^-\w" word-or-phrase) (str "!" (quote* (subs word-or-phrase 1))) ;; just a regular word :else (quote* word-or-phrase)))
Break up the words in the search input, preserving quoted and partially quoted segments.	(defn- split-preserving-quotes [s] (re-seq #"\"[^\"]*(?:\"\|$)\|[^\s\"]+\|\s+" (u/lower-case-en s)))
	(defn- process-clause [words-and-phrases] (->> words-and-phrases (remove #{"and"}) (map process-phrase) (remove str/blank?) (str/join " & ")))
Add wildcards at the end of the final word, so that we match ts completions.	(defn- complete-last-word [expression] (str/replace expression #"(\S+)(?=\s$)" "$1:"))
Given the user input, construct a query in the Postgres tsvector query language.	(defn- to-tsquery-expr [input] (str (when input (let [trimmed (str/trim input) complete? (not (str/ends-with? trimmed "\"")) ;; TODO also only complete if the :context is appropriate maybe-complete (if complete? complete-last-word identity)] (->> (str/replace trimmed "\\" "\\\\") split-preserving-quotes (remove str/blank?) (partition-by #{"or"}) (remove #(= (first %) "or")) (map process-clause) (str/join " \| ") maybe-complete)))))
	(defmethod specialization/base-query :postgres [active-table search-term search-ctx select-items] {:select select-items :from [[active-table :search_index]] ;; Using a join allows us to share the query expression between our SELECT and WHERE clauses. :join [[[:raw "to_tsquery('" tsv-language "', " [:lift (to-tsquery-expr search-term)] ")"] :query] [:= 1 1]] :where (if (str/blank? search-term) [:= [:inline 1] [:inline 1]] [:raw (str (if (:search-native-query search-ctx) "with_native_query_vector" "search_vector") " @@ query")])})
	(defn- weighted-tsvector [weight text] [:setweight [:to_tsvector [:inline tsv-language] [:cast text :text]] [:inline weight]])
	(defmethod specialization/extra-entry-fields :postgres [entity] {:search_vector [:\|\| (weighted-tsvector "A" (:name entity)) (weighted-tsvector "B" (:searchable_text entity ""))] :with_native_query_vector [:\|\| (weighted-tsvector "A" (:name entity)) (weighted-tsvector "B" (str/join " " (keep entity [:searchable_text :native_query])))]})
See https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-RANKING 0 (the default) ignores the document length 1 divides the rank by 1 + the logarithm of the document length 2 divides the rank by the document length 4 divides the rank by the mean harmonic distance between extents (this is implemented only by tsrankcd) 8 divides the rank by the number of unique words in document 16 divides the rank by 1 + the logarithm of the number of unique words in document 32 divides the rank by itself + 1	(def ^:private ts-rank-normalization 0)
	(defmethod specialization/text-score :postgres [] [:ts_rank :search_vector :query [:inline ts-rank-normalization]])
	(defmethod specialization/view-count-percentile-query :postgres [index-table p-value] (let [expr [:raw "percentile_cont(" [:lift p-value] ") WITHIN GROUP (ORDER BY view_count)"]] {:select [:search_index.model [expr :vcp]] :from [[index-table :search_index]] :group-by [:search_index.model] :having [:is-not expr nil]}))

metabase.search.appdb.specialization.postgres