(ns metabase.search.in-place.util (:require [clojure.core.memoize :as memoize] [clojure.string :as str] [metabase.util :as u] [metabase.util.malli :as mu])) | |
Returns a string pattern to match a wildcard search term. | (defn wildcard-match [s] (str "%" s "%")) |
(mu/defn normalize :- :string "Normalize a `query` to lower-case." [query :- :string] (u/lower-case-en (str/trim query))) | |
(mu/defn tokenize :- [:sequential :string]
"Break a search `query` into its constituent tokens"
[query :- :string]
(filter seq
(str/split query #"\s+"))) | |
Given two lists (and an equality test), return the length of the longest overlapping subsequence. (largest-common-subseq-length = [1 2 3 :this :part :will :not :be :relevant] [:not :counted 1 2 3 :also :not :counted]) ;; => 3 | (def largest-common-subseq-length
(memoize/fifo
(fn
([eq xs ys]
(largest-common-subseq-length eq xs ys 0))
([eq xs ys tally]
(if (or (zero? (count xs))
(zero? (count ys)))
tally
(max
(if (eq (first xs)
(first ys))
(largest-common-subseq-length eq (rest xs) (rest ys) (inc tally))
tally)
(largest-common-subseq-length eq xs (rest ys) 0)
(largest-common-subseq-length eq (rest xs) ys 0)))))
;; Uses O(n*m) space (the lengths of the two lists) with k≤2, so napkin math suggests this gives us caching for at
;; least a 31*31 search (or 50*20, etc) which sounds like more than enough. Memory is cheap and the items are
;; small, so we may as well skew high.
;; As a precaution, the scorer that uses this limits the number of tokens (see the `take` call below)
:fifo/threshold 2000)) |