(ns metabase.search.in-place.util (:require [clojure.core.memoize :as memoize] [clojure.string :as str] [metabase.util :as u] [metabase.util.malli :as mu])) | |
Returns a string pattern to match a wildcard search term. | (defn wildcard-match [s] (str "%" s "%")) |
(mu/defn normalize :- :string "Normalize a `query` to lower-case." [query :- :string] (u/lower-case-en (str/trim query))) | |
(mu/defn tokenize :- [:sequential :string] "Break a search `query` into its constituent tokens" [query :- :string] (filter seq (str/split query #"\s+"))) | |
Given two lists (and an equality test), return the length of the longest overlapping subsequence. (largest-common-subseq-length = [1 2 3 :this :part :will :not :be :relevant] [:not :counted 1 2 3 :also :not :counted]) ;; => 3 | (def largest-common-subseq-length (memoize/fifo (fn ([eq xs ys] (largest-common-subseq-length eq xs ys 0)) ([eq xs ys tally] (if (or (zero? (count xs)) (zero? (count ys))) tally (max (if (eq (first xs) (first ys)) (largest-common-subseq-length eq (rest xs) (rest ys) (inc tally)) tally) (largest-common-subseq-length eq xs (rest ys) 0) (largest-common-subseq-length eq (rest xs) ys 0))))) ;; Uses O(n*m) space (the lengths of the two lists) with k≤2, so napkin math suggests this gives us caching for at ;; least a 31*31 search (or 50*20, etc) which sounds like more than enough. Memory is cheap and the items are ;; small, so we may as well skew high. ;; As a precaution, the scorer that uses this limits the number of tokens (see the `take` call below) :fifo/threshold 2000)) |