Middlware that optimizes equality filter clauses against bucketed temporal fields. See docstring for
| (ns metabase.query-processor.middleware.optimize-temporal-filters (:require [clojure.walk :as walk] [metabase.legacy-mbql.schema :as mbql.s] [metabase.legacy-mbql.util :as mbql.u] [metabase.lib.metadata :as lib.metadata] [metabase.lib.schema.common :as lib.schema.common] [metabase.lib.util.match :as lib.util.match] [metabase.query-processor.store :as qp.store] [metabase.util :as u] [metabase.util.date-2 :as u.date] [metabase.util.log :as log] [metabase.util.malli :as mu] [metabase.util.malli.registry :as mr])) |
(def ^:private optimizable-units
#{:second :minute :hour :day :week :month :quarter :year}) | |
TODO -- we can use [[metabase.lib/temporal-bucket]] for this once we convert this middleware to MLv2 | (defmulti ^:private temporal-unit
{:arglists '([expression])}
mbql.u/dispatch-by-clause-name-or-class) |
(defmethod temporal-unit :default [_expr] nil) | |
(defmethod temporal-unit :field [[_field _id-or-name opts]] (:temporal-unit opts)) | |
(defmethod temporal-unit :expression [[_field _id-or-name opts]] (:temporal-unit opts)) | |
(defmethod temporal-unit :relative-datetime [[_relative-datetime _n unit]] unit) | |
(defmethod temporal-unit :absolute-datetime [[_absolute-datetime _t unit]] unit) | |
(defn- temporal-ref? [x]
(and (mbql.u/is-clause? #{:field :expression} x)
(or (temporal-unit x)
(let [[_field _id-or-name opts] x]
(when-let [expr-type ((some-fn :effective-type :base-type) opts)]
(isa? expr-type :type/Temporal)))))) | |
(defn- optimizable-expr? [expr]
(lib.util.match/match-one expr
#{:field :expression}
(and (temporal-ref? &match)
(let [unit (or (temporal-unit &match) :default)]
(or (= unit :default)
(contains? optimizable-units unit)))))) | |
(defmulti ^:private can-optimize-filter?
{:arglists '([mbql-clause])}
mbql.u/dispatch-by-clause-name-or-class) | |
Can | (defn- optimizable-temporal-value?
[temporal-value]
(lib.util.match/match-one temporal-value
[:relative-datetime (_ :guard #{0 :current})]
true
[(_ :guard #{:absolute-datetime :relative-datetime}) _ _opts]
(let [unit (or (temporal-unit &match) :default)]
(or (= unit :default)
(contains? optimizable-units unit))))) |
Do datetime | (defn- field-and-temporal-value-have-compatible-units?
[field temporal-value]
(lib.util.match/match-one temporal-value
[:relative-datetime (_ :guard #{0 :current})]
true
[(_ :guard #{:absolute-datetime :relative-datetime}) _ opts]
(let [field-unit (or (temporal-unit field) :default)
value-unit (or (temporal-unit &match) :default)]
(cond
(= field-unit :default) (contains? optimizable-units value-unit)
(= value-unit :default) (contains? optimizable-units field-unit)
:else (= field-unit value-unit))))) |
TODO -- once we convert this middleware to MLv2 we can use [[metabase.lib.metadata.calculation/type-of]] | (defn- field-or-expression-effective-type [field-or-expression]
(lib.util.match/match-one field-or-expression
[(_tag :guard #{:field :expression}) _ (opts :guard :effective-type)]
(:effective-type opts)
[:field (id :guard pos-int?) _opts]
(when-let [field (lib.metadata/field (qp.store/metadata-provider) id)]
(:effective-type field))
[(_tag :guard #{:field :expression}) _ (opts :guard :base-type)]
(:base-type opts))) |
(defmethod can-optimize-filter? :default
[filter-clause]
(lib.util.match/match-one filter-clause
[_tag
(field :guard optimizable-expr?)
(temporal-value :guard optimizable-temporal-value?)]
(field-and-temporal-value-have-compatible-units? field temporal-value))) | |
(defn- not-default-bucket-clause
[clause]
(and (vector? clause)
(not= :default (get-in clause [2 :temporal-unit])))) | |
TODO: I believe we do not generate __filter clauses that have default temporal bucket on column arg which should be
optimized__. Unfortunately I'm not certain about that. If I was, the following | |
(defmethod can-optimize-filter? :>=
[filter-clause]
(lib.util.match/match-one
filter-clause
[_tag
;; Don't optimize >= with column that has default temporal bucket
(field :guard (every-pred not-default-bucket-clause optimizable-expr?))
(temporal-value :guard optimizable-temporal-value?)]
(field-and-temporal-value-have-compatible-units? field temporal-value))) | |
(defmethod can-optimize-filter? :<
[filter-clause]
(lib.util.match/match-one
filter-clause
[_tag
;; Don't optimize < with column that has default temporal bucket
(field :guard (every-pred not-default-bucket-clause optimizable-expr?))
(temporal-value :guard optimizable-temporal-value?)]
(field-and-temporal-value-have-compatible-units? field temporal-value))) | |
(defmethod can-optimize-filter? :between
[filter-clause]
(lib.util.match/match-one filter-clause
[:between
[(_offset :guard #{:+ :-})
(field :guard (every-pred (comp #{:field :expression} first) optimizable-expr?))
[:interval _ _]]
(temporal-value-1 :guard optimizable-temporal-value?)
(temporal-value-2 :guard optimizable-temporal-value?)]
(and (field-and-temporal-value-have-compatible-units? field temporal-value-1)
(field-and-temporal-value-have-compatible-units? field temporal-value-2))
[:between
(field :guard (every-pred (comp #{:field :expression} first) optimizable-expr?))
(temporal-value-1 :guard optimizable-temporal-value?)
(temporal-value-2 :guard optimizable-temporal-value?)]
(and (field-and-temporal-value-have-compatible-units? field temporal-value-1)
(field-and-temporal-value-have-compatible-units? field temporal-value-2)))) | |
(mr/def ::temporal (lib.schema.common/instance-of-class java.time.temporal.Temporal)) | |
(mu/defn- temporal-literal-lower-bound :- ::temporal [unit :- (into [:enum] u.date/add-units) t :- ::temporal] (:start (u.date/range t unit))) | |
(mu/defn- temporal-literal-upper-bound :- ::temporal [unit :- (into [:enum] u.date/add-units) t :- ::temporal] (:end (u.date/range t unit))) | |
(defn- change-temporal-unit-to-default [field]
(lib.util.match/replace field
[(_ :guard #{:field :expression}) _ (_ :guard (comp optimizable-units :temporal-unit))]
(mbql.u/update-field-options &match assoc :temporal-unit :default)
[:absolute-datetime t _unit]
[:absolute-datetime t :default])) | |
Get a clause representing the lower bound that should be used when converting a | (defmulti ^:private temporal-value-lower-bound
{:arglists '([temporal-value-clause temporal-unit])}
mbql.u/dispatch-by-clause-name-or-class) |
Get a clause representing the upper bound that should be used when converting a | (defmulti ^:private temporal-value-upper-bound
{:arglists '([temporal-value-clause temporal-unit])}
mbql.u/dispatch-by-clause-name-or-class) |
(defmethod temporal-value-lower-bound :default [_temporal-value-clause _temporal-unit] nil) | |
(defmethod temporal-value-upper-bound :default [_temporal-value-clause _temporal-unit] nil) | |
(mu/defn- target-unit-for-new-bound :- [:maybe (into [:enum] u.date/add-units)]
[value-unit :- [:maybe :keyword]
field-unit :- [:maybe :keyword]]
(or (when (and value-unit
(not= value-unit :default))
value-unit)
(when (and field-unit
(not= field-unit :default))
field-unit))) | |
(mu/defmethod temporal-value-lower-bound :absolute-datetime :- mbql.s/absolute-datetime
[[_ t unit] temporal-unit]
(let [target-unit (target-unit-for-new-bound unit temporal-unit)]
[:absolute-datetime (temporal-literal-lower-bound target-unit t) :default])) | |
(mu/defmethod temporal-value-upper-bound :absolute-datetime :- mbql.s/absolute-datetime
[[_ t unit] temporal-unit]
(let [target-unit (target-unit-for-new-bound unit temporal-unit)]
[:absolute-datetime (temporal-literal-upper-bound target-unit t) :default])) | |
(mu/defmethod temporal-value-lower-bound :relative-datetime :- [:maybe mbql.s/relative-datetime]
[[_ n unit] temporal-unit]
(when-not (= temporal-unit :default)
(let [target-unit (target-unit-for-new-bound unit temporal-unit)]
[:relative-datetime (if (= n :current) 0 n) target-unit]))) | |
(mu/defmethod temporal-value-upper-bound :relative-datetime :- [:maybe mbql.s/relative-datetime]
[[_ n unit] temporal-unit]
(when-not (= temporal-unit :default)
(let [target-unit (target-unit-for-new-bound unit temporal-unit)]
[:relative-datetime (inc (if (= n :current) 0 n)) target-unit]))) | |
(defn- date-field-with-day-bucketing? [x]
(and (isa? (field-or-expression-effective-type x) :type/Date)
(= (temporal-unit x) :day))) | |
Optimize a filter clause against a temporal-bucketed | (defmulti ^:private optimize-filter
{:arglists '([clause])}
mbql.u/dispatch-by-clause-name-or-class) |
(defmethod optimize-filter :=
[[_tag field temporal-value]]
(if (date-field-with-day-bucketing? field)
[:= (change-temporal-unit-to-default field) (change-temporal-unit-to-default temporal-value)]
(let [temporal-unit (lib.util.match/match-one field
[(_ :guard #{:field :expression}) _ (opts :guard :temporal-unit)]
(:temporal-unit opts))]
(when (field-and-temporal-value-have-compatible-units? field temporal-value)
(when-let [lower-bound (temporal-value-lower-bound temporal-value temporal-unit)]
(when-let [upper-bound (temporal-value-upper-bound temporal-value temporal-unit)]
(let [field' (change-temporal-unit-to-default field)]
[:and
[:>= field' lower-bound]
[:< field' upper-bound]]))))))) | |
(defmethod optimize-filter :!=
[[_tag field temporal-value :as filter-clause]]
(if (date-field-with-day-bucketing? field)
[:!= (change-temporal-unit-to-default field) (change-temporal-unit-to-default temporal-value)]
(when-let [optimized ((get-method optimize-filter :=) filter-clause)]
(mbql.u/negate-filter-clause optimized)))) | |
(defn- optimize-comparison-filter
[optimize-temporal-value-fn [tag field temporal-value] new-filter-type]
(if (date-field-with-day-bucketing? field)
[tag (change-temporal-unit-to-default field) (change-temporal-unit-to-default temporal-value)]
(when-let [new-bound (optimize-temporal-value-fn temporal-value (temporal-unit field))]
[new-filter-type
(change-temporal-unit-to-default field)
new-bound]))) | |
(defmethod optimize-filter :< [filter-clause] (optimize-comparison-filter temporal-value-lower-bound filter-clause :<)) | |
(defmethod optimize-filter :<= [filter-clause] (optimize-comparison-filter temporal-value-upper-bound filter-clause :<)) | |
(defmethod optimize-filter :> [filter-clause] (optimize-comparison-filter temporal-value-upper-bound filter-clause :>=)) | |
(defmethod optimize-filter :>= [filter-clause] (optimize-comparison-filter temporal-value-lower-bound filter-clause :>=)) | |
(defmethod optimize-filter :between
[[_tag field lower-bound upper-bound]]
(if (date-field-with-day-bucketing? field)
[:between
(change-temporal-unit-to-default field)
(change-temporal-unit-to-default lower-bound)
(change-temporal-unit-to-default upper-bound)]
(when-let [new-lower-bound (temporal-value-lower-bound lower-bound (temporal-unit field))]
(when-let [new-upper-bound (temporal-value-upper-bound upper-bound (temporal-unit field))]
(let [field' (change-temporal-unit-to-default field)]
[:and
[:>= field' new-lower-bound]
[:< field' new-upper-bound]]))))) | |
(defn- optimize-temporal-filters* [query]
(lib.util.match/replace query
(_ :guard (partial mbql.u/is-clause? (set (keys (methods optimize-filter)))))
(or (when (can-optimize-filter? &match)
(u/prog1 (optimize-filter &match)
(if <>
(when-not (= &match <>)
(log/tracef "Optimized filter %s to %s" (pr-str &match) (pr-str <>)))
;; if for some reason `optimize-filter` doesn't return an optimized filter clause, log and error and use
;; the original. `can-optimize-filter?` shouldn't have said we could optimize this filter in the first
;; place
(log/error "Error optimizing temporal filter clause" (pr-str &match)))))
&match))) | |
Middlware that optimizes equality ( [:= [:field 1 {:temporal-unit :month}] [:absolute-datetime #t "2019-09-01" :month]] -> [:and [:>= [:field 1 {:temporal-unit :default}] [:absolute-datetime #t "2019-09-01" :month]] [:< [:field 1 {:temporal-unit :default}] [:absolute-datetime #t "2019-10-01" :month]]] The equivalent SQL, before and after, looks like: -- before SELECT ... WHERE datetrunc('month', myfield) = date_trunc('month', timestamp '2019-09-01 00:00:00') -- after SELECT ... WHERE myfield >= timestamp '2019-09-01 00:00:00' AND myfield < timestamp '2019-10-01 00:00:00' The idea here is that by avoiding casts/extraction/truncation operations, databases will be able to make better use of indexes on these columns. This namespace expects to run after the | (defn optimize-temporal-filters
[{query-type :type, :as query}]
(if (not= query-type :query)
query
;; walk query, looking for inner-query forms that have a `:filter` key
(walk/postwalk
(fn [form]
(if-not (and (map? form) (seq (:filter form)))
form
;; optimize the filters in this inner-query form.
(let [optimized (optimize-temporal-filters* form)]
;; if we did some optimizations, we should flatten/deduplicate the filter clauses afterwards.
(cond-> optimized
(not= optimized form) (update :filter mbql.u/combine-filter-clauses)))))
query))) |