(ns metabase.lib.binning.util (:require [clojure.math :as math] [metabase.lib.metadata :as lib.metadata] [metabase.lib.schema.binning :as lib.schema.binning] [metabase.lib.schema.metadata :as lib.schema.metadata] [metabase.lib.types.isa :as lib.types.isa] [metabase.util :as u] [metabase.util.malli :as mu])) | |
(mu/defn- calculate-bin-width :- ::lib.schema.binning/bin-width "Calculate bin width required to cover interval [`min-value`, `max-value`] with `num-bins`." [min-value :- number? max-value :- number? num-bins :- ::lib.schema.binning/num-bins] (u/round-to-decimals 5 (/ (- max-value min-value) num-bins))) | |
(mu/defn- calculate-num-bins :- ::lib.schema.binning/num-bins "Calculate number of bins of width `bin-width` required to cover interval [`min-value`, `max-value`]." [min-value :- number? max-value :- number? bin-width :- ::lib.schema.binning/bin-width] (max (long (math/ceil (/ (- max-value min-value) bin-width))) 1)) | |
(def ^:private ResolvedStrategy [:tuple [:enum :bin-width :num-bins] [:map [:bin-width ::lib.schema.binning/bin-width] [:num-bins ::lib.schema.binning/num-bins]]]) | |
(mu/defn- resolve-default-strategy :- ResolvedStrategy "Determine the approprate strategy & options to use when `:default` strategy was specified." [metadata-providerable :- ::lib.schema.metadata/metadata-providerable column :- ::lib.schema.metadata/column min-value :- number? max-value :- number?] (if (lib.types.isa/coordinate? column) (let [bin-width (lib.metadata/setting metadata-providerable :breakout-bin-width)] [:bin-width {:bin-width bin-width :num-bins (calculate-num-bins min-value max-value bin-width)}]) (let [num-bins (lib.metadata/setting metadata-providerable :breakout-bins-num)] [:num-bins {:num-bins num-bins :bin-width (calculate-bin-width min-value max-value num-bins)}]))) | |
------------------------------------- Humanized binning with nicer-breakout -------------------------------------- | |
(defn- ceil-to [precision x] (* (math/ceil (/ x precision)) precision)) | |
(defn- floor-to [precision x] (* (math/floor (/ x precision)) precision)) | |
(def ^:private pleasing-numbers [1 1.25 2 2.5 3 5 7.5 10]) | |
(mu/defn nicer-bin-width :- ::lib.schema.binning/bin-width "Calculate the bin width we should use for `:num-bins` binning based on `min-value` and `max-value`, taken from a column's fingerprint... rather than simply doing (/ (- max-value min-value) num-bins) this function attempts to return a 'pleasing' bin width, e.g. 20 instead of 15.01." [min-value :- number? max-value :- number? num-bins :- ::lib.schema.binning/num-bins] (let [min-bin-width (calculate-bin-width min-value max-value num-bins) scale (math/pow 10 (u/order-of-magnitude min-bin-width))] (some (fn [pleasing-number] (let [candidate-width (* pleasing-number scale)] (when (>= candidate-width min-bin-width) candidate-width))) pleasing-numbers))) | |
(mu/defn- nicer-bounds :- [:tuple number? number?] [min-value :- number? max-value :- number? bin-width :- ::lib.schema.binning/bin-width] [(floor-to bin-width min-value) (ceil-to bin-width max-value)]) | |
(def ^:private ^:const max-steps 10) | |
(defn- fixed-point [f] (fn [x] (->> (iterate f x) (partition 2 1) (take max-steps) (drop-while (partial apply not=)) ffirst))) | |
(mu/defn- nicer-breakout* :- :map "Humanize binning: extend interval to start and end on a \"nice\" number and, when number of bins is fixed, have a \"nice\" step (bin width)." [strategy :- ::lib.schema.binning/strategy {:keys [min-value max-value bin-width num-bins]} :- [:map [:min-value number?] [:max-value number?] [:bin-width {:optional true} ::lib.schema.binning/bin-width] [:num-bins {:optional true} ::lib.schema.binning/num-bins]]] (let [bin-width (if (= strategy :num-bins) (nicer-bin-width min-value max-value num-bins) bin-width) [min-value max-value] (nicer-bounds min-value max-value bin-width)] {:min-value min-value :max-value max-value :num-bins (if (= strategy :num-bins) num-bins (calculate-num-bins min-value max-value bin-width)) :bin-width bin-width})) | |
(mu/defn nicer-breakout :- [:maybe :map] "Make the current breakout a little nicer? Not 100% sure exactly how this is used, refer to [[metabase.query-processor.middleware.binning/update-binned-field]]." [strategy :- ::lib.schema.binning/strategy opts :- :map] (let [f (partial nicer-breakout* strategy)] ((fixed-point f) opts))) | |
(mu/defn resolve-options :- ResolvedStrategy "Given any binning `:strategy`, determine the `:bin-width` and `:num-bins` we should use based on the column's fingerprint." [metadata-providerable :- ::lib.schema.metadata/metadata-providerable strategy :- ::lib.schema.binning/strategy strategy-param :- [:maybe number?] column :- ::lib.schema.metadata/column min-value :- number? max-value :- number?] (case strategy :num-bins [:num-bins {:num-bins strategy-param :bin-width (calculate-bin-width min-value max-value strategy-param)}] :bin-width [:bin-width {:bin-width strategy-param :num-bins (calculate-num-bins min-value max-value strategy-param)}] :default (resolve-default-strategy metadata-providerable column min-value max-value))) | |