Related entities recommendations. | (ns metabase.xrays.related (:require [clojure.set :as set] [medley.core :as m] [metabase.api.common :as api] [metabase.legacy-mbql.normalize :as mbql.normalize] [metabase.legacy-mbql.util :as mbql.u] [metabase.models.interface :as mi] [metabase.query-processor.util :as qp.util] [metabase.util.malli.registry :as mr] [toucan2.core :as t2])) |
(def ^:private ^Long max-best-matches 3)
(def ^:private ^Long max-serendipity-matches 2)
(def ^:private ^Long max-matches (+ max-best-matches
max-serendipity-matches)) | |
(def ^:private ContextBearingForm
[:cat
[:and
[:or :string :keyword]
[:fn
{:error/message "field, metric, or segment"}
(comp #{:field :metric :segment}
qp.util/normalize-token)]]
[:* :any]]) | |
(defn- strip-idents [clause]
(cond-> clause
(mbql.u/is-clause? :field clause) (update 2 dissoc :ident))) | |
(defn- collect-context-bearing-forms
[form]
(let [form (mbql.normalize/normalize-fragment [:query :filter] form)]
(into #{}
(comp (filter (mr/validator ContextBearingForm))
(map #(update % 0 qp.util/normalize-token))
(map strip-idents))
(tree-seq sequential? identity form)))) | |
Return the relevant parts of a given entity's definition. Relevant parts are those that carry semantic meaning, and especially context-bearing forms. | (defmulti definition
{:arglists '([instance])}
mi/model) |
(defmethod definition :model/LegacyMetric [metric] (-> metric :definition ((juxt :aggregation :filter)))) | |
(defmethod definition :model/Card
[card]
(-> card
:dataset_query
:query
((juxt :breakout :aggregation :expressions :fields)))) | |
(defmethod definition :model/Segment [segment] (-> segment :definition :filter)) | |
(defmethod definition :model/Field [field] [[:field-id (:id field)]]) | |
How similar are entities | (defn- similarity
[a b]
(let [context-a (-> a definition collect-context-bearing-forms)
context-b (-> b definition collect-context-bearing-forms)
overlap (set/intersection context-a context-b)
min-overlap (min (count context-a) (count context-b))]
(/ (count overlap)
(max min-overlap 1)))) |
(defn- rank-by-similarity
[reference entities]
(->> entities
(remove #{reference})
(map #(assoc % :similarity (similarity reference %)))
(sort-by :similarity >))) | |
Create an interesting mix of matches. The idea is to have a balanced mix between close (best) matches and more diverse matches to cover a wider field of intents. | (defn- interesting-mix
[matches]
(let [[best rest] (split-at max-best-matches matches)]
(concat best (->> rest shuffle (take max-serendipity-matches))))) |
(def ^:private ^{:arglists '([instances])} filter-visible
(partial filter (fn [{:keys [archived visibility_type active] :as instance}]
(and (some? instance)
(or (nil? visibility_type)
(= (qp.util/normalize-token visibility_type) :normal))
(not archived)
(not= active false)
(mi/can-read? instance))))) | |
(defn- metrics-for-table
[table]
(filter-visible (t2/select :model/Card
:table_id (:id table)
:type :metric
:archived false))) | |
(defn- legacy-metrics-for-table
[table]
(filter-visible (t2/select :model/LegacyMetric
:table_id (:id table)
:archived false))) | |
(defn- segments-for-table
[table]
(filter-visible (t2/select :model/Segment
:table_id (:id table)
:archived false))) | |
(defn- linking-to
[table]
(->> (t2/select-fn-set :fk_target_field_id :model/Field
:table_id (:id table)
:fk_target_field_id [:not= nil]
:active true)
(map (comp (partial t2/select-one :model/Table :id)
:table_id
(partial t2/select-one :model/Field :id)))
distinct
filter-visible
(take max-matches))) | |
(defn- linked-from
[table]
(if-let [fields (not-empty (t2/select-fn-set :id :model/Field
:table_id (:id table)
:active true))]
(->> (t2/select-fn-set :table_id :model/Field
:fk_target_field_id [:in fields]
:active true)
(map (partial t2/select-one :model/Table :id))
filter-visible
(take max-matches))
[])) | |
(defn- cards-sharing-dashboard
[card]
(if-let [dashboards (not-empty (t2/select-fn-set :dashboard_id :model/DashboardCard
:card_id (:id card)))]
(->> (t2/select-fn-set :card_id :model/DashboardCard
:dashboard_id [:in dashboards]
:card_id [:not= (:id card)])
(map (partial t2/select-one :model/Card :id))
filter-visible
(take max-matches))
[])) | |
(defn- similar-questions
[card]
(->> (t2/select :model/Card
:table_id (:table_id card)
:type [:in [:model :question]]
:archived false)
filter-visible
(rank-by-similarity card)
(filter (comp pos? :similarity)))) | |
(defn- similar-metrics
[card]
(->> (t2/select :model/Card
:table_id (:table_id card)
:type :metric
:archived false)
filter-visible
(rank-by-similarity card)
(filter (comp pos? :similarity)))) | |
(defn- recently-modified-dashboards
[]
(when-let [dashboard-ids (not-empty (t2/select-fn-set :model_id :model/Revision
:model "Dashboard"
:user_id api/*current-user-id*
{:order-by [[:timestamp :desc]]}))]
(->> (t2/select :model/Dashboard :id [:in dashboard-ids])
filter-visible
(take max-serendipity-matches)))) | |
(defn- recommended-dashboards
[cards]
(let [recent (recently-modified-dashboards)
card-id->dashboard-cards (->> (apply t2/select [:model/DashboardCard :card_id :dashboard_id]
(cond-> []
(seq cards)
(concat [:card_id [:in (map :id cards)]])
(seq recent)
(concat [:dashboard_id [:not-in (map :id recent)]])))
(group-by :card_id))
dashboard-ids (->> (map :id cards)
(mapcat card-id->dashboard-cards)
(map :dashboard_id)
distinct)
best (when (seq dashboard-ids)
(->> (t2/select :model/Dashboard :id [:in dashboard-ids])
filter-visible
(take max-best-matches)))]
(concat best recent))) | |
(defn- recommended-collections
[cards]
(->> cards
(m/distinct-by :collection_id)
interesting-mix
(keep (comp (partial t2/select-one :model/Collection :id) :collection_id))
filter-visible)) | |
Return related entities. | (defmulti related
{:arglists '([entity])}
mi/model) |
(defmethod related :model/Card
[card]
(let [table (t2/select-one :model/Table :id (:table_id card))
similar-questions (similar-questions card)
similar-metrics (similar-metrics card)]
{:table table
:metrics (interesting-mix similar-metrics)
:segments (->> table
segments-for-table
(rank-by-similarity card)
interesting-mix)
:dashboard-mates (cards-sharing-dashboard card)
:dashboards (recommended-dashboards similar-questions)
:collections (recommended-collections similar-questions)})) | |
(defmethod related :model/Query [query] (related (mi/instance :model/Card query))) | |
(defmethod related :model/LegacyMetric
[metric]
(let [table (t2/select-one :model/Table :id (:table_id metric))]
{:table table
:metrics (->> table
legacy-metrics-for-table
(rank-by-similarity metric)
interesting-mix)
:segments (->> table
segments-for-table
(rank-by-similarity metric)
interesting-mix)})) | |
(defmethod related :model/Segment
[segment]
(let [table (t2/select-one :model/Table :id (:table_id segment))]
{:table table
:metrics (metrics-for-table table)
:segments (->> table
segments-for-table
(rank-by-similarity segment)
interesting-mix)
:linked-from (linked-from table)})) | |
(defmethod related :model/Table
[table]
(let [linking-to (linking-to table)
linked-from (linked-from table)]
{:segments (segments-for-table table)
:metrics (metrics-for-table table)
:linking-to linking-to
:linked-from linked-from
:tables (->> (t2/select :model/Table
:db_id (:db_id table)
:schema (:schema table)
:id [:not= (:id table)]
:visibility_type nil
:active true)
(remove (set (concat linking-to linked-from)))
filter-visible
interesting-mix)})) | |
(defmethod related :model/Field
[field]
(let [table (t2/select-one :model/Table :id (:table_id field))]
{:table table
:segments (->> table
segments-for-table
(rank-by-similarity field)
interesting-mix)
:metrics (->> table
metrics-for-table
(rank-by-similarity field)
(filter (comp pos? :similarity))
interesting-mix)
:fields (->> (t2/select :model/Field
:table_id (:id table)
:id [:not= (:id field)]
:visibility_type "normal"
:active true)
filter-visible
interesting-mix)})) | |
(defmethod related :model/Dashboard
[dashboard]
(let [cards (map (partial t2/select-one :model/Card :id) (t2/select-fn-set :card_id :model/DashboardCard
:dashboard_id (:id dashboard)))]
{:cards (->> cards
(mapcat similar-questions)
(remove (set cards))
distinct
filter-visible
interesting-mix)})) | |