Functions which summarize the usage of an instance | (ns metabase.analytics.stats (:require [clj-http.client :as http] [clojure.java.io :as io] [clojure.string :as str] [clojure.walk :as walk] [environ.core :as env] [java-time.api :as t] [medley.core :as m] [metabase.analytics.settings :as analytics.settings] [metabase.analytics.snowplow :as snowplow] [metabase.config :as config] [metabase.db :as db] [metabase.db.query :as mdb.query] [metabase.driver :as driver] [metabase.eid-translation :as eid-translation] [metabase.integrations.slack :as slack] [metabase.models.humanization :as humanization] [metabase.models.interface :as mi] [metabase.models.setting :as setting] [metabase.premium-features.core :as premium-features :refer [defenterprise]] [metabase.public-settings :as public-settings] [metabase.util :as u] [metabase.util.honey-sql-2 :as h2x] [metabase.util.json :as json] [metabase.util.log :as log] [metabase.util.malli :as mu] [toucan2.core :as t2])) |
(set! *warn-on-reflection* true) | |
Merge sequence of maps | (defn- merge-count-maps
[ms]
(reduce (partial merge-with +)
{}
(for [m ms]
(m/map-vals #(cond
(number? %) %
% 1
:else 0)
m)))) |
(def ^:private ^String metabase-usage-url "https://xuq0fbkk0j.execute-api.us-east-1.amazonaws.com/prod") | |
Return small bin number. Assumes positive inputs. | (defn- bin-small-number
[x]
(cond
(= 0 x) "0"
(<= 1 x 5) "1-5"
(<= 6 x 10) "6-10"
(<= 11 x 25) "11-25"
(> x 25) "25+")) |
Return medium bin number. Assumes positive inputs. | (defn- bin-medium-number
[x]
(cond
(= 0 x) "0"
(<= 1 x 5) "1-5"
(<= 6 x 10) "6-10"
(<= 11 x 25) "11-25"
(<= 26 x 50) "26-50"
(<= 51 x 100) "51-100"
(<= 101 x 250) "101-250"
(> x 250) "250+")) |
Go through a bunch of maps and count the frequency a given key's values. | (defn- value-frequencies [many-maps k] (frequencies (map k many-maps))) |
Bin some frequencies using a passed in ;; Generate histogram for values of :a; ;; (or if you already have the counts) (histogram bin-micro-number [3 1 1]) ;; -> {"3+" 1, "1" 2} | (defn- histogram ([binning-fn counts] (frequencies (map binning-fn counts))) ([binning-fn many-maps k] (histogram binning-fn (vals (value-frequencies many-maps k))))) |
Return a histogram for medium numbers. | (def ^:private medium-histogram (partial histogram bin-medium-number)) |
Figure out what we're running under | (defn environment-type
[]
(cond
(config/config-str :rds-hostname) :elastic-beanstalk
(config/config-str :database-url) :heroku ;; Putting this last as 'database-url' seems least specific
:else :unknown)) |
(def ^:private ui-colors #{:brand :filter :summarize}) | |
Returns true if the 'User Interface Colors' have been customized | (defn- appearance-ui-colors-changed? [] (boolean (seq (select-keys (public-settings/application-colors) ui-colors)))) |
Returns true if the 'Chart Colors' have been customized | (defn- appearance-chart-colors-changed? [] (boolean (seq (apply dissoc (public-settings/application-colors) ui-colors)))) |
Figure out global info about this instance | (defn- instance-settings
[]
{:version (config/mb-version-info :tag)
:running_on (environment-type)
:startup_time_millis (int (public-settings/startup-time-millis))
:application_database (config/config-str :mb-db-type)
:check_for_updates (public-settings/check-for-updates)
:report_timezone (driver/report-timezone)
;; We deprecated advanced humanization but have this here anyways
:friendly_names (= (humanization/humanization-strategy) "advanced")
:email_configured (setting/get :email-configured?)
:slack_configured (slack/slack-configured?)
:sso_configured (setting/get :google-auth-enabled)
:instance_started (snowplow/instance-creation)
:has_sample_data (t2/exists? :model/Database, :is_sample true)
:enable_embedding #_{:clj-kondo/ignore [:deprecated-var]} (setting/get :enable-embedding)
:enable_embedding_sdk (setting/get :enable-embedding-sdk)
:enable_embedding_interactive (setting/get :enable-embedding-interactive)
:enable_embedding_static (setting/get :enable-embedding-static)
:embedding_app_origin_set (boolean
#_{:clj-kondo/ignore [:deprecated-var]}
(setting/get :embedding-app-origin))
:embedding_app_origin_sdk_set (boolean (let [sdk-origins (setting/get :embedding-app-origins-sdk)]
(and sdk-origins (not= "localhost:*" sdk-origins))))
:embedding_app_origin_interactive_set (setting/get :embedding-app-origins-interactive)
:appearance_site_name (not= (public-settings/site-name) "Metabase")
:appearance_help_link (public-settings/help-link)
:appearance_logo (not= (public-settings/application-logo-url) "app/assets/img/logo.svg")
:appearance_favicon (not= (public-settings/application-favicon-url) "app/assets/img/favicon.ico")
:appearance_loading_message (not= (public-settings/loading-message) :doing-science)
:appearance_metabot_greeting (not (public-settings/show-metabot))
:appearance_login_page_illustration (public-settings/login-page-illustration)
:appearance_landing_page_illustration (public-settings/landing-page-illustration)
:appearance_no_data_illustration (public-settings/no-data-illustration)
:appearance_no_object_illustration (public-settings/no-object-illustration)
:appearance_ui_colors (appearance-ui-colors-changed?)
:appearance_chart_colors (appearance-chart-colors-changed?)
:appearance_show_mb_links (not (public-settings/show-metabase-links))}) |
Get metrics based on user records. TODO: get activity in terms of created questions, pulses and dashboards | (defn- user-metrics
[]
{:users (merge-count-maps (for [user (t2/select [:model/User :is_active :is_superuser :last_login :sso_source]
:type :personal)]
{:total 1
:active (:is_active user)
:admin (:is_superuser user)
:logged_in (:last_login user)
:sso (= :google (:sso_source user))}))}) |
Get metrics based on groups: TODO characterize by # w/ sql access, # of users, no self-serve data access | (defn- group-metrics
[]
{:groups (t2/count :model/PermissionsGroup)}) |
(defn- card-has-params? [card] (boolean (get-in card [:dataset_query :native :template-tags]))) | |
Get metrics based on questions TODO characterize by # executions and avg latency | (defn- question-metrics
[]
(let [cards (t2/select [:model/Card :query_type :public_uuid :enable_embedding :embedding_params :dataset_query
:dashboard_id :entity_id :created_at :collection_id :name]
{:where (mi/exclude-internal-content-hsql :model/Card)})]
{:questions (merge-count-maps (for [card cards]
(let [native? (= (keyword (:query_type card)) :native)
dq? (some? (:dashboard_id card))]
{:total 1
:native native?
:gui (not native?)
:is_dashboard_question dq?
:with_params (card-has-params? card)})))
:public (merge-count-maps (for [card cards
:when (:public_uuid card)]
{:total 1
:with_params (card-has-params? card)}))
:embedded (merge-count-maps (for [card cards
:when (:enable_embedding card)]
(let [embedding-params-vals (set (vals (:embedding_params card)))]
{:total 1
:with_params (card-has-params? card)
:with_enabled_params (contains? embedding-params-vals "enabled")
:with_locked_params (contains? embedding-params-vals "locked")
:with_disabled_params (contains? embedding-params-vals "disabled")})))})) |
Get metrics based on dashboards TODO characterize by # of revisions, and created by an admin | (defn- dashboard-metrics
[]
(let [dashboards (t2/select [:model/Dashboard :creator_id :public_uuid :parameters :enable_embedding :embedding_params]
{:where (mi/exclude-internal-content-hsql :model/Dashboard)})
dashcards (t2/query {:select :dc.*
:from [[(t2/table-name :model/DashboardCard) :dc]]
:join [[(t2/table-name :model/Dashboard) :d] [:= :d.id :dc.dashboard_id]]
:where (mi/exclude-internal-content-hsql :model/Dashboard :table-alias :d)})]
{:dashboards (count dashboards)
:with_params (count (filter (comp seq :parameters) dashboards))
:num_dashs_per_user (medium-histogram dashboards :creator_id)
:num_cards_per_dash (medium-histogram dashcards :dashboard_id)
:num_dashs_per_card (medium-histogram dashcards :card_id)
:public (merge-count-maps (for [dash dashboards
:when (:public_uuid dash)]
{:total 1
:with_params (seq (:parameters dash))}))
:embedded (merge-count-maps (for [dash dashboards
:when (:enable_embedding dash)]
(let [embedding-params-vals (set (vals (:embedding_params dash)))]
{:total 1
:with_params (seq (:parameters dash))
:with_enabled_params (contains? embedding-params-vals "enabled")
:with_locked_params (contains? embedding-params-vals "locked")
:with_disabled_params (contains? embedding-params-vals "disabled")})))})) |
Fetch the frequencies of a given (db-frequencies Database :engine) ;; -> {"h2" 2, "postgres" 1, ...} ;; include ;; Generate a histogram: (micro-histogram (vals (db-frequencies Database :engine))) ;; -> {"2" 1, "1" 1, ...} ;; Include | (defn- db-frequencies
[model column & [additonal-honeysql]]
(into {} (for [{:keys [k count]} (t2/select [model [column :k] [:%count.* :count]]
(merge {:group-by [column]}
additonal-honeysql))]
[k count]))) |
Return the number of Notifications that satisfy ;; Pulses only (filter out Alerts) (num-notifications-with-xls-or-csv-cards [:= :alert_condition nil]) | (defn- num-notifications-with-xls-or-csv-cards
[& where-conditions]
(-> (mdb.query/query {:select [[[::h2x/distinct-count :pulse.id] :count]]
:from [:pulse]
:left-join [:pulse_card [:= :pulse.id :pulse_card.pulse_id]]
:where (into
[:and
[:or
[:= :pulse_card.include_csv true]
[:= :pulse_card.include_xls true]]]
where-conditions)})
first
:count)) |
Get metrics based on pulses TODO: characterize by non-user account emails, # emails | (defn- pulse-metrics
[]
(let [pulse-conditions {:left-join [:pulse [:= :pulse.id :pulse_id]], :where [:= :pulse.alert_condition nil]}]
{:pulses (t2/count :model/Pulse :alert_condition nil)
;; "Table Cards" are Cards that include a Table you can download
:with_table_cards (num-notifications-with-xls-or-csv-cards [:= :alert_condition nil])
:pulse_types (db-frequencies :model/PulseChannel :channel_type pulse-conditions)
:pulse_schedules (db-frequencies :model/PulseChannel :schedule_type pulse-conditions)
:num_pulses_per_user (medium-histogram (vals (db-frequencies :model/Pulse :creator_id (dissoc pulse-conditions :left-join))))
:num_pulses_per_card (medium-histogram (vals (db-frequencies :model/PulseCard :card_id pulse-conditions)))
:num_cards_per_pulses (medium-histogram (vals (db-frequencies :model/PulseCard :pulse_id pulse-conditions)))})) |
(defn- alert-metrics []
(let [alert-conditions {:left-join [:pulse [:= :pulse.id :pulse_id]], :where [:not= (mdb.query/qualify :model/Pulse :alert_condition) nil]}]
{:alerts (t2/count :model/Pulse :alert_condition [:not= nil])
:with_table_cards (num-notifications-with-xls-or-csv-cards [:not= :alert_condition nil])
:first_time_only (t2/count :model/Pulse :alert_condition [:not= nil], :alert_first_only true)
:above_goal (t2/count :model/Pulse :alert_condition [:not= nil], :alert_above_goal true)
:alert_types (db-frequencies :model/PulseChannel :channel_type alert-conditions)
:num_alerts_per_user (medium-histogram (vals (db-frequencies :model/Pulse :creator_id (dissoc alert-conditions :left-join))))
:num_alerts_per_card (medium-histogram (vals (db-frequencies :model/PulseCard :card_id alert-conditions)))
:num_cards_per_alerts (medium-histogram (vals (db-frequencies :model/PulseCard :pulse_id alert-conditions)))})) | |
Get metrics on Collection usage. | (defn- collection-metrics
[]
(let [collections (t2/select :model/Collection {:where (mi/exclude-internal-content-hsql :model/Collection)})
cards (t2/select [:model/Card :collection_id] {:where (mi/exclude-internal-content-hsql :model/Card)})]
{:collections (count collections)
:cards_in_collections (count (filter :collection_id cards))
:cards_not_in_collections (count (remove :collection_id cards))
:num_cards_per_collection (medium-histogram cards :collection_id)})) |
Get metrics based on Databases. Metadata Metrics | (defn- database-metrics
[]
(let [databases (t2/select [:model/Database :is_full_sync :engine :dbms_version]
{:where (mi/exclude-internal-content-hsql :model/Database)})]
{:databases (merge-count-maps (for [{is-full-sync? :is_full_sync} databases]
{:total 1
:analyzed is-full-sync?}))
:dbms_versions (frequencies (map (fn [db]
(-> db
:dbms_version
(assoc :engine (:engine db))
json/encode))
databases))})) |
Get metrics based on Tables. | (defn- table-metrics
[]
(let [tables (t2/query {:select [:t.db_id :t.schema]
:from [[(t2/table-name :model/Table) :t]]
:join [[(t2/table-name :model/Database) :d] [:= :d.id :t.db_id]]
:where (mi/exclude-internal-content-hsql :model/Database :table-alias :d)})]
{:tables (count tables)
:num_per_database (medium-histogram tables :db_id)
:num_per_schema (medium-histogram tables :schema)})) |
Get metrics based on Fields. | (defn- field-metrics
[]
(let [fields (t2/query {:select [:f.table_id]
:from [[(t2/table-name :model/Field) :f]]
:join [[(t2/table-name :model/Table) :t] [:= :t.id :f.table_id]
[(t2/table-name :model/Database) :d] [:= :d.id :t.db_id]]
:where (mi/exclude-internal-content-hsql :model/Database :table-alias :d)})]
{:fields (count fields)
:num_per_table (medium-histogram fields :table_id)})) |
Get metrics based on Segments. | (defn- segment-metrics
[]
{:segments (t2/count :model/Segment)}) |
Get metrics based on Metrics. | (defn- metric-metrics
[]
{:metrics (t2/count :model/LegacyMetric)}) |
Execution Metrics | |
(defn- execution-metrics-sql []
;; Postgres automatically adjusts for daylight saving time when performing time calculations on TIMESTAMP WITH TIME
;; ZONE. This can cause discrepancies when subtracting 30 days if the calculation crosses a DST boundary (e.g., in the
;; Pacific/Auckland timezone). To avoid this, we ensure all date computations are done in UTC on Postgres to prevent
;; any time shifts due to DST. See PR #48204
(let [thirty-days-ago (case (db/db-type)
:postgres "CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - INTERVAL '30 days'"
:h2 "DATEADD('DAY', -30, CURRENT_TIMESTAMP)"
:mysql "CURRENT_TIMESTAMP - INTERVAL 30 DAY")
started-at (case (db/db-type)
:postgres "started_at AT TIME ZONE 'UTC'"
:h2 "started_at"
:mysql "started_at")
timestamp-where (str started-at " > " thirty-days-ago)]
(str/join
"\n"
["WITH user_executions AS ("
" SELECT executor_id, COUNT(*) AS num_executions"
" FROM query_execution"
" WHERE " timestamp-where
" GROUP BY executor_id"
"),"
"query_stats_1 AS ("
" SELECT"
" COUNT(*) AS executions,"
" SUM(CASE WHEN error IS NULL OR length(error) = 0 THEN 1 ELSE 0 END) AS by_status__completed,"
" SUM(CASE WHEN error IS NOT NULL OR length(error) > 0 THEN 1 ELSE 0 END) AS by_status__failed,"
" COALESCE(SUM(CASE WHEN running_time = 0 THEN 1 ELSE 0 END), 0) AS num_by_latency__0,"
" COALESCE(SUM(CASE WHEN running_time > 0 AND running_time < 1000 THEN 1 ELSE 0 END), 0) AS num_by_latency__lt_1,"
" COALESCE(SUM(CASE WHEN running_time >= 1000 AND running_time < 10000 THEN 1 ELSE 0 END), 0) AS num_by_latency__1_10,"
" COALESCE(SUM(CASE WHEN running_time >= 10000 AND running_time < 50000 THEN 1 ELSE 0 END), 0) AS num_by_latency__11_50,"
" COALESCE(SUM(CASE WHEN running_time >= 50000 AND running_time < 250000 THEN 1 ELSE 0 END), 0) AS num_by_latency__51_250,"
" COALESCE(SUM(CASE WHEN running_time >= 250000 AND running_time < 1000000 THEN 1 ELSE 0 END), 0) AS num_by_latency__251_1000,"
" COALESCE(SUM(CASE WHEN running_time >= 1000000 AND running_time < 10000000 THEN 1 ELSE 0 END), 0) AS num_by_latency__1001_10000,"
" COALESCE(SUM(CASE WHEN running_time >= 10000000 THEN 1 ELSE 0 END), 0) AS num_by_latency__10000_plus"
" FROM query_execution"
" WHERE " timestamp-where
"),"
"query_stats_2 AS ("
" SELECT"
" COALESCE(SUM(CASE WHEN num_executions = 0 THEN 1 ELSE 0 END), 0) AS num_per_user__0,"
" COALESCE(SUM(CASE WHEN num_executions > 0 AND num_executions < 1 THEN 1 ELSE 0 END), 0) AS num_per_user__lt_1,"
" COALESCE(SUM(CASE WHEN num_executions >= 1 AND num_executions < 10 THEN 1 ELSE 0 END), 0) AS num_per_user__1_10,"
" COALESCE(SUM(CASE WHEN num_executions >= 10 AND num_executions < 50 THEN 1 ELSE 0 END), 0) AS num_per_user__11_50,"
" COALESCE(SUM(CASE WHEN num_executions >= 50 AND num_executions < 250 THEN 1 ELSE 0 END), 0) AS num_per_user__51_250,"
" COALESCE(SUM(CASE WHEN num_executions >= 250 AND num_executions < 1000 THEN 1 ELSE 0 END), 0) AS num_per_user__251_1000,"
" COALESCE(SUM(CASE WHEN num_executions >= 1000 AND num_executions < 10000 THEN 1 ELSE 0 END), 0) AS num_per_user__1001_10000,"
" COALESCE(SUM(CASE WHEN num_executions >= 10000 THEN 1 ELSE 0 END), 0) AS num_per_user__10000_plus"
" FROM user_executions"
")"
"SELECT q1.*, q2.* FROM query_stats_1 q1, query_stats_2 q2;"]))) | |
Get metrics based on QueryExecutions. | (defn- execution-metrics
[]
(let [maybe-rename-bin (fn [x]
({"lt_1" "< 1"
"1_10" "1-10"
"11_50" "11-50"
"51_250" "51-250"
"251_1000" "251-1000"
"1001_10000" "1001-10000"
"10000_plus" "10000+"} x x))
raw-results (-> (first (t2/query (execution-metrics-sql)))
;; cast numbers to int because some DBs output bigdecimals
(update-vals #(some-> % int)))]
(reduce (fn [acc [k v]]
(let [[prefix bin] (str/split (name k) #"__")]
(if bin
(cond-> acc
(and (some? v) (pos? v))
(update (keyword prefix) #(assoc % (maybe-rename-bin bin) v)))
(assoc acc (keyword prefix) v))))
{:executions 0
:by_status {}
:num_per_user {}
:num_by_latency {}}
raw-results))) |
Cache Metrics | |
Metrics based on use of the QueryCache. | (defn- cache-metrics
[]
(let [{:keys [length count]} (t2/select-one [:model/QueryCache [[:avg [:length :results]] :length] [:%count.* :count]])]
{:average_entry_size (int (or length 0))
:num_queries_cached (bin-small-number count)
;; this value gets used in the snowplow ping 'metrics' section.
:num_queries_cached_unbinned count})) |
System Metrics | |
(defn- bytes->megabytes [b] (Math/round (double (/ b 1024 1024)))) | |
(def ^:private system-property-names ["java.version" "java.vm.specification.version" "java.runtime.name" "user.timezone" "user.language" "user.country" "file.encoding" "os.name" "os.version"]) | |
Metadata about the environment Metabase is running in | (defn- system-metrics
[]
(let [runtime (Runtime/getRuntime)]
(merge
{:max_memory (bytes->megabytes (.maxMemory runtime))
:processors (.availableProcessors runtime)}
(zipmap (map #(keyword (str/replace % \. \_)) system-property-names)
(map #(System/getProperty %) system-property-names))))) |
Combined Stats & Logic for sending them in | |
generate a map of the usage stats for this instance | (defn legacy-anonymous-usage-stats
[]
(merge (instance-settings)
{:uuid (public-settings/site-uuid)
:timestamp (t/offset-date-time)
:stats {:cache (cache-metrics)
:collection (collection-metrics)
:dashboard (dashboard-metrics)
:database (database-metrics)
:execution (execution-metrics)
:field (field-metrics)
:group (group-metrics)
:metric (metric-metrics)
:pulse (pulse-metrics)
:alert (alert-metrics)
:question (question-metrics)
:segment (segment-metrics)
:system (system-metrics)
:table (table-metrics)
:user (user-metrics)}})) |
Send stats to Metabase tracking server. | (defn- ^:deprecated send-stats-deprecated!
[stats]
(try
(http/post metabase-usage-url {:form-params stats, :content-type :json, :throw-entire-message? true})
(catch Throwable e
(log/error e "Sending usage stats FAILED")))) |
Is the current Metabase process running in a Docker container?
(Best-effort check based on a | (defn- in-docker?
[]
(boolean
(or (.exists (io/file "/.dockerenv"))
(when (.exists (io/file "/proc/self/cgroup"))
(try
(some #(re-find #"docker" %)
(line-seq (io/reader "/proc/self/cgroup")))
(catch java.io.IOException _
false)))))) |
(defn- deployment-model
[]
(cond
(premium-features/is-hosted?) "cloud"
(in-docker?) "docker"
:else "jar")) | |
(def ^:private activation-days 3) | |
Returns a Boolean indicating whether the number of non-internal users created within | (defn- sufficient-users?
[num-users]
(let [users-in-activation-period
(t2/count :model/User {:where [:and
[:<=
:date_joined
(t/plus (t/offset-date-time (setting/get :instance-creation))
(t/days activation-days))]
(mi/exclude-internal-content-hsql :model/User)]
:limit (inc num-users)})]
(>= users-in-activation-period num-users))) |
Returns a Boolean indicating whether the number of queries recorded over non-sample content is greater than or equal
to | (defn- sufficient-queries?
[num-queries]
(let [sample-db-id (t2/select-one-pk :model/Database :is_sample true)
;; QueryExecution can be large, so let's avoid counting everything
queries (t2/select-fn-set :id :model/QueryExecution
{:where [:or
[:not= :database_id sample-db-id]
[:= :database_id nil]]
:limit (inc num-queries)})]
(>= (count queries) num-queries))) |
If the current plan is Pro or Starter, returns a Boolean indicating whether the instance should be considered to have completed activation signals. Returns nil for non-Pro or Starter plans. | (defn- completed-activation-signals?
[]
(let [plan (premium-features/plan-alias)
pro? (when plan (str/starts-with? plan "pro"))
starter? (when plan (str/starts-with? plan "starter"))]
(cond
pro?
(or (sufficient-users? 4) (sufficient-queries? 201))
starter?
(or (sufficient-users? 2) (sufficient-queries? 101))
:else
nil))) |
Convert a map to a vector of key-value maps with keys 'key' and 'value' for each key-value pair in the map. | (defn m->kv-vec
[m]
(mapv (fn [[k v]] {"key" (name k) "value" v}) m)) |
(defn- snowplow-instance-attributes
[stats]
(let [system-stats (-> stats :stats :system)
instance-attributes
(merge
(dissoc system-stats :user_language)
{:metabase_plan (premium-features/plan-alias)
:metabase_version (-> stats :version)
:language (-> system-stats :user_language)
:report_timezone (-> stats :report_timezone)
:deployment_model (deployment-model)
:startup_time_millis (-> stats :startup_time_millis)
:has_activation_signals_completed (completed-activation-signals?)})]
(m->kv-vec instance-attributes))) | |
(mu/defn- get-translation-count
:- [:map [:ok :int] [:not-found :int] [:invalid-format :int] [:total :int]]
"Get and clear the entity-id translation counter. This is meant to be called during the daily stats collection process."
[]
(let [counter (setting/get-value-of-type :json :entity-id-translation-counter)]
(merge counter {:total (apply + (vals counter))}))) | |
We want to reset the eid translation count on every stat ping, so we do it here. | (mu/defn- clear-translation-count!
[]
(u/prog1 eid-translation/default-counter
(setting/set-value-of-type! :json :entity-id-translation-counter <>))) |
(defn- categorize-query-execution [{:keys [context embedding_client executor_id]}]
(cond
(= "embedding-sdk-react" embedding_client) "sdk_embed"
(and (= "embedding-iframe" embedding_client) (some? executor_id)) "interactive_embed"
(and (= "embedding-iframe" embedding_client) (nil? executor_id)) "static_embed"
(some-> context name (str/starts-with? "public-")) "public_link"
:else "internal")) | |
(defn- ->one-day-ago [] (t/minus (t/offset-date-time) (t/days 1))) | |
(defn- ->snowplow-grouped-metric-info []
(let [qe (t2/select [:model/QueryExecution :embedding_client :context :executor_id :started_at])
one-day-ago (->one-day-ago)
;; reuse the query data:
qe-24h (filter (fn [{started-at :started_at}] (t/after? started-at one-day-ago)) qe)]
{:query-executions (merge
{"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0}
(-> (group-by categorize-query-execution qe)
(update-vals count)))
:query-executions-24h (merge
{"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0}
(-> (group-by categorize-query-execution qe-24h)
(update-vals count)))
:eid-translations-24h (get-translation-count)})) | |
Snowplow data will not work if you pass in keywords, but this will let use use keywords all over. | (defn- deep-string-keywords [data] (walk/postwalk (fn [x] (if (keyword? x) (-> x u/->snake_case_en name) x)) data)) |
(mu/defn- snowplow-grouped-metrics
:- [:sequential
[:map
["name" :string]
["values" [:sequential [:map ["group" :string] ["value" :int]]]]
["tags" [:sequential :string]]]]
[{:keys [eid-translations-24h
query-executions
query-executions-24h]
:as _snowplow-grouped-metric-info}]
(deep-string-keywords
[{:name :query_executions_by_source
:values (mapv (fn [qe-group]
{:group qe-group :value (get query-executions qe-group)})
["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"])
:tags ["embedding"]}
{:name :query_executions_by_source_24h
:values (mapv (fn [qe-group] {:group qe-group :value (get query-executions-24h qe-group)})
["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"])
:tags ["embedding"]}
{:name :entity_id_translations_last_24h
:values (mapv (fn [[k v]] {:group k :value v}) eid-translations-24h)
:tags ["embedding"]}])) | |
Collects Snowplow metrics data that is not in the legacy stats format. Also clears entity id translation count. | (defn- ->snowplow-metric-info
[]
(let [one-day-ago (->one-day-ago)
total-translation-count (:total (get-translation-count))]
{:models (t2/count :model/Card :type :model :archived false)
:new_embedded_dashboards (t2/count :model/Dashboard
:enable_embedding true
:archived false
:created_at [:>= one-day-ago])
:new_users_last_24h (t2/count :model/User
:is_active true
:date_joined [:>= one-day-ago])
:pivot_tables (t2/count :model/Card :display :pivot :archived false)
:query_executions_last_24h (t2/count :model/QueryExecution :started_at [:>= one-day-ago])
:entity_id_translations_last_24h total-translation-count
:scim_users_last_24h (t2/count :model/User :sso_source :scim
:is_active true
:date_joined [:>= one-day-ago])})) |
(mu/defn- snowplow-metrics
[stats metric-info :- [:map
[:models :int]
[:new_embedded_dashboards :int]
[:new_users_last_24h :int]
[:pivot_tables :int]
[:query_executions_last_24h :int]
[:entity_id_translations_last_24h :int]]]
(mapv
(fn [[k v tags]]
(assert (every? string? tags) "Tags must be strings in snowplow metrics.")
(assert (some? v) "Cannot have a nil value in snowplow metrics.")
{"name" (name k) "value" v "tags" (-> tags sort vec)})
[[:above_goal_alerts (get-in stats [:stats :alert :above_goal] 0) #{"alerts"}]
[:alerts (get-in stats [:stats :alert :alerts] 0) #{"alerts"}]
[:all_time_query_executions (get-in stats [:stats :execution :executions] 0) #{"query_executions"}]
[:analyzed_databases (get-in stats [:stats :database :databases :analyzed] 0) #{}]
[:cache_average_entry_size (get-in stats [:stats :cache :average_entry_size] 0) #{"cache"}]
[:cache_num_queries_cached (get-in stats [:stats :cache :num_queries_cached_unbinned] 0) #{"cache"}]
[:cards_in_collections (get-in stats [:stats :collection :cards_in_collections] 0) #{"collections"}]
[:cards_not_in_collections (get-in stats [:stats :collection :cards_not_in_collections] 0) #{"collections"}]
[:collections (get-in stats [:stats :collection :collections] 0) #{"collections"}]
[:connected_databases (get-in stats [:stats :database :databases :total] 0) #{"databases"}]
[:dashboards_with_params (get-in stats [:stats :dashboard :with_params] 0) #{"dashboards"}]
[:embedded_dashboards (get-in stats [:stats :dashboard :embedded :total] 0) #{"dashboards" "embedding"}]
[:embedded_questions (get-in stats [:stats :question :embedded :total] 0) #{"questions" "embedding"}]
[:entity_id_translations_last_24h (:entity_id_translations_last_24h metric-info 0) #{"embedding"}]
[:first_time_only_alerts (get-in stats [:stats :alert :first_time_only] 0) #{"alerts"}]
[:metabase_fields (get-in stats [:stats :field :fields] 0) #{"fields"}]
[:metrics (get-in stats [:stats :metric :metrics] 0) #{"metrics"}]
[:models (:models metric-info 0) #{}]
[:native_questions (get-in stats [:stats :question :questions :native] 0) #{"questions"}]
[:new_embedded_dashboards (:new_embedded_dashboards metric-info 0) #{}]
[:new_users_last_24h (:new_users_last_24h metric-info 0) #{"users"}]
[:permission_groups (get-in stats [:stats :group :groups] 0) #{"permissions"}]
[:pivot_tables (:pivot_tables metric-info 0) #{}]
[:public_dashboards (get-in stats [:stats :dashboard :public :total] 0) #{"dashboards"}]
[:public_dashboards_with_params (get-in stats [:stats :dashboard :public :with_params] 0) #{"dashboards"}]
[:public_questions (get-in stats [:stats :question :public :total] 0) #{"questions"}]
[:public_questions_with_params (get-in stats [:stats :question :public :with_params] 0) #{"questions"}]
[:query_builder_questions (get-in stats [:stats :question :questions :total] 0) #{"questions"}]
[:query_executions_last_24h (:query_executions_last_24h metric-info 0) #{"query_executions"}]
[:questions (get-in stats [:stats :question :questions :total] 0) #{"questions"}]
[:questions_with_params (get-in stats [:stats :question :questions :with_params] 0) #{"questions"}]
[:segments (get-in stats [:stats :segment :segments] 0) #{"segments"}]
[:tables (get-in stats [:stats :table :tables] 0) #{"tables"}]
[:users (get-in stats [:stats :user :users :total] 0) #{"users"}]])) | |
Are any whitelabeling settings set to values other than their default? | (defn- whitelabeling-in-use?
[]
(let [whitelabel-settings (filter
(fn [setting] (= (:feature setting) :whitelabel))
(vals @setting/registered-settings))]
(boolean
(some
(fn [setting]
(not= ((:getter setting))
(:default setting)))
whitelabel-settings)))) |
Map from driver engines to the first version ([major minor]) which introduced support for CSV uploads | (def csv-upload-version-availability
{:postgres [47 0]
:mysql [47 0]
:redshift [49 6]
:clickhouse [50 0]}) |
Is CSV upload currently available to be used on this instance? | (defn- csv-upload-available?
[]
(boolean
(let [major-version (config/current-major-version)
minor-version (config/current-minor-version)
engines (t2/select-fn-set :engine :model/Database
{:where [:in :engine (map name (keys csv-upload-version-availability))]})]
(when (and major-version minor-version)
(some
(fn [engine]
(when-let [[required-major required-minor] (csv-upload-version-availability engine)]
(and (>= major-version required-major)
(>= minor-version required-minor))))
engines))))) |
(defn- ee-snowplow-features-data'
[]
(let [features [:sso-jwt :sso-saml :scim :sandboxes :email-allow-list]]
(map
(fn [feature]
{:name feature
:available false
:enabled false})
features))) | |
OSS values to use for features which require calling EE code to check whether they are available/enabled. | (defenterprise ee-snowplow-features-data metabase-enterprise.stats [] (ee-snowplow-features-data')) |
(defn- snowplow-features-data
[]
[{:name :email
:available true
:enabled (setting/get :email-configured?)}
{:name :slack
:available true
:enabled (slack/slack-configured?)}
{:name :sso-google
:available true
:enabled (setting/get :google-auth-configured)}
{:name :sso-ldap
:available true
:enabled (public-settings/ldap-enabled?)}
{:name :sample-data
:available true
:enabled (t2/exists? :model/Database, :is_sample true)}
{:name :interactive-embedding
:available (premium-features/hide-embed-branding?)
:enabled (and
(setting/get :enable-embedding-interactive)
(boolean (setting/get :embedding-app-origins-interactive))
(public-settings/sso-enabled?))}
{:name :static-embedding
:available true
:enabled (and
(setting/get :enable-embedding-static)
(or
(t2/exists? :model/Dashboard :enable_embedding true)
(t2/exists? :model/Card :enable_embedding true)))}
{:name :public-sharing
:available true
:enabled (and
(setting/get :enable-public-sharing)
(or
(t2/exists? :model/Dashboard :public_uuid [:not= nil])
(t2/exists? :model/Card :public_uuid [:not= nil])))}
{:name :whitelabel
:available (premium-features/enable-whitelabeling?)
:enabled (whitelabeling-in-use?)}
{:name :csv-upload
:available (csv-upload-available?)
:enabled (t2/exists? :model/Database :uploads_enabled true)}
{:name :mb-analytics
:available (premium-features/enable-audit-app?)
:enabled (premium-features/enable-audit-app?)}
{:name :advanced-permissions
:available (premium-features/enable-advanced-permissions?)
:enabled (premium-features/enable-advanced-permissions?)}
{:name :serialization
:available (premium-features/enable-serialization?)
:enabled (premium-features/enable-serialization?)}
{:name :official-collections
:available (premium-features/enable-official-collections?)
:enabled (t2/exists? :model/Collection :authority_level "official")}
{:name :cache-granular-controls
:available (premium-features/enable-cache-granular-controls?)
:enabled (t2/exists? :model/CacheConfig)}
{:name :attached-dwh
:available (premium-features/has-attached-dwh?)
:enabled (premium-features/has-attached-dwh?)}
{:name :database-auth-providers
:available (premium-features/enable-database-auth-providers?)
:enabled (premium-features/enable-database-auth-providers?)}
{:name :config-text-file
:available (premium-features/enable-config-text-file?)
:enabled (some? (get env/env :mb-config-file-path))}
{:name :content-verification
:available (premium-features/enable-content-verification?)
:enabled (t2/exists? :model/ModerationReview)}
{:name :dashboard-subscription-filters
:available (premium-features/enable-content-verification?)
:enabled (t2/exists? :model/Pulse {:where [:not= :parameters "[]"]})}
{:name :disable-password-login
:available (premium-features/can-disable-password-login?)
:enabled (not (public-settings/enable-password-login))}
{:name :email-restrict-recipients
:available (premium-features/enable-email-restrict-recipients?)
:enabled (not= (setting/get-value-of-type :keyword :user-visibility) :all)}
{:name :upload-management
:available (premium-features/enable-upload-management?)
:enabled (t2/exists? :model/Table :is_upload true)}
{:name :snippet-collections
:available (premium-features/enable-snippet-collections?)
:enabled (t2/exists? :model/Collection :namespace "snippets")}
{:name :cache-preemptive
:available (premium-features/enable-preemptive-caching?)
:enabled (t2/exists? :model/CacheConfig :refresh_automatically true)}]) | |
(defn- snowplow-features
[]
(let [features (concat (snowplow-features-data) (ee-snowplow-features-data))]
(mapv
;; Convert keys and feature names to strings to match expected Snowplow schema
(fn [feature]
(-> (update feature :name name)
(update :name u/->snake_case_en)
;; Ensure that unavailable features are not reported as enabled
(update :enabled (fn [enabled?] (if-not (:available feature) false enabled?)))
(walk/stringify-keys)))
features))) | |
Send stats to Metabase's snowplow collector. Transforms stats into the format required by the Snowplow schema. | (defn- snowplow-anonymous-usage-stats
[stats]
(let [instance-attributes (snowplow-instance-attributes stats)
metrics (snowplow-metrics stats (->snowplow-metric-info))
grouped-metrics (snowplow-grouped-metrics (->snowplow-grouped-metric-info))
features (snowplow-features)]
;; grouped_metrics and settings are required in the json schema, but their data will be included in the next Milestone:
{"analytics_uuid" (analytics.settings/analytics-uuid)
"features" features
"grouped_metrics" grouped-metrics
"instance_attributes" instance-attributes
"metrics" metrics
"settings" []})) |
Generate stats for this instance as data | (defn- generate-instance-stats!
[]
(let [stats (legacy-anonymous-usage-stats)]
{:stats (-> stats
;; `:num_queries_cached_unbinned` is added to [[legacy-anonymous-usage-stats]]'s return value to make
;; computing [[snowplow-anonymous-usage-stats]] more efficient. It shouldn't be sent by
;; [[send-stats-deprecited!]].
(update-in [:stats :cache] dissoc :num_queries_cached_unbinned))
:snowplow-stats (snowplow-anonymous-usage-stats stats)})) |
(defn- stats-post-cleanup [] (clear-translation-count!)) | |
Collect usage stats and phone them home | (defn phone-home-stats!
[]
(when (public-settings/anon-tracking-enabled)
(let [start-time-ms (System/currentTimeMillis)
{:keys [stats snowplow-stats]} (generate-instance-stats!)
end-time-ms (System/currentTimeMillis)
elapsed-secs (quot (- end-time-ms start-time-ms) 1000)
snowplow-data (-> snowplow-stats
(assoc "metadata" [{"key" "stats_export_time_seconds"
"value" elapsed-secs}])
deep-string-keywords)]
(assert (= #{"analytics_uuid" "features" "grouped_metrics" "instance_attributes" "metadata" "metrics" "settings"}
(set (keys snowplow-data)))
(str "Missing required keys in snowplow-data. got:" (sort (keys snowplow-data))))
#_{:clj-kondo/ignore [:deprecated-var]}
(send-stats-deprecated! stats)
(snowplow/track-event! :snowplow/instance_stats snowplow-data)
(stats-post-cleanup)))) |