Functions which summarize the usage of an instance | (ns metabase.analytics.stats (:require [clj-http.client :as http] [clojure.java.io :as io] [clojure.string :as str] [clojure.walk :as walk] [environ.core :as env] [java-time.api :as t] [medley.core :as m] [metabase.analytics.snowplow :as snowplow] [metabase.config :as config] [metabase.db :as db] [metabase.db.query :as mdb.query] [metabase.driver :as driver] [metabase.eid-translation :as eid-translation] [metabase.email :as email] [metabase.embed.settings :as embed.settings] [metabase.integrations.google :as google] [metabase.integrations.slack :as slack] [metabase.models :refer [Card Collection Dashboard DashboardCard Database Field LegacyMetric PermissionsGroup Pulse PulseCard PulseChannel QueryCache Segment Table User]] [metabase.models.humanization :as humanization] [metabase.models.interface :as mi] [metabase.models.setting :as setting] [metabase.public-settings :as public-settings] [metabase.public-settings.premium-features :as premium-features :refer [defenterprise]] [metabase.util :as u] [metabase.util.honey-sql-2 :as h2x] [metabase.util.json :as json] [metabase.util.log :as log] [metabase.util.malli :as mu] [toucan2.core :as t2])) |
(set! *warn-on-reflection* true) | |
Merge sequence of maps | (defn- merge-count-maps [ms] (reduce (partial merge-with +) {} (for [m ms] (m/map-vals #(cond (number? %) % % 1 :else 0) m)))) |
(def ^:private ^String metabase-usage-url "https://xuq0fbkk0j.execute-api.us-east-1.amazonaws.com/prod") | |
Return small bin number. Assumes positive inputs. | (defn- bin-small-number [x] (cond (= 0 x) "0" (<= 1 x 5) "1-5" (<= 6 x 10) "6-10" (<= 11 x 25) "11-25" (> x 25) "25+")) |
Return medium bin number. Assumes positive inputs. | (defn- bin-medium-number [x] (cond (= 0 x) "0" (<= 1 x 5) "1-5" (<= 6 x 10) "6-10" (<= 11 x 25) "11-25" (<= 26 x 50) "26-50" (<= 51 x 100) "51-100" (<= 101 x 250) "101-250" (> x 250) "250+")) |
Go through a bunch of maps and count the frequency a given key's values. | (defn- value-frequencies [many-maps k] (frequencies (map k many-maps))) |
Bin some frequencies using a passed in ;; Generate histogram for values of :a; ;; (or if you already have the counts) (histogram bin-micro-number [3 1 1]) ;; -> {"3+" 1, "1" 2} | (defn- histogram ([binning-fn counts] (frequencies (map binning-fn counts))) ([binning-fn many-maps k] (histogram binning-fn (vals (value-frequencies many-maps k))))) |
Return a histogram for medium numbers. | (def ^:private medium-histogram (partial histogram bin-medium-number)) |
Figure out what we're running under | (defn environment-type [] (cond (config/config-str :rds-hostname) :elastic-beanstalk (config/config-str :database-url) :heroku ;; Putting this last as 'database-url' seems least specific :else :unknown)) |
(def ^:private ui-colors #{:brand :filter :summarize}) | |
Returns true if the 'User Interface Colors' have been customized | (defn appearance-ui-colors-changed? [] (boolean (seq (select-keys (public-settings/application-colors) ui-colors)))) |
Returns true if the 'Chart Colors' have been customized | (defn appearance-chart-colors-changed? [] (boolean (seq (apply dissoc (public-settings/application-colors) ui-colors)))) |
Figure out global info about this instance | (defn- instance-settings [] {:version (config/mb-version-info :tag) :running_on (environment-type) :startup_time_millis (int (public-settings/startup-time-millis)) :application_database (config/config-str :mb-db-type) :check_for_updates (public-settings/check-for-updates) :report_timezone (driver/report-timezone) ;; We deprecated advanced humanization but have this here anyways :friendly_names (= (humanization/humanization-strategy) "advanced") :email_configured (email/email-configured?) :slack_configured (slack/slack-configured?) :sso_configured (google/google-auth-enabled) :instance_started (snowplow/instance-creation) :has_sample_data (t2/exists? Database, :is_sample true) :enable_embedding #_{:clj-kondo/ignore [:deprecated-var]} (embed.settings/enable-embedding) :enable_embedding_sdk (embed.settings/enable-embedding-sdk) :enable_embedding_interactive (embed.settings/enable-embedding-interactive) :enable_embedding_static (embed.settings/enable-embedding-static) :embedding_app_origin_set (boolean #_{:clj-kondo/ignore [:deprecated-var]} (embed.settings/embedding-app-origin)) :embedding_app_origin_sdk_set (boolean (let [sdk-origins (embed.settings/embedding-app-origins-sdk)] (and sdk-origins (not= "localhost:*" sdk-origins)))) :embedding_app_origin_interactive_set (embed.settings/embedding-app-origins-interactive) :appearance_site_name (not= (public-settings/site-name) "Metabase") :appearance_help_link (public-settings/help-link) :appearance_logo (not= (public-settings/application-logo-url) "app/assets/img/logo.svg") :appearance_favicon (not= (public-settings/application-favicon-url) "app/assets/img/favicon.ico") :appearance_loading_message (not= (public-settings/loading-message) :doing-science) :appearance_metabot_greeting (not (public-settings/show-metabot)) :appearance_login_page_illustration (public-settings/login-page-illustration) :appearance_landing_page_illustration (public-settings/landing-page-illustration) :appearance_no_data_illustration (public-settings/no-data-illustration) :appearance_no_object_illustration (public-settings/no-object-illustration) :appearance_ui_colors (appearance-ui-colors-changed?) :appearance_chart_colors (appearance-chart-colors-changed?) :appearance_show_mb_links (not (public-settings/show-metabase-links))}) |
Get metrics based on user records. TODO: get activity in terms of created questions, pulses and dashboards | (defn- user-metrics [] {:users (merge-count-maps (for [user (t2/select [User :is_active :is_superuser :last_login :sso_source] :type :personal)] {:total 1 :active (:is_active user) :admin (:is_superuser user) :logged_in (:last_login user) :sso (= :google (:sso_source user))}))}) |
Get metrics based on groups: TODO characterize by # w/ sql access, # of users, no self-serve data access | (defn- group-metrics [] {:groups (t2/count PermissionsGroup)}) |
(defn- card-has-params? [card] (boolean (get-in card [:dataset_query :native :template-tags]))) | |
Get metrics based on questions TODO characterize by # executions and avg latency | (defn- question-metrics [] (let [cards (t2/select [:model/Card :query_type :public_uuid :enable_embedding :embedding_params :dataset_query :dashboard_id] {:where (mi/exclude-internal-content-hsql :model/Card)})] {:questions (merge-count-maps (for [card cards] (let [native? (= (keyword (:query_type card)) :native) dq? (some? (:dashboard_id card))] {:total 1 :native native? :gui (not native?) :is_dashboard_question dq? :with_params (card-has-params? card)}))) :public (merge-count-maps (for [card cards :when (:public_uuid card)] {:total 1 :with_params (card-has-params? card)})) :embedded (merge-count-maps (for [card cards :when (:enable_embedding card)] (let [embedding-params-vals (set (vals (:embedding_params card)))] {:total 1 :with_params (card-has-params? card) :with_enabled_params (contains? embedding-params-vals "enabled") :with_locked_params (contains? embedding-params-vals "locked") :with_disabled_params (contains? embedding-params-vals "disabled")})))})) |
Get metrics based on dashboards TODO characterize by # of revisions, and created by an admin | (defn- dashboard-metrics [] (let [dashboards (t2/select [:model/Dashboard :creator_id :public_uuid :parameters :enable_embedding :embedding_params] {:where (mi/exclude-internal-content-hsql :model/Dashboard)}) dashcards (t2/query {:select :dc.* :from [[(t2/table-name DashboardCard) :dc]] :join [[(t2/table-name Dashboard) :d] [:= :d.id :dc.dashboard_id]] :where (mi/exclude-internal-content-hsql :model/Dashboard :table-alias :d)})] {:dashboards (count dashboards) :with_params (count (filter (comp seq :parameters) dashboards)) :num_dashs_per_user (medium-histogram dashboards :creator_id) :num_cards_per_dash (medium-histogram dashcards :dashboard_id) :num_dashs_per_card (medium-histogram dashcards :card_id) :public (merge-count-maps (for [dash dashboards :when (:public_uuid dash)] {:total 1 :with_params (seq (:parameters dash))})) :embedded (merge-count-maps (for [dash dashboards :when (:enable_embedding dash)] (let [embedding-params-vals (set (vals (:embedding_params dash)))] {:total 1 :with_params (seq (:parameters dash)) :with_enabled_params (contains? embedding-params-vals "enabled") :with_locked_params (contains? embedding-params-vals "locked") :with_disabled_params (contains? embedding-params-vals "disabled")})))})) |
Fetch the frequencies of a given (db-frequencies Database :engine) ;; -> {"h2" 2, "postgres" 1, ...} ;; include ;; Generate a histogram: (micro-histogram (vals (db-frequencies Database :engine))) ;; -> {"2" 1, "1" 1, ...} ;; Include | (defn- db-frequencies [model column & [additonal-honeysql]] (into {} (for [{:keys [k count]} (t2/select [model [column :k] [:%count.* :count]] (merge {:group-by [column]} additonal-honeysql))] [k count]))) |
Return the number of Notifications that satisfy ;; Pulses only (filter out Alerts) (num-notifications-with-xls-or-csv-cards [:= :alert_condition nil]) | (defn- num-notifications-with-xls-or-csv-cards [& where-conditions] (-> (mdb.query/query {:select [[[::h2x/distinct-count :pulse.id] :count]] :from [:pulse] :left-join [:pulse_card [:= :pulse.id :pulse_card.pulse_id]] :where (into [:and [:or [:= :pulse_card.include_csv true] [:= :pulse_card.include_xls true]]] where-conditions)}) first :count)) |
Get metrics based on pulses TODO: characterize by non-user account emails, # emails | (defn- pulse-metrics [] (let [pulse-conditions {:left-join [:pulse [:= :pulse.id :pulse_id]], :where [:= :pulse.alert_condition nil]}] {:pulses (t2/count Pulse :alert_condition nil) ;; "Table Cards" are Cards that include a Table you can download :with_table_cards (num-notifications-with-xls-or-csv-cards [:= :alert_condition nil]) :pulse_types (db-frequencies PulseChannel :channel_type pulse-conditions) :pulse_schedules (db-frequencies PulseChannel :schedule_type pulse-conditions) :num_pulses_per_user (medium-histogram (vals (db-frequencies Pulse :creator_id (dissoc pulse-conditions :left-join)))) :num_pulses_per_card (medium-histogram (vals (db-frequencies PulseCard :card_id pulse-conditions))) :num_cards_per_pulses (medium-histogram (vals (db-frequencies PulseCard :pulse_id pulse-conditions)))})) |
(defn- alert-metrics [] (let [alert-conditions {:left-join [:pulse [:= :pulse.id :pulse_id]], :where [:not= (mdb.query/qualify Pulse :alert_condition) nil]}] {:alerts (t2/count Pulse :alert_condition [:not= nil]) :with_table_cards (num-notifications-with-xls-or-csv-cards [:not= :alert_condition nil]) :first_time_only (t2/count Pulse :alert_condition [:not= nil], :alert_first_only true) :above_goal (t2/count Pulse :alert_condition [:not= nil], :alert_above_goal true) :alert_types (db-frequencies PulseChannel :channel_type alert-conditions) :num_alerts_per_user (medium-histogram (vals (db-frequencies Pulse :creator_id (dissoc alert-conditions :left-join)))) :num_alerts_per_card (medium-histogram (vals (db-frequencies PulseCard :card_id alert-conditions))) :num_cards_per_alerts (medium-histogram (vals (db-frequencies PulseCard :pulse_id alert-conditions)))})) | |
Get metrics on Collection usage. | (defn- collection-metrics [] (let [collections (t2/select Collection {:where (mi/exclude-internal-content-hsql :model/Collection)}) cards (t2/select [Card :collection_id] {:where (mi/exclude-internal-content-hsql :model/Card)})] {:collections (count collections) :cards_in_collections (count (filter :collection_id cards)) :cards_not_in_collections (count (remove :collection_id cards)) :num_cards_per_collection (medium-histogram cards :collection_id)})) |
Get metrics based on Databases. Metadata Metrics | (defn- database-metrics [] (let [databases (t2/select [:model/Database :is_full_sync :engine :dbms_version] {:where (mi/exclude-internal-content-hsql :model/Database)})] {:databases (merge-count-maps (for [{is-full-sync? :is_full_sync} databases] {:total 1 :analyzed is-full-sync?})) :dbms_versions (frequencies (map (fn [db] (-> db :dbms_version (assoc :engine (:engine db)) json/encode)) databases))})) |
Get metrics based on Tables. | (defn- table-metrics [] (let [tables (t2/query {:select [:t.db_id :t.schema] :from [[(t2/table-name :model/Table) :t]] :join [[(t2/table-name :model/Database) :d] [:= :d.id :t.db_id]] :where (mi/exclude-internal-content-hsql :model/Database :table-alias :d)})] {:tables (count tables) :num_per_database (medium-histogram tables :db_id) :num_per_schema (medium-histogram tables :schema)})) |
Get metrics based on Fields. | (defn- field-metrics [] (let [fields (t2/query {:select [:f.table_id] :from [[(t2/table-name Field) :f]] :join [[(t2/table-name Table) :t] [:= :t.id :f.table_id] [(t2/table-name Database) :d] [:= :d.id :t.db_id]] :where (mi/exclude-internal-content-hsql :model/Database :table-alias :d)})] {:fields (count fields) :num_per_table (medium-histogram fields :table_id)})) |
Get metrics based on Segments. | (defn- segment-metrics [] {:segments (t2/count Segment)}) |
Get metrics based on Metrics. | (defn- metric-metrics [] {:metrics (t2/count LegacyMetric)}) |
Execution Metrics | |
(defn- execution-metrics-sql [] ;; Postgres automatically adjusts for daylight saving time when performing time calculations on TIMESTAMP WITH TIME ;; ZONE. This can cause discrepancies when subtracting 30 days if the calculation crosses a DST boundary (e.g., in the ;; Pacific/Auckland timezone). To avoid this, we ensure all date computations are done in UTC on Postgres to prevent ;; any time shifts due to DST. See PR #48204 (let [thirty-days-ago (case (db/db-type) :postgres "CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - INTERVAL '30 days'" :h2 "DATEADD('DAY', -30, CURRENT_TIMESTAMP)" :mysql "CURRENT_TIMESTAMP - INTERVAL 30 DAY") started-at (case (db/db-type) :postgres "started_at AT TIME ZONE 'UTC'" :h2 "started_at" :mysql "started_at") timestamp-where (str started-at " > " thirty-days-ago)] (str/join "\n" ["WITH user_executions AS (" " SELECT executor_id, COUNT(*) AS num_executions" " FROM query_execution" " WHERE " timestamp-where " GROUP BY executor_id" ")," "query_stats_1 AS (" " SELECT" " COUNT(*) AS executions," " SUM(CASE WHEN error IS NULL OR length(error) = 0 THEN 1 ELSE 0 END) AS by_status__completed," " SUM(CASE WHEN error IS NOT NULL OR length(error) > 0 THEN 1 ELSE 0 END) AS by_status__failed," " COALESCE(SUM(CASE WHEN running_time = 0 THEN 1 ELSE 0 END), 0) AS num_by_latency__0," " COALESCE(SUM(CASE WHEN running_time > 0 AND running_time < 1000 THEN 1 ELSE 0 END), 0) AS num_by_latency__lt_1," " COALESCE(SUM(CASE WHEN running_time >= 1000 AND running_time < 10000 THEN 1 ELSE 0 END), 0) AS num_by_latency__1_10," " COALESCE(SUM(CASE WHEN running_time >= 10000 AND running_time < 50000 THEN 1 ELSE 0 END), 0) AS num_by_latency__11_50," " COALESCE(SUM(CASE WHEN running_time >= 50000 AND running_time < 250000 THEN 1 ELSE 0 END), 0) AS num_by_latency__51_250," " COALESCE(SUM(CASE WHEN running_time >= 250000 AND running_time < 1000000 THEN 1 ELSE 0 END), 0) AS num_by_latency__251_1000," " COALESCE(SUM(CASE WHEN running_time >= 1000000 AND running_time < 10000000 THEN 1 ELSE 0 END), 0) AS num_by_latency__1001_10000," " COALESCE(SUM(CASE WHEN running_time >= 10000000 THEN 1 ELSE 0 END), 0) AS num_by_latency__10000_plus" " FROM query_execution" " WHERE " timestamp-where ")," "query_stats_2 AS (" " SELECT" " COALESCE(SUM(CASE WHEN num_executions = 0 THEN 1 ELSE 0 END), 0) AS num_per_user__0," " COALESCE(SUM(CASE WHEN num_executions > 0 AND num_executions < 1 THEN 1 ELSE 0 END), 0) AS num_per_user__lt_1," " COALESCE(SUM(CASE WHEN num_executions >= 1 AND num_executions < 10 THEN 1 ELSE 0 END), 0) AS num_per_user__1_10," " COALESCE(SUM(CASE WHEN num_executions >= 10 AND num_executions < 50 THEN 1 ELSE 0 END), 0) AS num_per_user__11_50," " COALESCE(SUM(CASE WHEN num_executions >= 50 AND num_executions < 250 THEN 1 ELSE 0 END), 0) AS num_per_user__51_250," " COALESCE(SUM(CASE WHEN num_executions >= 250 AND num_executions < 1000 THEN 1 ELSE 0 END), 0) AS num_per_user__251_1000," " COALESCE(SUM(CASE WHEN num_executions >= 1000 AND num_executions < 10000 THEN 1 ELSE 0 END), 0) AS num_per_user__1001_10000," " COALESCE(SUM(CASE WHEN num_executions >= 10000 THEN 1 ELSE 0 END), 0) AS num_per_user__10000_plus" " FROM user_executions" ")" "SELECT q1.*, q2.* FROM query_stats_1 q1, query_stats_2 q2;"]))) | |
Get metrics based on QueryExecutions. | (defn- execution-metrics [] (let [maybe-rename-bin (fn [x] ({"lt_1" "< 1" "1_10" "1-10" "11_50" "11-50" "51_250" "51-250" "251_1000" "251-1000" "1001_10000" "1001-10000" "10000_plus" "10000+"} x x)) raw-results (-> (first (t2/query (execution-metrics-sql))) ;; cast numbers to int because some DBs output bigdecimals (update-vals #(some-> % int)))] (reduce (fn [acc [k v]] (let [[prefix bin] (str/split (name k) #"__")] (if bin (cond-> acc (and (some? v) (pos? v)) (update (keyword prefix) #(assoc % (maybe-rename-bin bin) v))) (assoc acc (keyword prefix) v)))) {:executions 0 :by_status {} :num_per_user {} :num_by_latency {}} raw-results))) |
Cache Metrics | |
Metrics based on use of the QueryCache. | (defn- cache-metrics [] (let [{:keys [length count]} (t2/select-one [QueryCache [[:avg [:length :results]] :length] [:%count.* :count]])] {:average_entry_size (int (or length 0)) :num_queries_cached (bin-small-number count) ;; this value gets used in the snowplow ping 'metrics' section. :num_queries_cached_unbinned count})) |
System Metrics | |
(defn- bytes->megabytes [b] (Math/round (double (/ b 1024 1024)))) | |
(def ^:private system-property-names ["java.version" "java.vm.specification.version" "java.runtime.name" "user.timezone" "user.language" "user.country" "file.encoding" "os.name" "os.version"]) | |
Metadata about the environment Metabase is running in | (defn- system-metrics [] (let [runtime (Runtime/getRuntime)] (merge {:max_memory (bytes->megabytes (.maxMemory runtime)) :processors (.availableProcessors runtime)} (zipmap (map #(keyword (str/replace % \. \_)) system-property-names) (map #(System/getProperty %) system-property-names))))) |
Combined Stats & Logic for sending them in | |
generate a map of the usage stats for this instance | (defn legacy-anonymous-usage-stats [] (merge (instance-settings) {:uuid (public-settings/site-uuid) :timestamp (t/offset-date-time) :stats {:cache (cache-metrics) :collection (collection-metrics) :dashboard (dashboard-metrics) :database (database-metrics) :execution (execution-metrics) :field (field-metrics) :group (group-metrics) :metric (metric-metrics) :pulse (pulse-metrics) :alert (alert-metrics) :question (question-metrics) :segment (segment-metrics) :system (system-metrics) :table (table-metrics) :user (user-metrics)}})) |
Send stats to Metabase tracking server. | (defn- ^:deprecated send-stats-deprecated! [stats] (try (http/post metabase-usage-url {:form-params stats, :content-type :json, :throw-entire-message? true}) (catch Throwable e (log/error e "Sending usage stats FAILED")))) |
Is the current Metabase process running in a Docker container?
(Best-effort check based on a | (defn- in-docker? [] (boolean (or (.exists (io/file "/.dockerenv")) (when (.exists (io/file "/proc/self/cgroup")) (try (some #(re-find #"docker" %) (line-seq (io/reader "/proc/self/cgroup"))) (catch java.io.IOException _ false)))))) |
(defn- deployment-model [] (cond (premium-features/is-hosted?) "cloud" (in-docker?) "docker" :else "jar")) | |
(def ^:private activation-days 3) | |
Returns a Boolean indicating whether the number of non-internal users created within | (defn- sufficient-users? [num-users] (let [users-in-activation-period (t2/count :model/User {:where [:and [:<= :date_joined (t/plus (t/offset-date-time (setting/get :instance-creation)) (t/days activation-days))] (mi/exclude-internal-content-hsql :model/User)] :limit (inc num-users)})] (>= users-in-activation-period num-users))) |
Returns a Boolean indicating whether the number of queries recorded over non-sample content is greater than or equal
to | (defn- sufficient-queries? [num-queries] (let [sample-db-id (t2/select-one-pk :model/Database :is_sample true) ;; QueryExecution can be large, so let's avoid counting everything queries (t2/select-fn-set :id :model/QueryExecution {:where [:or [:not= :database_id sample-db-id] [:= :database_id nil]] :limit (inc num-queries)})] (>= (count queries) num-queries))) |
If the current plan is Pro or Starter, returns a Boolean indicating whether the instance should be considered to have completed activation signals. Returns nil for non-Pro or Starter plans. | (defn- completed-activation-signals? [] (let [plan (premium-features/plan-alias) pro? (when plan (str/starts-with? plan "pro")) starter? (when plan (str/starts-with? plan "starter"))] (cond pro? (or (sufficient-users? 4) (sufficient-queries? 201)) starter? (or (sufficient-users? 2) (sufficient-queries? 101)) :else nil))) |
Convert a map to a vector of key-value maps with keys 'key' and 'value' for each key-value pair in the map. | (defn m->kv-vec [m] (mapv (fn [[k v]] {"key" (name k) "value" v}) m)) |
(defn- snowplow-instance-attributes [stats] (let [system-stats (-> stats :stats :system) instance-attributes (merge (dissoc system-stats :user_language) {:metabase_plan (premium-features/plan-alias) :metabase_version (-> stats :version) :language (-> system-stats :user_language) :report_timezone (-> stats :report_timezone) :deployment_model (deployment-model) :startup_time_millis (-> stats :startup_time_millis) :has_activation_signals_completed (completed-activation-signals?)})] (m->kv-vec instance-attributes))) | |
(mu/defn- get-translation-count :- [:map [:ok :int] [:not-found :int] [:invalid-format :int] [:total :int]] "Get and clear the entity-id translation counter. This is meant to be called during the daily stats collection process." [] (let [counter (setting/get-value-of-type :json :entity-id-translation-counter)] (merge counter {:total (apply + (vals counter))}))) | |
We want to reset the eid translation count on every stat ping, so we do it here. | (mu/defn- clear-translation-count! [] (u/prog1 eid-translation/default-counter (setting/set-value-of-type! :json :entity-id-translation-counter <>))) |
(defn- categorize-query-execution [{:keys [context embedding_client executor_id]}] (cond (= "embedding-sdk-react" embedding_client) "sdk_embed" (and (= "embedding-iframe" embedding_client) (some? executor_id)) "interactive_embed" (and (= "embedding-iframe" embedding_client) (nil? executor_id)) "static_embed" (some-> context name (str/starts-with? "public-")) "public_link" :else "internal")) | |
(defn- ->one-day-ago [] (t/minus (t/offset-date-time) (t/days 1))) | |
(defn- ->snowplow-grouped-metric-info [] (let [qe (t2/select [:model/QueryExecution :embedding_client :context :executor_id :started_at]) one-day-ago (->one-day-ago) ;; reuse the query data: qe-24h (filter (fn [{started-at :started_at}] (t/after? started-at one-day-ago)) qe)] {:query-executions (merge {"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0} (-> (group-by categorize-query-execution qe) (update-vals count))) :query-executions-24h (merge {"sdk_embed" 0 "interactive_embed" 0 "static_embed" 0 "public_link" 0 "internal" 0} (-> (group-by categorize-query-execution qe-24h) (update-vals count))) :eid-translations-24h (get-translation-count)})) | |
Snowplow data will not work if you pass in keywords, but this will let use use keywords all over. | (defn- deep-string-keywords [data] (walk/postwalk (fn [x] (if (keyword? x) (-> x u/->snake_case_en name) x)) data)) |
(mu/defn- snowplow-grouped-metrics :- [:sequential [:map ["name" :string] ["values" [:sequential [:map ["group" :string] ["value" :int]]]] ["tags" [:sequential :string]]]] [{:keys [eid-translations-24h query-executions query-executions-24h] :as _snowplow-grouped-metric-info}] (deep-string-keywords [{:name :query_executions_by_source :values (mapv (fn [qe-group] {:group qe-group :value (get query-executions qe-group)}) ["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"]) :tags ["embedding"]} {:name :query_executions_by_source_24h :values (mapv (fn [qe-group] {:group qe-group :value (get query-executions-24h qe-group)}) ["interactive_embed" "internal" "public_link" "sdk_embed" "static_embed"]) :tags ["embedding"]} {:name :entity_id_translations_last_24h :values (mapv (fn [[k v]] {:group k :value v}) eid-translations-24h) :tags ["embedding"]}])) | |
Collects Snowplow metrics data that is not in the legacy stats format. Also clears entity id translation count. | (defn- ->snowplow-metric-info [] (let [one-day-ago (->one-day-ago) total-translation-count (:total (get-translation-count))] {:models (t2/count :model/Card :type :model :archived false) :new_embedded_dashboards (t2/count :model/Dashboard :enable_embedding true :archived false :created_at [:>= one-day-ago]) :new_users_last_24h (t2/count :model/User :is_active true :date_joined [:>= one-day-ago]) :pivot_tables (t2/count :model/Card :display :pivot :archived false) :query_executions_last_24h (t2/count :model/QueryExecution :started_at [:>= one-day-ago]) :entity_id_translations_last_24h total-translation-count :scim_users_last_24h (t2/count :model/User :sso_source :scim :is_active true :date_joined [:>= one-day-ago])})) |
(mu/defn- snowplow-metrics [stats metric-info :- [:map [:models :int] [:new_embedded_dashboards :int] [:new_users_last_24h :int] [:pivot_tables :int] [:query_executions_last_24h :int] [:entity_id_translations_last_24h :int]]] (mapv (fn [[k v tags]] (assert (every? string? tags) "Tags must be strings in snowplow metrics.") (assert (some? v) "Cannot have a nil value in snowplow metrics.") {"name" (name k) "value" v "tags" (-> tags sort vec)}) [[:above_goal_alerts (get-in stats [:stats :alert :above_goal] 0) #{"alerts"}] [:alerts (get-in stats [:stats :alert :alerts] 0) #{"alerts"}] [:all_time_query_executions (get-in stats [:stats :execution :executions] 0) #{"query_executions"}] [:analyzed_databases (get-in stats [:stats :database :databases :analyzed] 0) #{}] [:cache_average_entry_size (get-in stats [:stats :cache :average_entry_size] 0) #{"cache"}] [:cache_num_queries_cached (get-in stats [:stats :cache :num_queries_cached_unbinned] 0) #{"cache"}] [:cards_in_collections (get-in stats [:stats :collection :cards_in_collections] 0) #{"collections"}] [:cards_not_in_collections (get-in stats [:stats :collection :cards_not_in_collections] 0) #{"collections"}] [:collections (get-in stats [:stats :collection :collections] 0) #{"collections"}] [:connected_databases (get-in stats [:stats :database :databases :total] 0) #{"databases"}] [:dashboards_with_params (get-in stats [:stats :dashboard :with_params] 0) #{"dashboards"}] [:embedded_dashboards (get-in stats [:stats :dashboard :embedded :total] 0) #{"dashboards" "embedding"}] [:embedded_questions (get-in stats [:stats :question :embedded :total] 0) #{"questions" "embedding"}] [:entity_id_translations_last_24h (:entity_id_translations_last_24h metric-info 0) #{"embedding"}] [:first_time_only_alerts (get-in stats [:stats :alert :first_time_only] 0) #{"alerts"}] [:metabase_fields (get-in stats [:stats :field :fields] 0) #{"fields"}] [:metrics (get-in stats [:stats :metric :metrics] 0) #{"metrics"}] [:models (:models metric-info 0) #{}] [:native_questions (get-in stats [:stats :question :questions :native] 0) #{"questions"}] [:new_embedded_dashboards (:new_embedded_dashboards metric-info 0) #{}] [:new_users_last_24h (:new_users_last_24h metric-info 0) #{"users"}] [:permission_groups (get-in stats [:stats :group :groups] 0) #{"permissions"}] [:pivot_tables (:pivot_tables metric-info 0) #{}] [:public_dashboards (get-in stats [:stats :dashboard :public :total] 0) #{"dashboards"}] [:public_dashboards_with_params (get-in stats [:stats :dashboard :public :with_params] 0) #{"dashboards"}] [:public_questions (get-in stats [:stats :question :public :total] 0) #{"questions"}] [:public_questions_with_params (get-in stats [:stats :question :public :with_params] 0) #{"questions"}] [:query_builder_questions (get-in stats [:stats :question :questions :total] 0) #{"questions"}] [:query_executions_last_24h (:query_executions_last_24h metric-info 0) #{"query_executions"}] [:questions (get-in stats [:stats :question :questions :total] 0) #{"questions"}] [:questions_with_params (get-in stats [:stats :question :questions :with_params] 0) #{"questions"}] [:segments (get-in stats [:stats :segment :segments] 0) #{"segments"}] [:tables (get-in stats [:stats :table :tables] 0) #{"tables"}] [:users (get-in stats [:stats :user :users :total] 0) #{"users"}]])) | |
Are any whitelabeling settings set to values other than their default? | (defn- whitelabeling-in-use? [] (let [whitelabel-settings (filter (fn [setting] (= (:feature setting) :whitelabel)) (vals @setting/registered-settings))] (boolean (some (fn [setting] (not= ((:getter setting)) (:default setting))) whitelabel-settings)))) |
Map from driver engines to the first version ([major minor]) which introduced support for CSV uploads | (def csv-upload-version-availability {:postgres [47 0] :mysql [47 0] :redshift [49 6] :clickhouse [50 0]}) |
Is CSV upload currently available to be used on this instance? | (defn- csv-upload-available? [] (boolean (let [major-version (config/current-major-version) minor-version (config/current-minor-version) engines (t2/select-fn-set :engine :model/Database {:where [:in :engine (map name (keys csv-upload-version-availability))]})] (when (and major-version minor-version) (some (fn [engine] (when-let [[required-major required-minor] (csv-upload-version-availability engine)] (and (>= major-version required-major) (>= minor-version required-minor)))) engines))))) |
(defn- ee-snowplow-features-data' [] (let [features [:sso-jwt :sso-saml :scim :sandboxes :email-allow-list]] (map (fn [feature] {:name feature :available false :enabled false}) features))) | |
OSS values to use for features which require calling EE code to check whether they are available/enabled. | (defenterprise ee-snowplow-features-data metabase-enterprise.stats [] (ee-snowplow-features-data')) |
(defn- snowplow-features-data [] [{:name :email :available true :enabled (email/email-configured?)} {:name :slack :available true :enabled (slack/slack-configured?)} {:name :sso-google :available true :enabled (google/google-auth-configured)} {:name :sso-ldap :available true :enabled (public-settings/ldap-enabled?)} {:name :sample-data :available true :enabled (t2/exists? Database, :is_sample true)} {:name :interactive-embedding :available (premium-features/hide-embed-branding?) :enabled (and (embed.settings/enable-embedding-interactive) (boolean (embed.settings/embedding-app-origins-interactive)) (public-settings/sso-enabled?))} {:name :static-embedding :available true :enabled (and (embed.settings/enable-embedding-static) (or (t2/exists? :model/Dashboard :enable_embedding true) (t2/exists? :model/Card :enable_embedding true)))} {:name :public-sharing :available true :enabled (and (public-settings/enable-public-sharing) (or (t2/exists? :model/Dashboard :public_uuid [:not= nil]) (t2/exists? :model/Card :public_uuid [:not= nil])))} {:name :whitelabel :available (premium-features/enable-whitelabeling?) :enabled (whitelabeling-in-use?)} {:name :csv-upload :available (csv-upload-available?) :enabled (t2/exists? :model/Database :uploads_enabled true)} {:name :mb-analytics :available (premium-features/enable-audit-app?) :enabled (premium-features/enable-audit-app?)} {:name :advanced-permissions :available (premium-features/enable-advanced-permissions?) :enabled (premium-features/enable-advanced-permissions?)} {:name :serialization :available (premium-features/enable-serialization?) :enabled (premium-features/enable-serialization?)} {:name :official-collections :available (premium-features/enable-official-collections?) :enabled (t2/exists? :model/Collection :authority_level "official")} {:name :cache-granular-controls :available (premium-features/enable-cache-granular-controls?) :enabled (t2/exists? :model/CacheConfig)} {:name :attached-dwh :available (premium-features/has-attached-dwh?) :enabled (premium-features/has-attached-dwh?)} {:name :database-auth-providers :available (premium-features/enable-database-auth-providers?) :enabled (premium-features/enable-database-auth-providers?)} {:name :config-text-file :available (premium-features/enable-config-text-file?) :enabled (some? (get env/env :mb-config-file-path))} {:name :content-verification :available (premium-features/enable-content-verification?) :enabled (t2/exists? :model/ModerationReview)} {:name :dashboard-subscription-filters :available (premium-features/enable-content-verification?) :enabled (t2/exists? :model/Pulse {:where [:not= :parameters "[]"]})} {:name :disable-password-login :available (premium-features/can-disable-password-login?) :enabled (not (public-settings/enable-password-login))} {:name :email-restrict-recipients :available (premium-features/enable-email-restrict-recipients?) :enabled (not= (setting/get-value-of-type :keyword :user-visibility) :all)} {:name :upload-management :available (premium-features/enable-upload-management?) :enabled (t2/exists? :model/Table :is_upload true)} {:name :snippet-collections :available (premium-features/enable-snippet-collections?) :enabled (t2/exists? :model/Collection :namespace "snippets")} {:name :cache-preemptive :available (premium-features/enable-preemptive-caching?) :enabled (t2/exists? :model/CacheConfig :refresh_automatically true)}]) | |
(defn- snowplow-features [] (let [features (concat (snowplow-features-data) (ee-snowplow-features-data))] (mapv ;; Convert keys and feature names to strings to match expected Snowplow schema (fn [feature] (-> (update feature :name name) (update :name u/->snake_case_en) ;; Ensure that unavailable features are not reported as enabled (update :enabled (fn [enabled?] (if-not (:available feature) false enabled?))) (walk/stringify-keys))) features))) | |
Send stats to Metabase's snowplow collector. Transforms stats into the format required by the Snowplow schema. | (defn- snowplow-anonymous-usage-stats [stats] (let [instance-attributes (snowplow-instance-attributes stats) metrics (snowplow-metrics stats (->snowplow-metric-info)) grouped-metrics (snowplow-grouped-metrics (->snowplow-grouped-metric-info)) features (snowplow-features)] ;; grouped_metrics and settings are required in the json schema, but their data will be included in the next Milestone: {"analytics_uuid" (snowplow/analytics-uuid) "features" features "grouped_metrics" grouped-metrics "instance_attributes" instance-attributes "metrics" metrics "settings" []})) |
Generate stats for this instance as data | (defn- generate-instance-stats! [] (let [stats (legacy-anonymous-usage-stats)] {:stats (-> stats ;; `:num_queries_cached_unbinned` is added to [[legacy-anonymous-usage-stats]]'s return value to make ;; computing [[snowplow-anonymous-usage-stats]] more efficient. It shouldn't be sent by ;; [[send-stats-deprecited!]]. (update-in [:stats :cache] dissoc :num_queries_cached_unbinned)) :snowplow-stats (snowplow-anonymous-usage-stats stats)})) |
(defn- stats-post-cleanup [] (clear-translation-count!)) | |
Collect usage stats and phone them home | (defn phone-home-stats! [] (when (public-settings/anon-tracking-enabled) (let [start-time-ms (System/currentTimeMillis) {:keys [stats snowplow-stats]} (generate-instance-stats!) end-time-ms (System/currentTimeMillis) elapsed-secs (quot (- end-time-ms start-time-ms) 1000) snowplow-data (-> snowplow-stats (assoc "metadata" [{"key" "stats_export_time_seconds" "value" elapsed-secs}]) deep-string-keywords)] (assert (= #{"analytics_uuid" "features" "grouped_metrics" "instance_attributes" "metadata" "metrics" "settings"} (set (keys snowplow-data))) (str "Missing required keys in snowplow-data. got:" (sort (keys snowplow-data)))) #_{:clj-kondo/ignore [:deprecated-var]} (send-stats-deprecated! stats) (snowplow/track-event! ::snowplow/instance_stats snowplow-data) (stats-post-cleanup)))) |