(ns metabase.models.database (:require [clojure.core.match :refer [match]] [medley.core :as m] [metabase.analytics.core :as analytics] [metabase.api.common :as api] [metabase.audit :as audit] [metabase.db :as mdb] [metabase.db.query :as mdb.query] [metabase.driver :as driver] [metabase.driver.impl :as driver.impl] [metabase.driver.util :as driver.u] [metabase.models.audit-log :as audit-log] [metabase.models.interface :as mi] [metabase.models.secret :as secret] [metabase.models.serialization :as serdes] [metabase.models.setting :as setting] [metabase.permissions.core :as perms] [metabase.premium-features.core :as premium-features :refer [defenterprise]] ;; Trying to use metabase.search would cause a circular reference ;_; [metabase.search.spec :as search.spec] [metabase.sync.schedules :as sync.schedules] [metabase.util :as u] [metabase.util.honey-sql-2 :as h2x] [metabase.util.i18n :refer [trs]] [metabase.util.log :as log] [metabase.util.quick-task :as quick-task] [methodical.core :as methodical] [toucan2.core :as t2] [toucan2.pipeline :as t2.pipeline] [toucan2.realize :as t2.realize] [toucan2.tools.with-temp :as t2.with-temp])) | |
----------------------------------------------- Entity & Lifecycle ----------------------------------------------- | |
(methodical/defmethod t2/table-name :model/Database [_model] :metabase_database) | |
(methodical/defmethod t2.pipeline/results-transform [:toucan.result-type/instances :model/Database]
[query-type model]
(comp
(next-method query-type model)
;; This is for safety - if a secret ever gets stored in details we don't want it to leak.
;; This will also help to secure properties that we set to secret in the future.
(map secret/clean-secret-properties-from-database))) | |
(t2/deftransforms :model/Database
{:details mi/transform-encrypted-json
:engine mi/transform-keyword
:metadata_sync_schedule mi/transform-cron-string
:cache_field_values_schedule mi/transform-cron-string
:start_of_week mi/transform-keyword
:settings mi/transform-encrypted-json
:dbms_version mi/transform-json}) | |
(methodical/defmethod t2/model-for-automagic-hydration [:default :database] [_model _k] :model/Database) (methodical/defmethod t2/model-for-automagic-hydration [:default :db] [_model _k] :model/Database) | |
(doto :model/Database (derive :metabase/model) (derive :hook/timestamped?) ;; Deliberately **not** deriving from `:hook/entity-id` because we should not be randomizing the `entity_id`s on ;; databases, tables or fields. Since the sync process can create them in multiple instances, randomizing them would ;; cause duplication rather than good matching if the two instances are later linked by serdes. #_(derive :hook/entity-id)) | |
(methodical/defmethod t2.with-temp/do-with-temp* :before :model/Database
[_model _explicit-attributes f]
(fn [temp-object]
;; Grant All Users full perms on the temp-object so that tests don't have to manually set permissions
(perms/set-database-permission! (perms/all-users-group) temp-object :perms/view-data :unrestricted)
(perms/set-database-permission! (perms/all-users-group) temp-object :perms/create-queries :query-builder-and-native)
(perms/set-database-permission! (perms/all-users-group) temp-object :perms/download-results :one-million-rows)
(f temp-object))) | |
Audit Database should only be fetched if audit app is enabled. | (defn- should-read-audit-db? [database-id] (and (not (premium-features/enable-audit-app?)) (= database-id audit/audit-db-id))) |
(defmethod mi/can-read? :model/Database
([instance]
(mi/can-read? :model/Database (u/the-id instance)))
([_model pk]
(if (should-read-audit-db? pk)
false
(contains? #{:query-builder :query-builder-and-native}
(perms/most-permissive-database-permission-for-user
api/*current-user-id*
:perms/create-queries
pk))))) | |
OSS implementation. Returns a boolean whether the current user can write the given field. | (defenterprise current-user-can-write-db? metabase-enterprise.advanced-permissions.common [_db-id] (mi/superuser?)) |
(defn- can-write?
[db-id]
(and (not= db-id audit/audit-db-id)
(current-user-can-write-db? db-id))) | |
(defmethod mi/can-write? :model/Database
;; Lack of permission to change database details will also exclude the `details` field from the HTTP response,
;; cf. the implementation of [[metabase.models.interface/to-json]] for `:model/Database`.
([{:keys [is_attached_dwh] :as instance}]
(and (can-write? (u/the-id instance))
(not is_attached_dwh)))
([_model pk]
(and (can-write? pk)
(not (:is_attached_dwh (t2/select-one :model/Database :id pk)))))) | |
Infer database schedule settings based on its options. | (defn- infer-db-schedules
[{:keys [details is_full_sync is_on_demand cache_field_values_schedule metadata_sync_schedule] :as database}]
(match [(boolean (:let-user-control-scheduling details)) is_full_sync is_on_demand]
[false _ _]
(merge
database
(sync.schedules/schedule-map->cron-strings
(sync.schedules/default-randomized-schedule)))
;; "Regularly on a schedule"
;; -> sync both steps, schedule should be provided
[true true false]
(do
(assert (every? some? [cache_field_values_schedule metadata_sync_schedule]))
database)
;; "Only when adding a new filter" or "Never, I'll do it myself"
;; -> Sync metadata only
[true false _]
;; schedules should only contains metadata_sync, but FE might sending both
;; so we just manually nullify it here
(assoc database :cache_field_values_schedule nil))) |
(Re)schedule sync operation tasks for | (defn- check-and-schedule-tasks-for-db!
[database]
(try
;; this is done this way to avoid circular dependencies
((requiring-resolve 'metabase.sync.task.sync-databases/check-and-schedule-tasks-for-db!) database)
(catch Throwable e
(log/error e "Error scheduling tasks for DB")))) |
When a driver has db-details to test and migrate: we loop through them until we find one that works and update the database with the working details. | (defn maybe-test-and-migrate-details!
[{:keys [engine details] :as database}]
(if-let [details-to-test (seq (driver/db-details-to-test-and-migrate (keyword engine) details))]
(do
(log/infof "Attempting to connect to %d possible legacy details" (count details-to-test))
(loop [[test-details & tail] details-to-test]
(if test-details
(if (driver.u/can-connect-with-details? engine (assoc test-details :engine engine))
(do
(log/infof "Successfully connected, migrating to: %s" (pr-str test-details))
(t2/update! :model/Database (:id database) {:details test-details})
test-details)
(recur tail))
;; if we go through the list and we can't fine a working detail to test, keep original value
details)))
details)) |
Checks database health off-thread. - checks connectivity - cleans-up ambiguous legacy, db-details | (defn health-check-database!
[{:keys [engine] :as database}]
(when-not (or (:is_audit database) (:is_sample database))
(log/info (u/format-color :cyan "Health check: queueing %s {:id %d}" (:name database) (:id database)))
(quick-task/submit-task!
(fn []
(let [details (maybe-test-and-migrate-details! database)]
(try
(log/info (u/format-color :cyan "Health check: checking %s {:id %d}" (:name database) (:id database)))
(if (driver.u/can-connect-with-details? engine (assoc details :engine engine))
(do
(log/info (u/format-color :green "Health check: success %s {:id %d}" (:name database) (:id database)))
(analytics/inc! :metabase-database/healthy {:driver engine} 1))
(do
(log/warn (u/format-color :yellow "Health check: failure %s {:id %d}" (:name database) (:id database)))
(analytics/inc! :metabase-database/unhealthy {:driver engine} 1)))
(catch Throwable e
(do
(log/error e (u/format-color :red "Health check: failure with error %s {:id %d}" (:name database) (:id database)))
(analytics/inc! :metabase-database/unhealthy {:driver engine} 1))))))))) |
(Re)schedule sync operation tasks for any database which is not yet being synced regularly. | (defn check-health-and-schedule-tasks!
[]
(doseq [database (t2/select :model/Database)]
(health-check-database! database)
(check-and-schedule-tasks-for-db! database))) |
TODO - something like NSNotificationCenter in Objective-C would be really really useful here so things that want to implement behavior when an object is deleted can do it without having to put code here | |
Unschedule any currently pending sync operation tasks for | (defn- unschedule-tasks!
[database]
(try
((requiring-resolve 'metabase.sync.task.sync-databases/unschedule-tasks-for-db!) database)
(catch Throwable e
(log/error e "Error unscheduling tasks for DB.")))) |
TODO -- consider whether this should live HERE or inside the | (defn- set-new-database-permissions!
[database]
(t2/with-transaction [_conn]
(let [all-users-group (perms/all-users-group)
non-magic-groups (perms/non-magic-groups)
non-admin-groups (conj non-magic-groups all-users-group)]
(if (:is_audit database)
(doseq [group non-admin-groups]
(perms/set-database-permission! group database :perms/view-data :unrestricted)
(perms/set-database-permission! group database :perms/create-queries :no)
(perms/set-database-permission! group database :perms/download-results :one-million-rows)
(perms/set-database-permission! group database :perms/manage-table-metadata :no)
(perms/set-database-permission! group database :perms/manage-database :no))
(doseq [group non-admin-groups]
(perms/set-new-database-permissions! group database)))))) |
(t2/define-after-insert :model/Database
[database]
(u/prog1 database
(set-new-database-permissions! database)
;; schedule the Database sync & analyze tasks This will not do anything when coming
;; from [[metabase-enterprise.advanced-config.file/initialize!]], since the scheduler will not be up yet. Thus, we
;; call [[metabase.sync.task.sync-databases/check-and-schedule-tasks!]] from [[metabase.core.core/init!]] to
;; self-heal.
(check-and-schedule-tasks-for-db! (t2.realize/realize database)))) | |
Track whether we're calling [[driver/normalize-db-details]] already to prevent infinite recursion. [[driver/normalize-db-details]] is actually done for side effects! | (def ^:private ^:dynamic *normalizing-details* false) |
(t2/define-after-select :model/Database
[{driver :engine, :as database}]
(letfn [(normalize-details [db]
(binding [*normalizing-details* true]
(driver/normalize-db-details
driver
(m/update-existing-in db [:details :auth-provider] keyword))))]
(cond-> database
;; TODO - this is only really needed for API responses. This should be a `hydrate` thing instead!
(driver.impl/registered? driver)
(assoc :features (driver.u/features driver (t2.realize/realize database)))
(and (driver.impl/registered? driver)
(map? (:details database))
(not *normalizing-details*))
normalize-details))) | |
(t2/define-before-delete :model/Database
[{id :id, driver :engine, :as database}]
(unschedule-tasks! database)
(secret/delete-orphaned-secrets! database)
;; We need to use toucan to delete the fields instead of cascading deletes because MySQL doesn't support columns with cascade delete
;; foreign key constraints in generated columns. #44866
(when-some [table-ids (not-empty (t2/select-pks-vec :model/Table :db_id id))]
(t2/delete! :model/Field :table_id [:in table-ids]))
(try
(driver/notify-database-updated driver database)
(catch Throwable e
(log/error e "Error sending database deletion notification")))) | |
This function maintains the invariant that only one database can have uploads_enabled=true. | (defn- handle-uploads-enabled!
[db]
(when (:uploads_enabled db)
(t2/update! :model/Database :uploads_enabled true {:uploads_enabled false :uploads_table_prefix nil :uploads_schema_name nil}))
db) |
(t2/define-before-update :model/Database
[database]
(let [changes (t2/changes database)
{new-engine :engine
new-settings :settings} changes
{is-sample? :is_sample
existing-settings :settings
existing-engine :engine} (t2/original database)
new-engine (some-> new-engine keyword)]
(if (and is-sample?
new-engine
(not= new-engine existing-engine))
(throw (ex-info (trs "The engine on a sample database cannot be changed.")
{:status-code 400
:existing-engine existing-engine
:new-engine new-engine}))
(u/prog1 (cond-> database
;; If the engine doesn't support nested field columns, `json_unfolding` must be nil
(and (some? (:details changes))
(not (driver.u/supports? (or new-engine existing-engine) :nested-field-columns database)))
(update :details dissoc :json_unfolding)
(or
;if there is any changes in user control setting
(some? (get-in changes [:details :let-user-control-scheduling]))
;; if the let user control scheduling is already on, we should always try to re-infer it
(get-in database [:details :let-user-control-scheduling])
;; if there is a changes in schedules, make sure it respects the settings
(some some? [(:cache_field_values_schedule changes) (:metadata_sync_schedule changes)]))
infer-db-schedules
(some? (:details changes))
secret/handle-incoming-client-secrets!
(:uploads_enabled changes)
handle-uploads-enabled!)
;; This maintains a constraint that if a driver doesn't support actions, it can never be enabled
;; If we drop support for actions for a driver, we'd need to add a migration to disable actions for all databases
(when (and (:database-enable-actions (or new-settings existing-settings))
(not (driver.u/supports? (or new-engine existing-engine) :actions database)))
(throw (ex-info (trs "The database does not support actions.")
{:status-code 400
:existing-engine existing-engine
:new-engine new-engine}))))))) | |
(t2/define-after-update :model/Database [database] ;; This will not do anything when coming from [[metabase-enterprise.advanced-config.file/initialize!]], since the ;; scheduler will not be up yet. Thus, we call [[metabase.sync.task.sync-databases/check-and-schedule-tasks!]] ;; from [[metabase.core/init!]] to self-heal. (check-and-schedule-tasks-for-db! (t2.realize/realize database))) | |
(t2/define-before-insert :model/Database
[{:keys [details initial_sync_status], :as database}]
(-> (merge {:is_full_sync true
:is_on_demand false}
database)
(cond->
(not details) (assoc :details {})
(not initial_sync_status) (assoc :initial_sync_status "incomplete"))
secret/handle-incoming-client-secrets!
handle-uploads-enabled!
infer-db-schedules)) | |
(defmethod serdes/hash-fields :model/Database [_database] [:name :engine]) | |
(defmethod mi/exclude-internal-content-hsql :model/Database
[_model & {:keys [table-alias]}]
(let [maybe-alias #(h2x/identifier :field table-alias %)]
[:not [:or (maybe-alias :is_sample) (maybe-alias :is_audit)]])) | |
---------------------------------------------- Hydration / Util Fns ---------------------------------------------- | |
Return the only used in tests | (defn tables
[{:keys [id]}]
;; TODO - do we want to include tables that should be `:hidden`?
(t2/select :model/Table :db_id id :active true {:order-by [[:%lower.display_name :asc]]})) |
(methodical/defmethod t2/batched-hydrate [:model/Database :tables]
"Batch hydrate `Tables` for the given `Database`."
[_model k databases]
(mi/instances-with-hydrated-data
databases k
#(group-by :db_id
;; TODO - do we want to include tables that should be `:hidden`?
(t2/select :model/Table
:db_id [:in (map :id databases)]
:active true
{:order-by [[:db_id :asc] [:%lower.display_name :asc]]}))
:id
{:default []})) | |
Return all the primary key | (defn pk-fields
[{:keys [id]}]
(let [table-ids (t2/select-pks-set 'Table, :db_id id, :active true)]
(when (seq table-ids)
(t2/select 'Field, :table_id [:in table-ids], :semantic_type (mdb.query/isa :type/PK))))) |
-------------------------------------------------- JSON Encoder -------------------------------------------------- | |
Gets all sensitive fields that should be redacted in API responses for a given database. Delegates to driver.u/sensitive-fields using the given database's driver (if valid), so refer to that for full details. If a valid driver can't be clearly determined, this simply returns the default set (driver.u/default-sensitive-fields). | (defn sensitive-fields-for-db
[database]
(if (and (some? database) (not-empty database))
(let [driver (driver.u/database->driver database)]
(if (some? driver)
(driver.u/sensitive-fields (driver.u/database->driver database))
driver.u/default-sensitive-fields))
driver.u/default-sensitive-fields)) |
(methodical/defmethod mi/to-json :model/Database
"When encoding a Database as JSON remove the `details` for any User without write perms for the DB.
Users with write perms can see the `details` but remove anything resembling a password. No one gets to see this in
an API response!
Also remove settings that the User doesn't have read perms for."
[db json-generator]
(next-method
(let [db (if (not (mi/can-write? db))
(do (log/debug "Fully redacting database details during json encoding.")
(dissoc db :details))
(do (log/debug "Redacting sensitive fields within database details during json encoding.")
(-> db
(secret/to-json-hydrate-redacted-secrets)
(update :details (fn [details]
(reduce
#(m/update-existing %1 %2 (fn [v] (when v secret/protected-password)))
details
(sensitive-fields-for-db db)))))))]
(update db :settings
(fn [settings]
(when (map? settings)
(u/prog1
(m/filter-keys
(fn [setting-name]
(try
(setting/can-read-setting? setting-name
(setting/current-user-readable-visibilities))
(catch Throwable e
;; there is an known issue with exception is ignored when render API response (#32822)
;; If you see this error, you probably need to define a setting for `setting-name`.
;; But ideally, we should resovle the above issue, and remove this try/catch
(log/errorf e "Error checking the readability of %s setting. The setting will be hidden in API response."
setting-name)
;; let's be conservative and hide it by defaults, if you want to see it,
;; you need to define it :)
false)))
settings)
(when (not= <> settings)
(log/debug "Redacting non-user-readable database settings during json encoding.")))))))
json-generator)) | |
------------------------------------------------ Serialization ---------------------------------------------------- | (defmethod serdes/make-spec "Database"
[_model-name {:keys [include-database-secrets]}]
{:copy [:auto_run_queries :cache_field_values_schedule :caveats :dbms_version
:description :engine :entity_id :is_audit :is_attached_dwh :is_full_sync :is_on_demand :is_sample
:metadata_sync_schedule :name :points_of_interest :refingerprint :settings :timezone :uploads_enabled
:uploads_schema_name :uploads_table_prefix]
:skip [;; deprecated field
:cache_ttl]
:transform {:created_at (serdes/date)
;; details should be imported if available regardless of options
:details {:export-with-context
(fn [current _ details]
(if (and include-database-secrets
(not (:is_attached_dwh current)))
details
::serdes/skip))
:import identity}
:creator_id (serdes/fk :model/User)
:initial_sync_status {:export identity :import (constantly "complete")}}}) |
(defmethod serdes/entity-id "Database"
[_ {:keys [name]}]
name) | |
(defmethod serdes/generate-path "Database"
[_ {:keys [name]}]
[{:model "Database" :id name}]) | |
(defmethod serdes/load-find-local "Database"
[[{:keys [id]}]]
(t2/select-one :model/Database :name id)) | |
(defmethod serdes/storage-path "Database" [{:keys [name]} _]
;; ["databases" "db_name" "db_name"] directory for the database with same-named file inside.
["databases" name name]) | |
(defmethod audit-log/model-details :model/Database [database _event-type] (select-keys database [:id :name :engine])) | |
Retrieve the | (def ^{:arglists '([table-id])} table-id->database-id
(mdb/memoize-for-application-db
(fn [table-id]
{:pre [(integer? table-id)]}
(t2/select-one-fn :db_id :model/Table, :id table-id)))) |
------------------------------------------------- Search ---------------------------------------------------------- | |
(search.spec/define-spec "database"
{:model :model/Database
:attrs {:archived false
:collection-id false
:creator-id false
;; not sure if this is another bug
:database-id false
:created-at true
:updated-at true}
:search-terms [:name :description]
:render-terms {:initial-sync-status true}}) | |