Logic for updating Metabase Table models from metadata fetched from a physical DB. | (ns metabase.sync.sync-metadata.tables (:require [clojure.data :as data] [clojure.set :as set] [medley.core :as m] [metabase.lib.schema.common :as lib.schema.common] [metabase.models.database :refer [Database]] [metabase.models.humanization :as humanization] [metabase.models.interface :as mi] [metabase.models.table :refer [Table]] [metabase.sync.fetch-metadata :as fetch-metadata] [metabase.sync.interface :as i] [metabase.sync.sync-metadata.metabase-metadata :as metabase-metadata] [metabase.sync.util :as sync-util] [metabase.util :as u] [metabase.util.log :as log] [metabase.util.malli :as mu] [metabase.util.malli.schema :as ms] [toucan2.core :as t2])) |
------------------------------------------------ "Crufty" Tables ------------------------------------------------- | |
Crufty tables are ones we know are from frameworks like Rails or Django and thus automatically mark as | |
Regular expressions that match Tables that should automatically given the | (def ^:private crufty-table-patterns #{;; Django #"^auth_group$" #"^auth_group_permissions$" #"^auth_permission$" #"^django_admin_log$" #"^django_content_type$" #"^django_migrations$" #"^django_session$" #"^django_site$" #"^south_migrationhistory$" #"^user_groups$" #"^user_user_permissions$" ;; Drupal #".*_cache$" #".*_revision$" #"^advagg_.*" #"^apachesolr_.*" #"^authmap$" #"^autoload_registry.*" #"^batch$" #"^blocked_ips$" #"^cache.*" #"^captcha_.*" #"^config$" #"^field_revision_.*" #"^flood$" #"^node_revision.*" #"^queue$" #"^rate_bot_.*" #"^registry.*" #"^router.*" #"^semaphore$" #"^sequences$" #"^sessions$" #"^watchdog$" ;; Rails / Active Record #"^schema_migrations$" #"^ar_internal_metadata$" ;; PostGIS #"^spatial_ref_sys$" ;; nginx #"^nginx_access_log$" ;; Liquibase #"^databasechangelog$" #"^databasechangeloglock$" ;; Lobos #"^lobos_migrations$" ;; MSSQL #"^syncobj_0x.*"}) |
Should we give newly created TABLE a | (mu/defn- is-crufty-table? [table-name] (some #(re-find % (u/lower-case-en table-name)) crufty-table-patterns)) |
---------------------------------------------------- Syncing ----------------------------------------------------- | |
If there is a version in the db-metadata update the DB to have that in the DB model | (mu/defn- update-database-metadata! [database :- i/DatabaseInstance db-metadata :- i/DatabaseMetadata] (log/infof "Found new version for DB: %s" (:version db-metadata)) (t2/update! Database (u/the-id database) {:details (assoc (:details database) :version (:version db-metadata))})) |
(defn- cruft-dependent-columns [table-name] ;; if this is a crufty table, mark initial sync as complete since we'll skip the subsequent sync steps (let [is-crufty? (is-crufty-table? table-name)] {:initial_sync_status (if is-crufty? "complete" "incomplete") :visibility_type (when is-crufty? :cruft)})) | |
Creates a new table in the database, ready to be synced. Throws an exception if there is already a table with the same name, schema and database ID. | (defn create-table! [database table] (t2/insert-returning-instance! Table (merge (cruft-dependent-columns (:name table)) {:active true :db_id (:id database) :schema (:schema table) :description (:description table) :database_require_filter (:database_require_filter table) :display_name (or (:display_name table) (humanization/name->human-readable-name (:name table))) :name (:name table)}))) |
Create a single new table in the database, or mark it as active if it already exists. | (defn create-or-reactivate-table! [database {schema :schema table-name :name :as table}] (if-let [existing-id (t2/select-one-pk Table :db_id (u/the-id database) :schema schema :name table-name :active false)] ;; if the table already exists but is marked *inactive*, mark it as *active* (t2/update! Table existing-id (assoc (cruft-dependent-columns (:name table)) :active true)) ;; otherwise create a new Table (create-table! database table))) |
TODO - should we make this logic case-insensitive like it is for fields? | |
Create | (mu/defn- create-or-reactivate-tables! [database :- i/DatabaseInstance new-tables :- [:set i/DatabaseMetadataTable]] (doseq [table new-tables] (log/info "Found new table:" (sync-util/name-for-logging (mi/instance Table table)))) (doseq [table new-tables] (create-or-reactivate-table! database table))) |
Mark any | (mu/defn- retire-tables! [database :- i/DatabaseInstance old-tables :- [:set [:map [:name ::lib.schema.common/non-blank-string] [:schema [:maybe ::lib.schema.common/non-blank-string]]]]] (log/info "Marking tables as inactive:" (for [table old-tables] (sync-util/name-for-logging (mi/instance Table table)))) (doseq [{schema :schema table-name :name :as _table} old-tables] (t2/update! Table {:db_id (u/the-id database) :schema schema :name table-name :active true} {:active false}))) |
Update the table metadata if it has changed. | (mu/defn- update-table-metadata-if-needed! [table-metadata :- i/DatabaseMetadataTable metabase-table :- (ms/InstanceOf :model/Table)] (log/infof "Updating table metadata for %s" (sync-util/name-for-logging metabase-table)) (let [to-update-keys [:description :database_require_filter :estimated_row_count] old-table (select-keys metabase-table to-update-keys) new-table (select-keys (merge (zipmap to-update-keys (repeat nil)) table-metadata) to-update-keys) [_ changes _] (data/diff old-table new-table) changes (cond-> changes ;; we only update the description if the initial state is nil ;; because don't want to override the user edited description if it exists (some? (:description old-table)) (dissoc changes :description))] (doseq [[k v] changes] (log/infof "%s of %s changed from %s to %s" k (sync-util/name-for-logging metabase-table) (get metabase-table k) v)) (when (seq changes) (t2/update! :model/Table (:id metabase-table) changes)))) |
(mu/defn- update-tables-metadata-if-needed! [table-metadatas :- [:set i/DatabaseMetadataTable] metabase-tables :- [:set (ms/InstanceOf :model/Table)]] (let [name+schema->table-metadata (m/index-by (juxt :name :schema) table-metadatas) name+schema->metabase-table (m/index-by (juxt :name :schema) metabase-tables)] (doseq [name+schema (set/intersection (set (keys name+schema->table-metadata)) (set (keys name+schema->metabase-table)))] (update-table-metadata-if-needed! (name+schema->table-metadata name+schema) (name+schema->metabase-table name+schema))))) | |
(mu/defn- table-set :- [:set i/DatabaseMetadataTable] "So there exist tables for the user and metabase metadata tables for internal usage by metabase. Get set of user tables only, excluding metabase metadata tables." [db-metadata :- i/DatabaseMetadata] (into #{} (remove metabase-metadata/is-metabase-metadata-table?) (:tables db-metadata))) | |
(mu/defn- db->our-metadata :- [:set (ms/InstanceOf :model/Table)] "Return information about what Tables we have for this DB in the Metabase application DB." [database :- i/DatabaseInstance] (set (t2/select [:model/Table :id :name :schema :description :database_require_filter :estimated_row_count] :db_id (u/the-id database) :active true))) | |
Sync the Tables recorded in the Metabase application database with the ones obtained by calling | (mu/defn sync-tables-and-database! ([database :- i/DatabaseInstance] (sync-tables-and-database! database (fetch-metadata/db-metadata database))) ([database :- i/DatabaseInstance db-metadata] ;; determine what's changed between what info we have and what's in the DB (let [db-tables (table-set db-metadata) name+schema #(select-keys % [:name :schema]) name+schema->db-table (m/index-by name+schema db-tables) our-metadata (db->our-metadata database) keep-name+schema-set (fn [metadata] (set (map name+schema metadata))) [new-tables old-tables] (data/diff (keep-name+schema-set (set (map name+schema db-tables))) (keep-name+schema-set (set (map name+schema our-metadata))))] ;; update database metadata from database (when (some? (:version db-metadata)) (sync-util/with-error-handling (format "Error creating/reactivating tables for %s" (sync-util/name-for-logging database)) (update-database-metadata! database db-metadata))) ;; create new tables as needed or mark them as active again (when (seq new-tables) (let [new-tables-info (set (map #(get name+schema->db-table (name+schema %)) new-tables))] (sync-util/with-error-handling (format "Error creating/reactivating tables for %s" (sync-util/name-for-logging database)) (create-or-reactivate-tables! database new-tables-info)))) ;; mark old tables as inactive (when (seq old-tables) (sync-util/with-error-handling (format "Error retiring tables for %s" (sync-util/name-for-logging database)) (retire-tables! database old-tables))) (sync-util/with-error-handling (format "Error updating table metadata for %s" (sync-util/name-for-logging database)) ;; we need to fetch the tables again because we might have retired tables in the previous steps (update-tables-metadata-if-needed! db-tables (db->our-metadata database))) {:updated-tables (+ (count new-tables) (count old-tables)) :total-tables (count our-metadata)}))) |