| |
| ( ns metabase-enterprise.audit-app.audit
( :require
[ babashka.fs :as fs ]
[ clojure.java.io :as io ]
[ clojure.string :as str ]
[ metabase-enterprise.serialization.cmd :as serialization.cmd ]
[ metabase.audit :as audit ]
[ metabase.db :as mdb ]
[ metabase.models.serialization :as serdes ]
[ metabase.models.setting :refer [ defsetting ] ]
[ metabase.plugins :as plugins ]
[ metabase.premium-features.core :refer [ defenterprise ] ]
[ metabase.sync.util :as sync-util ]
[ metabase.util :as u ]
[ metabase.util.files :as u.files ]
[ metabase.util.log :as log ]
[ toucan2.core :as t2 ] )
( :import
( java.nio.file Path )
( java.util.jar JarEntry JarFile ) ) )
|
|
| ( set! *warn-on-reflection* true )
|
|
Returns true iff we are running from a jar.
.getResource will return a java.net.URL, and those start with "jar:" if and only if the app is running from a jar.
More info: https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/Thread.html
| ( defn- running-from-jar?
[ ]
( -> ( Thread/currentThread )
( .getContextClassLoader )
( .getResource "" )
( str/starts-with? "jar:" ) ) )
|
|
Returns the path to the currently running jar file.
More info: https://stackoverflow.com/questions/320542/how-to-get-the-path-of-a-running-jar-file
| ( defn- get-jar-path
[ ]
( assert ( running-from-jar? ) "Can only get-jar-path when running from a jar." )
( -> ( class { } )
( .getProtectionDomain )
( .getCodeSource )
( .getLocation )
( .toURI )
( .getPath ) ) )
|
|
Recursively copies a subdirectory (at resource-path) from the jar at jar-path into out-dir.
Scans every file in resources, to see which ones are inside of resource-path, since there's no
way to "ls" or list a directory inside of a jar's resources.
| ( defn copy-from-jar!
[ jar-path resource-path out-dir ]
( let [ jar-file ( JarFile. ( str jar-path ) )
entries ( .entries jar-file ) ]
( doseq [ ^ JarEntry entry ( iterator-seq entries )
:let [ entry-name ( .getName entry ) ]
:when ( str/starts-with? entry-name resource-path )
:let [ out-file ( fs/path out-dir entry-name ) ] ]
( if ( .isDirectory entry )
( fs/create-dirs out-file )
( do
( -> out-file fs/parent fs/create-dirs )
( with-open [ in ( .getInputStream jar-file entry )
out ( io/output-stream ( str out-file ) ) ]
( io/copy in out ) ) ) ) ) ) )
|
|
Default Question Overview (this is a dashboard) entity id.
| ( def default-question-overview-entity-id
"jm7KgY6IuS6pQjkBZ7WUI" )
|
|
Default Dashboard Overview (this is a dashboard) entity id.
| ( def default-dashboard-overview-entity-id
"bJEYb0o5CXlfWFcIztDwJ" )
|
|
Creates the audit db, a clone of the app db used for auditing purposes.
- This uses a weird ID because some tests were hardcoded to look for database with ID = 2, and inserting an extra db
throws that off since these IDs are sequential.
| ( defn- install-database!
[ engine id ]
( t2/insert! :model/Database { :is_audit true
:id id
:name "Internal Metabase Database"
:description "Internal Audit DB used to power metabase analytics."
:engine engine
:is_full_sync true
:is_on_demand false
:creator_id nil
:auto_run_queries true } )
( t2/delete! :model/Permissions { :where [ :like :object ( str "%/db/" id "/%" ) ] } ) )
|
|
| ( defn- adjust-audit-db-to-source!
[ { audit-db-id :id } ]
( when ( contains? #{ :mysql :h2 } ( mdb/db-type ) )
( t2/update! :model/Database audit-db-id { :engine "postgres" } )
( when ( = :mysql ( mdb/db-type ) )
( t2/update! :model/Table { :db_id audit-db-id } { :schema "public" } ) )
( when ( = :h2 ( mdb/db-type ) )
( t2/update! :model/Table { :db_id audit-db-id } { :schema [ :lower :schema ] :name [ :lower :name ] } )
( t2/update! :model/Field
{ :table_id
[ :in
{ :select [ :id ]
:from [ ( t2/table-name :model/Table ) ]
:where [ := :db_id audit-db-id ] } ] }
{ :name [ :lower :name ] } ) )
( log/info "Adjusted Audit DB for loading Analytics Content" ) ) )
|
|
| ( defn- adjust-audit-db-to-host!
[ { audit-db-id :id :keys [ engine ] } ]
( when ( not= engine ( mdb/db-type ) )
( t2/update! :model/Database audit-db-id { :engine ( name ( mdb/db-type ) ) } )
( when ( = :mysql ( mdb/db-type ) )
( t2/update! :model/Table { :db_id audit-db-id } { :schema nil } ) )
( when ( = :h2 ( mdb/db-type ) )
( t2/update! :model/Table { :db_id audit-db-id } { :schema [ :upper :schema ] :name [ :upper :name ] } )
( t2/update! :model/Field
{ :table_id
[ :in
{ :select [ :id ]
:from [ ( t2/table-name :model/Table ) ]
:where [ := :db_id audit-db-id ] } ] }
{ :name [ :upper :name ] } ) )
( log/infof "Adjusted Audit DB to match host engine: %s" ( name ( mdb/db-type ) ) ) ) )
|
|
A resource dir containing analytics content created by Metabase to load into the app instance on startup.
| ( def ^ :private analytics-dir-resource
( io/resource "instance_analytics" ) )
|
|
The directory analytics content is unzipped or moved to, and subsequently loaded into the app from on startup.
| ( defn- instance-analytics-plugin-dir
[ plugins-dir ]
( fs/path ( fs/absolutize plugins-dir ) "instance_analytics" ) )
|
|
| ( def ^ :private jar-resource-path "instance_analytics/" )
|
|
Load instance analytics content (collections/dashboards/cards/etc.) from resources dir or a zip file
and copies it into the provided directory (by default, plugins/instance_analytics).
| ( defn- ia-content->plugins
[ plugins-dir ]
( let [ ia-dir ( instance-analytics-plugin-dir plugins-dir ) ]
( when ( fs/exists? ( u.files/relative-path ia-dir ) )
( fs/delete-tree ( u.files/relative-path ia-dir ) ) )
( if ( running-from-jar? )
( let [ path-to-jar ( get-jar-path ) ]
( log/info "The app is running from a jar, starting copy..." )
( log/info ( str "Copying " path-to-jar "::" jar-resource-path " -> " plugins-dir ) )
( copy-from-jar! path-to-jar jar-resource-path plugins-dir )
( log/info "Copying complete." ) )
( let [ in-path ( fs/path analytics-dir-resource ) ]
( log/info "The app is not running from a jar, starting copy..." )
( log/info ( str "Copying " in-path " -> " ia-dir ) )
( fs/copy-tree ( u.files/relative-path in-path )
( u.files/relative-path ia-dir )
{ :replace-existing true } )
( log/info "Copying complete." ) ) ) ) )
|
|
Whether or not we should load Metabase analytics content on startup. Defaults to true, but can be disabled via environment variable.
| ( defsetting load-analytics-content
:type :boolean
:default true
:visibility :internal
:setter :none
:audit :never
:doc "Setting this environment variable to false can also come in handy when migrating environments, as it can simplify the migration process." )
|
|
If last-analytics-checksum is set to this value, we will skip calculating checksums entirely and always reload the
analytics data.
| ( def ^ :constant SKIP_CHECKSUM_FLAG
-1 )
|
|
| ( defn- should-skip-checksum? [ last-checksum ]
( = SKIP_CHECKSUM_FLAG last-checksum ) )
|
|
Hashes the contents of all non-dir files in the analytics-dir-resource .
| ( defn analytics-checksum
[ ]
( ->> ^ Path ( instance-analytics-plugin-dir ( plugins/plugins-dir ) )
( .toFile )
file-seq
( remove fs/directory? )
( pmap #( hash ( slurp % ) ) )
( reduce + ) ) )
|
|
Should we load audit data?
| ( defn- should-load-audit?
[ load-analytics-content? last-checksum current-checksum ]
( and load-analytics-content?
( or ( should-skip-checksum? last-checksum )
( not= last-checksum current-checksum ) ) ) )
|
|
Gets the previous and current checksum for the analytics directory, respecting the -1 flag for skipping checksums entirely.
| ( defn- get-last-and-current-checksum
[ ]
( let [ last-checksum ( audit/last-analytics-checksum ) ]
( if ( should-skip-checksum? last-checksum )
[ SKIP_CHECKSUM_FLAG SKIP_CHECKSUM_FLAG ]
[ last-checksum ( analytics-checksum ) ] ) ) )
|
|
| ( defn- maybe-load-analytics-content!
[ audit-db ]
( when analytics-dir-resource
( adjust-audit-db-to-source! audit-db )
( ia-content->plugins ( plugins/plugins-dir ) )
( let [ [ last-checksum current-checksum ] ( get-last-and-current-checksum ) ]
( when ( should-load-audit? ( load-analytics-content ) last-checksum current-checksum )
( log/info ( str "Loading Analytics Content from: " ( instance-analytics-plugin-dir ( plugins/plugins-dir ) ) ) )
( let [ report ( log/with-no-logs
( serialization.cmd/v2-load-internal! ( str ( instance-analytics-plugin-dir ( plugins/plugins-dir ) ) )
{ :backfill? false }
:token-check? false
:require-initialized-db? false ) ) ]
( if ( not-empty ( :errors report ) )
( log/info ( str "Error Loading Analytics Content: " ( pr-str report ) ) )
( do
( log/info ( str "Loading Analytics Content Complete (" ( count ( :seen report ) ) ") entities loaded." ) )
( audit/last-analytics-checksum! current-checksum ) ) ) ) ) )
( when-let [ audit-db ( t2/select-one :model/Database :is_audit true ) ]
( adjust-audit-db-to-host! audit-db ) ) ) )
|
|
Hard-coded :entity_id for the audit DB. Used to compute any missing :entity_id s for existing audit DBs.
| ( def ^ :private audit-db-entity-id
"audit__rP75CiURKZ-0pq" )
|
|
| ( defn- entity-id-for-table [ table ]
( -> [ audit-db-entity-id
"public"
( u/lower-case-en ( :name table ) ) ]
serdes/raw-hash
u/generate-nano-id ) )
|
|
| ( defn- entity-id-for-field [ table-eid field ]
( -> [ table-eid ( u/lower-case-en ( :name field ) ) ]
serdes/raw-hash
u/generate-nano-id ) )
|
|
Databases, Tables and Fields did not originally have :entity_id fields. Now that they do (Jan 2025), we need to
include :entity_id s on the exported audit DB checked into the Metabase repo and inlined in the (EE) JAR files.
If we add new tables and fields in the future, they'll get randomly generated :entity_id s that will be randomly
generated, exported and checked in.
But for existing audit DBs, we can't do that! Serdes will backfill the :entity_id s based on its
serdes/hash-fields mechanism, but :engine is part of the hash for Databases, since names can be duplicated
with different engines! That's a mess, but it has happened in the wild so we have to support it.
So we hard-code a NanoID for the audit Database, and then compute reproducible NanoIDs for all existing tables and
fields by seeding [[u/generate-nano-id]] with the table and field names.
| ( defn- backfill-entity-ids!
[ db ]
( when db
( t2/update! :model/Database ( :id db ) { :entity_id audit-db-entity-id } )
( let [ tables ( t2/select :model/Table :db_id ( :id db ) )
eids ( into { } ( map ( juxt :id ( some-fn :entity_id entity-id-for-table ) ) ) tables ) ]
( doseq [ table tables
:when ( not ( :entity_id table ) ) ]
( t2/update! :model/Table ( :id table ) { :entity_id ( get eids ( :id table ) ) } ) )
( when ( seq tables )
( doseq [ field ( t2/select :model/Field :table_id [ :in ( map :id tables ) ] :entity_id nil ) ]
( t2/update! :model/Field ( :id field )
{ :entity_id ( entity-id-for-field ( get eids ( :table_id field ) ) field ) } ) ) ) ) ) )
|
|
| ( defn- maybe-install-audit-db
[ ]
( let [ audit-db ( t2/select-one :model/Database :is_audit true ) ]
( when audit-db
( backfill-entity-ids! audit-db ) )
( cond
( nil? audit-db )
( u/prog1 ::installed
( log/info "Installing Audit DB..." )
( install-database! ( mdb/db-type ) audit/audit-db-id ) )
( not= ( mdb/db-type ) ( :engine audit-db ) )
( u/prog1 ::updated
( log/infof "App DB change detected. Changing Audit DB source to match: %s." ( name ( mdb/db-type ) ) )
( adjust-audit-db-to-host! audit-db ) )
:else
::no-op ) ) )
|
|
EE implementation of ensure-db-installed! . Installs audit db if it does not already exist, and loads audit
content if it is available.
| ( defenterprise ensure-audit-db-installed!
:feature :none
[ ]
( u/prog1 ( maybe-install-audit-db )
( let [ audit-db ( t2/select-one :model/Database :is_audit true ) ]
( ( sync-util/with-duplicate-ops-prevented
:sync-database audit-db
( fn [ ]
( maybe-load-analytics-content! audit-db ) ) ) ) ) ) )
|
|
| |