(ns metabase-enterprise.audit-app.audit (:require [babashka.fs :as fs] [clojure.java.io :as io] [clojure.string :as str] [metabase-enterprise.serialization.cmd :as serialization.cmd] [metabase.audit :as audit] [metabase.db :as mdb] [metabase.models.serialization :as serdes] [metabase.models.setting :refer [defsetting]] [metabase.plugins :as plugins] [metabase.premium-features.core :refer [defenterprise]] [metabase.sync.util :as sync-util] [metabase.util :as u] [metabase.util.files :as u.files] [metabase.util.log :as log] [toucan2.core :as t2]) (:import (java.nio.file Path) (java.util.jar JarEntry JarFile))) | |
(set! *warn-on-reflection* true) | |
Returns true iff we are running from a jar. .getResource will return a java.net.URL, and those start with "jar:" if and only if the app is running from a jar. More info: https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/lang/Thread.html | (defn- running-from-jar? [] (-> (Thread/currentThread) (.getContextClassLoader) (.getResource "") (str/starts-with? "jar:"))) |
Returns the path to the currently running jar file. More info: https://stackoverflow.com/questions/320542/how-to-get-the-path-of-a-running-jar-file | (defn- get-jar-path [] (assert (running-from-jar?) "Can only get-jar-path when running from a jar.") (-> (class {}) (.getProtectionDomain) (.getCodeSource) (.getLocation) (.toURI) ;; avoid problems with special characters in path. (.getPath))) |
Recursively copies a subdirectory (at resource-path) from the jar at jar-path into out-dir. Scans every file in resources, to see which ones are inside of resource-path, since there's no way to "ls" or list a directory inside of a jar's resources. | (defn copy-from-jar! [jar-path resource-path out-dir] (let [jar-file (JarFile. (str jar-path)) entries (.entries jar-file)] (doseq [^JarEntry entry (iterator-seq entries) :let [entry-name (.getName entry)] :when (str/starts-with? entry-name resource-path) :let [out-file (fs/path out-dir entry-name)]] (if (.isDirectory entry) (fs/create-dirs out-file) (do (-> out-file fs/parent fs/create-dirs) (with-open [in (.getInputStream jar-file entry) out (io/output-stream (str out-file))] (io/copy in out))))))) |
Default Question Overview (this is a dashboard) entity id. | (def default-question-overview-entity-id "jm7KgY6IuS6pQjkBZ7WUI") |
Default Dashboard Overview (this is a dashboard) entity id. | (def default-dashboard-overview-entity-id "bJEYb0o5CXlfWFcIztDwJ") |
Creates the audit db, a clone of the app db used for auditing purposes.
| (defn- install-database! [engine id] (t2/insert! :model/Database {:is_audit true :id id :name "Internal Metabase Database" :description "Internal Audit DB used to power metabase analytics." :engine engine :is_full_sync true :is_on_demand false :creator_id nil :auto_run_queries true}) ;; guard against someone manually deleting the audit-db entry, but not removing the audit-db permissions. (t2/delete! :model/Permissions {:where [:like :object (str "%/db/" id "/%")]})) |
(defn- adjust-audit-db-to-source! [{audit-db-id :id}] ;; We need to move back to a schema that matches the serialized data (when (contains? #{:mysql :h2} (mdb/db-type)) (t2/update! :model/Database audit-db-id {:engine "postgres"}) (when (= :mysql (mdb/db-type)) (t2/update! :model/Table {:db_id audit-db-id} {:schema "public"})) (when (= :h2 (mdb/db-type)) (t2/update! :model/Table {:db_id audit-db-id} {:schema [:lower :schema] :name [:lower :name]}) (t2/update! :model/Field {:table_id [:in {:select [:id] :from [(t2/table-name :model/Table)] :where [:= :db_id audit-db-id]}]} {:name [:lower :name]})) (log/info "Adjusted Audit DB for loading Analytics Content"))) | |
(defn- adjust-audit-db-to-host! [{audit-db-id :id :keys [engine]}] (when (not= engine (mdb/db-type)) ;; We need to move the loaded data back to the host db (t2/update! :model/Database audit-db-id {:engine (name (mdb/db-type))}) (when (= :mysql (mdb/db-type)) (t2/update! :model/Table {:db_id audit-db-id} {:schema nil})) (when (= :h2 (mdb/db-type)) (t2/update! :model/Table {:db_id audit-db-id} {:schema [:upper :schema] :name [:upper :name]}) (t2/update! :model/Field {:table_id [:in {:select [:id] :from [(t2/table-name :model/Table)] :where [:= :db_id audit-db-id]}]} {:name [:upper :name]})) (log/infof "Adjusted Audit DB to match host engine: %s" (name (mdb/db-type))))) | |
A resource dir containing analytics content created by Metabase to load into the app instance on startup. | (def ^:private analytics-dir-resource (io/resource "instance_analytics")) |
The directory analytics content is unzipped or moved to, and subsequently loaded into the app from on startup. | (defn- instance-analytics-plugin-dir [plugins-dir] (fs/path (fs/absolutize plugins-dir) "instance_analytics")) |
(def ^:private jar-resource-path "instance_analytics/") | |
Load instance analytics content (collections/dashboards/cards/etc.) from resources dir or a zip file and copies it into the provided directory (by default, plugins/instance_analytics). | (defn- ia-content->plugins [plugins-dir] (let [ia-dir (instance-analytics-plugin-dir plugins-dir)] (when (fs/exists? (u.files/relative-path ia-dir)) (fs/delete-tree (u.files/relative-path ia-dir))) (if (running-from-jar?) (let [path-to-jar (get-jar-path)] (log/info "The app is running from a jar, starting copy...") (log/info (str "Copying " path-to-jar "::" jar-resource-path " -> " plugins-dir)) (copy-from-jar! path-to-jar jar-resource-path plugins-dir) (log/info "Copying complete.")) (let [in-path (fs/path analytics-dir-resource)] (log/info "The app is not running from a jar, starting copy...") (log/info (str "Copying " in-path " -> " ia-dir)) (fs/copy-tree (u.files/relative-path in-path) (u.files/relative-path ia-dir) {:replace-existing true}) (log/info "Copying complete."))))) |
Whether or not we should load Metabase analytics content on startup. Defaults to true, but can be disabled via environment variable. | (defsetting load-analytics-content :type :boolean :default true :visibility :internal :setter :none :audit :never :doc "Setting this environment variable to false can also come in handy when migrating environments, as it can simplify the migration process.") |
If | (def ^:constant SKIP_CHECKSUM_FLAG -1) |
(defn- should-skip-checksum? [last-checksum] (= SKIP_CHECKSUM_FLAG last-checksum)) | |
Hashes the contents of all non-dir files in the | (defn analytics-checksum [] (->> ^Path (instance-analytics-plugin-dir (plugins/plugins-dir)) (.toFile) file-seq (remove fs/directory?) (pmap #(hash (slurp %))) (reduce +))) |
Should we load audit data? | (defn- should-load-audit? [load-analytics-content? last-checksum current-checksum] (and load-analytics-content? (or (should-skip-checksum? last-checksum) (not= last-checksum current-checksum)))) |
Gets the previous and current checksum for the analytics directory, respecting the | (defn- get-last-and-current-checksum [] (let [last-checksum (audit/last-analytics-checksum)] (if (should-skip-checksum? last-checksum) [SKIP_CHECKSUM_FLAG SKIP_CHECKSUM_FLAG] [last-checksum (analytics-checksum)]))) |
(defn- maybe-load-analytics-content! [audit-db] (when analytics-dir-resource (adjust-audit-db-to-source! audit-db) (ia-content->plugins (plugins/plugins-dir)) (let [[last-checksum current-checksum] (get-last-and-current-checksum)] (when (should-load-audit? (load-analytics-content) last-checksum current-checksum) (log/info (str "Loading Analytics Content from: " (instance-analytics-plugin-dir (plugins/plugins-dir)))) ;; The EE token might not have :serialization enabled, but audit features should still be able to use it. (let [report (log/with-no-logs (serialization.cmd/v2-load-internal! (str (instance-analytics-plugin-dir (plugins/plugins-dir))) {:backfill? false} :token-check? false :require-initialized-db? false))] (if (not-empty (:errors report)) (log/info (str "Error Loading Analytics Content: " (pr-str report))) (do (log/info (str "Loading Analytics Content Complete (" (count (:seen report)) ") entities loaded.")) (audit/last-analytics-checksum! current-checksum)))))) (when-let [audit-db (t2/select-one :model/Database :is_audit true)] (adjust-audit-db-to-host! audit-db)))) | |
Hard-coded | (def ^:private audit-db-entity-id "audit__rP75CiURKZ-0pq") |
(defn- entity-id-for-table [table] (-> [audit-db-entity-id ;; The hard-coded entity_ids saved in the serdes export used a schema of "public", so that's now hard-coded. ;; The schema (and spelling) used for the AppDB tables varies by engine, so it should not influence the idents. "public" ;; We use inconsistent upper and lower case table and field names for audit across AppDB engines; this uses ;; lower case everywhere to make the entity IDs effectively hard-coded. (u/lower-case-en (:name table))] serdes/raw-hash u/generate-nano-id)) | |
(defn- entity-id-for-field [table-eid field] ;; We use inconsistent upper and lower case table and field names for audit across AppDB engines; this uses lower ;; case to make the entity IDs effectively hard-coded. (-> [table-eid (u/lower-case-en (:name field))] serdes/raw-hash u/generate-nano-id)) | |
Databases, Tables and Fields did not originally have If we add new tables and fields in the future, they'll get randomly generated But for existing audit DBs, we can't do that! Serdes will backfill the So we hard-code a NanoID for the audit Database, and then compute reproducible NanoIDs for all existing tables and fields by seeding [[u/generate-nano-id]] with the table and field names. | (defn- backfill-entity-ids! [db] (when db (t2/update! :model/Database (:id db) {:entity_id audit-db-entity-id}) (let [tables (t2/select :model/Table :db_id (:id db)) eids (into {} (map (juxt :id (some-fn :entity_id entity-id-for-table))) tables)] (doseq [table tables :when (not (:entity_id table))] (t2/update! :model/Table (:id table) {:entity_id (get eids (:id table))})) (when (seq tables) (doseq [field (t2/select :model/Field :table_id [:in (map :id tables)] :entity_id nil)] (t2/update! :model/Field (:id field) {:entity_id (entity-id-for-field (get eids (:table_id field)) field)})))))) |
(defn- maybe-install-audit-db [] (let [audit-db (t2/select-one :model/Database :is_audit true)] (when audit-db (backfill-entity-ids! audit-db)) (cond (nil? audit-db) (u/prog1 ::installed (log/info "Installing Audit DB...") (install-database! (mdb/db-type) audit/audit-db-id)) (not= (mdb/db-type) (:engine audit-db)) (u/prog1 ::updated (log/infof "App DB change detected. Changing Audit DB source to match: %s." (name (mdb/db-type))) (adjust-audit-db-to-host! audit-db)) :else ::no-op))) | |
EE implementation of | (defenterprise ensure-audit-db-installed! :feature :none [] (u/prog1 (maybe-install-audit-db) (let [audit-db (t2/select-one :model/Database :is_audit true)] ;; prevent sync while loading ((sync-util/with-duplicate-ops-prevented :sync-database audit-db (fn [] (maybe-load-analytics-content! audit-db))))))) |