| Logic responsible for doing deep 'analysis' of the data inside a database. This is significantly more expensive than the basic sync-metadata step, and involves things like running MBQL queries and fetching values to do things like determine Table row counts and infer field semantic types. | (ns metabase.sync.analyze (:require [metabase.sync.analyze.classify :as classify] [metabase.sync.analyze.fingerprint :as sync.fingerprint] [metabase.sync.interface :as i] [metabase.sync.util :as sync-util] [metabase.util :as u] [metabase.util.log :as log] [metabase.util.malli :as mu] [toucan2.core :as t2])) | 
| How does analysis decide which Fields should get analyzed? Good question. There are two situations in which Fields should get analyzed: 
 So how do we check all that? 
 So what happens during the next analysis? During the next analysis phase, Fields whose fingerprint is up-to-date will be skipped. However, if a new
fingerprint version is introduced, Fields that need it will be upgraded to it. We'll still only reclassify the
newly re-fingerprinted Fields, because we'll know to skip the ones from last time since their value of
 | |
| Update the  | (mu/defn- update-fields-last-analyzed!
  [table :- i/TableInstance]
  (t2/update! :model/Field
              (merge (sync.fingerprint/incomplete-analysis-kvs)
                     {:table_id (:id table)})
              {:last_analyzed :%now})) | 
| Update the  | (mu/defn- update-fields-last-analyzed-for-db!
  [database :- i/DatabaseInstance]
  (t2/update! :model/Field
              (merge (sync.fingerprint/incomplete-analysis-kvs)
                     {:table_id [:in {:select [:id]
                                      :from   [(t2/table-name :model/Table)]
                                      :where  [:and sync-util/sync-tables-clause [:= :db_id (:id database)]]}]})
              {:last_analyzed :%now})) | 
| Perform in-depth analysis for a  | (mu/defn analyze-table! [table :- i/TableInstance] (sync.fingerprint/fingerprint-fields! table) (classify/classify-fields! table) (classify/classify-table! table) (update-fields-last-analyzed! table)) | 
| (defn- maybe-log-progress [progress-bar-fn]
  (fn [step table]
    (let [progress-bar-result (progress-bar-fn)]
      (when progress-bar-result
        (log/info (u/format-color 'blue "%s Analyzed %s %s" step progress-bar-result (sync-util/name-for-logging table))))))) | |
| (defn- fingerprint-fields-summary [{:keys [fingerprints-attempted updated-fingerprints no-data-fingerprints failed-fingerprints]}]
  (format "Fingerprint updates attempted %d, updated %d, no data found %d, failed %d"
          fingerprints-attempted updated-fingerprints no-data-fingerprints failed-fingerprints)) | |
| (defn- classify-fields-summary [{:keys [fields-classified fields-failed]}]
  (format "Total number of fields classified %d, %d failed"
          fields-classified fields-failed)) | |
| (defn- classify-tables-summary [{:keys [total-tables tables-classified]}]
  (format "Total number of tables classified %d, %d updated"
          total-tables tables-classified)) | |
| (defn- make-analyze-steps [log-fn]
  [(sync-util/create-sync-step "fingerprint-fields"
                               #(sync.fingerprint/fingerprint-fields-for-db! % log-fn)
                               fingerprint-fields-summary)
   (sync-util/create-sync-step "classify-fields"
                               #(classify/classify-fields-for-db! % log-fn)
                               classify-fields-summary)
   (sync-util/create-sync-step "classify-tables"
                               #(classify/classify-tables-for-db! % log-fn)
                               classify-tables-summary)]) | |
| Perform in-depth analysis on the data for all Tables in a given  | (mu/defn analyze-db!
  [database :- i/DatabaseInstance]
  (sync-util/sync-operation :analyze database (format "Analyze data for %s" (sync-util/name-for-logging database))
    (sync-util/with-emoji-progress-bar [emoji-progress-bar (inc (* 3 (sync-util/sync-tables-count database)))]
      (u/prog1 (sync-util/run-sync-operation "analyze" database (make-analyze-steps (maybe-log-progress emoji-progress-bar)))
        (update-fields-last-analyzed-for-db! database))))) | 
| Refingerprint a subset of tables in a given  | (mu/defn refingerprint-db!
  [database :- i/DatabaseInstance]
  (sync-util/sync-operation :refingerprint database (format "Refingerprinting tables for %s" (sync-util/name-for-logging database))
    (let [log-fn (fn [step table]
                   (log/info (u/format-color 'blue "%s Analyzed %s" step (sync-util/name-for-logging table))))]
      (sync-util/run-sync-operation "refingerprint database"
                                    database
                                    [(sync-util/create-sync-step "refingerprinting fields"
                                                                 #(sync.fingerprint/refingerprint-fields-for-db! % log-fn)
                                                                 fingerprint-fields-summary)])))) |