Predefined MBQL queries for getting metadata about an external database. TODO -- these have nothing to do with the application database. This namespace should be renamed something like
| (ns metabase.db.metadata-queries (:require [medley.core :as m] [metabase.driver :as driver] [metabase.driver.util :as driver.u] [metabase.legacy-mbql.schema :as mbql.s] [metabase.legacy-mbql.schema.helpers :as helpers] [metabase.lib.ident :as lib.ident] [metabase.query-processor :as qp] [metabase.query-processor.interface :as qp.i] [metabase.util :as u] [metabase.util.malli :as mu] [metabase.util.malli.schema :as ms] [toucan2.core :as t2])) |
Given a partition field, returns the default value can be used to query. | (defn- partition-field->filter-form
[field]
(let [field-form [:field (:id field) {:base-type (:base_type field)}]]
(condp #(isa? %2 %1) (:base_type field)
:type/Number [:> field-form -9223372036854775808]
:type/Date [:> field-form "0001-01-01"]
:type/DateTime [:> field-form "0001-01-01T00:00:00"]))) |
Add a dummy filter for tables that require filters. Look into tables from source tables and all the joins. Currently this only apply to partitioned tables on bigquery that requires a partition filter. In the future we probably want this to be dispatched by database engine or handled by QP. | (defn add-required-filters-if-needed
[query]
(let [table-ids (->> (conj (keep :source-table (:joins query)) (:source-table query))
(filter pos-int?))
required-filter-fields (when (seq table-ids)
(t2/select :model/Field {:select [:f.*]
:from [[:metabase_field :f]]
:left-join [[:metabase_table :t] [:= :t.id :f.table_id]]
:where [:and
[:= :f.active true]
[:= :f.database_partitioned true]
[:= :t.active true]
[:= :t.database_require_filter true]
[:in :t.id table-ids]]}))
update-query-filter-fn (fn [existing-filter new-filter]
(if (some? existing-filter)
[:and existing-filter new-filter]
new-filter))]
(case (count required-filter-fields)
0
query
1
(update query :filter update-query-filter-fn (partition-field->filter-form (first required-filter-fields)))
;; > 1
(update query :filter update-query-filter-fn (into [:and] (map partition-field->filter-form required-filter-fields)))))) |
(defn- add-breakout-idents-if-needed [{:keys [breakout] :as inner-query}]
(m/assoc-some inner-query :breakout-idents (lib.ident/indexed-idents breakout))) | |
Runs the | (defn table-query
([table-id mbql-query]
(table-query table-id mbql-query nil))
([table-id mbql-query rff]
{:pre [(integer? table-id)]}
(binding [qp.i/*disable-qp-logging* true]
(qp/process-query
{:type :query
:database (t2/select-one-fn :db_id :model/Table table-id)
:query (-> mbql-query
(assoc :source-table table-id)
add-required-filters-if-needed
add-breakout-idents-if-needed)
:middleware {:disable-remaps? true}}
rff)))) |
Generate the MBQL query used to power FieldValues search in [[metabase.api.field/search-values]]. The actual query generated differs slightly based on whether the two Fields are the same Field. Note: the generated MBQL query assume that both | (defn search-values-query
[field search-field value limit]
(-> (table-query (:table_id field)
{:filter (when (some? value)
[:contains [:field (u/the-id search-field) nil] value {:case-sensitive false}])
;; if both fields are the same then make sure not to refer to it twice in the `:breakout` clause.
;; Otherwise this will break certain drivers like BigQuery that don't support duplicate
;; identifiers/aliases
:breakout (if (= (u/the-id field) (u/the-id search-field))
[[:field (u/the-id field) nil]]
[[:field (u/the-id field) nil]
[:field (u/the-id search-field) nil]])
:limit limit})
:data :rows)) |
Return the distinct count of | (defn field-distinct-count
[field & [limit]]
(-> (table-query (:table_id field) {:aggregation [[:distinct [:field (u/the-id field) nil]]]
:aggregation-idents (lib.ident/indexed-idents 1)
:limit limit})
:data :rows first first int)) |
Return the count of | (defn field-count
[field]
(-> (table-query (:table_id field) {:aggregation [[:count [:field (u/the-id field) nil]]]
:aggregation-idents (lib.ident/indexed-idents 1)})
:data :rows first first int)) |
The maximum number of values we should return when using | (def max-sample-rows 10000) |
Number of rows to sample for tables with nested (e.g., JSON) columns. | (def nested-field-sample-limit 500) |
Schema for | (def ^:private TableRowsSampleOptions
[:maybe
[:map
[:truncation-size {:optional true} :int]
[:limit {:optional true} :int]
[:order-by {:optional true} (helpers/distinct (helpers/non-empty [:sequential ::mbql.s/OrderBy]))]
[:rff {:optional true} fn?]]]) |
Identify text fields which can accept our substring optimization. JSON and XML fields are now marked as | (defn- text-field?
[{:keys [base_type semantic_type]}]
(and (= base_type :type/Text)
(not (isa? semantic_type :type/Structured)))) |
Returns the mbql query to query a table for sample rows | (defn- table-rows-sample-query
[table
fields
{:keys [truncation-size limit order-by] :or {limit max-sample-rows} :as _opts}]
(let [database (t2/select-one :model/Database (:db_id table))
driver (driver.u/database->driver database)
text-fields (filter text-field? fields)
field->expressions (when (and truncation-size (driver.u/supports? driver :expressions database))
(into {} (for [field text-fields]
[field [(str (gensym "substring"))
[:substring [:field (u/the-id field) nil]
1 truncation-size]]])))
expressions (into {} (vals field->expressions))]
{:database (:db_id table)
:type :query
:query (cond-> {:source-table (u/the-id table)
:expressions expressions
:expression-idents (update-vals expressions (fn [_] (u/generate-nano-id)))
:fields (vec (for [field fields]
(if-let [[expression-name _] (get field->expressions field)]
[:expression expression-name]
[:field (u/the-id field) nil])))
:limit limit}
order-by
(assoc :order-by order-by)
true
add-required-filters-if-needed)
:middleware {:format-rows? false
:skip-results-metadata? true}})) |
Run a basic MBQL query to fetch a sample of rows of FIELDS belonging to a TABLE. Options: a map of
| (mu/defn table-rows-sample
([table :- (ms/InstanceOf :model/Table)
fields :- [:sequential (ms/InstanceOf :model/Field)]
rff]
(table-rows-sample table fields rff nil))
([table :- (ms/InstanceOf :model/Table)
fields :- [:sequential (ms/InstanceOf :model/Field)]
rff :- fn?
opts :- TableRowsSampleOptions]
(let [query (table-rows-sample-query table fields opts)]
(qp/process-query query rff)))) |
(defmethod driver/table-rows-sample :default [_driver table fields rff opts] (table-rows-sample table fields rff opts)) | |