データの完全な再配置が必要です。プロジェクション内の式は理論的にはGROUP BYを使用できますが、集約を維持するにはマテリアライズドビューがより効果的です。また、クエリ最適化は、単純な再配置を使用するプロジェクション(すなわちSELECT * ORDER BY x)をより好む可能性が高いです。ユーザーは、この式内でカラムのサブセットを選択して、ストレージフットプリントを削減できます。
この例では、pickup_datetimeでソートされたNew York Taxi Dataデータセットを使用します。
SELECT
tip_amount,
trip_id,
dateDiff('minutes', pickup_datetime, dropoff_datetime) AS trip_duration_min
FROM nyc_taxi.trips WHERE tip_amount > 200 AND trip_duration_min > 0
ORDER BY tip_amount, trip_id ASC
SELECT
tip_amount,
trip_id,
dateDiff('minutes', pickup_datetime, dropoff_datetime) AS trip_duration_min
FROM nyc_taxi.trips WHERE tip_amount > 200 AND trip_duration_min > 0
ORDER BY tip_amount, trip_id ASC
ALTER TABLE nyc.trips_with_projection MATERIALIZE PROJECTION prj_tip_amount
プロジェクションを追加したので、もう一度クエリを実行しましょう:
SELECT
tip_amount,
trip_id,
dateDiff('minutes', pickup_datetime, dropoff_datetime) AS trip_duration_min
FROM nyc_taxi.trips_with_projection WHERE tip_amount > 200 AND trip_duration_min > 0
ORDER BY tip_amount, trip_id ASC
SELECT
tables,
query,
query_duration_ms::String || ' ms' AS query_duration,
formatReadableQuantity(read_rows) AS read_rows,
projections
FROM clusterAllReplicas(default, system.query_log)
WHERE (type = 'QueryFinish') AND (tables = ['default.uk_price_paid_with_projections'])
ORDER BY initial_query_start_time DESC
LIMIT 2
FORMAT Vertical
Row 1:
──────
tables: ['uk.uk_price_paid_with_projections']
query: SELECT
county,
avg(price)
FROM uk_price_paid_with_projections
GROUP BY county
ORDER BY avg(price) DESC
LIMIT 3
query_duration: 5 ms
read_rows: 132.00
projections: ['uk.uk_price_paid_with_projections.prj_gby_county']
Row 2:
──────
tables: ['uk.uk_price_paid_with_projections']
query: SELECT
county,
price
FROM uk_price_paid_with_projections
WHERE town = 'LONDON'
ORDER BY price DESC
LIMIT 3
SETTINGS log_queries=1
query_duration: 11 ms
read_rows: 2.29 million
projections: ['uk.uk_price_paid_with_projections.prj_obj_town_price']
2 rows in set. Elapsed: 0.006 sec.
元のテーブル(およびパフォーマンス)を保持するために、再びCREATE ASとINSERT INTO SELECTを使用してテーブルのコピーを作成します。
CREATE TABLE uk.uk_price_paid_with_projections_v2 AS uk.uk_price_paid;
INSERT INTO uk.uk_price_paid_with_projections_v2 SELECT * FROM uk.uk_price_paid;
ALTER TABLE uk.uk_price_paid_with_projections_v2
ADD PROJECTION projection_by_year_district_town
(
SELECT
toYear(date),
district,
town,
avg(price),
sum(price),
count()
GROUP BY
toYear(date),
district,
town
)
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 1000000, 80)
FROM uk.uk_price_paid_with_projections_v2
GROUP BY year
ORDER BY year ASC
SETTINGS optimize_use_projections=0
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 1000000, 80)
FROM uk.uk_price_paid_with_projections_v2
GROUP BY year
ORDER BY year ASC
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 2000000, 100)
FROM uk.uk_price_paid_with_projections_v2
WHERE town = 'LONDON'
GROUP BY year
ORDER BY year ASC
SETTINGS optimize_use_projections=0
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 2000000, 100)
FROM uk.uk_price_paid_with_projections_v2
WHERE town = 'LONDON'
GROUP BY year
ORDER BY year ASC
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM uk.uk_price_paid_with_projections_v2
WHERE toYear(date) >= 2020
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100
SETTINGS optimize_use_projections=0
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM uk.uk_price_paid_with_projections_v2
WHERE toYear(date) >= 2020
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100
CREATE TABLE page_views
(
id UInt64,
event_date Date,
user_id UInt32,
url String,
region String,
PROJECTION region_proj
(
SELECT _part_offset ORDER BY region
),
PROJECTION user_id_proj
(
SELECT _part_offset ORDER BY user_id
)
)
ENGINE = MergeTree
ORDER BY (event_date, id)
SETTINGS
index_granularity = 1, -- one row per granule
max_bytes_to_merge_at_max_space_in_pool = 1; -- disable merge
その後、テーブルにデータを挿入します:
INSERT INTO page_views VALUES (
1, '2025-07-01', 101, 'https://example.com/page1', 'europe');
INSERT INTO page_views VALUES (
2, '2025-07-01', 102, 'https://example.com/page2', 'us_west');
INSERT INTO page_views VALUES (
3, '2025-07-02', 106, 'https://example.com/page3', 'us_west');
INSERT INTO page_views VALUES (
4, '2025-07-02', 107, 'https://example.com/page4', 'us_west');
INSERT INTO page_views VALUES (
5, '2025-07-03', 104, 'https://example.com/page5', 'asia');