庄同学(魏庄)
查看启动时间
select hostname(),fromUnixTimestamp(toUnixTimestamp(now())- uptime())as uptime from system.one
查看磁盘大小
SELECTcount(), formatReadableSize(sum(total_bytes))FROM system.tablesWHERE engine LIKE'%MergeTree'
查看是否有分布式表:
SELECT name, engine FROM system.tablesWHERE(name NOTIN(SELECT splitByChar('\'', engine_full)[6]FROM system.tablesWHERE engine ='Distributed'))AND(database !='system')AND(engine NOTIN('MaterializedView','View','OSS'))AND(engine !='Distributed')
查看top10 慢查询
SELECTtype,event_time,query_duration_ms,initial_query_id,formatReadableSize(memory_usage)AS memory,`ProfileEvents.Values`[indexOf(`ProfileEvents.Names`,'UserTimeMicroseconds')]AS userCPU,`ProfileEvents.Values`[indexOf(`ProfileEvents.Names`,'SystemTimeMicroseconds')]AS systemCPU,normalizedQueryHash(query)AS normalized_query_hash,substring(normalizeQuery(query)AS query,1,100)FROM system.query_logORDERBY query_duration_ms DESCLIMIT10FORMAT Vertical
执行失败的SQL:
- type(Enum8) — Type of an event that occurred when executing the query. Values:
- 'QueryStart' = 1— Successful start of query execution.
- 'QueryFinish' = 2— Successful end of query execution.
- 'ExceptionBeforeStart' = 3— Exception before the start of query execution.
- 'ExceptionWhileProcessing' = 4 — Exception during the query execution.
SELECTtype,query_start_time,query_duration_ms,query_id,query_kind,is_initial_query,normalizeQuery(query)AS normalized_query,concat(toString(read_rows),' rows / ', formatReadableSize(read_bytes))AS read,concat(toString(written_rows),' rows / ', formatReadableSize(written_bytes))AS written,concat(toString(result_rows),' rows / ', formatReadableSize(result_bytes))AS result,formatReadableSize(memory_usage)AS `memory usage`,exception,concat('\n', stack_trace)AS stack_trace,user,initial_user,multiIf(empty(client_name), http_user_agent, concat(client_name,' ', toString(client_version_major),'.', toString(client_version_minor),'.', toString(client_version_patch)))AS client,client_hostname,databases,tables,columns,used_aggregate_functions,used_aggregate_function_combinators,used_database_engines,used_data_type_families,used_dictionaries,used_formats,used_functions,used_storages,used_table_functions,thread_ids,ProfileEvents,Settings FROM system.query_logWHERE type IN['3','4']ORDERBY query_start_time DESCLIMIT1FORMAT Vertical
error信息表
SELECTname,code,value,last_error_time,last_error_message,last_error_trace AS remote FROM system.errorsLIMIT1FORMAT Vertical
是否有part被写入进来
SELECT event_time, event_time_microseconds, rows FROM system.part_logWHERE(table='xxx')AND(event_type IN['NewPart'])ORDERBY event_time ASCLIMIT10
查看merge进展:
SELECT hostName(), database,table, round(elapsed,0)AStime, round(progress,4)AS percent, formatReadableTimeDelta((elapsed / progress)- elapsed)AS ETA, num_parts, formatReadableSize(memory_usage)AS memory_usage, result_part_name FROM system.mergesORDERBY(elapsed / percent)- elapsed ASCFORMAT Vertical
去排查part merge的错误,我们可以查看system.part_log
SELECT event_date, event_type,table, error AS error_code, errorCodeToName(error)AS error_code_name,count()as c FROM system.part_logWHERE(error_code !=0)AND(event_date >(now()- toIntervalMonth(1)))GROUPBY event_date, event_type, error,tableORDERBY event_date DESC, event_type ASC,tableASC, error ASC
是否有长时间运行的mutation:alter查询也叫mutation。
SELECT database,table, mutation_id, command, create_time, parts_to_do_names, parts_to_do, is_done, latest_failed_part, latest_fail_time, latest_fail_reason FROM clusterAllReplicas(default, system.mutations)WHERENOT is_done ORDERBY create_time DESC
磁盘使用大小:
SELECT hostName(), database,table, sum(rows)AS rows, formatReadableSize(sum(bytes_on_disk))AS total_bytes_on_disk, formatReadableSize(sum(data_compressed_bytes))AS total_data_compressed_bytes, formatReadableSize(sum(data_uncompressed_bytes))AS total_data_uncompressed_bytes, round(sum(data_compressed_bytes)/ sum(data_uncompressed_bytes),3)AS compression_ratio FROM system.partsWHERE database !='system'GROUPBY hostName(), database,tableORDERBY sum(bytes_on_disk)DESC FORMAT Vertical
查当系统表时,query只会在本地节点运行,如果需要获取cluster上所有的shards或者replicas,需要使用clusterAllReplicas表函数。
SELECT hostName(), is_initial_query, query_id, initial_query_id, query FROM clusterAllReplicas('default', system.processes)FORMAT Vertical Row 1:────── hostName(): c-mint-mb-85-server-0is_initial_query:1query_id: c8f1cfb2-7eed-4ecd-a303-cc20ef5d9d0f initial_query_id: c8f1cfb2-7eed-4ecd-a303-cc20ef5d9d0f query:SELECT hostName(), is_initial_query, query_id, initial_query_id, query FROM clusterAllReplicas('default', system.processes) FORMAT Vertical
监控写入数据:
异步写入:异步写入数大批量写入的最佳实践。当打开async_insert时,在每次insert时,new parts不会立刻生成。当超过async_insert_busy_timeout_ms或async_insert_max_data_size设置的值时,就会flush buffer到table里。
查看最近2小时new parts写入量(不区分异步写入还是同步写入)
SELECTcount()AS new_parts, toStartOfMinute(event_time)AS modification_time_m,table, sum(rows)AS total_written_rows, formatReadableSize(sum(size_in_bytes))AS total_bytes_on_disk FROM clusterAllReplicas(default, system.part_log)WHERE(event_type ='NewPart')AND(event_time >(now()- toIntervalHour(2)))GROUPBY modification_time_m,tableORDERBY modification_time_m ASC,tableDESC
异步写入:
SELECT event_time, query, database,table, bytes, status, flush_time FROM clusterAllReplicas(default, system.asynchronous_insert_log)ORDERBY flush_time DESC
监控同步写入之 bulk request 写入,建议每次写入至少1000行数据。
SELECT toStartOfMinute(event_time)AS event_time_m,count(*)AS nb_bulk_inserts FROM clusterAllReplicas(default, system.query_log)WHERE(query ILIKE '%insert%')AND(query_kind ='Insert')AND(type ='QueryFinish')AND(NOT(Settings['async_insert'])='1')AND(event_time >(now()- toIntervalDay(3)))GROUPBY event_time_m ORDERBY event_time_m ASC
写入量和行数
SELECT toStartOfMinute(event_time)AS modification_time_m,table, sum(rows)AS total_written_rows, formatReadableSize(sum(size_in_bytes))AS total_bytes_on_disk FROM clusterAllReplicas(default, system.part_log)WHERE(event_type ='NewPart')AND(event_time >(now()- toIntervalDay(3)))GROUPBY modification_time_m,tableORDERBY modification_time_m ASC,tableDESC
平均写入
SELECT toStartOfMinute(event_time)AS event_time_m,count()AS count_batches, avg(query_duration_ms)AS avg_duration FROM clusterAllReplicas(default, system.query_log)WHERE(query_kind ='Insert')AND(type !='QueryStart')AND(event_time >(now()- toIntervalDay(2)))GROUPBY event_time_m ORDERBY event_time_m ASCSELECT toStartOfHour(event_time)AS event_time_h,count()AS count_batches, quantile(0.25)(query_duration_ms), quantile(0.5)(query_duration_ms), quantile(0.75)(query_duration_ms), quantile(0.95)(query_duration_ms), quantile(0.99)(query_duration_ms)FROM clusterAllReplicas(default, system.query_log)WHERE(query_kind ='Insert')AND(type !='QueryStart')AND(event_time >(now()- toIntervalDay(2)))GROUPBY event_time_h ORDERBY event_time_h ASC
内存和CPU使用情况
SELECT event_time, formatReadableSize(memory_usage)AS memory, ProfileEvents['UserTimeMicroseconds']AS userCPU, ProfileEvents['SystemTimeMicroseconds']AS systemCPU, replaceRegexpAll(query,CHAR(10),''), initial_query_id FROM clusterAllReplicas(default, system.query_log)WHERE(query_kind ='Insert')AND(type ='QueryFinish')ORDERBY memory_usage DESCLIMIT10
每个partition最多的parts
SELECT toStartOfMinute(event_time)AS event_time_m, avg(value)AS avg_MaxPartCountForPartition FROM clusterAllReplicas(default, system.asynchronous_metric_log)WHERE(event_time >(now()- toIntervalDay(1)))AND(metric ='MaxPartCountForPartition')GROUPBY event_time_m ORDERBY event_time_m ASC
假设上面的查询定位到了潜在问题,下面的SQL让你定位哪个表的哪个分区的parts最多。parts太多会导致merge时系统压力过大。
SELECT concat(database,'.',table)AStable,count()AS parts_per_partition, partition_id FROM clusterAllReplicas(default, system.parts)WHERE active AND(database !='system')GROUPBY database,table, partition_id HAVING parts_per_partition >1ORDERBY parts_per_partition DESC
知道了哪个分区和parts有问题后,可以查看内存消耗情况
SELECT event_date, argMax(table, peak_memory_usage)AStable, argMax(event_time, peak_memory_usage)AS event_time, formatReadableSize(MAX(peak_memory_usage))AS max_peak_memory_usage FROM clusterAllReplicas(default, system.part_log)WHERE peak_memory_usage >0GROUPBY event_date ORDERBY event_date DESC
统计有报错的parts event
SELECT event_date, event_type,table, error, errorCodeToName(error)AS error_code,COUNT()FROM clusterAllReplicas(default, system.part_log)WHERE(database ='default')AND(error >0)GROUPBY event_date, event_type, error, error_code,table
监控Select操作:
表统计信息总览:
SELECTtable, sum(rows)AS rows, max(modification_time)AS latest_modification, formatReadableSize(sum(bytes))AS data_size, formatReadableSize(sum(primary_key_bytes_in_memory))AS primary_keys_size, any(engine)AS engine, sum(bytes)AS bytes_size FROM clusterAllReplicas(default, system.parts)WHERE active GROUPBY database,tableORDERBY bytes_size DESC
最昂贵的query语句
SELECT type, query_start_time, formatReadableTimeDelta(query_duration_ms)AS query_duration, query_id, query_kind, is_initial_query, query, concat(toString(read_rows),' rows / ', formatReadableSize(read_bytes))AS read, concat(toString(result_rows),' rows / ', formatReadableSize(result_bytes))AS result, formatReadableSize(memory_usage)AS `memory usage`, exception, concat('\n', stack_trace)AS stack_trace, user, initial_user, multiIf(empty(client_name), http_user_agent, concat(client_name,' ', toString(client_version_major),'.', toString(client_version_minor),'.', toString(client_version_patch)))AS client, client_hostname, databases, tables, columns, used_aggregate_functions, used_dictionaries, used_formats, used_functions, used_table_functions, ProfileEvents.Names, ProfileEvents.Values, Settings.Names, Settings.ValuesFROM system.query_logWHERE(type !='QueryStart')AND(query_kind ='Select')AND(event_date >=(today()-1))AND(event_time >=(now()- toIntervalDay(1)))ORDERBY query_duration_ms DESCLIMIT10
比较修改前后2个query指标(需要使用clickhouse-client)
找到对应SQL的query id SELECT query_id, query, formatReadableTimeDelta(query_duration_ms)AS query_duration FROM clusterAllReplicas(default, system.query_log)WHERE(type !='QueryStart')AND(query_kind ='Select')AND(event_time >=(now()- toIntervalHour(1)))ORDERBY event_time DESCLIMIT10使用下面的语句进行query id的替换 WITH query_id ='...query_id_old_version...'AS first, query_id ='...query_id_new_version...'AS second SELECT PE.NamesAS metric, anyIf(PE.Values, first)AS v1, anyIf(PE.Values, second)AS v2 FROM clusterAllReplicas(default, system.query_log)ARRAY JOIN ProfileEvents AS PE WHERE(first OR second)AND(event_date = today())AND(type =2)GROUPBY metric HAVING v1 != v2 ORDERBY(v2 - v1)/(v1 + v2)ASC, v2 ASC, metric ASC
优化单个SQL
首先需要安装clienthouse-client,推荐使用docker:
docker run -it --entrypoint clickhouse-client clickhouse/clickhouse-server --host xxxxx --secure –user default --password your-password指定客户端版本: docker run -it --entrypoint clickhouse-client clickhouse/clickhouse-server:22.12 --host xxxxx --secure –user default --password your-password
打开 trace:SET send_logs_level = 'trace'
假设已经按照官网之道导入英国房产价格的数据。
SELECTcounty,price FROM uk_price_paid WHERE town ='LONDON'ORDERBY price DESCLIMIT3Query id:31bc412a-411d-4717-95c1-97ac0b5e22ff ┌─county─────────┬─────price─┐ │ GREATER LONDON │ 594300000 │ │ GREATER LONDON │ 569200000 │ │ GREATER LONDON │ 523000000 │ └────────────────┴───────────┘ 3 rows inset. Elapsed:1.206 sec. Processed 27.84 million rows,44.74 MB (23.08 million rows/s.,37.09 MB/s.)
虽然查询已经比较快了,但我们推测加上投射(projection)会加速查询。
ALTERTABLE uk_price_paid ADD PROJECTION uk_price_paid_projection (SELECT*ORDERBY town, price )ALTERTABLE uk_price_paid MATERIALIZE PROJECTION uk_price_paid_projection 再次查询 SELECT county, price FROM uk_price_paid WHERE town ='LONDON'ORDERBY price DESCLIMIT3Query id: f5931796-62d1-4577-9a80-dbaf21a43049 ┌─county─────────┬─────price─┐ │ GREATER LONDON │ 594300000 │ │ GREATER LONDON │ 569200000 │ │ GREATER LONDON │ 448500000 │ └────────────────┴───────────┘ 3 rows inset. Elapsed:0.028 sec. Processed 2.18 million rows,13.09 MB (78.30 million rows/s.,470.20 MB/s.)
平均查询时间和请求次数
SELECT toStartOfHour(event_time)AS event_time_h,count()AS count_m, avg(query_duration_ms)AS avg_duration FROM clusterAllReplicas(default, system.query_log)WHERE(query_kind ='Select')AND(type !='QueryStart')AND(event_time >(now()- toIntervalDay(3)))GROUPBY event_time_h ORDERBY event_time_h ASC
按客户端或用户分组查询: 替换client_name为user
SELECT toStartOfMinute(event_time)AS event_time_m, if(empty(client_name),'unknow_or_http', client_name)AS client_name,count(), query_kind FROM clusterAllReplicas(default, system.query_log)WHERE(type ='QueryStart')AND(event_time >(now()- toIntervalMinute(10)))AND(query_kind ='Select')GROUPBY event_time_m, client_name, query_kind ORDERBY event_time_m DESC,count()ASCLIMIT100
排查TOO_MANY_SIMULTANEOUS_QUERIES问题
该报错经常发生在你处理大量并发select的时候,由max_concurrent_queries参数控制。time_delta可以帮组判断是否查询堵塞。
SELECT formatReadableTimeDelta(elapsed)AS time_delta,*FROM clusterAllReplicas(default, system.processes)WHERE query ILIKE 'SELECT%'ORDERBY time_delta DESC
如果确实看到查询有堵塞,可以通过查询system.stack_trace 查看导致堵塞的栈trace
SELECT thread_id, query_id, arrayStringConcat(arrayMap((x, y)-> concat(x,': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)),'\n')AS n_trace FROM clusterAllReplicas(default, system.stack_trace)WHERE query_id ISNOTNULLSETTINGS allow_introspection_functions =1