kl个人博客 首页>>分布式,架构>>Sentry Snuba Clickhoue 数据源分析

Sentry Snuba Clickhoue 数据源分析

Clickhouse 库表介绍

Sentry 和数据分析相关的表,只有四张,如下:

  • errors_local : 存储所有的 issue 数据

  • transactions_local :存储所有的 trace 、span 数据,以及自定义的 measurements

  • sessions_raw_local :存储 session 的原始数据信息,崩溃率的原始数据

  • sessions_hourly_local :存储 session 的小时粒度的统计聚合信息

分析案

需求1:以下 SQL 用于实时的分析【启动开屏广告 p95 耗时】


SELECT
    extract(`release`, '[0-9]+\.[0-9]+\.[0-9]') AS `version`,
    (quantile(0.95)(if(has(measurements.key, 'tap_app_start_cold_time'), arrayElement(measurements.value, indexOf(measurements.key, 'tap_app_start_cold_time')), NULL) as fps)) as avg_fps
FROM 
    transactions_local
WHERE
    `start_ts` BETWEEN timestamp_sub(now(), INTERVAL 28 DAY) and now()
    AND fps > 0
    AND `project_id` IN (148)
    AND transaction_op = 'tap.biz.page.load'
    AND version NOT IN ('2.53.1', '2.54.0')
    AND `environment` IN ('rnd','release')
GROUP BY version

需求2:按天持久化【自定义埋点数据】。

针对这个进阶需求,可以采用 Clickhouse 的物化视图功能(CREATE VIEW | ClickHouse Docs  )对源表进行聚合处理成物化表。然后查询物化图即可。

首先创建一张表,用于存储物化视图的结果


CREATE TABLE overview_daily.`transactions_daily_local`
(
    `daytime` DateTime('Asia/Shanghai'),
    `project_id` UInt64,
    `major_release` String,
    `os` String,
    `area` String,
    `app_start_cold_time_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8),
    `app_start_cold_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8),
    `first_page_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8),
    `gamedetail_page_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8),
    `ui_show_duration_p95` AggregateFunction (quantilesIf (0.5, 0.95), Float64, UInt8)
)
ENGINE = AggregatingMergeTree()
PARTITION BY toMonday(daytime)
ORDER BY (project_id,
 major_release,
 daytime)
SETTINGS index_granularity = 8192;
物化视图创建:


#---- app_start_cold_time_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_app_start_cold_time_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,'tap_app_start_cold_time'),
arrayElement(measurements.value,
indexOf(measurements.key,
'tap_app_start_cold_time')),
0) as temp_duration,(temp_duration !=4294967295))) as app_start_cold_time_duration_p95
FROM
`default`.transactions_local
WHERE
transaction_op = 'tap.biz.page.load'
    AND `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release')
    and arrayElement(measurements.value,
indexOf(measurements.key,
'tap_app_start_cold_time')) > 0
GROUP BY
major_release,
daytime,
project_id;

#---- app_start_cold_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_app_start_cold_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,'app_start_cold'),
                                   arrayElement(measurements.value,indexOf(measurements.key,'app_start_cold')),0) 
                                as temp_duration,(temp_duration !=4294967295))) as app_start_cold_duration_p95
FROM
`default`.transactions_local
WHERE
     `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release')
    and arrayElement(measurements.value,
indexOf(measurements.key,
'app_start_cold')) > 0
GROUP BY
major_release,
daytime,
project_id

#------ ui_show_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_ui_show_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,'tap_loading_more_ui_show_total'),
                                   arrayElement(measurements.value,indexOf(measurements.key,'tap_loading_more_ui_show_total')),0) 
                                as temp_duration,(temp_duration !=4294967295))) as ui_show_duration_p95
FROM
`default`.transactions_local
WHERE
     `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release','rnd')
    AND transaction_op = 'tap.loading.more'
    and arrayElement(measurements.value,
indexOf(measurements.key,
'tap_loading_more_ui_show_total')) > 0
GROUP BY
major_release,
daytime,
project_id

#------- gamedetail_page_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_gamedetail_page_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
    (quantilesIfState(0.5,0.95)(toFloat64(duration),( duration!= 4294967295)))  AS gamedetail_page_duration_p95
FROM
`default`.transactions_local
WHERE
     `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release')
    AND transaction_op = 'ui.load'
    AND `transaction_name` in ('GameDetailNewPager','GameDetailPager','GameDetail.GameDetailV2ViewController','TapTap.TapGameDetailViewController', 'TapTap.TapGameDetailInfoViewController', 'TapGameDetailViewController')
GROUP BY
major_release,
daytime,
project_id

#----------- first_page_duration_p95 的物化视图
CREATE MATERIALIZED VIEW overview_daily.transactions_daily_mv_first_page_duration_p95 to overview_daily.transactions_daily_local AS
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
    (quantilesIfState(0.5,0.95)(toFloat64(duration),( duration!= 4294967295)))  AS first_page_duration_p95
FROM
`default`.transactions_local
WHERE
     `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release')
    AND transaction_op = 'ui.load'
    AND `transaction_name` in ('AdvPageActivity', 'MainAct', 'HomePageActivity', 'TapHomeFeedListViewController', 'TapTap.TapHomeFeedListViewController', 'TapTap.TapHomeFeedsListViewController', 'HomeModule.ForYouViewController')
GROUP BY
major_release,
daytime,
project_id
物化视图创建后,只会对新增的数据生效,如果要处理历史数据,需要手动插入。如:


#从历史结果表里插入
INSERT INTO
  overview_daily.transactions_daily_local (
    daytime,
    major_release,
    os,
    area,
    app_start_cold_duration_p95,
    first_page_duration_p95,
    gamedetail_page_duration_p95,
    ui_show_duration_p95
  )
select
  daytime,
  major_release,
  os,
  area,
  quantilesIfState (0.5, 0.95) (
    app_start_cold_duration_p95,
    (app_start_cold_duration_p95 != 4294967295)
    
  ) as app_start_cold_duration_p95,
  quantilesIfState (0.5, 0.95) (
    first_page_duration_p95,(first_page_duration_p95 != 4294967295)
  ) as first_page_duration_p95,
  quantilesIfState (0.5, 0.95) (
    gamedetail_page_duration_p95,
    (gamedetail_page_duration_p95 != 4294967295)
    
  ) as gamedetail_page_duration_p95,
  quantilesIfState (0.5, 0.95) (
    ui_show_duration_p95,
    (ui_show_duration_p95 != 4294967295)
    
  ) as ui_show_duration_p95
FROM
  apm.sentry_overview_daily
GROUP BY daytime , major_release , os , area 


#---- app_start_cold_time_duration_p95
INSERT INTO
  overview_daily.transactions_daily_local (
    daytime,
      project_id,
    
    os,
    area,
      major_release,
    app_start_cold_time_duration_p95
  )
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,
'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,
'tap_app_start_cold_time'),
arrayElement(measurements.value,
indexOf(measurements.key,
'tap_app_start_cold_time')),
0) as fps,(fps !=4294967295))) as app_start_cold_time_duration_p95
FROM
`default`.transactions_local
WHERE
arrayElement(measurements.value,
indexOf(measurements.key,
'tap_app_start_cold_time')) > 0
AND transaction_op = 'tap.biz.page.load'
AND major_release NOT IN ('2.53.1', '2.54.0')
    AND `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release')
GROUP BY
major_release,
daytime,
project_id;

#---- ui_show_duration_p95
INSERT INTO
  overview_daily.transactions_daily_local (
    daytime,
      project_id,
    
    os,
    area,
      major_release,
    ui_show_duration_p95
  )
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
(quantilesIfState(0.5,0.95)(if(has(measurements.key,'tap_loading_more_ui_show_total'),
                                   arrayElement(measurements.value,indexOf(measurements.key,'tap_loading_more_ui_show_total')),0) 
                                as temp_duration,(temp_duration !=4294967295))) as ui_show_duration_p95
FROM
`default`.transactions_local
WHERE
     `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release','rnd')
    AND transaction_op = 'tap.loading.more'
    and arrayElement(measurements.value,
indexOf(measurements.key,
'tap_loading_more_ui_show_total')) > 0
    and daytime ='2023-06-07 00:00:00'
GROUP BY
major_release,
daytime,
project_id


#------- gamedetail_page_duration_p95、
INSERT INTO
  overview_daily.transactions_daily_local (
    daytime,
      project_id,
    
    os,
    area,
      major_release,
    gamedetail_page_duration_p95
  )
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
    (quantilesIfState(0.5,0.95)(toFloat64(duration),( duration!= 4294967295)))  AS gamedetail_page_duration_p95
FROM
`default`.transactions_local
WHERE
     `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release')
    AND transaction_op = 'ui.load'
    AND `transaction_name` in ('GameDetailNewPager','GameDetailPager','GameDetail.GameDetailV2ViewController','TapTap.TapGameDetailViewController', 'TapTap.TapGameDetailInfoViewController', 'TapGameDetailViewController')
    and daytime ='2023-06-07 00:00:00'
GROUP BY
major_release,
daytime,
project_id

#----------- first_page_duration_p95
INSERT INTO
  overview_daily.transactions_daily_local (
    daytime,
      project_id,
    
    os,
    area,
      major_release,
    first_page_duration_p95
  )
SELECT
toStartOfDay(start_ts, 'Asia/Shanghai') AS daytime,
project_id,
    multiIf( project_id in [148,175],'android', project_id in[140,92,177],'ios','N/A')  AS os,
    multiIf( project_id in [148,92,177],'cn', project_id in[175,140],'intl','N/A')  AS area,
extract(`release`,'[0-9]+\.[0-9]+\.[0-9]') AS major_release,
    (quantilesIfState(0.5,0.95)(toFloat64(duration),( duration!= 4294967295)))  AS first_page_duration_p95
FROM
`default`.transactions_local
WHERE
     `project_id` IN (148, 175, 92, 177, 140)
    AND `environment` IN ('release')
    AND transaction_op = 'ui.load'
    AND `transaction_name` in ('AdvPageActivity', 'MainAct', 'HomePageActivity', 'TapHomeFeedListViewController', 'TapTap.TapHomeFeedListViewController', 'TapTap.TapHomeFeedsListViewController', 'HomeModule.ForYouViewController')
    and daytime ='2023-06-07 00:00:00'
GROUP BY
major_release,
daytime,
project_id
后续分析直接查询物化视图即可

select project_id FROM  overview_daily.transactions_daily_mv_local

注意:AggregateFunction 类型标注的字段,需要使用 quantilesIfMerge 函数查询,比如


SELECT
  daytime AS time,
   major_release,
  quantilesIfMerge(0.95)(app_start_cold_time_duration_p95)[1] AS `duration_p95`
FROM
  overview_daily.transactions_daily_local
WHERE
  daytime >= '2023-05-07 00:00:00'
  AND os = 'android'
  AND area = 'cn'
GROUP BY
  `time`,
  major_release
HAVING duration_p95>=0

需求3:按天持久化 session 、session_crashed 数据

 依然采用 Clickhouse 的物化视图功能(CREATE VIEW | ClickHouse Docs  )对源表进行聚合处理成物化表。然后查询物化图即可。

首先创建一张表,用于存储物化视图的结果,注意:目标表的表引擎需要使用 SummingMergeTree ,用于实时的聚合



CREATE TABLE overview_daily.`sessions_daily_local`
(
    `daytime` DateTime('Asia/Shanghai'),
    `major_release` String,
    `os` String,
    `area` String,
    `total_session_crashed` Nullable(Float64), 
    `total_session` Nullable(Float64)
)
ENGINE = SummingMergeTree()
PARTITION BY toMonday(daytime)
ORDER BY (os,
 major_release,
 area,
 daytime)
SETTINGS index_granularity = 8192;
然后创建物化视图


CREATE MATERIALIZED VIEW overview_daily.sessions_daily_mv_local to overview_daily.sessions_daily_local AS
SELECT
  toStartOfDay (`started`, 'Asia/Shanghai') AS `daytime`,
  extract(`release`, '[0-9]+\.[0-9]+\.[0-9]+') AS `major_release`,
  multiIf (
    `project_id` in (148, 175),
    'android',
    `project_id` in (92, 177, 140),
    'ios',
    'N/A'
  ) AS os,
  multiIf (
    `project_id` in (148, 92, 177),
    'cn',
    `project_id` in (175, 140),
    'intl',
    'N/A'
  ) AS area,
  plus (
    countIfMerge (sessions_crashed),
    sumIfMerge (sessions_crashed_preaggr)
  ) AS total_session_crashed,
  plus (
    countIfMerge (sessions),
    sumIfMerge (sessions_preaggr)
  ) AS total_session
FROM
  default.sessions_hourly_local
WHERE
   `project_id` IN (148, 175, 92, 177, 140)
  AND `environment` IN ('release')
GROUP BY
  `daytime`,
  `major_release`,
  os,
  area
插入历史记录的数据


INSERT INTO overview_daily.sessions_daily_local
SELECT
  toStartOfDay (`started`, 'Asia/Shanghai') AS `daytime`,
  extract(`release`, '[0-9]+\.[0-9]+\.[0-9]+') AS `major_release`,
  multiIf (
    `project_id` in (148, 175),
    'android',
    `project_id` in (92, 177, 140),
    'ios',
    'N/A'
  ) AS os,
  multiIf (
    `project_id` in (148, 92, 177),
    'cn',
    `project_id` in (175, 140),
    'intl',
    'N/A'
  ) AS area,
  plus (
    countIfMerge (sessions_crashed),
    sumIfMerge (sessions_crashed_preaggr)
  ) AS total_session_crashed,
  plus (
    countIfMerge (sessions),
    sumIfMerge (sessions_preaggr)
  ) AS total_session
FROM
  default.sessions_hourly_local
WHERE
   `project_id` IN (148, 175, 92, 177, 140)
  AND `environment` IN ('release')
  and daytime = '2023-06-06 00:00:00'
GROUP BY
  `daytime`,
  `major_release`,
  os,
  area

需求4:按 5 分钟粒度持久化 crash_count 、unique_user_count 数据

 依然采用 Clickhouse 的物化视图功能(CREATE VIEW | ClickHouse Docs  )对源表进行聚合处理成物化表。然后查询物化图即可。

首先创建一张表,用于存储物化视图的结果,注意:目标表的表引擎需要使用 SummingMergeTree ,用于实时的聚合


CREATE TABLE overview_daily.`errors_5min_local`
(
    `mintime` DateTime('Asia/Shanghai'),
    `os` String,
    `area` String,
    `crash_count` Float64,
    `unique_user_count` Float64
)
ENGINE = SummingMergeTree()
PARTITION BY toMonday(mintime)
ORDER BY (os,
 area,
 mintime)
SETTINGS index_granularity = 8192;
创建物化视图


CREATE MATERIALIZED VIEW overview_daily.errors_5min_mv_local to overview_daily.`errors_5min_local` AS
SELECT toStartOfFiveMinute(`timestamp`, 'Asia/Shanghai') as `mintime`,
            multiIf(`project_id` in (148, 175), 'android', `project_id` in (92,177, 140), 'ios','N/A') AS os,
            multiIf(`project_id` in (148, 92,177), 'cn', `project_id` in (175,140), 'intl','N/A') AS area,
            count(*) AS crash_count,
            (ifNull(uniq((nullIf(user, '') AS _snuba_user)), 0) AS unique_user_count)
        FROM default.errors_local 
        WHERE 
             equals(deleted, 0) AND equals((type AS _snuba_type), 'error') 
            and `project_id`  in (148, 175, 92, 177, 140)
            AND equals(ifNull((level AS `_snuba_tags[level]`), ''), 'fatal') 
            AND `environment` in ('release')
        GROUP BY `mintime`,os,area
插入历史记录


#从历史结果表里插入
INSERT INTO
  overview_daily.`errors_5min_local` (
    mintime,
    os,
    area,
    crash_count, 
    unique_user_count
  )
select
  time as mintime,
    os,
    area,
    crash_count, 
    unique_user_count
FROM
  apm.sentry_crash_user_5min 

#------ errors_local 表导入

INSERT INTO overview_daily.errors_5min_local
SELECT toStartOfFiveMinute(`timestamp`, 'Asia/Shanghai') as `mintime`,
            multiIf(`project_id` in (148, 175), 'android', `project_id` in (92,177, 140), 'ios','N/A') AS os,
            multiIf(`project_id` in (148, 92,177), 'cn', `project_id` in (175,140), 'intl','N/A') AS area,
            count(*) AS crash_count,
            (ifNull(uniq((nullIf(user, '') AS _snuba_user)), 0) AS unique_user_count)
        FROM default.errors_local 
        PREWHERE `project_id`  in (148, 175, 92, 177, 140)
        WHERE 
             equals(deleted, 0) AND equals((type AS _snuba_type), 'error') 
            AND equals(ifNull((level AS `_snuba_tags[level]`), ''), 'fatal') 
            AND `environment` in ('release')
            and mintime > '2023-06-09 00:00:00' 
        GROUP BY `mintime`,os,area

kl个人博客