by Nicolas JeanselmePerformanceQuery per second (QPS)sourcetype=ib:dns:query:by_member index=ib_dns host="$grid_member_var$"| bucket span=10m _time | stats sum(QCOUNT) as QPM by _time | timechart bins=1000 eval(avg(QPM)/600) as QPS| interpolate 1200$time.earliest$$time.latest$Cache hit ratio (CHR) %sourcetype=ib:dns:query:cache_hit_rate index=ib_dns host="$grid_member_var$" | eval PERCENT=if(HITS+MISSES > 0,(HITS*100/(HITS+MISSES)),0) | bucket span=10m _time | timechart bins=1000 avg(PERCENT) as CHR by display_name | interpolate 1200$time.earliest$$time.latest$Latency in mssourcetype=ib:dns:perf index=ib_dns host="$grid_member_var$"| bucket span=1m _time | timechart bins=1000 avg(LATENCY) as Latency_ms| interpolate 120$time.earliest$$time.latest$Max QPS at 100% Cache Hit Ratio (CHR)index=* |head 1| eval max=$qps_threshold$|table max0QPS, CHR and Max QPS for this CHR comparisonsourcetype=ib:dns:query:by_member index=ib_dns host="$grid_member_var$"| bucket span=10m _time | stats sum(QCOUNT) as QPM by _time | timechart bins=1000 eval(avg(QPM)/600) as QPS| interpolate 1200 | join _time [search sourcetype=ib:dns:query:cache_hit_rate index=ib_dns host="$grid_member_var$" | eval PERCENT=if(HITS+MISSES > 0,(HITS*100/(HITS+MISSES)),0) | bucket span=10m _time | timechart bins=1000 avg(PERCENT) as CHR | interpolate 1200] | eval Max_QPS=$qps_threshold$ | eval Max_QPS_for_this_CHR=100/((100-CHR)/($qps_threshold$/10)+CHR/$qps_threshold$)$time.earliest$$time.latest$DNS engine usage % (Max QPS for CHR vs QPS), CPU & CHRsourcetype=ib:dns:query:by_member index=ib_dns host="$grid_member_var$"| bucket span=10m _time | stats sum(QCOUNT) as QPM by _time | timechart bins=1000 eval(avg(QPM)/600) as QPS| interpolate 1200 | join _time [search sourcetype=ib:dns:query:cache_hit_rate index=ib_dns host="$grid_member_var$" | eval PERCENT=if(HITS+MISSES > 0,(HITS*100/(HITS+MISSES)),0) | bucket span=10m _time | timechart bins=1000 avg(PERCENT) as CHR | interpolate 1200| join _time [search sourcetype=ib:system index=ib_system " 1 " host="$grid_member_var$" | bucket span=10m _time | timechart bins=1000 avg(CPU_PERCENT) as CPU| interpolate 1200]] | eval Max_QPS=$qps_threshold$ | eval Max_QPS_for_this_CHR=100/((100-CHR)/($qps_threshold$/10)+CHR/$qps_threshold$) | eval DNS_engine_usage=(QPS*100/Max_QPS_for_this_CHR) | fields _time, DNS_engine_usage, CHR, CPU |forecast DNS_engine_usage future_timespan=90 as DNS_engine_prediction$time.earliest$$time.latest$Memory usagesourcetype=ib:system index=ib_system " 2 " host="$grid_member_var$" | timechart bins=1000 avg(MEMORY_PERCENT) as Mem$time.earliest$$time.latest$DNS Engine maximum load %sourcetype=ib:dns:query:by_member index=ib_dns host="$grid_member_var$"| bucket span=10m _time | stats sum(QCOUNT) as QPM by _time | timechart bins=1000 eval(avg(QPM)/600) as QPS| interpolate 1200 | join _time [search sourcetype=ib:dns:query:cache_hit_rate index=ib_dns (HITS=0 OR MISSES=0) host="$grid_member_var$" | eval PERCENT=if(HITS+MISSES > 0,(HITS*100/(HITS+MISSES)),0) | bucket span=10m _time | timechart bins=1000 avg(PERCENT) as CHR | interpolate 1200| join _time [search sourcetype=ib:system index=ib_system " 1 " host="$grid_member_var$" | bucket span=10m _time | timechart bins=1000 avg(CPU_PERCENT) as CPU| interpolate 1200]] | eval Max_QPS=$qps_threshold$ | eval Max_QPS_for_this_CHR=100/((100-CHR)/($qps_threshold$/10)+CHR/$qps_threshold$) | eval DNS_engine_usage=(QPS*100/Max_QPS_for_this_CHR) | stats max(DNS_engine_usage)$time.earliest$$time.latest$DNS Indicators - Top
Top 10 DNS Clientsindex=ib_dns_summary report=si_dns_top_clients orig_host="$grid_member_var$" | stats sum(COUNT) as CLIENT_QUERIES by CLIENT | sort -CLIENT_QUERIES | eventstats sum(CLIENT_QUERIES) as TOTAL | eval percent=round(CLIENT_QUERIES*100/TOTAL,6) | rename CLIENT as Client, CLIENT_QUERIES as count | fields Client, count, percent | head 10$time.earliest$$time.latest$
Top 10 Requested FQDNindex=ib_dns_summary report=si_dns_requested_domain orig_host="$grid_member_var$" | stats sum(COUNT) as FQDN_TOTAL by FQDN | sort -FQDN_TOTAL | eventstats sum(FQDN_TOTAL) as TOTAL | eval percent=round(FQDN_TOTAL*100/TOTAL, 6) | rename FQDN_TOTAL as count, FQDN as "Domain Name" | fields "Domain Name", count, percent| head 10$time.earliest$$time.latest$
Top 10 Requested FQDN not in Alexa 2000index=ib_dns_summary report=si_dns_requested_domain orig_host="$grid_member_var$" | lookup alexa2000global.csv fqdn as FQDN OUTPUTNEW fqdn AS match | where isnull(match)| stats sum(COUNT) as FQDN_TOTAL by FQDN | sort -FQDN_TOTAL | rename FQDN_TOTAL as count, FQDN as "Domain Name" | head 10$time.earliest$$time.latest$
DNS Engine IndicatorsRecursion client quotaindex=ib_syslog "Recursion client quota" host="$grid_member_var$"| timechart avg(used) as used ,avg(max) as max ,avg(soft_limit) as soft_limit ,avg(s_over) as s_over ,avg(hard_limit) as hard_limit ,avg(h_over) as h_over ,avg(low_pri) as low_pri$time.earliest$$time.latest$Clients per Queryindex=ib_syslog "clients per query" host="$grid_member_var$"| timechart avg(limit) as limit ,avg(max) as max , avg(avg) as avg, avg(soft_limit) as soft_limit ,avg(limit_over) as limit_over ,avg(hard_limit) as hard_limit ,avg(h_over) as h_over ,avg(est_max_req) as "est_max_req"$time.earliest$$time.latest$DNS recursive cache sizeindex=ib_syslog "Recursion cache view" host="$grid_member_var$" | timechart avg(size) as size$time.earliest$$time.latest$Recursion quota reachedindex=ib_syslog "quota reached" host="$grid_member_var$"| timechart count$time.earliest$$time.latest$
Top 10 client IP reached quotaindex=ib_syslog "quota reached" host="$grid_member_var$"| top client_ip limit=10$time.earliest$$time.latest$
Top 10 FQDN quota reachedindex=ib_syslog "quota reached" host="$grid_member_var$"| top fqdn limit=10$time.earliest$$time.latest$
DNS engine messages by severity over timeindex=ib_syslog named process=named host="$grid_member_var$"| timechart count by severity$time.earliest$$time.latest$DNS engine messages by severityindex=ib_syslog named process=named host="$grid_member_var$"| top limit=8 severity$time.earliest$$time.latest$
Top 5 DNS engine messages by severityindex=ib_syslog named process=named host="$grid_member_var$"| top limit=5 message by severity$time.earliest$$time.latest$
RPZ feed update statusindex=ib_syslog host="$grid_member_var$" named rpz zone NOT DBRef | rex "zone '?(?<rpz_zone>[^ '/]*)" | eval mytime=strftime(_time, "%d-%m-%Y %H:%M:%S") | stats latest(mytime) as time, latest(message) by rpz_zone | sort -time | rename rpz_zone AS "RPZ Zone", latest(message) as "Last message"$time.earliest$$time.latest$
DNS problem indicatorsRequest time-outsindex=ib_dns_summary report=si_top_timeout_queries orig_host="$grid_member_var$"| timechart sum(COUNT) as Timeouts$time.earliest$$time.latest$
Top 10 time-outs domainsindex=ib_dns_summary report=si_top_timeout_queries orig_host="$grid_member_var$"| stats sum(COUNT) as SFT_QUERIES by NAME | sort -SFT_QUERIES | eventstats sum(SFT_QUERIES) as COUNT_SUM | eval TIMEOUT_PERCENT=round(SFT_QUERIES*100/COUNT_SUM, 6) | rename NAME as "Domain Name", SFT_QUERIES as "count", TIMEOUT_PERCENT as "percent" | fields "Domain Name", "count", "percent"| head 10$time.earliest$$time.latest$
Top 10 domains resolved after disabling EDNSindex=ib_syslog "success resolving" "after disabling EDNS" host="$grid_member_var$" | top 10 fqdn$time.earliest$$time.latest$
Requests resolved after reducing EDNS to 512index=ib_syslog "success resolving" "after reducing the advertised EDNS UDP packet size to 512 octets" host="$grid_member_var$" | timechart count$time.earliest$$time.latest$
Top 10 domains resolved after reducing EDNS to 512index=ib_syslog "success resolving" "after reducing the advertised EDNS UDP packet size to 512 octets" host="$grid_member_var$" | top 10 fqdn$time.earliest$$time.latest$
LAME delegationsindex=ib_syslog "lame server resolving" host="$grid_member_var$"| timechart count$time.earliest$$time.latest$
Top 10 LAME delegations domainsindex=ib_syslog "lame server resolving" host="$grid_member_var$" | top 10 fqdn$time.earliest$$time.latest$
Top 10 Unexpected FORMERR return code domainsindex=ib_syslog "FORMERR resolving" host="$grid_member_var$" | top 10 fqdn$time.earliest$$time.latest$
Security related indicatorsFetches per server eventsindex=ib_syslog "adb: quota " host="$grid_member_var$"| timechart count$time.earliest$$time.latest$
Top 10 fetches per server IPsindex=ib_syslog "adb: quota " host="$grid_member_var$" | top 10 fetches_server_ip$time.earliest$$time.latest$
Fetches per zone eventsindex=ib_syslog "too many simultaneous fetches for " host="$grid_member_var$"| timechart count$time.earliest$$time.latest$
Top 10 fetches per zone FQDNsindex=ib_syslog "too many simultaneous fetches for " host="$grid_member_var$" | top 10 fetches_zone_name$time.earliest$$time.latest$