Pull request 1928: 1453-stats-tests

Updates #1453.

Squashed commit of the following:

commit f08f68ef5493dad03d3eb120d886f2df1af28be6
Merge: b70b088af 54aee2272
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Tue Aug 8 19:04:06 2023 +0300

    Merge branch 'master' into 1453-stats-tests

commit b70b088af0fdc7d6d048d688160048bad1fceb12
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Aug 3 19:32:04 2023 +0300

    stats: imp code

commit c341012ba61894c255c1868624be1cac0d26a6fa
Merge: a2ac8c34e 5eb3cd0f9
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Aug 3 13:36:24 2023 +0300

    Merge branch 'master' into 1453-stats-tests

commit a2ac8c34ee32606ca5e259c3e2a47db0dd5858de
Author: Ildar Kamalov <ik@adguard.com>
Date:   Thu Aug 3 13:25:12 2023 +0300

    client: add top upstreams and average processing time tables

commit 11118947f9bf945be0b056f8475cf3b848c6e66e
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Tue Aug 1 17:24:57 2023 +0300

    stats: imp docs

commit 904cf81d02a1f327b9647fa7ad9e181cfabb68a4
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Mon Jul 31 17:34:06 2023 +0300

    stats: imp code

commit 34f0c96dd5865d1470385322a88842dd0b3d996d
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Mon Jul 31 15:43:46 2023 +0300

    all: imp docs

commit 2cb2d0d8bef3580f64bc25c414fe9b5ea6b9f997
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Fri Jul 28 17:24:31 2023 +0300

    all: imp code

commit 5251a899fecc21e50a0ba06042f96f5b404e196a
Merge: b6c2b12d4 300821a7f
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Jul 27 20:34:39 2023 +0300

    Merge branch 'master' into 1453-stats-tests

commit b6c2b12d4425012efd73549c3a426735f3a677cd
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Jul 27 20:32:18 2023 +0300

    stats: imp code

commit 5546b82a78326f9cc6d8c87df5083f8fc66a0178
Merge: 8a3d6b1b4 5f8fa006c
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Jul 27 14:24:01 2023 +0300

    Merge branch 'master' into 1453-stats-tests

commit 8a3d6b1b49ce189f95adfa7406a34108e885e676
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Jul 27 14:17:47 2023 +0300

    all: imp code

commit 2a48001e275e3cdcf70e13e1c9cebd4e502f3259
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Tue Jul 25 18:27:20 2023 +0300

    all: imp docs

commit 3dd21890175af32a3368378f7e013383f6d040ec
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Tue Jul 25 16:00:39 2023 +0300

    all: imp naming

commit 6124456fc3149b71f6bd58d35ecf24eb6cf40d5d
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Jul 20 16:15:56 2023 +0300

    all: add upstreams avg processing time

commit 187ad0c77a81c9fd95c24e23141355db2e83e50d
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Tue Jul 18 16:42:19 2023 +0300

    all: add top upstreams
This commit is contained in:
Stanislav Chzhen
2023-08-09 14:33:52 +03:00
parent 54aee22720
commit c47509fabc
19 changed files with 686 additions and 140 deletions

View File

@@ -11,17 +11,19 @@ import (
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/stringutil"
"go.etcd.io/bbolt"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
)
// TODO(a.garipov): Rewrite all of this. Add proper error handling and
// inspection. Improve logging. Decrease complexity.
const (
// maxDomains is the max number of top domains to return.
maxDomains = 100
// maxClients is the max number of top clients to return.
maxClients = 100
// maxUpstreams is the max number of top upstreams to return.
maxUpstreams = 100
)
// UnitIDGenFunc is the signature of a function that generates a unique ID for
@@ -63,11 +65,30 @@ type Entry struct {
// Domain is the domain name requested.
Domain string
// Upstream is the upstream DNS server.
Upstream string
// Result is the result of processing the request.
Result Result
// Time is the duration of the request processing in milliseconds.
Time uint32
// Time is the duration of the request processing.
Time time.Duration
}
// validate returs an error if entry is not valid.
func (e *Entry) validate() (err error) {
switch {
case e.Result == 0:
return errors.Error("result code is not set")
case e.Result >= resultLast:
return fmt.Errorf("unknown result code %d", e.Result)
case e.Domain == "":
return errors.Error("domain is empty")
case e.Client == "":
return errors.Error("client is empty")
default:
return nil
}
}
// unit collects the statistics data for a specific period of time.
@@ -82,6 +103,13 @@ type unit struct {
// clients stores the number of requests from each client.
clients map[string]uint64
// upstreamsResponses stores the number of responses from each upstream.
upstreamsResponses map[string]uint64
// upstreamsTimeSum stores the sum of processing time in microseconds of
// responses from each upstream.
upstreamsTimeSum map[string]uint64
// nResult stores the number of requests grouped by it's result.
nResult []uint64
@@ -95,7 +123,7 @@ type unit struct {
// nTotal stores the total number of requests.
nTotal uint64
// timeSum stores the sum of processing time in milliseconds of each request
// timeSum stores the sum of processing time in microseconds of each request
// written by the unit.
timeSum uint64
}
@@ -103,11 +131,13 @@ type unit struct {
// newUnit allocates the new *unit.
func newUnit(id uint32) (u *unit) {
return &unit{
domains: map[string]uint64{},
blockedDomains: map[string]uint64{},
clients: map[string]uint64{},
nResult: make([]uint64, resultLast),
id: id,
domains: map[string]uint64{},
blockedDomains: map[string]uint64{},
clients: map[string]uint64{},
upstreamsResponses: map[string]uint64{},
upstreamsTimeSum: map[string]uint64{},
nResult: make([]uint64, resultLast),
id: id,
}
}
@@ -135,10 +165,17 @@ type unitDB struct {
// Clients is the number of requests from each client.
Clients []countPair
// UpstreamsResponses is the number of responses from each upstream.
UpstreamsResponses []countPair
// UpstreamsTimeSum is the sum of processing time in microseconds of
// responses from each upstream.
UpstreamsTimeSum []countPair
// NTotal is the total number of requests.
NTotal uint64
// TimeAvg is the average of processing times in milliseconds of all the
// TimeAvg is the average of processing times in microseconds of all the
// requests in the unit.
TimeAvg uint32
}
@@ -218,12 +255,14 @@ func (u *unit) serialize() (udb *unitDB) {
}
return &unitDB{
NTotal: u.nTotal,
NResult: append([]uint64{}, u.nResult...),
Domains: convertMapToSlice(u.domains, maxDomains),
BlockedDomains: convertMapToSlice(u.blockedDomains, maxDomains),
Clients: convertMapToSlice(u.clients, maxClients),
TimeAvg: timeAvg,
NTotal: u.nTotal,
NResult: append([]uint64{}, u.nResult...),
Domains: convertMapToSlice(u.domains, maxDomains),
BlockedDomains: convertMapToSlice(u.blockedDomains, maxDomains),
Clients: convertMapToSlice(u.clients, maxClients),
UpstreamsResponses: convertMapToSlice(u.upstreamsResponses, maxUpstreams),
UpstreamsTimeSum: convertMapToSlice(u.upstreamsTimeSum, maxUpstreams),
TimeAvg: timeAvg,
}
}
@@ -262,21 +301,29 @@ func (u *unit) deserialize(udb *unitDB) {
u.domains = convertSliceToMap(udb.Domains)
u.blockedDomains = convertSliceToMap(udb.BlockedDomains)
u.clients = convertSliceToMap(udb.Clients)
u.upstreamsResponses = convertSliceToMap(udb.UpstreamsResponses)
u.upstreamsTimeSum = convertSliceToMap(udb.UpstreamsTimeSum)
u.timeSum = uint64(udb.TimeAvg) * udb.NTotal
}
// add adds new data to u. It's safe for concurrent use.
func (u *unit) add(res Result, domain, cli string, dur uint64) {
u.nResult[res]++
if res == RNotFiltered {
u.domains[domain]++
func (u *unit) add(e *Entry) {
u.nResult[e.Result]++
if e.Result == RNotFiltered {
u.domains[e.Domain]++
} else {
u.blockedDomains[domain]++
u.blockedDomains[e.Domain]++
}
u.clients[cli]++
u.timeSum += dur
u.clients[e.Client]++
t := uint64(e.Time.Microseconds())
u.timeSum += t
u.nTotal++
if e.Upstream != "" {
u.upstreamsResponses[e.Upstream]++
u.upstreamsTimeSum[e.Upstream] += t
}
}
// flushUnitToDB puts udb to the database at id.
@@ -390,9 +437,11 @@ func (s *StatsCtx) getData(limit uint32) (StatsResp, bool) {
return StatsResp{
TimeUnits: "days",
TopBlocked: []topAddrs{},
TopClients: []topAddrs{},
TopQueried: []topAddrs{},
TopBlocked: []topAddrs{},
TopClients: []topAddrs{},
TopQueried: []topAddrs{},
TopUpstreamsResponses: []topAddrs{},
TopUpstreamsAvgTime: []topAddrsFloat{},
BlockedFiltering: []uint64{},
DNSQueries: []uint64{},
@@ -416,21 +465,35 @@ func (s *StatsCtx) getData(limit uint32) (StatsResp, bool) {
log.Fatalf("len(dnsQueries) != limit: %d %d", len(dnsQueries), limit)
}
return s.dataFromUnits(units, dnsQueries, firstID, timeUnit), true
}
// dataFromUnits collects and returns the statistics data.
func (s *StatsCtx) dataFromUnits(
units []*unitDB,
dnsQueries []uint64,
firstID uint32,
timeUnit TimeUnit,
) (resp StatsResp) {
topUpstreamsResponses, topUpstreamsAvgTime := topUpstreamsPairs(units)
data := StatsResp{
DNSQueries: dnsQueries,
BlockedFiltering: statsCollector(units, firstID, timeUnit, func(u *unitDB) (num uint64) { return u.NResult[RFiltered] }),
ReplacedSafebrowsing: statsCollector(units, firstID, timeUnit, func(u *unitDB) (num uint64) { return u.NResult[RSafeBrowsing] }),
ReplacedParental: statsCollector(units, firstID, timeUnit, func(u *unitDB) (num uint64) { return u.NResult[RParental] }),
TopQueried: topsCollector(units, maxDomains, s.ignored, func(u *unitDB) (pairs []countPair) { return u.Domains }),
TopBlocked: topsCollector(units, maxDomains, s.ignored, func(u *unitDB) (pairs []countPair) { return u.BlockedDomains }),
TopClients: topsCollector(units, maxClients, nil, topClientPairs(s)),
DNSQueries: dnsQueries,
BlockedFiltering: statsCollector(units, firstID, timeUnit, func(u *unitDB) (num uint64) { return u.NResult[RFiltered] }),
ReplacedSafebrowsing: statsCollector(units, firstID, timeUnit, func(u *unitDB) (num uint64) { return u.NResult[RSafeBrowsing] }),
ReplacedParental: statsCollector(units, firstID, timeUnit, func(u *unitDB) (num uint64) { return u.NResult[RParental] }),
TopQueried: topsCollector(units, maxDomains, s.ignored, func(u *unitDB) (pairs []countPair) { return u.Domains }),
TopBlocked: topsCollector(units, maxDomains, s.ignored, func(u *unitDB) (pairs []countPair) { return u.BlockedDomains }),
TopUpstreamsResponses: topUpstreamsResponses,
TopUpstreamsAvgTime: topUpstreamsAvgTime,
TopClients: topsCollector(units, maxClients, nil, topClientPairs(s)),
}
// Total counters:
sum := unitDB{
NResult: make([]uint64, resultLast),
}
timeN := 0
var timeN uint32
for _, u := range units {
sum.NTotal += u.NTotal
sum.TimeAvg += u.TimeAvg
@@ -450,7 +513,7 @@ func (s *StatsCtx) getData(limit uint32) (StatsResp, bool) {
data.NumReplacedParental = sum.NResult[RParental]
if timeN != 0 {
data.AvgProcessingTime = float64(sum.TimeAvg/uint32(timeN)) / 1000000
data.AvgProcessingTime = microsecondsToSeconds(float64(sum.TimeAvg / timeN))
}
data.TimeUnits = "hours"
@@ -458,7 +521,7 @@ func (s *StatsCtx) getData(limit uint32) (StatsResp, bool) {
data.TimeUnits = "days"
}
return data, true
return data
}
func topClientPairs(s *StatsCtx) (pg pairsGetter) {
@@ -474,3 +537,66 @@ func topClientPairs(s *StatsCtx) (pg pairsGetter) {
return clients
}
}
// topUpstreamsPairs returns sorted lists of number of total responses and the
// average of processing time for each upstream.
func topUpstreamsPairs(
units []*unitDB,
) (topUpstreamsResponses []topAddrs, topUpstreamsAvgTime []topAddrsFloat) {
upstreamsResponses := topAddrs{}
upstreamsTimeSum := topAddrsFloat{}
for _, u := range units {
for _, cp := range u.UpstreamsResponses {
upstreamsResponses[cp.Name] += cp.Count
}
for _, cp := range u.UpstreamsTimeSum {
upstreamsTimeSum[cp.Name] += float64(cp.Count)
}
}
upstreamsAvgTime := topAddrsFloat{}
for u, n := range upstreamsResponses {
total := upstreamsTimeSum[u]
if total != 0 {
upstreamsAvgTime[u] = microsecondsToSeconds(total / float64(n))
}
}
upstreamsPairs := convertMapToSlice(upstreamsResponses, maxUpstreams)
topUpstreamsResponses = convertTopSlice(upstreamsPairs)
return topUpstreamsResponses, prepareTopUpstreamsAvgTime(upstreamsAvgTime)
}
// microsecondsToSeconds converts microseconds to seconds.
//
// NOTE: Frontend expects time duration in seconds as floating-point number
// with double precision.
func microsecondsToSeconds(n float64) (r float64) {
const micro = 1e-6
return n * micro
}
// prepareTopUpstreamsAvgTime returns sorted list of average processing times
// of the DNS requests from each upstream.
func prepareTopUpstreamsAvgTime(
upstreamsAvgTime topAddrsFloat,
) (topUpstreamsAvgTime []topAddrsFloat) {
keys := maps.Keys(upstreamsAvgTime)
slices.SortFunc(keys, func(a, b string) (sortsBefore bool) {
return upstreamsAvgTime[a] > upstreamsAvgTime[b]
})
topUpstreamsAvgTime = make([]topAddrsFloat, 0, len(upstreamsAvgTime))
for _, k := range keys {
topUpstreamsAvgTime = append(topUpstreamsAvgTime, topAddrsFloat{k: upstreamsAvgTime[k]})
}
return topUpstreamsAvgTime
}