Pull request: 3890 fix anonymization

Merge in DNS/adguard-home from 3890-fix-stats to master

Updates #3890.

Squashed commit of the following:

commit a77a6204bc8a58f62a4fac70efdcae4267a64810
Merge: 834493a2 90e65b66
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Dec 6 17:22:16 2021 +0300

    Merge branch 'master' into 3890-fix-stats

commit 834493a22ae79199efcc44e0715e2ac6f6272963
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Dec 6 17:09:30 2021 +0300

    querylog: load once

commit b8000e7ba7a998fcd4553230ec5e5f9c90106e31
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Dec 6 16:54:41 2021 +0300

    querylog: fix docs

commit 7db99ccfa19b58100950c11d67b23bca7af3e5cb
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Dec 6 16:51:31 2021 +0300

    querylog: imp docs

commit 2a84650bd7ac5195730a7ab47b9562a83f721499
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Dec 6 15:48:09 2021 +0300

    querylog: imp anonyization

commit 0f63feb1ff5f006fc528c3b681ef3b9d2199581e
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Dec 6 14:44:37 2021 +0300

    all: imp code & docs

commit c4ccdcbb7248897edd178fd5cb77127e39ada73d
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Dec 6 14:24:30 2021 +0300

    all: log changes

commit 60bb777a5aff36bba129a078fa11ae566298178a
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Mon Dec 6 14:08:41 2021 +0300

    all: use atomic value

commit c45886bd20eee2212b42686ff369830d8c08fe36
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Tue Nov 30 18:50:02 2021 +0300

    all: anonymize separately
This commit is contained in:
Eugene Burkov
2021-12-06 17:26:43 +03:00
parent 90e65b662c
commit d2cf3233b8
16 changed files with 279 additions and 111 deletions

View File

@@ -244,3 +244,59 @@ func TestDecodeLogEntry_backwardCompatability(t *testing.T) {
})
}
}
func BenchmarkAnonymizeIP(b *testing.B) {
benchCases := []struct {
name string
ip net.IP
want net.IP
}{{
name: "v4",
ip: net.IP{1, 2, 3, 4},
want: net.IP{1, 2, 0, 0},
}, {
name: "v4_mapped",
ip: net.IP{1, 2, 3, 4}.To16(),
want: net.IP{1, 2, 0, 0}.To16(),
}, {
name: "v6",
ip: net.IP{
0xa, 0xb, 0x0, 0x0,
0x0, 0xb, 0xa, 0x9,
0x8, 0x7, 0x6, 0x5,
0x4, 0x3, 0x2, 0x1,
},
want: net.IP{
0xa, 0xb, 0x0, 0x0,
0x0, 0xb, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
},
}, {
name: "invalid",
ip: net.IP{1, 2, 3},
want: net.IP{1, 2, 3},
}}
for _, bc := range benchCases {
b.Run(bc.name, func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
AnonymizeIP(bc.ip)
}
assert.Equal(b, bc.want, bc.ip)
})
b.Run(bc.name+"_slow", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
anonymizeIPSlow(bc.ip)
}
assert.Equal(b, bc.want, bc.ip)
})
}
}

View File

@@ -3,6 +3,7 @@ package querylog
import (
"encoding/json"
"fmt"
"net"
"net/http"
"net/url"
"strconv"
@@ -12,6 +13,7 @@ import (
"github.com/AdguardTeam/golibs/jsonutil"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/stringutil"
"github.com/AdguardTeam/golibs/timeutil"
"golang.org/x/net/idna"
)
@@ -88,23 +90,59 @@ func (l *queryLog) handleQueryLogInfo(w http.ResponseWriter, r *http.Request) {
}
}
// anonymizeIPSlow masks ip to anonymize the client if the ip is a valid one.
// It only exists in purposes of benchmark demonstration.
func anonymizeIPSlow(ip net.IP) {
if ip4 := ip.To4(); ip4 != nil {
copy(ip4[net.IPv4len-2:], []byte{0, 0})
} else if len(ip) == net.IPv6len {
copy(ip[net.IPv6len-10:], []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
}
}
// AnonymizeIP masks ip to anonymize the client if the ip is a valid one.
func AnonymizeIP(ip net.IP) {
// We use an assignment operator here since it compiles into more efficient
// code than copy(). See BenchmarkAnonymizeIP.
if ip4 := ip.To4(); ip4 != nil {
ip4[net.IPv4len-2], ip4[net.IPv4len-1] = 0, 0
} else if len(ip) == net.IPv6len {
ip[net.IPv6len-10],
ip[net.IPv6len-9],
ip[net.IPv6len-8],
ip[net.IPv6len-7],
ip[net.IPv6len-6],
ip[net.IPv6len-5],
ip[net.IPv6len-4],
ip[net.IPv6len-3],
ip[net.IPv6len-2],
ip[net.IPv6len-1] =
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
}
}
// Set configuration
func (l *queryLog) handleQueryLogConfig(w http.ResponseWriter, r *http.Request) {
d := qlogConfig{}
req, err := jsonutil.DecodeObject(&d, r.Body)
d := &qlogConfig{}
req, err := jsonutil.DecodeObject(d, r.Body)
if err != nil {
httpError(r, w, http.StatusBadRequest, "%s", err)
return
}
ivl := time.Duration(24*d.Interval) * time.Hour
ivl := time.Duration(float64(timeutil.Day) * d.Interval)
if req.Exists("interval") && !checkInterval(ivl) {
httpError(r, w, http.StatusBadRequest, "Unsupported interval")
return
}
defer l.conf.ConfigModified()
l.lock.Lock()
// copy data, modify it, then activate. Other threads (readers) don't need to use this lock.
defer l.lock.Unlock()
// Copy data, modify it, then activate. Other threads (readers) don't need
// to use this lock.
conf := *l.conf
if req.Exists("enabled") {
conf.Enabled = d.Enabled
@@ -113,12 +151,13 @@ func (l *queryLog) handleQueryLogConfig(w http.ResponseWriter, r *http.Request)
conf.RotationIvl = ivl
}
if req.Exists("anonymize_client_ip") {
conf.AnonymizeClientIP = d.AnonymizeClientIP
if conf.AnonymizeClientIP = d.AnonymizeClientIP; conf.AnonymizeClientIP {
l.anonymizer.Store(AnonymizeIP)
} else {
l.anonymizer.Store(nil)
}
}
l.conf = &conf
l.lock.Unlock()
l.conf.ConfigModified()
}
// "value" -> value, return TRUE

View File

@@ -2,46 +2,30 @@ package querylog
import (
"fmt"
"net"
"strconv"
"strings"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghnet"
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/netutil"
"github.com/miekg/dns"
"golang.org/x/net/idna"
)
// TODO(a.garipov): Use a proper structured approach here.
// Get Client IP address
func (l *queryLog) getClientIP(ip net.IP) (clientIP net.IP) {
if l.conf.AnonymizeClientIP && ip != nil {
const AnonymizeClientIPv4Mask = 16
const AnonymizeClientIPv6Mask = 112
if ip.To4() != nil {
return ip.Mask(net.CIDRMask(AnonymizeClientIPv4Mask, 32))
}
return ip.Mask(net.CIDRMask(AnonymizeClientIPv6Mask, 128))
}
return ip
}
// jobject is a JSON object alias.
type jobject = map[string]interface{}
// entriesToJSON converts query log entries to JSON.
func (l *queryLog) entriesToJSON(entries []*logEntry, oldest time.Time) (res jobject) {
data := []jobject{}
data := make([]jobject, 0, len(entries))
// the elements order is already reversed (from newer to older)
for i := 0; i < len(entries); i++ {
entry := entries[i]
jsonEntry := l.logEntryToJSONEntry(entry)
// The elements order is already reversed to be from newer to older.
for _, entry := range entries {
jsonEntry := l.entryToJSON(entry, l.anonymizer.Load())
data = append(data, jsonEntry)
}
@@ -56,7 +40,7 @@ func (l *queryLog) entriesToJSON(entries []*logEntry, oldest time.Time) (res job
return res
}
func (l *queryLog) logEntryToJSONEntry(entry *logEntry) (jsonEntry jobject) {
func (l *queryLog) entryToJSON(entry *logEntry, anonFunc aghnet.IPMutFunc) (jsonEntry jobject) {
var msg *dns.Msg
if len(entry.Answer) > 0 {
@@ -81,16 +65,21 @@ func (l *queryLog) logEntryToJSONEntry(entry *logEntry) (jsonEntry jobject) {
log.Debug("translating %q into unicode: %s", hostname, err)
}
eip := netutil.CloneIP(entry.IP)
anonFunc(eip)
jsonEntry = jobject{
"reason": entry.Result.Reason.String(),
"elapsedMs": strconv.FormatFloat(entry.Elapsed.Seconds()*1000, 'f', -1, 64),
"time": entry.Time.Format(time.RFC3339Nano),
"client": l.getClientIP(entry.IP),
"client_info": entry.client,
"client": eip,
"client_proto": entry.ClientProto,
"upstream": entry.Upstream,
"question": question,
}
if eip.Equal(entry.IP) {
jsonEntry["client_info"] = entry.client
}
if entry.ClientID != "" {
jsonEntry["client_id"] = entry.ClientID

View File

@@ -9,6 +9,7 @@ import (
"sync"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghnet"
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
@@ -36,6 +37,8 @@ type queryLog struct {
fileFlushLock sync.Mutex // synchronize a file-flushing goroutine and main thread
flushPending bool // don't start another goroutine while the previous one is still running
fileWriteLock sync.Mutex
anonymizer *aghnet.IPMut
}
// ClientProto values are names of the client protocols.
@@ -162,7 +165,7 @@ func (l *queryLog) Add(params AddParams) {
now := time.Now()
entry := logEntry{
IP: l.getClientIP(params.ClientIP),
IP: params.ClientIP,
Time: now,
Result: *params.Result,

View File

@@ -6,6 +6,7 @@ import (
"path/filepath"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghnet"
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
@@ -67,6 +68,9 @@ type Config struct {
// AnonymizeClientIP tells if the query log should anonymize clients' IP
// addresses.
AnonymizeClientIP bool
// Anonymizer proccesses the IP addresses to anonymize those if needed.
Anonymizer *aghnet.IPMut
}
// AddParams - parameters for Add()
@@ -115,7 +119,8 @@ func newQueryLog(conf Config) (l *queryLog) {
l = &queryLog{
findClient: findClient,
logFile: filepath.Join(conf.BaseDir, queryLogFileName),
logFile: filepath.Join(conf.BaseDir, queryLogFileName),
anonymizer: conf.Anonymizer,
}
l.conf = &Config{}