Pull request 1837: AG-21462-imp-safebrowsing-parental

Merge in DNS/adguard-home from AG-21462-imp-safebrowsing-parental to master

Squashed commit of the following:

commit 85016d4f1105e21a407efade0bd45b8362808061
Merge: 0e61edade 620b51e3e
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 27 16:36:30 2023 +0300

    Merge branch 'master' into AG-21462-imp-safebrowsing-parental

commit 0e61edadeff34f6305e941c1db94575c82f238d9
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 27 14:51:37 2023 +0300

    filtering: imp tests

commit 994255514cc0f67dfe33d5a0892432e8924d1e36
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 27 11:13:19 2023 +0300

    filtering: fix typo

commit 96d1069573171538333330d6af94ef0f4208a9c4
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 27 11:00:18 2023 +0300

    filtering: imp code more

commit c2a5620b04c4a529eea69983f1520cd2bc82ea9b
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Wed Apr 26 19:13:26 2023 +0300

    all: add todo

commit e5dcc2e9701f8bccfde6ef8c01a4a2e7eb31599e
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Wed Apr 26 14:36:08 2023 +0300

    all: imp code more

commit b6e734ccbeda82669023f6578481260b7c1f7161
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Tue Apr 25 15:01:56 2023 +0300

    filtering: imp code

commit 530648dadf836c1a4bd9917e0d3b47256fa8ff52
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Mon Apr 24 20:06:36 2023 +0300

    all: imp code

commit 49fa6e587052a40bb431fea457701ee860493527
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Mon Apr 24 14:57:19 2023 +0300

    all: rm safe browsing ctx

commit bbcb66cb03e18fa875e3c33cf16295892739e507
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Fri Apr 21 17:54:18 2023 +0300

    filtering: add cache item

commit cb7c9fffe8c4ff5e7a21ca912c223c799f61385f
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 20 18:43:02 2023 +0300

    filtering: fix hashes

commit 153fec46270212af03f3631bfb42c5d680c4e142
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 20 16:15:15 2023 +0300

    filtering: add test cases

commit 09372f92bbb1fc082f1b1283594ee589100209c5
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Thu Apr 20 15:38:05 2023 +0300

    filtering: imp code

commit 466bc26d524ea6d1c3efb33692a7785d39e491ca
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Wed Apr 19 18:38:40 2023 +0300

    filtering: add tests

commit 24365ecf8c60512fdac65833ee603c80864ae018
Author: Stanislav Chzhen <s.chzhen@adguard.com>
Date:   Wed Apr 19 11:38:57 2023 +0300

    filtering: add hashprefix
This commit is contained in:
Stanislav Chzhen
2023-04-27 16:39:35 +03:00
parent 620b51e3ea
commit 381f2f651d
9 changed files with 762 additions and 599 deletions

View File

@@ -0,0 +1,130 @@
package hashprefix
import (
"encoding/binary"
"time"
"github.com/AdguardTeam/golibs/log"
)
// expirySize is the size of expiry in cacheItem.
const expirySize = 8
// cacheItem represents an item that we will store in the cache.
type cacheItem struct {
// expiry is the time when cacheItem will expire.
expiry time.Time
// hashes is the hashed hostnames.
hashes []hostnameHash
}
// toCacheItem decodes cacheItem from data. data must be at least equal to
// expiry size.
func toCacheItem(data []byte) *cacheItem {
t := time.Unix(int64(binary.BigEndian.Uint64(data)), 0)
data = data[expirySize:]
hashes := make([]hostnameHash, len(data)/hashSize)
for i := 0; i < len(data); i += hashSize {
var hash hostnameHash
copy(hash[:], data[i:i+hashSize])
hashes = append(hashes, hash)
}
return &cacheItem{
expiry: t,
hashes: hashes,
}
}
// fromCacheItem encodes cacheItem into data.
func fromCacheItem(item *cacheItem) (data []byte) {
data = make([]byte, len(item.hashes)*hashSize+expirySize)
expiry := item.expiry.Unix()
binary.BigEndian.PutUint64(data[:expirySize], uint64(expiry))
for _, v := range item.hashes {
// nolint:looppointer // The subsilce is used for a copy.
data = append(data, v[:]...)
}
return data
}
// findInCache finds hashes in the cache. If nothing found returns list of
// hashes, prefixes of which will be sent to upstream.
func (c *Checker) findInCache(
hashes []hostnameHash,
) (found, blocked bool, hashesToRequest []hostnameHash) {
now := time.Now()
i := 0
for _, hash := range hashes {
// nolint:looppointer // The subsilce is used for a safe cache lookup.
data := c.cache.Get(hash[:prefixLen])
if data == nil {
hashes[i] = hash
i++
continue
}
item := toCacheItem(data)
if now.After(item.expiry) {
hashes[i] = hash
i++
continue
}
if ok := findMatch(hashes, item.hashes); ok {
return true, true, nil
}
}
if i == 0 {
return true, false, nil
}
return false, false, hashes[:i]
}
// storeInCache caches hashes.
func (c *Checker) storeInCache(hashesToRequest, respHashes []hostnameHash) {
hashToStore := make(map[prefix][]hostnameHash)
for _, hash := range respHashes {
var pref prefix
// nolint:looppointer // The subsilce is used for a copy.
copy(pref[:], hash[:])
hashToStore[pref] = append(hashToStore[pref], hash)
}
for pref, hash := range hashToStore {
// nolint:looppointer // The subsilce is used for a safe cache lookup.
c.setCache(pref[:], hash)
}
for _, hash := range hashesToRequest {
// nolint:looppointer // The subsilce is used for a safe cache lookup.
pref := hash[:prefixLen]
val := c.cache.Get(pref)
if val == nil {
c.setCache(pref, nil)
}
}
}
// setCache stores hash in cache.
func (c *Checker) setCache(pref []byte, hashes []hostnameHash) {
item := &cacheItem{
expiry: time.Now().Add(c.cacheTime),
hashes: hashes,
}
c.cache.Set(pref, fromCacheItem(item))
log.Debug("%s: stored in cache: %v", c.svc, pref)
}

View File

@@ -0,0 +1,245 @@
// Package hashprefix used for safe browsing and parent control.
package hashprefix
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"time"
"github.com/AdguardTeam/dnsproxy/upstream"
"github.com/AdguardTeam/golibs/cache"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/netutil"
"github.com/AdguardTeam/golibs/stringutil"
"github.com/miekg/dns"
"golang.org/x/exp/slices"
"golang.org/x/net/publicsuffix"
)
const (
// prefixLen is the length of the hash prefix of the filtered hostname.
prefixLen = 2
// hashSize is the size of hashed hostname.
hashSize = sha256.Size
// hexSize is the size of hexadecimal representation of hashed hostname.
hexSize = hashSize * 2
)
// prefix is the type of the SHA256 hash prefix used to match against the
// domain-name database.
type prefix [prefixLen]byte
// hostnameHash is the hashed hostname.
//
// TODO(s.chzhen): Split into prefix and suffix.
type hostnameHash [hashSize]byte
// findMatch returns true if one of the a hostnames matches one of the b.
func findMatch(a, b []hostnameHash) (matched bool) {
for _, hash := range a {
if slices.Contains(b, hash) {
return true
}
}
return false
}
// Config is the configuration structure for safe browsing and parental
// control.
type Config struct {
// Upstream is the upstream DNS server.
Upstream upstream.Upstream
// ServiceName is the name of the service.
ServiceName string
// TXTSuffix is the TXT suffix for DNS request.
TXTSuffix string
// CacheTime is the time period to store hash.
CacheTime time.Duration
// CacheSize is the maximum size of the cache. If it's zero, cache size is
// unlimited.
CacheSize uint
}
type Checker struct {
// upstream is the upstream DNS server.
upstream upstream.Upstream
// cache stores hostname hashes.
cache cache.Cache
// svc is the name of the service.
svc string
// txtSuffix is the TXT suffix for DNS request.
txtSuffix string
// cacheTime is the time period to store hash.
cacheTime time.Duration
}
// New returns Checker.
func New(conf *Config) (c *Checker) {
return &Checker{
upstream: conf.Upstream,
cache: cache.New(cache.Config{
EnableLRU: true,
MaxSize: conf.CacheSize,
}),
svc: conf.ServiceName,
txtSuffix: conf.TXTSuffix,
cacheTime: conf.CacheTime,
}
}
// Check returns true if request for the host should be blocked.
func (c *Checker) Check(host string) (ok bool, err error) {
hashes := hostnameToHashes(host)
found, blocked, hashesToRequest := c.findInCache(hashes)
if found {
log.Debug("%s: found %q in cache, blocked: %t", c.svc, host, blocked)
return blocked, nil
}
question := c.getQuestion(hashesToRequest)
log.Debug("%s: checking %s: %s", c.svc, host, question)
req := (&dns.Msg{}).SetQuestion(question, dns.TypeTXT)
resp, err := c.upstream.Exchange(req)
if err != nil {
return false, fmt.Errorf("getting hashes: %w", err)
}
matched, receivedHashes := c.processAnswer(hashesToRequest, resp, host)
c.storeInCache(hashesToRequest, receivedHashes)
return matched, nil
}
// hostnameToHashes returns hashes that should be checked by the hash prefix
// filter.
func hostnameToHashes(host string) (hashes []hostnameHash) {
// subDomainNum defines how many labels should be hashed to match against a
// hash prefix filter.
const subDomainNum = 4
pubSuf, icann := publicsuffix.PublicSuffix(host)
if !icann {
// Check the full private domain space.
pubSuf = ""
}
nDots := 0
i := strings.LastIndexFunc(host, func(r rune) (ok bool) {
if r == '.' {
nDots++
}
return nDots == subDomainNum
})
if i != -1 {
host = host[i+1:]
}
sub := netutil.Subdomains(host)
for _, s := range sub {
if s == pubSuf {
break
}
sum := sha256.Sum256([]byte(s))
hashes = append(hashes, sum)
}
return hashes
}
// getQuestion combines hexadecimal encoded prefixes of hashed hostnames into
// string.
func (c *Checker) getQuestion(hashes []hostnameHash) (q string) {
b := &strings.Builder{}
for _, hash := range hashes {
// nolint:looppointer // The subsilce is used for safe hex encoding.
stringutil.WriteToBuilder(b, hex.EncodeToString(hash[:prefixLen]), ".")
}
stringutil.WriteToBuilder(b, c.txtSuffix)
return b.String()
}
// processAnswer returns true if DNS response matches the hash, and received
// hashed hostnames from the upstream.
func (c *Checker) processAnswer(
hashesToRequest []hostnameHash,
resp *dns.Msg,
host string,
) (matched bool, receivedHashes []hostnameHash) {
txtCount := 0
for _, a := range resp.Answer {
txt, ok := a.(*dns.TXT)
if !ok {
continue
}
txtCount++
receivedHashes = c.appendHashesFromTXT(receivedHashes, txt, host)
}
log.Debug("%s: received answer for %s with %d TXT count", c.svc, host, txtCount)
matched = findMatch(hashesToRequest, receivedHashes)
if matched {
log.Debug("%s: matched %s", c.svc, host)
return true, receivedHashes
}
return false, receivedHashes
}
// appendHashesFromTXT appends received hashed hostnames.
func (c *Checker) appendHashesFromTXT(
hashes []hostnameHash,
txt *dns.TXT,
host string,
) (receivedHashes []hostnameHash) {
log.Debug("%s: received hashes for %s: %v", c.svc, host, txt.Txt)
for _, t := range txt.Txt {
if len(t) != hexSize {
log.Debug("%s: wrong hex size %d for %s %s", c.svc, len(t), host, t)
continue
}
buf, err := hex.DecodeString(t)
if err != nil {
log.Debug("%s: decoding hex string %s: %s", c.svc, t, err)
continue
}
var hash hostnameHash
copy(hash[:], buf)
hashes = append(hashes, hash)
}
return hashes
}

View File

@@ -0,0 +1,248 @@
package hashprefix
import (
"crypto/sha256"
"encoding/hex"
"strings"
"testing"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghtest"
"github.com/AdguardTeam/golibs/cache"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
)
const (
cacheTime = 10 * time.Minute
cacheSize = 10000
)
func TestChcker_getQuestion(t *testing.T) {
const suf = "sb.dns.adguard.com."
// test hostnameToHashes()
hashes := hostnameToHashes("1.2.3.sub.host.com")
assert.Len(t, hashes, 3)
hash := sha256.Sum256([]byte("3.sub.host.com"))
hexPref1 := hex.EncodeToString(hash[:prefixLen])
assert.True(t, slices.Contains(hashes, hash))
hash = sha256.Sum256([]byte("sub.host.com"))
hexPref2 := hex.EncodeToString(hash[:prefixLen])
assert.True(t, slices.Contains(hashes, hash))
hash = sha256.Sum256([]byte("host.com"))
hexPref3 := hex.EncodeToString(hash[:prefixLen])
assert.True(t, slices.Contains(hashes, hash))
hash = sha256.Sum256([]byte("com"))
assert.False(t, slices.Contains(hashes, hash))
c := &Checker{
svc: "SafeBrowsing",
txtSuffix: suf,
}
q := c.getQuestion(hashes)
assert.Contains(t, q, hexPref1)
assert.Contains(t, q, hexPref2)
assert.Contains(t, q, hexPref3)
assert.True(t, strings.HasSuffix(q, suf))
}
func TestHostnameToHashes(t *testing.T) {
testCases := []struct {
name string
host string
wantLen int
}{{
name: "basic",
host: "example.com",
wantLen: 1,
}, {
name: "sub_basic",
host: "www.example.com",
wantLen: 2,
}, {
name: "private_domain",
host: "foo.co.uk",
wantLen: 1,
}, {
name: "sub_private_domain",
host: "bar.foo.co.uk",
wantLen: 2,
}, {
name: "private_domain_v2",
host: "foo.blogspot.co.uk",
wantLen: 4,
}, {
name: "sub_private_domain_v2",
host: "bar.foo.blogspot.co.uk",
wantLen: 4,
}}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
hashes := hostnameToHashes(tc.host)
assert.Len(t, hashes, tc.wantLen)
})
}
}
func TestChecker_storeInCache(t *testing.T) {
c := &Checker{
svc: "SafeBrowsing",
cacheTime: cacheTime,
}
conf := cache.Config{}
c.cache = cache.New(conf)
// store in cache hashes for "3.sub.host.com" and "host.com"
// and empty data for hash-prefix for "sub.host.com"
hashes := []hostnameHash{}
hash := sha256.Sum256([]byte("sub.host.com"))
hashes = append(hashes, hash)
var hashesArray []hostnameHash
hash4 := sha256.Sum256([]byte("3.sub.host.com"))
hashesArray = append(hashesArray, hash4)
hash2 := sha256.Sum256([]byte("host.com"))
hashesArray = append(hashesArray, hash2)
c.storeInCache(hashes, hashesArray)
// match "3.sub.host.com" or "host.com" from cache
hashes = []hostnameHash{}
hash = sha256.Sum256([]byte("3.sub.host.com"))
hashes = append(hashes, hash)
hash = sha256.Sum256([]byte("sub.host.com"))
hashes = append(hashes, hash)
hash = sha256.Sum256([]byte("host.com"))
hashes = append(hashes, hash)
found, blocked, _ := c.findInCache(hashes)
assert.True(t, found)
assert.True(t, blocked)
// match "sub.host.com" from cache
hashes = []hostnameHash{}
hash = sha256.Sum256([]byte("sub.host.com"))
hashes = append(hashes, hash)
found, blocked, _ = c.findInCache(hashes)
assert.True(t, found)
assert.False(t, blocked)
// Match "sub.host.com" from cache. Another hash for "host.example" is not
// in the cache, so get data for it from the server.
hashes = []hostnameHash{}
hash = sha256.Sum256([]byte("sub.host.com"))
hashes = append(hashes, hash)
hash = sha256.Sum256([]byte("host.example"))
hashes = append(hashes, hash)
found, _, hashesToRequest := c.findInCache(hashes)
assert.False(t, found)
hash = sha256.Sum256([]byte("sub.host.com"))
ok := slices.Contains(hashesToRequest, hash)
assert.False(t, ok)
hash = sha256.Sum256([]byte("host.example"))
ok = slices.Contains(hashesToRequest, hash)
assert.True(t, ok)
c = &Checker{
svc: "SafeBrowsing",
cacheTime: cacheTime,
}
c.cache = cache.New(cache.Config{})
hashes = []hostnameHash{}
hash = sha256.Sum256([]byte("sub.host.com"))
hashes = append(hashes, hash)
c.cache.Set(hash[:prefixLen], make([]byte, expirySize+hashSize))
found, _, _ = c.findInCache(hashes)
assert.False(t, found)
}
func TestChecker_Check(t *testing.T) {
const hostname = "example.org"
testCases := []struct {
name string
wantBlock bool
}{{
name: "sb_no_block",
wantBlock: false,
}, {
name: "sb_block",
wantBlock: true,
}, {
name: "pc_no_block",
wantBlock: false,
}, {
name: "pc_block",
wantBlock: true,
}}
for _, tc := range testCases {
c := New(&Config{
CacheTime: cacheTime,
CacheSize: cacheSize,
})
// Prepare the upstream.
ups := aghtest.NewBlockUpstream(hostname, tc.wantBlock)
var numReq int
onExchange := ups.OnExchange
ups.OnExchange = func(req *dns.Msg) (resp *dns.Msg, err error) {
numReq++
return onExchange(req)
}
c.upstream = ups
t.Run(tc.name, func(t *testing.T) {
// Firstly, check the request blocking.
hits := 0
res := false
res, err := c.Check(hostname)
require.NoError(t, err)
if tc.wantBlock {
assert.True(t, res)
hits++
} else {
require.False(t, res)
}
// Check the cache state, check the response is now cached.
assert.Equal(t, 1, c.cache.Stats().Count)
assert.Equal(t, hits, c.cache.Stats().Hit)
// There was one request to an upstream.
assert.Equal(t, 1, numReq)
// Now make the same request to check the cache was used.
res, err = c.Check(hostname)
require.NoError(t, err)
if tc.wantBlock {
assert.True(t, res)
} else {
require.False(t, res)
}
// Check the cache state, it should've been used.
assert.Equal(t, 1, c.cache.Stats().Count)
assert.Equal(t, hits+1, c.cache.Stats().Hit)
// Check that there were no additional requests.
assert.Equal(t, 1, numReq)
})
}
}