Pull request: 3815 fix hosts container rewrites

Merge in DNS/adguard-home from 3815-weird-rewrites to master

Updates #3815.

Squashed commit of the following:

commit d217db9f5632a3fba5a37fc6ac7b90b8d97fe1cf
Merge: 006b67b9 9c8e0875
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Tue Nov 16 16:08:41 2021 +0300

    Merge branch 'master' into 3815-weird-rewrites

commit 006b67b93199f3818396ad782d90aba32da74092
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Tue Nov 16 15:49:50 2021 +0300

    filtering: fix doc

commit 7ffafcedc7275b007977a539bd63ab20a758eecc
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Tue Nov 16 14:17:41 2021 +0300

    all: imp hosts container more

commit b60deddec988762c61060cabad1340a37b154dbb
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Sun Nov 14 19:06:16 2021 +0300

    all: log changes

commit 37c76f478e0db90b3840a931d79465eefeea7945
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Sun Nov 14 18:14:21 2021 +0300

    aghnet: imp hosts container

commit 187251c364f6d23ba7166906b5394a0299657b76
Author: Eugene Burkov <E.Burkov@AdGuard.COM>
Date:   Sun Nov 14 16:16:41 2021 +0300

    all: merge hosts container more
This commit is contained in:
Eugene Burkov
2021-11-16 16:16:38 +03:00
parent 9c8e087544
commit 4a4b4715ca
7 changed files with 179 additions and 124 deletions

View File

@@ -17,12 +17,13 @@ import (
"github.com/AdguardTeam/golibs/stringutil"
"github.com/AdguardTeam/urlfilter"
"github.com/AdguardTeam/urlfilter/filterlist"
"github.com/AdguardTeam/urlfilter/rules"
"github.com/miekg/dns"
)
// DefaultHostsPaths returns the slice of paths default for the operating system
// to files and directories which are containing the hosts database. The result
// is intended to use within fs.FS so the initial slash is omitted.
// is intended to be used within fs.FS so the initial slash is omitted.
func DefaultHostsPaths() (paths []string) {
return defaultHostsPaths()
}
@@ -42,9 +43,10 @@ type HostsContainer struct {
// engine serves rulesStrg.
engine *urlfilter.DNSEngine
// Updates is the channel for receiving updated hosts. The receivable map's
// values has a type of slice of strings.
// updates is the channel for receiving updated hosts.
updates chan *netutil.IPMap
// last is the set of hosts that was cached within last detected change.
last *netutil.IPMap
// fsys is the working file system to read hosts files from.
fsys fs.FS
@@ -81,6 +83,7 @@ func NewHostsContainer(
hc = &HostsContainer{
engLock: &sync.RWMutex{},
updates: make(chan *netutil.IPMap, 1),
last: &netutil.IPMap{},
fsys: fsys,
w: w,
patterns: patterns,
@@ -127,17 +130,20 @@ func (hc *HostsContainer) MatchRequest(
hc.engLock.RLock()
defer hc.engLock.RUnlock()
return hc.engine.MatchRequest(req)
res, ok = hc.engine.MatchRequest(req)
return res, ok
}
// Close implements the io.Closer interface for *HostsContainer.
func (hc *HostsContainer) Close() (err error) {
log.Debug("%s: closing hosts container", hostsContainerPref)
log.Debug("%s: closing", hostsContainerPref)
return errors.Annotate(hc.w.Close(), "%s: closing: %w", hostsContainerPref)
}
// Upd returns the channel into which the updates are sent.
// Upd returns the channel into which the updates are sent. The receivable
// map's values are guaranteed to be of type of *stringutil.Set.
func (hc *HostsContainer) Upd() (updates <-chan *netutil.IPMap) {
return hc.updates
}
@@ -185,11 +191,18 @@ type hostsParser struct {
table *netutil.IPMap
}
// parseHostsFile is a aghtest.FileWalker for parsing the files with hosts
// syntax. It never signs to stop the walking.
func (hc *HostsContainer) newHostsParser() (hp *hostsParser) {
return &hostsParser{
rules: &strings.Builder{},
table: netutil.NewIPMap(hc.last.Len()),
}
}
// parseFile is a aghos.FileWalker for parsing the files with hosts syntax. It
// never signs to stop walking and never returns any additional patterns.
//
// See man hosts(5).
func (hp hostsParser) parseHostsFile(
func (hp *hostsParser) parseFile(
r io.Reader,
) (patterns []string, cont bool, err error) {
s := bufio.NewScanner(r)
@@ -208,7 +221,7 @@ func (hp hostsParser) parseHostsFile(
}
// parseLine parses the line having the hosts syntax ignoring invalid ones.
func (hp hostsParser) parseLine(line string) (ip net.IP, hosts []string) {
func (hp *hostsParser) parseLine(line string) (ip net.IP, hosts []string) {
line = strings.TrimSpace(line)
fields := strings.Fields(line)
if len(fields) < 2 {
@@ -240,20 +253,24 @@ loop:
}
// add returns true if the pair of ip and host wasn't added to the hp before.
func (hp hostsParser) add(ip net.IP, host string) (added bool) {
func (hp *hostsParser) add(ip net.IP, host string) (added bool) {
v, ok := hp.table.Get(ip)
hosts, _ := v.([]string)
if ok && stringutil.InSlice(hosts, host) {
hosts, _ := v.(*stringutil.Set)
switch {
case ok && hosts.Has(host):
return false
case hosts == nil:
hosts = stringutil.NewSet(host)
hp.table.Set(ip, hosts)
default:
hosts.Add(host)
}
hp.table.Set(ip, append(hosts, host))
return true
}
// addPair puts the pair of ip and host to the rules builder if needed.
func (hp hostsParser) addPair(ip net.IP, host string) {
func (hp *hostsParser) addPair(ip net.IP, host string) {
arpa, err := netutil.IPToReversedAddr(ip)
if err != nil {
return
@@ -269,61 +286,110 @@ func (hp hostsParser) addPair(ip net.IP, host string) {
qtype = "A"
}
stringutil.WriteToBuilder(
hp.rules,
"||",
host,
"^$dnsrewrite=NOERROR;",
qtype,
";",
ip.String(),
"\n",
"||",
arpa,
"^$dnsrewrite=NOERROR;PTR;",
dns.Fqdn(host),
"\n",
const (
nl = "\n"
sc = ";"
rewriteSuccess = "$dnsrewrite=NOERROR" + sc
rewriteSuccessPTR = rewriteSuccess + "PTR" + sc
)
ipStr := ip.String()
fqdn := dns.Fqdn(host)
for _, ruleData := range [...][]string{{
// A/AAAA.
rules.MaskStartURL,
host,
rules.MaskSeparator,
rewriteSuccess,
qtype,
sc,
ipStr,
nl,
}, {
// PTR.
rules.MaskStartURL,
arpa,
rules.MaskSeparator,
rewriteSuccessPTR,
fqdn,
nl,
}} {
stringutil.WriteToBuilder(hp.rules, ruleData...)
}
log.Debug("%s: added ip-host pair %q/%q", hostsContainerPref, ip, host)
}
// equalSet returns true if the internal hosts table just parsed equals target.
func (hp *hostsParser) equalSet(target *netutil.IPMap) (ok bool) {
if hp.table.Len() != target.Len() {
return false
}
hp.table.Range(func(ip net.IP, val interface{}) (cont bool) {
v, hasIP := target.Get(ip)
// ok is set to true if the target doesn't contain ip or if the
// appropriate hosts set isn't equal to the checked one, i.e. the maps
// have at least one disperancy.
ok = !hasIP || !v.(*stringutil.Set).Equal(val.(*stringutil.Set))
// Continue only if maps has no discrepancies.
return !ok
})
// Return true if every value from the IP map has no disperancies with the
// appropriate one from the target.
return !ok
}
// sendUpd tries to send the parsed data to the ch.
func (hp hostsParser) sendUpd(ch chan *netutil.IPMap) {
func (hp *hostsParser) sendUpd(ch chan *netutil.IPMap) {
log.Debug("%s: sending upd", hostsContainerPref)
upd := hp.table
select {
case ch <- hp.table:
case ch <- upd:
// Updates are delivered. Go on.
case <-ch:
ch <- upd
log.Debug("%s: replaced the last update", hostsContainerPref)
case ch <- upd:
// The previous update was just read and the next one pushed. Go on.
default:
log.Debug("%s: the buffer is full", hostsContainerPref)
log.Debug("%s: the channel is broken", hostsContainerPref)
}
}
// newStrg creates a new rules storage from parsed data.
func (hp hostsParser) newStrg() (s *filterlist.RuleStorage, err error) {
func (hp *hostsParser) newStrg() (s *filterlist.RuleStorage, err error) {
return filterlist.NewRuleStorage([]filterlist.RuleList{&filterlist.StringRuleList{
ID: 1,
ID: -1,
RulesText: hp.rules.String(),
IgnoreCosmetic: true,
}})
}
// refresh gets the data from specified files and propagates the updates.
// refresh gets the data from specified files and propagates the updates if
// needed.
func (hc *HostsContainer) refresh() (err error) {
log.Debug("%s: refreshing", hostsContainerPref)
hp := hostsParser{
rules: &strings.Builder{},
table: netutil.NewIPMap(0),
hp := hc.newHostsParser()
if _, err = aghos.FileWalker(hp.parseFile).Walk(hc.fsys, hc.patterns...); err != nil {
return fmt.Errorf("refreshing : %w", err)
}
_, err = aghos.FileWalker(hp.parseHostsFile).Walk(hc.fsys, hc.patterns...)
if err != nil {
return fmt.Errorf("updating: %w", err)
}
if hp.equalSet(hc.last) {
log.Debug("%s: no updates detected", hostsContainerPref)
return nil
}
defer hp.sendUpd(hc.updates)
hc.last = hp.table.ShallowClone()
var rulesStrg *filterlist.RuleStorage
if rulesStrg, err = hp.newStrg(); err != nil {
return fmt.Errorf("initializing rules storage: %w", err)

View File

@@ -12,6 +12,7 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/aghtest"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/netutil"
"github.com/AdguardTeam/golibs/stringutil"
"github.com/AdguardTeam/urlfilter"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert"
@@ -164,11 +165,14 @@ func TestHostsContainer_Refresh(t *testing.T) {
},
}
eventsCh := make(chan struct{}, 1)
// event is a convenient alias for an empty struct{} to emit test events.
type event = struct{}
eventsCh := make(chan event, 1)
t.Cleanup(func() { close(eventsCh) })
w := &aghtest.FSWatcher{
OnEvents: func() (e <-chan struct{}) { return eventsCh },
OnEvents: func() (e <-chan event) { return eventsCh },
OnAdd: func(name string) (err error) {
assert.Equal(t, dirname, name)
@@ -181,7 +185,7 @@ func TestHostsContainer_Refresh(t *testing.T) {
require.NoError(t, err)
t.Cleanup(func() { require.ErrorIs(t, hc.Close(), closeCalled) })
checkRefresh := func(t *testing.T, wantHosts []string) {
checkRefresh := func(t *testing.T, wantHosts *stringutil.Set) {
upd, ok := <-hc.Upd()
require.True(t, ok)
require.NotNil(t, upd)
@@ -191,26 +195,30 @@ func TestHostsContainer_Refresh(t *testing.T) {
v, ok := upd.Get(knownIP)
require.True(t, ok)
var hosts []string
hosts, ok = v.([]string)
var hosts *stringutil.Set
hosts, ok = v.(*stringutil.Set)
require.True(t, ok)
require.Len(t, hosts, len(wantHosts))
assert.Equal(t, wantHosts, hosts)
assert.True(t, hosts.Equal(wantHosts))
}
t.Run("initial_refresh", func(t *testing.T) {
checkRefresh(t, []string{knownHost})
checkRefresh(t, stringutil.NewSet(knownHost))
})
testFS[p2] = &fstest.MapFile{
Data: []byte(strings.Join([]string{knownIP.String(), knownAlias}, sp) + nl),
}
eventsCh <- struct{}{}
eventsCh <- event{}
t.Run("second_refresh", func(t *testing.T) {
checkRefresh(t, []string{knownHost, knownAlias})
checkRefresh(t, stringutil.NewSet(knownHost, knownAlias))
})
eventsCh <- event{}
t.Run("no_changes_refresh", func(t *testing.T) {
assert.Empty(t, hc.Upd())
})
}
@@ -218,7 +226,7 @@ func TestHostsContainer_MatchRequest(t *testing.T) {
var (
ip4 = net.IP{127, 0, 0, 1}
ip6 = net.IP{
0, 0, 0, 0,
128, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1,
@@ -236,9 +244,9 @@ func TestHostsContainer_MatchRequest(t *testing.T) {
gsfs := fstest.MapFS{
filename: &fstest.MapFile{Data: []byte(
strings.Join([]string{ip4.String(), hostname4, hostname4a}, sp) + nl +
strings.Join([]string{ip6.String(), hostname6}, sp) + nl +
strings.Join([]string{"256.256.256.256", "fakebroadcast"}, sp) + nl,
ip4.String() + " " + hostname4 + " " + hostname4a + nl +
ip6.String() + " " + hostname6 + nl +
`256.256.256.256 fakebroadcast` + nl,
)},
}
@@ -265,6 +273,15 @@ func TestHostsContainer_MatchRequest(t *testing.T) {
Hostname: hostname4,
DNSType: dns.TypeA,
},
}, {
name: "a_for_aaaa",
want: []interface{}{
ip4.To16(),
},
req: urlfilter.DNSRequest{
Hostname: hostname4,
DNSType: dns.TypeAAAA,
},
}, {
name: "aaaa",
want: []interface{}{ip6},
@@ -408,7 +425,7 @@ func TestUniqueRules_AddPair(t *testing.T) {
const knownHost = "host1"
ipToHost := netutil.NewIPMap(0)
ipToHost.Set(knownIP, []string{knownHost})
ipToHost.Set(knownIP, *stringutil.NewSet(knownHost))
testCases := []struct {
name string
@@ -422,10 +439,11 @@ func TestUniqueRules_AddPair(t *testing.T) {
"||4.3.2.1.in-addr.arpa^$dnsrewrite=NOERROR;PTR;host2.\n",
ip: knownIP,
}, {
name: "existing_one",
host: knownHost,
wantRules: "",
ip: knownIP,
name: "existing_one",
host: knownHost,
wantRules: "||" + knownHost + "^$dnsrewrite=NOERROR;A;1.2.3.4\n" +
"||4.3.2.1.in-addr.arpa^$dnsrewrite=NOERROR;PTR;host1.\n",
ip: knownIP,
}, {
name: "new_ip",
host: knownHost,