all: sync with master; upd chlog

This commit is contained in:
Ainar Garipov
2024-04-02 20:22:19 +03:00
parent ce9bb588ed
commit 6fb2aee210
57 changed files with 1363 additions and 873 deletions

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghhttp"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/AdGuardHome/internal/schedule"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/urlfilter/rules"
@@ -28,7 +29,7 @@ func initBlockedServices() {
for i, s := range blockedServices {
netRules := make([]*rules.NetworkRule, 0, len(s.Rules))
for _, text := range s.Rules {
rule, err := rules.NewNetworkRule(text, BlockedSvcsListID)
rule, err := rules.NewNetworkRule(text, rulelist.URLFilterIDBlockedService)
if err != nil {
log.Error("parsing blocked service %q rule %q: %s", s.ID, text, err)

View File

@@ -30,7 +30,7 @@ func (d *DNSFilter) processDNSRewrites(dnsr []*rules.NetworkRule) (res Result) {
if dr.NewCNAME != "" {
// NewCNAME rules have a higher priority than other rules.
rules = []*ResultRule{{
FilterListID: int64(nr.GetFilterListID()),
FilterListID: nr.GetFilterListID(),
Text: nr.RuleText,
}}
@@ -46,14 +46,14 @@ func (d *DNSFilter) processDNSRewrites(dnsr []*rules.NetworkRule) (res Result) {
dnsrr.RCode = dr.RCode
dnsrr.Response[dr.RRType] = append(dnsrr.Response[dr.RRType], dr.Value)
rules = append(rules, &ResultRule{
FilterListID: int64(nr.GetFilterListID()),
FilterListID: nr.GetFilterListID(),
Text: nr.RuleText,
})
default:
// RcodeRefused and other such codes have higher priority. Return
// immediately.
rules = []*ResultRule{{
FilterListID: int64(nr.GetFilterListID()),
FilterListID: nr.GetFilterListID(),
Text: nr.RuleText,
}}
dnsrr = &DNSRewriteResult{

View File

@@ -13,20 +13,15 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/aghrenameio"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/container"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/stringutil"
)
// filterDir is the subdirectory of a data directory to store downloaded
// filters.
const filterDir = "filters"
// nextFilterID is a way to seed a unique ID generation.
//
// TODO(e.burkov): Use more deterministic approach.
var nextFilterID = time.Now().Unix()
// FilterYAML represents a filter list in the configuration file.
//
// TODO(e.burkov): Investigate if the field ordering is important.
@@ -50,7 +45,10 @@ func (filter *FilterYAML) unload() {
// Path to the filter contents
func (filter *FilterYAML) Path(dataDir string) string {
return filepath.Join(dataDir, filterDir, strconv.FormatInt(filter.ID, 10)+".txt")
return filepath.Join(
dataDir,
filterDir,
strconv.FormatInt(int64(filter.ID), 10)+".txt")
}
// ensureName sets provided title or default name for the filter if it doesn't
@@ -217,7 +215,10 @@ func (d *DNSFilter) loadFilters(array []FilterYAML) {
for i := range array {
filter := &array[i] // otherwise we're operating on a copy
if filter.ID == 0 {
filter.ID = assignUniqueFilterID()
newID := d.idGen.next()
log.Info("filtering: warning: filter at index %d has no id; assigning to %d", i, newID)
filter.ID = newID
}
if !filter.Enabled {
@@ -233,7 +234,7 @@ func (d *DNSFilter) loadFilters(array []FilterYAML) {
}
func deduplicateFilters(filters []FilterYAML) (deduplicated []FilterYAML) {
urls := stringutil.NewSet()
urls := container.NewMapSet[string]()
lastIdx := 0
for _, filter := range filters {
@@ -247,22 +248,6 @@ func deduplicateFilters(filters []FilterYAML) (deduplicated []FilterYAML) {
return filters[:lastIdx]
}
// Set the next filter ID to max(filter.ID) + 1
func updateUniqueFilterID(filters []FilterYAML) {
for _, filter := range filters {
if nextFilterID < filter.ID {
nextFilterID = filter.ID + 1
}
}
}
// TODO(e.burkov): Improve this inexhaustible source of races.
func assignUniqueFilterID() int64 {
value := nextFilterID
nextFilterID++
return value
}
// tryRefreshFilters is like [refreshFilters], but backs down if the update is
// already going on.
//
@@ -608,7 +593,7 @@ func (d *DNSFilter) EnableFilters(async bool) {
func (d *DNSFilter) enableFiltersLocked(async bool) {
filters := make([]Filter, 1, len(d.conf.Filters)+len(d.conf.WhitelistFilters)+1)
filters[0] = Filter{
ID: CustomListID,
ID: rulelist.URLFilterIDCustom,
Data: []byte(strings.Join(d.conf.UserRules, "\n")),
}

View File

@@ -20,11 +20,11 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/aghhttp"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/container"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/hostsfile"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/mathutil"
"github.com/AdguardTeam/golibs/stringutil"
"github.com/AdguardTeam/golibs/syncutil"
"github.com/AdguardTeam/urlfilter"
"github.com/AdguardTeam/urlfilter/filterlist"
@@ -32,19 +32,6 @@ import (
"github.com/miekg/dns"
)
// The IDs of built-in filter lists.
//
// Keep in sync with client/src/helpers/constants.js.
// TODO(d.kolyshev): Add RewritesListID and don't forget to keep in sync.
const (
CustomListID = -iota
SysHostsListID
BlockedSvcsListID
ParentalListID
SafeBrowsingListID
SafeSearchListID
)
// ServiceEntry - blocked service array element
type ServiceEntry struct {
Name string
@@ -232,6 +219,9 @@ type Checker interface {
// DNSFilter matches hostnames and DNS requests against filtering rules.
type DNSFilter struct {
// idGen is used to generate IDs for package urlfilter.
idGen *idGenerator
// bufPool is a pool of buffers used for filtering-rule list parsing.
bufPool *syncutil.Pool[[]byte]
@@ -278,7 +268,7 @@ type Filter struct {
Data []byte `yaml:"-"`
// ID is automatically assigned when filter is added using nextFilterID.
ID int64 `yaml:"id"`
ID rulelist.URLFilterID `yaml:"id"`
}
// Reason holds an enum detailing why it was filtered or not filtered
@@ -530,11 +520,13 @@ func (d *DNSFilter) ParentalBlockHost() (host string) {
type ResultRule struct {
// Text is the text of the rule.
Text string `json:",omitempty"`
// IP is the host IP. It is nil unless the rule uses the
// /etc/hosts syntax or the reason is FilteredSafeSearch.
IP netip.Addr `json:",omitempty"`
// FilterListID is the ID of the rule's filter list.
FilterListID int64 `json:",omitempty"`
FilterListID rulelist.URLFilterID `json:",omitempty"`
}
// Result contains the result of a request check.
@@ -637,7 +629,7 @@ func (d *DNSFilter) processRewrites(host string, qtype uint16) (res Result) {
res.Reason = Rewritten
cnames := stringutil.NewSet()
cnames := container.NewMapSet[string]()
origHost := host
for matched && len(rewrites) > 0 && rewrites[0].Type == dns.TypeCNAME {
rw := rewrites[0]
@@ -705,7 +697,7 @@ func matchBlockedServicesRules(
ruleText := rule.Text()
res.Rules = []*ResultRule{{
FilterListID: int64(rule.GetFilterListID()),
FilterListID: rule.GetFilterListID(),
Text: ruleText,
}}
@@ -970,7 +962,7 @@ func makeResult(matchedRules []rules.Rule, reason Reason) (res Result) {
resRules := make([]*ResultRule, len(matchedRules))
for i, mr := range matchedRules {
resRules[i] = &ResultRule{
FilterListID: int64(mr.GetFilterListID()),
FilterListID: mr.GetFilterListID(),
Text: mr.Text(),
}
}
@@ -991,6 +983,7 @@ func InitModule() {
// be non-nil.
func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) {
d = &DNSFilter{
idGen: newIDGenerator(int32(time.Now().Unix())),
bufPool: syncutil.NewSlicePool[byte](rulelist.DefaultRuleBufSize),
refreshLock: &sync.Mutex{},
safeBrowsingChecker: c.SafeBrowsingChecker,
@@ -1054,8 +1047,8 @@ func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) {
d.conf.Filters = deduplicateFilters(d.conf.Filters)
d.conf.WhitelistFilters = deduplicateFilters(d.conf.WhitelistFilters)
updateUniqueFilterID(d.conf.Filters)
updateUniqueFilterID(d.conf.WhitelistFilters)
d.idGen.fix(d.conf.Filters)
d.idGen.fix(d.conf.WhitelistFilters)
return d, nil
}
@@ -1139,7 +1132,7 @@ func (d *DNSFilter) checkSafeBrowsing(
res = Result{
Rules: []*ResultRule{{
Text: "adguard-malware-shavar",
FilterListID: SafeBrowsingListID,
FilterListID: rulelist.URLFilterIDSafeBrowsing,
}},
Reason: FilteredSafeBrowsing,
IsFiltered: true,
@@ -1171,7 +1164,7 @@ func (d *DNSFilter) checkParental(
res = Result{
Rules: []*ResultRule{{
Text: "parental CATEGORY_BLACKLISTED",
FilterListID: ParentalListID,
FilterListID: rulelist.URLFilterIDParentalControl,
}},
Reason: FilteredParental,
IsFiltered: true,

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"net/netip"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/hostsfile"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/netutil"
@@ -66,7 +67,7 @@ func hostsRewrites(
vals = append(vals, name)
rls = append(rls, &ResultRule{
Text: fmt.Sprintf("%s %s", addr, name),
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
})
}
@@ -84,7 +85,7 @@ func hostsRewrites(
}
rls = append(rls, &ResultRule{
Text: fmt.Sprintf("%s %s", addr, host),
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
})
}

View File

@@ -8,6 +8,7 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/aghnet"
"github.com/AdguardTeam/AdGuardHome/internal/aghtest"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/testutil"
"github.com/AdguardTeam/urlfilter/rules"
"github.com/miekg/dns"
@@ -71,7 +72,7 @@ func TestDNSFilter_CheckHost_hostsContainer(t *testing.T) {
dtyp: dns.TypeA,
wantRules: []*ResultRule{{
Text: "1.2.3.4 v4.host.example",
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
}},
wantResps: []rules.RRValue{addrv4},
}, {
@@ -80,7 +81,7 @@ func TestDNSFilter_CheckHost_hostsContainer(t *testing.T) {
dtyp: dns.TypeAAAA,
wantRules: []*ResultRule{{
Text: "::1 v6.host.example",
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
}},
wantResps: []rules.RRValue{addrv6},
}, {
@@ -89,7 +90,7 @@ func TestDNSFilter_CheckHost_hostsContainer(t *testing.T) {
dtyp: dns.TypeAAAA,
wantRules: []*ResultRule{{
Text: "::ffff:1.2.3.4 mapped.host.example",
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
}},
wantResps: []rules.RRValue{addrMapped},
}, {
@@ -98,7 +99,7 @@ func TestDNSFilter_CheckHost_hostsContainer(t *testing.T) {
dtyp: dns.TypePTR,
wantRules: []*ResultRule{{
Text: "1.2.3.4 v4.host.example",
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
}},
wantResps: []rules.RRValue{"v4.host.example"},
}, {
@@ -107,7 +108,7 @@ func TestDNSFilter_CheckHost_hostsContainer(t *testing.T) {
dtyp: dns.TypePTR,
wantRules: []*ResultRule{{
Text: "::ffff:1.2.3.4 mapped.host.example",
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
}},
wantResps: []rules.RRValue{"mapped.host.example"},
}, {
@@ -134,7 +135,7 @@ func TestDNSFilter_CheckHost_hostsContainer(t *testing.T) {
dtyp: dns.TypeAAAA,
wantRules: []*ResultRule{{
Text: fmt.Sprintf("%s v4.host.example", addrv4),
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
}},
wantResps: nil,
}, {
@@ -143,7 +144,7 @@ func TestDNSFilter_CheckHost_hostsContainer(t *testing.T) {
dtyp: dns.TypeA,
wantRules: []*ResultRule{{
Text: fmt.Sprintf("%s v6.host.example", addrv6),
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
}},
wantResps: nil,
}, {
@@ -164,7 +165,7 @@ func TestDNSFilter_CheckHost_hostsContainer(t *testing.T) {
dtyp: dns.TypeA,
wantRules: []*ResultRule{{
Text: "4.3.2.1 v4.host.with-dup",
FilterListID: SysHostsListID,
FilterListID: rulelist.URLFilterIDEtcHosts,
}},
wantResps: []rules.RRValue{addrv4Dup},
}}

View File

@@ -13,6 +13,7 @@ import (
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghhttp"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
"github.com/miekg/dns"
@@ -86,7 +87,7 @@ func (d *DNSFilter) handleFilteringAddURL(w http.ResponseWriter, r *http.Request
Name: fj.Name,
white: fj.Whitelist,
Filter: Filter{
ID: assignUniqueFilterID(),
ID: d.idGen.next(),
},
}
@@ -307,12 +308,12 @@ func (d *DNSFilter) handleFilteringRefresh(w http.ResponseWriter, r *http.Reques
}
type filterJSON struct {
URL string `json:"url"`
Name string `json:"name"`
LastUpdated string `json:"last_updated,omitempty"`
ID int64 `json:"id"`
RulesCount uint32 `json:"rules_count"`
Enabled bool `json:"enabled"`
URL string `json:"url"`
Name string `json:"name"`
LastUpdated string `json:"last_updated,omitempty"`
ID rulelist.URLFilterID `json:"id"`
RulesCount uint32 `json:"rules_count"`
Enabled bool `json:"enabled"`
}
type filteringConfig struct {
@@ -388,8 +389,8 @@ func (d *DNSFilter) handleFilteringConfig(w http.ResponseWriter, r *http.Request
}
type checkHostRespRule struct {
Text string `json:"text"`
FilterListID int64 `json:"filter_list_id"`
Text string `json:"text"`
FilterListID rulelist.URLFilterID `json:"filter_list_id"`
}
type checkHostResp struct {
@@ -412,7 +413,7 @@ type checkHostResp struct {
// FilterID is the ID of the rule's filter list.
//
// Deprecated: Use Rules[*].FilterListID.
FilterID int64 `json:"filter_id"`
FilterID rulelist.URLFilterID `json:"filter_id"`
}
func (d *DNSFilter) handleCheckHost(w http.ResponseWriter, r *http.Request) {

View File

@@ -0,0 +1,74 @@
package filtering
import (
"fmt"
"sync/atomic"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/container"
"github.com/AdguardTeam/golibs/log"
)
// idGenerator generates filtering-list IDs in a way broadly compatible with the
// legacy approach of AdGuard Home.
//
// TODO(a.garipov): Get rid of this once we switch completely to the new
// rule-list architecture.
type idGenerator struct {
current *atomic.Int32
}
// newIDGenerator returns a new ID generator initialized with the given seed
// value.
func newIDGenerator(seed int32) (g *idGenerator) {
g = &idGenerator{
current: &atomic.Int32{},
}
g.current.Store(seed)
return g
}
// next returns the next ID from the generator. It is safe for concurrent use.
func (g *idGenerator) next() (id rulelist.URLFilterID) {
id32 := g.current.Add(1)
if id32 < 0 {
panic(fmt.Errorf("invalid current id value %d", id32))
}
return rulelist.URLFilterID(id32)
}
// fix ensures that flts all have unique IDs.
func (g *idGenerator) fix(flts []FilterYAML) {
set := container.NewMapSet[rulelist.URLFilterID]()
for i, f := range flts {
id := f.ID
if id == 0 {
id = g.next()
flts[i].ID = id
}
if !set.Has(id) {
set.Add(id)
continue
}
newID := g.next()
for set.Has(newID) {
newID = g.next()
}
log.Info(
"filtering: warning: filter at index %d has duplicate id %d; reassigning to %d",
i,
id,
newID,
)
flts[i].ID = newID
set.Add(newID)
}
}

View File

@@ -0,0 +1,88 @@
package filtering
import (
"testing"
"github.com/AdguardTeam/AdGuardHome/internal/aghalg"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/stretchr/testify/assert"
)
func TestIDGenerator_Fix(t *testing.T) {
t.Parallel()
testCases := []struct {
name string
in []FilterYAML
}{{
name: "nil",
in: nil,
}, {
name: "empty",
in: []FilterYAML{},
}, {
name: "one_zero",
in: []FilterYAML{{}},
}, {
name: "two_zeros",
in: []FilterYAML{{}, {}},
}, {
name: "many_good",
in: []FilterYAML{{
Filter: Filter{
ID: 1,
},
}, {
Filter: Filter{
ID: 2,
},
}, {
Filter: Filter{
ID: 3,
},
}},
}, {
name: "two_dups",
in: []FilterYAML{{
Filter: Filter{
ID: 1,
},
}, {
Filter: Filter{
ID: 3,
},
}, {
Filter: Filter{
ID: 1,
},
}, {
Filter: Filter{
ID: 2,
},
}},
}}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
g := newIDGenerator(1)
g.fix(tc.in)
assertUniqueIDs(t, tc.in)
})
}
}
// assertUniqueIDs is a test helper that asserts that the IDs of filters are
// unique.
func assertUniqueIDs(t testing.TB, flts []FilterYAML) {
t.Helper()
uc := aghalg.UniqChecker[rulelist.URLFilterID]{}
for _, f := range flts {
uc.Add(f.ID)
}
assert.NoError(t, uc.Validate())
}

View File

@@ -7,8 +7,8 @@ import (
"strings"
"sync"
"github.com/AdguardTeam/golibs/container"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/stringutil"
"github.com/AdguardTeam/urlfilter"
"github.com/AdguardTeam/urlfilter/filterlist"
"github.com/AdguardTeam/urlfilter/rules"
@@ -85,7 +85,7 @@ func (s *DefaultStorage) MatchRequest(dReq *urlfilter.DNSRequest) (rws []*rules.
}
// TODO(a.garipov): Check cnames for cycles on initialization.
cnames := stringutil.NewSet()
cnames := container.NewMapSet[string]()
host := dReq.Hostname
for len(rrules) > 0 && rrules[0].DNSRewrite != nil && rrules[0].DNSRewrite.NewCNAME != "" {
rule := rrules[0]

View File

@@ -0,0 +1,254 @@
package rulelist
import (
"context"
"fmt"
"net/http"
"sync"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/urlfilter"
"github.com/AdguardTeam/urlfilter/filterlist"
"github.com/c2h5oh/datasize"
)
// Engine is a single DNS filter based on one or more rule lists. This
// structure contains the filtering engine combining several rule lists.
//
// TODO(a.garipov): Merge with [TextEngine] in some way?
type Engine struct {
// mu protects engine and storage.
//
// TODO(a.garipov): See if anything else should be protected.
mu *sync.RWMutex
// engine is the filtering engine.
engine *urlfilter.DNSEngine
// storage is the filtering-rule storage. It is saved here to close it.
storage *filterlist.RuleStorage
// name is the human-readable name of the engine, like "allowed", "blocked",
// or "custom".
name string
// filters is the data about rule filters in this engine.
filters []*Filter
}
// EngineConfig is the configuration for rule-list filtering engines created by
// combining refreshable filters.
type EngineConfig struct {
// Name is the human-readable name of this engine, like "allowed",
// "blocked", or "custom".
Name string
// Filters is the data about rule lists in this engine. There must be no
// other references to the elements of this slice.
Filters []*Filter
}
// NewEngine returns a new rule-list filtering engine. The engine is not
// refreshed, so a refresh should be performed before use.
func NewEngine(c *EngineConfig) (e *Engine) {
return &Engine{
mu: &sync.RWMutex{},
name: c.Name,
filters: c.Filters,
}
}
// Close closes the underlying rule-list engine as well as the rule lists.
func (e *Engine) Close() (err error) {
e.mu.Lock()
defer e.mu.Unlock()
if e.storage == nil {
return nil
}
err = e.storage.Close()
if err != nil {
return fmt.Errorf("closing engine %q: %w", e.name, err)
}
return nil
}
// FilterRequest returns the result of filtering req using the DNS filtering
// engine.
func (e *Engine) FilterRequest(
req *urlfilter.DNSRequest,
) (res *urlfilter.DNSResult, hasMatched bool) {
return e.currentEngine().MatchRequest(req)
}
// currentEngine returns the current filtering engine.
func (e *Engine) currentEngine() (enging *urlfilter.DNSEngine) {
e.mu.RLock()
defer e.mu.RUnlock()
return e.engine
}
// Refresh updates all rule lists in e. ctx is used for cancellation.
// parseBuf, cli, cacheDir, and maxSize are used for updates of rule-list
// filters; see [Filter.Refresh].
//
// TODO(a.garipov): Unexport and test in an internal test or through enigne
// tests.
func (e *Engine) Refresh(
ctx context.Context,
parseBuf []byte,
cli *http.Client,
cacheDir string,
maxSize datasize.ByteSize,
) (err error) {
defer func() { err = errors.Annotate(err, "updating engine %q: %w", e.name) }()
var filtersToRefresh []*Filter
for _, f := range e.filters {
if f.enabled {
filtersToRefresh = append(filtersToRefresh, f)
}
}
if len(filtersToRefresh) == 0 {
log.Info("filtering: updating engine %q: no rule-list filters", e.name)
return nil
}
engRefr := &engineRefresh{
httpCli: cli,
cacheDir: cacheDir,
engineName: e.name,
parseBuf: parseBuf,
maxSize: maxSize,
}
ruleLists, errs := engRefr.process(ctx, e.filters)
if isOneTimeoutError(errs) {
// Don't wrap the error since it's informative enough as is.
return err
}
storage, err := filterlist.NewRuleStorage(ruleLists)
if err != nil {
errs = append(errs, fmt.Errorf("creating rule storage: %w", err))
return errors.Join(errs...)
}
e.resetStorage(storage)
return errors.Join(errs...)
}
// resetStorage sets e.storage and e.engine and closes the previous storage.
// Errors from closing the previous storage are logged.
func (e *Engine) resetStorage(storage *filterlist.RuleStorage) {
e.mu.Lock()
defer e.mu.Unlock()
prevStorage := e.storage
e.storage, e.engine = storage, urlfilter.NewDNSEngine(storage)
if prevStorage == nil {
return
}
err := prevStorage.Close()
if err != nil {
log.Error("filtering: engine %q: closing old storage: %s", e.name, err)
}
}
// isOneTimeoutError returns true if the sole error in errs is either
// [context.Canceled] or [context.DeadlineExceeded].
func isOneTimeoutError(errs []error) (ok bool) {
if len(errs) != 1 {
return false
}
err := errs[0]
return errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)
}
// engineRefresh represents a single ongoing engine refresh.
type engineRefresh struct {
httpCli *http.Client
cacheDir string
engineName string
parseBuf []byte
maxSize datasize.ByteSize
}
// process runs updates of all given rule-list filters. All errors are logged
// as they appear, since the update can take a significant amount of time.
// errs contains all errors that happened during the update, unless the context
// is canceled or its deadline is reached, in which case errs will only contain
// a single timeout error.
//
// TODO(a.garipov): Think of a better way to communicate the timeout condition?
func (r *engineRefresh) process(
ctx context.Context,
filters []*Filter,
) (ruleLists []filterlist.RuleList, errs []error) {
ruleLists = make([]filterlist.RuleList, 0, len(filters))
for i, f := range filters {
select {
case <-ctx.Done():
return nil, []error{fmt.Errorf("timeout after updating %d filters: %w", i, ctx.Err())}
default:
// Go on.
}
err := r.processFilter(ctx, f)
if err == nil {
ruleLists = append(ruleLists, f.ruleList)
continue
}
errs = append(errs, err)
// Also log immediately, since the update can take a lot of time.
log.Error(
"filtering: updating engine %q: rule list %s from url %q: %s\n",
r.engineName,
f.uid,
f.url,
err,
)
}
return ruleLists, errs
}
// processFilter runs an update of a single rule-list filter.
func (r *engineRefresh) processFilter(ctx context.Context, f *Filter) (err error) {
prevChecksum := f.checksum
parseRes, err := f.Refresh(ctx, r.parseBuf, r.httpCli, r.cacheDir, r.maxSize)
if err != nil {
return fmt.Errorf("updating %s: %w", f.uid, err)
}
if prevChecksum == parseRes.Checksum {
log.Info("filtering: engine %q: filter %q: no change", r.engineName, f.uid)
return nil
}
log.Info(
"filtering: updated engine %q: filter %q: %d bytes, %d rules",
r.engineName,
f.uid,
parseRes.BytesWritten,
parseRes.RulesCount,
)
return nil
}

View File

@@ -0,0 +1,63 @@
package rulelist_test
import (
"context"
"net/http"
"testing"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/testutil"
"github.com/AdguardTeam/urlfilter"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestEngine_Refresh(t *testing.T) {
cacheDir := t.TempDir()
fileURL, srvURL := newFilterLocations(t, cacheDir, testRuleTextBlocked, testRuleTextBlocked2)
fileFlt := newFilter(t, fileURL, "File Filter")
httpFlt := newFilter(t, srvURL, "HTTP Filter")
eng := rulelist.NewEngine(&rulelist.EngineConfig{
Name: "Engine",
Filters: []*rulelist.Filter{fileFlt, httpFlt},
})
require.NotNil(t, eng)
testutil.CleanupAndRequireSuccess(t, eng.Close)
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
t.Cleanup(cancel)
buf := make([]byte, rulelist.DefaultRuleBufSize)
cli := &http.Client{
Timeout: testTimeout,
}
err := eng.Refresh(ctx, buf, cli, cacheDir, rulelist.DefaultMaxRuleListSize)
require.NoError(t, err)
fltReq := &urlfilter.DNSRequest{
Hostname: "blocked.example",
Answer: false,
DNSType: dns.TypeA,
}
fltRes, hasMatched := eng.FilterRequest(fltReq)
assert.True(t, hasMatched)
require.NotNil(t, fltRes)
fltReq = &urlfilter.DNSRequest{
Hostname: "blocked-2.example",
Answer: false,
DNSType: dns.TypeA,
}
fltRes, hasMatched = eng.FilterRequest(fltReq)
assert.True(t, hasMatched)
require.NotNil(t, fltRes)
}

View File

@@ -14,7 +14,6 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/aghrenameio"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/ioutil"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/urlfilter/filterlist"
"github.com/c2h5oh/datasize"
)
@@ -52,8 +51,6 @@ type Filter struct {
checksum uint32
// enabled, if true, means that this rule-list filter is used for filtering.
//
// TODO(a.garipov): Take into account.
enabled bool
}
@@ -106,6 +103,11 @@ func NewFilter(c *FilterConfig) (f *Filter, err error) {
// Refresh updates the data in the rule-list filter. parseBuf is the initial
// buffer used to parse information from the data. cli and maxSize are only
// used when f is a URL-based list.
//
// TODO(a.garipov): Unexport and test in an internal test or through enigne
// tests.
//
// TODO(a.garipov): Consider not returning parseRes.
func (f *Filter) Refresh(
ctx context.Context,
parseBuf []byte,
@@ -300,39 +302,3 @@ func (f *Filter) Close() (err error) {
return f.ruleList.Close()
}
// filterUpdate represents a single ongoing rule-list filter update.
//
//lint:ignore U1000 TODO(a.garipov): Use.
type filterUpdate struct {
httpCli *http.Client
cacheDir string
name string
parseBuf []byte
maxSize datasize.ByteSize
}
// process runs an update of a single rule-list.
func (u *filterUpdate) process(ctx context.Context, f *Filter) (err error) {
prevChecksum := f.checksum
parseRes, err := f.Refresh(ctx, u.parseBuf, u.httpCli, u.cacheDir, u.maxSize)
if err != nil {
return fmt.Errorf("updating %s: %w", f.uid, err)
}
if prevChecksum == parseRes.Checksum {
log.Info("filtering: filter %q: filter %q: no change", u.name, f.uid)
return nil
}
log.Info(
"filtering: updated filter %q: filter %q: %d bytes, %d rules",
u.name,
f.uid,
parseRes.BytesWritten,
parseRes.RulesCount,
)
return nil
}

View File

@@ -2,9 +2,7 @@ package rulelist_test
import (
"context"
"io"
"net/http"
"net/http/httptest"
"net/url"
"os"
"path/filepath"
@@ -20,23 +18,8 @@ func TestFilter_Refresh(t *testing.T) {
cacheDir := t.TempDir()
uid := rulelist.MustNewUID()
initialFile := filepath.Join(cacheDir, "initial.txt")
initialData := []byte(
testRuleTextTitle +
testRuleTextBlocked,
)
writeErr := os.WriteFile(initialFile, initialData, 0o644)
require.NoError(t, writeErr)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
pt := testutil.PanicT{}
_, err := io.WriteString(w, testRuleTextTitle+testRuleTextBlocked)
require.NoError(pt, err)
}))
srvURL, urlErr := url.Parse(srv.URL)
require.NoError(t, urlErr)
const fltData = testRuleTextTitle + testRuleTextBlocked
fileURL, srvURL := newFilterLocations(t, cacheDir, fltData, fltData)
testCases := []struct {
url *url.URL
@@ -56,7 +39,7 @@ func TestFilter_Refresh(t *testing.T) {
name: "file",
url: &url.URL{
Scheme: "file",
Path: initialFile,
Path: fileURL.Path,
},
wantNewErrMsg: "",
}, {

View File

@@ -25,6 +25,24 @@ const DefaultMaxRuleListSize = 64 * datasize.MB
// urlfilter.
type URLFilterID = int
// The IDs of built-in filter lists.
//
// NOTE: Do not change without the need for it and keep in sync with
// client/src/helpers/constants.js.
//
// TODO(a.garipov): Add type [URLFilterID] once it is used consistently in
// package filtering.
//
// TODO(d.kolyshev): Add URLFilterIDLegacyRewrite here and to the UI.
const (
URLFilterIDCustom URLFilterID = 0
URLFilterIDEtcHosts URLFilterID = -1
URLFilterIDBlockedService URLFilterID = -2
URLFilterIDParentalControl URLFilterID = -3
URLFilterIDSafeBrowsing URLFilterID = -4
URLFilterIDSafeSearch URLFilterID = -5
)
// UID is the type for the unique IDs of filtering-rule lists.
type UID uuid.UUID

View File

@@ -1,11 +1,19 @@
package rulelist_test
import (
"io"
"net/http"
"net/http/httptest"
"net/url"
"os"
"path/filepath"
"sync/atomic"
"testing"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/testutil"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) {
@@ -35,3 +43,70 @@ const (
// See https://github.com/AdguardTeam/AdGuardHome/issues/6003.
testRuleTextCosmetic = "||cosmetic.example## :has-text(/\u200c/i)\n"
)
// urlFilterIDCounter is the atomic integer used to create unique filter IDs.
var urlFilterIDCounter = &atomic.Int32{}
// newURLFilterID returns a new unique URLFilterID.
func newURLFilterID() (id rulelist.URLFilterID) {
return rulelist.URLFilterID(urlFilterIDCounter.Add(1))
}
// newFilter is a helper for creating new filters in tests. It does not
// register the closing of the filter using t.Cleanup; callers must do that
// either directly or by using the filter in an engine.
func newFilter(t testing.TB, u *url.URL, name string) (f *rulelist.Filter) {
t.Helper()
f, err := rulelist.NewFilter(&rulelist.FilterConfig{
URL: u,
Name: name,
UID: rulelist.MustNewUID(),
URLFilterID: newURLFilterID(),
Enabled: true,
})
require.NoError(t, err)
return f
}
// newFilterLocations is a test helper that sets up both the filtering-rule list
// file and the HTTP-server. It also registers file removal and server stopping
// using t.Cleanup.
func newFilterLocations(
t testing.TB,
cacheDir string,
fileData string,
httpData string,
) (fileURL, srvURL *url.URL) {
filePath := filepath.Join(cacheDir, "initial.txt")
err := os.WriteFile(filePath, []byte(fileData), 0o644)
require.NoError(t, err)
testutil.CleanupAndRequireSuccess(t, func() (err error) {
return os.Remove(filePath)
})
fileURL = &url.URL{
Scheme: "file",
Path: filePath,
}
srv := newStringHTTPServer(httpData)
t.Cleanup(srv.Close)
srvURL, err = url.Parse(srv.URL)
require.NoError(t, err)
return fileURL, srvURL
}
// newStringHTTPServer returns a new HTTP server that serves s.
func newStringHTTPServer(s string) (srv *httptest.Server) {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
pt := testutil.PanicT{}
_, err := io.WriteString(w, s)
require.NoError(pt, err)
}))
}

View File

@@ -0,0 +1,98 @@
package rulelist
import (
"fmt"
"strings"
"sync"
"github.com/AdguardTeam/urlfilter"
"github.com/AdguardTeam/urlfilter/filterlist"
)
// TextEngine is a single DNS filter based on a list of rules in text form.
type TextEngine struct {
// mu protects engine and storage.
mu *sync.RWMutex
// engine is the filtering engine.
engine *urlfilter.DNSEngine
// storage is the filtering-rule storage. It is saved here to close it.
storage *filterlist.RuleStorage
// name is the human-readable name of the engine, like "custom".
name string
}
// TextEngineConfig is the configuration for a rule-list filtering engine
// created from a filtering rule text.
type TextEngineConfig struct {
// Name is the human-readable name of this engine, like "allowed",
// "blocked", or "custom".
Name string
// Rules is the text of the filtering rules for this engine.
Rules []string
// ID is the ID to use inside a URL-filter engine.
ID URLFilterID
}
// NewTextEngine returns a new rule-list filtering engine that uses rules
// directly. The engine is ready to use and should not be refreshed.
func NewTextEngine(c *TextEngineConfig) (e *TextEngine, err error) {
text := strings.Join(c.Rules, "\n")
storage, err := filterlist.NewRuleStorage([]filterlist.RuleList{
&filterlist.StringRuleList{
RulesText: text,
ID: c.ID,
IgnoreCosmetic: true,
},
})
if err != nil {
return nil, fmt.Errorf("creating rule storage: %w", err)
}
engine := urlfilter.NewDNSEngine(storage)
return &TextEngine{
mu: &sync.RWMutex{},
engine: engine,
storage: storage,
name: c.Name,
}, nil
}
// FilterRequest returns the result of filtering req using the DNS filtering
// engine.
func (e *TextEngine) FilterRequest(
req *urlfilter.DNSRequest,
) (res *urlfilter.DNSResult, hasMatched bool) {
var engine *urlfilter.DNSEngine
func() {
e.mu.RLock()
defer e.mu.RUnlock()
engine = e.engine
}()
return engine.MatchRequest(req)
}
// Close closes the underlying rule list engine as well as the rule lists.
func (e *TextEngine) Close() (err error) {
e.mu.Lock()
defer e.mu.Unlock()
if e.storage == nil {
return nil
}
err = e.storage.Close()
if err != nil {
return fmt.Errorf("closing text engine %q: %w", e.name, err)
}
return nil
}

View File

@@ -0,0 +1,40 @@
package rulelist_test
import (
"testing"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/testutil"
"github.com/AdguardTeam/urlfilter"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewTextEngine(t *testing.T) {
eng, err := rulelist.NewTextEngine(&rulelist.TextEngineConfig{
Name: "RulesEngine",
Rules: []string{
testRuleTextTitle,
testRuleTextBlocked,
},
ID: testURLFilterID,
})
require.NoError(t, err)
require.NotNil(t, eng)
testutil.CleanupAndRequireSuccess(t, eng.Close)
fltReq := &urlfilter.DNSRequest{
Hostname: "blocked.example",
Answer: false,
DNSType: dns.TypeA,
}
fltRes, hasMatched := eng.FilterRequest(fltReq)
assert.True(t, hasMatched)
require.NotNil(t, fltRes)
require.NotNil(t, fltRes.NetworkRule)
assert.Equal(t, fltRes.NetworkRule.FilterListID, testURLFilterID)
}

View File

@@ -14,6 +14,7 @@ import (
"time"
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/cache"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/urlfilter"
@@ -98,7 +99,7 @@ func NewDefault(
cacheTTL: cacheTTL,
}
err = ss.resetEngine(filtering.SafeSearchListID, conf)
err = ss.resetEngine(rulelist.URLFilterIDSafeSearch, conf)
if err != nil {
// Don't wrap the error, because it's informative enough as is.
return nil, err
@@ -234,7 +235,7 @@ func (ss *Default) newResult(
) (res *filtering.Result, err error) {
res = &filtering.Result{
Rules: []*filtering.ResultRule{{
FilterListID: filtering.SafeSearchListID,
FilterListID: rulelist.URLFilterIDSafeSearch,
}},
Reason: filtering.FilteredSafeSearch,
IsFiltered: true,
@@ -368,7 +369,7 @@ func (ss *Default) Update(conf filtering.SafeSearchConfig) (err error) {
ss.mu.Lock()
defer ss.mu.Unlock()
err = ss.resetEngine(filtering.SafeSearchListID, conf)
err = ss.resetEngine(rulelist.URLFilterIDSafeSearch, conf)
if err != nil {
// Don't wrap the error, because it's informative enough as is.
return err

View File

@@ -9,6 +9,7 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/aghtest"
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/safesearch"
"github.com/AdguardTeam/golibs/testutil"
"github.com/miekg/dns"
@@ -69,7 +70,7 @@ func TestDefault_CheckHost_yandex(t *testing.T) {
require.Len(t, res.Rules, 1)
assert.Equal(t, yandexIP, res.Rules[0].IP)
assert.EqualValues(t, filtering.SafeSearchListID, res.Rules[0].FilterListID)
assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
}
}
@@ -89,7 +90,7 @@ func TestDefault_CheckHost_yandexAAAA(t *testing.T) {
require.Len(t, res.Rules, 1)
assert.Empty(t, res.Rules[0].IP)
assert.EqualValues(t, filtering.SafeSearchListID, res.Rules[0].FilterListID)
assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
}
func TestDefault_CheckHost_google(t *testing.T) {
@@ -128,7 +129,7 @@ func TestDefault_CheckHost_google(t *testing.T) {
require.Len(t, res.Rules, 1)
assert.Equal(t, wantIP, res.Rules[0].IP)
assert.EqualValues(t, filtering.SafeSearchListID, res.Rules[0].FilterListID)
assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
})
}
}
@@ -180,7 +181,7 @@ func TestDefault_CheckHost_duckduckgoAAAA(t *testing.T) {
require.Len(t, res.Rules, 1)
assert.Empty(t, res.Rules[0].IP)
assert.EqualValues(t, filtering.SafeSearchListID, res.Rules[0].FilterListID)
assert.Equal(t, rulelist.URLFilterIDSafeSearch, res.Rules[0].FilterListID)
}
func TestDefault_Update(t *testing.T) {