all: sync with master

This commit is contained in:
Eugene Burkov
2024-12-05 16:00:18 +03:00
parent 54f3a5f990
commit 3f95db98d3
143 changed files with 3476 additions and 2959 deletions

View File

@@ -9,7 +9,7 @@ import (
"testing"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghhttp"
"github.com/AdguardTeam/golibs/netutil/urlutil"
"github.com/AdguardTeam/golibs/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -33,7 +33,7 @@ func serveHTTPLocally(t *testing.T, h http.Handler) (urlStr string) {
require.IsType(t, (*net.TCPAddr)(nil), addr)
return (&url.URL{
Scheme: aghhttp.SchemeHTTP,
Scheme: urlutil.SchemeHTTP,
Host: addr.String(),
}).String()
}

View File

@@ -1057,7 +1057,7 @@ func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) {
}
}
err = aghos.MkdirAll(filepath.Join(d.conf.DataDir, filterDir), aghos.DefaultPermDir)
err = os.MkdirAll(filepath.Join(d.conf.DataDir, filterDir), aghos.DefaultPermDir)
if err != nil {
d.Close()

View File

@@ -13,10 +13,10 @@ import (
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghhttp"
"github.com/AdguardTeam/AdGuardHome/internal/aghos"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/netutil/urlutil"
"github.com/miekg/dns"
)
@@ -26,7 +26,7 @@ func (d *DNSFilter) validateFilterURL(urlStr string) (err error) {
if filepath.IsAbs(urlStr) {
urlStr = filepath.Clean(urlStr)
_, err = aghos.Stat(urlStr)
_, err = os.Stat(urlStr)
if err != nil {
// Don't wrap the error since it's informative enough as is.
return err
@@ -41,19 +41,14 @@ func (d *DNSFilter) validateFilterURL(urlStr string) (err error) {
u, err := url.ParseRequestURI(urlStr)
if err != nil {
// Don't wrap the error since it's informative enough as is.
// Don't wrap the error, because it's informative enough as is.
return err
}
if s := u.Scheme; s != aghhttp.SchemeHTTP && s != aghhttp.SchemeHTTPS {
return &url.Error{
Op: "Check scheme",
URL: urlStr,
Err: fmt.Errorf("only %v allowed", []string{
aghhttp.SchemeHTTP,
aghhttp.SchemeHTTPS,
}),
}
err = urlutil.ValidateHTTPURL(u)
if err != nil {
// Don't wrap the error, because it's informative enough as is.
return err
}
return nil

View File

@@ -3,11 +3,12 @@ package rulelist
import (
"context"
"fmt"
"log/slog"
"net/http"
"sync"
"github.com/AdguardTeam/golibs/errors"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/logutil/slogutil"
"github.com/AdguardTeam/urlfilter"
"github.com/AdguardTeam/urlfilter/filterlist"
"github.com/c2h5oh/datasize"
@@ -18,6 +19,9 @@ import (
//
// TODO(a.garipov): Merge with [TextEngine] in some way?
type Engine struct {
// logger is used to log the operation of the engine and its refreshes.
logger *slog.Logger
// mu protects engine and storage.
//
// TODO(a.garipov): See if anything else should be protected.
@@ -29,8 +33,7 @@ type Engine struct {
// storage is the filtering-rule storage. It is saved here to close it.
storage *filterlist.RuleStorage
// name is the human-readable name of the engine, like "allowed", "blocked",
// or "custom".
// name is the human-readable name of the engine.
name string
// filters is the data about rule filters in this engine.
@@ -40,12 +43,15 @@ type Engine struct {
// EngineConfig is the configuration for rule-list filtering engines created by
// combining refreshable filters.
type EngineConfig struct {
// Name is the human-readable name of this engine, like "allowed",
// "blocked", or "custom".
// Logger is used to log the operation of the engine. It must not be nil.
Logger *slog.Logger
// name is the human-readable name of the engine; see [EngineNameAllow] and
// similar constants.
Name string
// Filters is the data about rule lists in this engine. There must be no
// other references to the elements of this slice.
// other references to the items of this slice. Each item must not be nil.
Filters []*Filter
}
@@ -53,6 +59,7 @@ type EngineConfig struct {
// refreshed, so a refresh should be performed before use.
func NewEngine(c *EngineConfig) (e *Engine) {
return &Engine{
logger: c.Logger,
mu: &sync.RWMutex{},
name: c.Name,
filters: c.Filters,
@@ -85,7 +92,7 @@ func (e *Engine) FilterRequest(
}
// currentEngine returns the current filtering engine.
func (e *Engine) currentEngine() (enging *urlfilter.DNSEngine) {
func (e *Engine) currentEngine() (engine *urlfilter.DNSEngine) {
e.mu.RLock()
defer e.mu.RUnlock()
@@ -96,7 +103,7 @@ func (e *Engine) currentEngine() (enging *urlfilter.DNSEngine) {
// parseBuf, cli, cacheDir, and maxSize are used for updates of rule-list
// filters; see [Filter.Refresh].
//
// TODO(a.garipov): Unexport and test in an internal test or through enigne
// TODO(a.garipov): Unexport and test in an internal test or through engine
// tests.
func (e *Engine) Refresh(
ctx context.Context,
@@ -115,20 +122,20 @@ func (e *Engine) Refresh(
}
if len(filtersToRefresh) == 0 {
log.Info("filtering: updating engine %q: no rule-list filters", e.name)
e.logger.InfoContext(ctx, "updating: no rule-list filters")
return nil
}
engRefr := &engineRefresh{
httpCli: cli,
cacheDir: cacheDir,
engineName: e.name,
parseBuf: parseBuf,
maxSize: maxSize,
logger: e.logger,
httpCli: cli,
cacheDir: cacheDir,
parseBuf: parseBuf,
maxSize: maxSize,
}
ruleLists, errs := engRefr.process(ctx, e.filters)
ruleLists, errs := engRefr.process(ctx, filtersToRefresh)
if isOneTimeoutError(errs) {
// Don't wrap the error since it's informative enough as is.
return err
@@ -141,14 +148,14 @@ func (e *Engine) Refresh(
return errors.Join(errs...)
}
e.resetStorage(storage)
e.resetStorage(ctx, storage)
return errors.Join(errs...)
}
// resetStorage sets e.storage and e.engine and closes the previous storage.
// Errors from closing the previous storage are logged.
func (e *Engine) resetStorage(storage *filterlist.RuleStorage) {
func (e *Engine) resetStorage(ctx context.Context, storage *filterlist.RuleStorage) {
e.mu.Lock()
defer e.mu.Unlock()
@@ -161,7 +168,7 @@ func (e *Engine) resetStorage(storage *filterlist.RuleStorage) {
err := prevStorage.Close()
if err != nil {
log.Error("filtering: engine %q: closing old storage: %s", e.name, err)
e.logger.WarnContext(ctx, "closing old storage", slogutil.KeyError, err)
}
}
@@ -179,11 +186,11 @@ func isOneTimeoutError(errs []error) (ok bool) {
// engineRefresh represents a single ongoing engine refresh.
type engineRefresh struct {
httpCli *http.Client
cacheDir string
engineName string
parseBuf []byte
maxSize datasize.ByteSize
logger *slog.Logger
httpCli *http.Client
cacheDir string
parseBuf []byte
maxSize datasize.ByteSize
}
// process runs updates of all given rule-list filters. All errors are logged
@@ -216,12 +223,12 @@ func (r *engineRefresh) process(
errs = append(errs, err)
// Also log immediately, since the update can take a lot of time.
log.Error(
"filtering: updating engine %q: rule list %s from url %q: %s\n",
r.engineName,
f.uid,
f.url,
err,
r.logger.ErrorContext(
ctx,
"updating rule list",
"uid", f.uid,
"url", f.url,
slogutil.KeyError, err,
)
}
@@ -237,17 +244,17 @@ func (r *engineRefresh) processFilter(ctx context.Context, f *Filter) (err error
}
if prevChecksum == parseRes.Checksum {
log.Info("filtering: engine %q: filter %q: no change", r.engineName, f.uid)
r.logger.InfoContext(ctx, "no change in filter", "uid", f.uid)
return nil
}
log.Info(
"filtering: updated engine %q: filter %q: %d bytes, %d rules",
r.engineName,
f.uid,
parseRes.BytesWritten,
parseRes.RulesCount,
r.logger.InfoContext(
ctx,
"filter updated",
"uid", f.uid,
"bytes", parseRes.BytesWritten,
"rules", parseRes.RulesCount,
)
return nil

View File

@@ -5,6 +5,7 @@ import (
"testing"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/logutil/slogutil"
"github.com/AdguardTeam/golibs/testutil"
"github.com/AdguardTeam/urlfilter"
"github.com/miekg/dns"
@@ -13,6 +14,8 @@ import (
)
func TestEngine_Refresh(t *testing.T) {
t.Parallel()
cacheDir := t.TempDir()
fileURL, srvURL := newFilterLocations(t, cacheDir, testRuleTextBlocked, testRuleTextBlocked2)
@@ -21,6 +24,7 @@ func TestEngine_Refresh(t *testing.T) {
httpFlt := newFilter(t, srvURL, "HTTP Filter")
eng := rulelist.NewEngine(&rulelist.EngineConfig{
Logger: slogutil.NewDiscardLogger(),
Name: "Engine",
Filters: []*rulelist.Filter{fileFlt, httpFlt},
})

View File

@@ -105,7 +105,7 @@ func NewFilter(c *FilterConfig) (f *Filter, err error) {
// buffer used to parse information from the data. cli and maxSize are only
// used when f is a URL-based list.
//
// TODO(a.garipov): Unexport and test in an internal test or through enigne
// TODO(a.garipov): Unexport and test in an internal test or through engine
// tests.
//
// TODO(a.garipov): Consider not returning parseRes.

View File

@@ -8,12 +8,15 @@ import (
"testing"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/netutil/urlutil"
"github.com/AdguardTeam/golibs/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFilter_Refresh(t *testing.T) {
t.Parallel()
cacheDir := t.TempDir()
uid := rulelist.MustNewUID()
@@ -37,7 +40,7 @@ func TestFilter_Refresh(t *testing.T) {
}, {
name: "file",
url: &url.URL{
Scheme: "file",
Scheme: urlutil.SchemeFile,
Path: fileURL.Path,
},
wantNewErrMsg: "",
@@ -49,6 +52,8 @@ func TestFilter_Refresh(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
f, err := rulelist.NewFilter(&rulelist.FilterConfig{
URL: tc.url,
Name: tc.name,

View File

@@ -71,3 +71,10 @@ var _ fmt.Stringer = UID{}
func (id UID) String() (s string) {
return uuid.UUID(id).String()
}
// Common engine names.
const (
EngineNameAllow = "allow"
EngineNameBlock = "block"
EngineNameCustom = "custom"
)

View File

@@ -6,20 +6,16 @@ import (
"net/http/httptest"
"net/url"
"os"
"path/filepath"
"sync/atomic"
"testing"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/netutil/urlutil"
"github.com/AdguardTeam/golibs/testutil"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) {
testutil.DiscardLogOutput(m)
}
// testTimeout is the common timeout for tests.
const testTimeout = 1 * time.Second
@@ -31,6 +27,7 @@ const testTitle = "Test Title"
// Common rule texts for tests.
const (
testRuleTextAllowed = "||allowed.example^\n"
testRuleTextBadTab = "||bad-tab-and-comment.example^\t# A comment.\n"
testRuleTextBlocked = "||blocked.example^\n"
testRuleTextBlocked2 = "||blocked-2.example^\n"
@@ -79,8 +76,16 @@ func newFilterLocations(
fileData string,
httpData string,
) (fileURL, srvURL *url.URL) {
filePath := filepath.Join(cacheDir, "initial.txt")
err := os.WriteFile(filePath, []byte(fileData), 0o644)
t.Helper()
f, err := os.CreateTemp(cacheDir, "")
require.NoError(t, err)
err = f.Close()
require.NoError(t, err)
filePath := f.Name()
err = os.WriteFile(filePath, []byte(fileData), 0o644)
require.NoError(t, err)
testutil.CleanupAndRequireSuccess(t, func() (err error) {
@@ -88,7 +93,7 @@ func newFilterLocations(
})
fileURL = &url.URL{
Scheme: "file",
Scheme: urlutil.SchemeFile,
Path: filePath,
}

View File

@@ -0,0 +1,112 @@
package rulelist
import (
"context"
"fmt"
"log/slog"
"net/http"
"sync"
"github.com/AdguardTeam/golibs/errors"
"github.com/c2h5oh/datasize"
)
// Storage contains the main filtering engines, including the allowlist, the
// blocklist, and the user's custom filtering rules.
type Storage struct {
// refreshMu makes sure that only one update takes place at a time.
refreshMu *sync.Mutex
allow *Engine
block *Engine
custom *TextEngine
httpCli *http.Client
cacheDir string
parseBuf []byte
maxSize datasize.ByteSize
}
// StorageConfig is the configuration for the filtering-engine storage.
type StorageConfig struct {
// Logger is used to log the operation of the storage. It must not be nil.
Logger *slog.Logger
// HTTPClient is the HTTP client used to perform updates of rule lists.
// It must not be nil.
HTTPClient *http.Client
// CacheDir is the path to the directory used to cache rule-list files.
// It must be set.
CacheDir string
// AllowFilters are the filtering-rule lists used to exclude domain names
// from the filtering. Each item must not be nil.
AllowFilters []*Filter
// BlockFilters are the filtering-rule lists used to block domain names.
// Each item must not be nil.
BlockFilters []*Filter
// CustomRules contains custom rules of the user. They have priority over
// both allow- and blacklist rules.
CustomRules []string
// MaxRuleListTextSize is the maximum size of a rule-list file. It must be
// greater than zero.
MaxRuleListTextSize datasize.ByteSize
}
// NewStorage creates a new filtering-engine storage. The engines are not
// refreshed, so a refresh should be performed before use.
func NewStorage(c *StorageConfig) (s *Storage, err error) {
custom, err := NewTextEngine(&TextEngineConfig{
Name: EngineNameCustom,
Rules: c.CustomRules,
ID: URLFilterIDCustom,
})
if err != nil {
return nil, fmt.Errorf("creating custom engine: %w", err)
}
return &Storage{
refreshMu: &sync.Mutex{},
allow: NewEngine(&EngineConfig{
Logger: c.Logger.With("engine", EngineNameAllow),
Name: EngineNameAllow,
Filters: c.AllowFilters,
}),
block: NewEngine(&EngineConfig{
Logger: c.Logger.With("engine", EngineNameBlock),
Name: EngineNameBlock,
Filters: c.BlockFilters,
}),
custom: custom,
httpCli: c.HTTPClient,
cacheDir: c.CacheDir,
parseBuf: make([]byte, DefaultRuleBufSize),
maxSize: c.MaxRuleListTextSize,
}, nil
}
// Close closes the underlying rule-list engines.
func (s *Storage) Close() (err error) {
// Don't wrap the errors since they are informative enough as is.
return errors.Join(
s.allow.Close(),
s.block.Close(),
)
}
// Refresh updates all engines in s.
//
// TODO(a.garipov): Refresh allow and block separately?
func (s *Storage) Refresh(ctx context.Context) (err error) {
s.refreshMu.Lock()
defer s.refreshMu.Unlock()
// Don't wrap the errors since they are informative enough as is.
return errors.Join(
s.allow.Refresh(ctx, s.parseBuf, s.httpCli, s.cacheDir, s.maxSize),
s.block.Refresh(ctx, s.parseBuf, s.httpCli, s.cacheDir, s.maxSize),
)
}

View File

@@ -0,0 +1,49 @@
package rulelist_test
import (
"net/http"
"testing"
"github.com/AdguardTeam/AdGuardHome/internal/filtering/rulelist"
"github.com/AdguardTeam/golibs/logutil/slogutil"
"github.com/AdguardTeam/golibs/testutil"
"github.com/c2h5oh/datasize"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestStorage_Refresh(t *testing.T) {
t.Parallel()
cacheDir := t.TempDir()
allowedFileURL, _ := newFilterLocations(t, cacheDir, testRuleTextAllowed, "")
allowedFlt := newFilter(t, allowedFileURL, "Allowed 1")
blockedFileURL, _ := newFilterLocations(t, cacheDir, testRuleTextBlocked, "")
blockedFlt := newFilter(t, blockedFileURL, "Blocked 1")
strg, err := rulelist.NewStorage(&rulelist.StorageConfig{
Logger: slogutil.NewDiscardLogger(),
HTTPClient: &http.Client{
Timeout: testTimeout,
},
CacheDir: cacheDir,
AllowFilters: []*rulelist.Filter{
allowedFlt,
},
BlockFilters: []*rulelist.Filter{
blockedFlt,
},
CustomRules: []string{
testRuleTextBlocked2,
},
MaxRuleListTextSize: 1 * datasize.KB,
})
require.NoError(t, err)
testutil.CleanupAndRequireSuccess(t, strg.Close)
ctx := testutil.ContextWithTimeout(t, testTimeout)
err = strg.Refresh(ctx)
assert.NoError(t, err)
}

View File

@@ -20,15 +20,15 @@ type TextEngine struct {
// storage is the filtering-rule storage. It is saved here to close it.
storage *filterlist.RuleStorage
// name is the human-readable name of the engine, like "custom".
// name is the human-readable name of the engine.
name string
}
// TextEngineConfig is the configuration for a rule-list filtering engine
// created from a filtering rule text.
type TextEngineConfig struct {
// Name is the human-readable name of this engine, like "allowed",
// "blocked", or "custom".
// name is the human-readable name of the engine; see [EngineNameAllow] and
// similar constants.
Name string
// Rules is the text of the filtering rules for this engine.

View File

@@ -12,6 +12,8 @@ import (
)
func TestNewTextEngine(t *testing.T) {
t.Parallel()
eng, err := rulelist.NewTextEngine(&rulelist.TextEngineConfig{
Name: "RulesEngine",
Rules: []string{