Compare commits

...

20 Commits

Author SHA1 Message Date
Dimitry Kolyshev
94458c5658 Merge remote-tracking branch 'origin/master' into 3389-querylog-export
# Conflicts:
#	CHANGELOG.md
2023-06-22 13:28:17 +04:00
Dimitry Kolyshev
6fea435d89 Merge remote-tracking branch 'origin/master' into 3389-querylog-export
# Conflicts:
#	CHANGELOG.md
2023-06-15 14:40:06 +04:00
Dimitry Kolyshev
05706bd7ea querylog: bench search 2023-06-15 14:38:44 +04:00
Dimitry Kolyshev
00327757e1 querylog: bench search 2023-06-14 12:07:16 +04:00
Dimitry Kolyshev
5f0e53ded7 home: write timeout middleware 2023-06-14 10:51:17 +04:00
Dimitry Kolyshev
5cd4ce766d Merge remote-tracking branch 'origin/master' into 3389-querylog-export 2023-06-14 10:08:11 +04:00
Dimitry Kolyshev
e695fd9885 querylog: timeout revert 2023-06-14 10:07:22 +04:00
Dimitry Kolyshev
c43053e7d2 querylog: timeout 2023-06-13 14:36:31 +04:00
Dimitry Kolyshev
86e25944b3 querylog: imp code 2023-06-13 13:08:15 +04:00
Dimitry Kolyshev
fd7260f6de Merge remote-tracking branch 'origin/master' into 3389-querylog-export
# Conflicts:
#	CHANGELOG.md
2023-06-13 10:38:17 +04:00
Dimitry Kolyshev
c591e46254 querylog: imp code 2023-06-02 15:26:04 +03:00
Dimitry Kolyshev
66d9ea7cca querylog: imp docs 2023-06-02 15:15:37 +03:00
Dimitry Kolyshev
dafc785845 querylog: imp code docs 2023-06-02 12:09:50 +03:00
Dimitry Kolyshev
e9b17891bb Merge remote-tracking branch 'origin/master' into 3389-querylog-export 2023-06-02 12:00:57 +03:00
Dimitry Kolyshev
0b27f048a7 querylog: imp code 2023-05-31 16:26:56 +03:00
Dimitry Kolyshev
649454e77b querylog: imp code 2023-05-31 15:11:33 +03:00
Dimitry Kolyshev
ca22d8524d all: imp docs 2023-05-31 15:11:15 +03:00
Dimitry Kolyshev
07f4f0474c all: imp docs 2023-05-31 12:06:13 +03:00
Dimitry Kolyshev
8813e135b6 querylog: export 2023-05-31 11:49:15 +03:00
Dimitry Kolyshev
f4f2c11eb9 all: export querylog docs 2023-05-29 13:29:41 +03:00
9 changed files with 442 additions and 9 deletions

View File

@@ -25,6 +25,9 @@ NOTE: Add new changes BELOW THIS COMMENT.
### Added
- The new HTTP API, `GET /control/querylog/export`, which can be used to
export query log items. See `openapi/openapi.yaml` for the full description
([#3389]).
- The ability to set inactivity periods for filtering blocked services in the
configuration file ([#951]). The UI changes are coming in the upcoming
releases.
@@ -89,6 +92,7 @@ In this release, the schema version has changed from 20 to 21.
[#951]: https://github.com/AdguardTeam/AdGuardHome/issues/951
[#1577]: https://github.com/AdguardTeam/AdGuardHome/issues/1577
[#3389]: https://github.com/AdguardTeam/AdGuardHome/issues/3389
[#5910]: https://github.com/AdguardTeam/AdGuardHome/issues/5910
[#5913]: https://github.com/AdguardTeam/AdGuardHome/issues/5913

View File

@@ -3,13 +3,13 @@ package home
import (
"io"
"net/http"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/aghio"
"github.com/AdguardTeam/golibs/log"
)
// middlerware is a wrapper function signature.
// middleware is a wrapper function signature.
type middleware func(http.Handler) http.Handler
// withMiddlewares consequently wraps h with all the middlewares.
@@ -75,3 +75,48 @@ func limitRequestBody(h http.Handler) (limited http.Handler) {
h.ServeHTTP(w, rr)
})
}
const (
// defaultWriteTimeout is the maximum duration before timing out writes of
// the response.
defaultWriteTimeout = 60 * time.Second
// longerWriteTimeout is the maximum duration before timing out for APIs
// expecting longer response requests.
longerWriteTimeout = 5 * time.Minute
)
// expectsLongTimeoutRequests shows if this request should use a bigger write
// timeout value. These are exceptions for poorly designed current APIs as
// well as APIs that are designed to expect large files and requests. Remove
// once the new, better APIs are up.
//
// TODO(d.kolyshev): This could be achieved with [http.NewResponseController]
// with go v1.20.
func expectsLongTimeoutRequests(r *http.Request) (ok bool) {
if r.Method != http.MethodGet {
return false
}
return r.URL.Path == "/control/querylog/export"
}
// addWriteTimeout wraps underlying handler h, adding a response write timeout.
func addWriteTimeout(h http.Handler) (limited http.Handler) {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var handler http.Handler
if expectsLongTimeoutRequests(r) {
handler = http.TimeoutHandler(h, longerWriteTimeout, "write timeout exceeded")
} else {
handler = http.TimeoutHandler(h, defaultWriteTimeout, "write timeout exceeded")
}
handler.ServeHTTP(w, r)
})
}
// limitHandler wraps underlying handler h with default limits, such as request
// body limit and write timeout.
func limitHandler(h http.Handler) (limited http.Handler) {
return limitRequestBody(addWriteTimeout(h))
}

View File

@@ -25,11 +25,13 @@ const (
// readTimeout is the maximum duration for reading the entire request,
// including the body.
readTimeout = 60 * time.Second
// readHdrTimeout is the amount of time allowed to read request headers.
readHdrTimeout = 60 * time.Second
// writeTimeout is the maximum duration before timing out writes of the
// response.
writeTimeout = 60 * time.Second
// response. This limit is overwritten by [addWriteTimeout] middleware.
writeTimeout = 10 * time.Minute
)
type webConfig struct {
@@ -169,7 +171,7 @@ func (web *webAPI) start() {
errs := make(chan error, 2)
// Use an h2c handler to support unencrypted HTTP/2, e.g. for proxies.
hdlr := h2c.NewHandler(withMiddlewares(Context.mux, limitRequestBody), &http2.Server{})
hdlr := h2c.NewHandler(withMiddlewares(Context.mux, limitHandler), &http2.Server{})
// Create a new instance, because the Web is not usable after Shutdown.
hostStr := web.conf.BindHost.String()
@@ -254,7 +256,7 @@ func (web *webAPI) tlsServerLoop() {
CipherSuites: Context.tlsCipherIDs,
MinVersion: tls.VersionTLS12,
},
Handler: withMiddlewares(Context.mux, limitRequestBody),
Handler: withMiddlewares(Context.mux, limitHandler),
ReadTimeout: web.conf.ReadTimeout,
ReadHeaderTimeout: web.conf.ReadHeaderTimeout,
WriteTimeout: web.conf.WriteTimeout,
@@ -288,7 +290,7 @@ func (web *webAPI) mustStartHTTP3(address string) {
CipherSuites: Context.tlsCipherIDs,
MinVersion: tls.VersionTLS12,
},
Handler: withMiddlewares(Context.mux, limitRequestBody),
Handler: withMiddlewares(Context.mux, limitHandler),
}
log.Debug("web: starting http/3 server")

108
internal/querylog/csv.go Normal file
View File

@@ -0,0 +1,108 @@
package querylog
import (
"strconv"
"strings"
"time"
"github.com/AdguardTeam/golibs/log"
"github.com/miekg/dns"
)
// csvRow is an alias type for csv rows.
type csvRow = [18]string
// csvHeaderRow is a slice of strings with column names for CSV header row.
var csvHeaderRow = csvRow{
"ans_dnssec",
"ans_rcode",
"ans_type",
"ans_value",
"cached",
"client_ip",
"client_id",
"ecs",
"elapsed",
"filter_id",
"filter_rule",
"proto",
"qclass",
"qname",
"qtype",
"reason",
"time",
"upstream",
}
// toCSV returns a slice of strings with entry fields according to the
// csvHeaderRow slice.
func (e *logEntry) toCSV() (out *csvRow) {
var filterID, filterRule string
if e.Result.IsFiltered && len(e.Result.Rules) > 0 {
rule := e.Result.Rules[0]
filterID = strconv.FormatInt(rule.FilterListID, 10)
filterRule = rule.Text
}
aData := ansData(e)
return &csvRow{
strconv.FormatBool(e.AuthenticatedData),
aData.rCode,
aData.typ,
aData.value,
strconv.FormatBool(e.Cached),
e.IP.String(),
e.ClientID,
e.ReqECS,
strconv.FormatFloat(e.Elapsed.Seconds()*1000, 'f', -1, 64),
filterID,
filterRule,
string(e.ClientProto),
e.QClass,
e.QHost,
e.QType,
e.Result.Reason.String(),
e.Time.Format(time.RFC3339Nano),
e.Upstream,
}
}
// csvAnswer is a helper struct for csv row answer fields.
type csvAnswer struct {
rCode string
typ string
value string
}
// ansData returns a map with message answer data.
func ansData(entry *logEntry) (out csvAnswer) {
if len(entry.Answer) == 0 {
return out
}
msg := &dns.Msg{}
if err := msg.Unpack(entry.Answer); err != nil {
log.Debug("querylog: failed to unpack dns msg answer: %v: %s", entry.Answer, err)
return out
}
out.rCode = dns.RcodeToString[msg.Rcode]
if len(msg.Answer) == 0 {
return out
}
rr := msg.Answer[0]
header := rr.Header()
out.typ = dns.TypeToString[header.Rrtype]
// Remove the header string from the answer value since it's mostly
// unnecessary in the log.
out.value = strings.TrimPrefix(rr.String(), header.String())
return out
}

View File

@@ -0,0 +1,73 @@
package querylog
import (
"net"
"testing"
"time"
"github.com/AdguardTeam/AdGuardHome/internal/filtering"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var testDate = time.Date(2022, 1, 1, 0, 0, 0, 0, time.UTC)
func TestLogEntry_toCSV(t *testing.T) {
ans, err := dns.NewRR("www.example.org. IN A 127.0.0.1")
require.NoError(t, err)
ansBytes, err := (&dns.Msg{Answer: []dns.RR{ans}}).Pack()
require.NoError(t, err)
testCases := []struct {
entry *logEntry
want *csvRow
name string
}{{
name: "simple",
entry: &logEntry{
Time: testDate,
QHost: "test.host",
QType: "A",
QClass: "IN",
ReqECS: "",
ClientID: "test-client-id",
ClientProto: ClientProtoDoH,
Upstream: "https://test.upstream:443/dns-query",
Answer: ansBytes,
OrigAnswer: nil,
IP: net.IP{1, 2, 3, 4},
Result: filtering.Result{},
Elapsed: 500 * time.Millisecond,
Cached: false,
AuthenticatedData: false,
},
want: &[18]string{
"false",
"NOERROR",
"A",
"127.0.0.1",
"false",
"1.2.3.4",
"test-client-id",
"",
"500",
"",
"",
"doh",
"IN",
"test.host",
"A",
"NotFilteredNotFound",
"2022-01-01T00:00:00Z",
"https://test.upstream:443/dns-query",
},
}}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
assert.Equal(t, tc.want, tc.entry.toCSV())
})
}
}

View File

@@ -1,6 +1,7 @@
package querylog
import (
"encoding/csv"
"encoding/json"
"fmt"
"math"
@@ -14,6 +15,7 @@ import (
"github.com/AdguardTeam/AdGuardHome/internal/aghalg"
"github.com/AdguardTeam/AdGuardHome/internal/aghhttp"
"github.com/AdguardTeam/AdGuardHome/internal/aghnet"
"github.com/AdguardTeam/golibs/httphdr"
"github.com/AdguardTeam/golibs/log"
"github.com/AdguardTeam/golibs/stringutil"
"github.com/AdguardTeam/golibs/timeutil"
@@ -62,6 +64,7 @@ func (l *queryLog) initWeb() {
l.conf.HTTPRegister(http.MethodGet, "/control/querylog", l.handleQueryLog)
l.conf.HTTPRegister(http.MethodPost, "/control/querylog_clear", l.handleQueryLogClear)
l.conf.HTTPRegister(http.MethodGet, "/control/querylog/config", l.handleGetQueryLogConfig)
l.conf.HTTPRegister(http.MethodGet, "/control/querylog/export", l.handleQueryLogExport)
l.conf.HTTPRegister(
http.MethodPut,
"/control/querylog/config/update",
@@ -96,6 +99,73 @@ func (l *queryLog) handleQueryLog(w http.ResponseWriter, r *http.Request) {
_ = aghhttp.WriteJSONResponse(w, r, resp)
}
// exportChunkSize is a size of one search-flush iteration for query log export.
//
// TODO(a.meshkov): Consider making configurable.
const exportChunkSize = 500
// handleQueryLogExport is the handler for the GET /control/querylog/export
// HTTP API.
func (l *queryLog) handleQueryLogExport(w http.ResponseWriter, r *http.Request) {
searchCriteria, err := parseSearchCriteria(r.URL.Query())
if err != nil {
aghhttp.Error(r, w, http.StatusBadRequest, "parsing params: %s", err)
return
}
params := &searchParams{
limit: exportChunkSize,
searchCriteria: searchCriteria,
}
w.Header().Set(httphdr.ContentType, "text/csv; charset=UTF-8; header=present")
w.Header().Set(httphdr.ContentDisposition, "attachment;filename=data.csv")
csvWriter := csv.NewWriter(w)
// Write header.
if err = csvWriter.Write(csvHeaderRow[:]); err != nil {
http.Error(w, "writing csv header", http.StatusInternalServerError)
return
}
csvWriter.Flush()
var entries []*logEntry
for {
func() {
l.confMu.RLock()
defer l.confMu.RUnlock()
entries, _ = l.search(params)
}()
if len(entries) == 0 {
break
}
params.offset += params.limit
for _, entry := range entries {
row := entry.toCSV()
if err = csvWriter.Write(row[:]); err != nil {
// TODO(a.garipov): Set Trailer X-Error header.
log.Error("%s %s %s: %s: %s", r.Method, r.Host, r.URL, "writing csv record", err)
return
}
}
csvWriter.Flush()
}
if err = csvWriter.Error(); err != nil {
// TODO(a.garipov): Set Trailer X-Error header.
log.Error("%s %s %s: %s: %s", r.Method, r.Host, r.URL, "writing csv", err)
}
}
// handleQueryLogClear is the handler for the POST /control/querylog/clear HTTP
// API.
func (l *queryLog) handleQueryLogClear(_ http.ResponseWriter, _ *http.Request) {
@@ -360,6 +430,17 @@ func parseSearchParams(r *http.Request) (p *searchParams, err error) {
p.maxFileScanEntries = 0
}
p.searchCriteria, err = parseSearchCriteria(q)
if err != nil {
// Don't wrap the error, because it's informative enough as is.
return nil, err
}
return p, nil
}
// parseSearchCriteria parses a list of search criteria from the query.
func parseSearchCriteria(q url.Values) (searchCriteria []searchCriterion, err error) {
for _, v := range []struct {
urlField string
ct criterionType
@@ -378,9 +459,9 @@ func parseSearchParams(r *http.Request) (p *searchParams, err error) {
}
if ok {
p.searchCriteria = append(p.searchCriteria, c)
searchCriteria = append(searchCriteria, c)
}
}
return p, nil
return searchCriteria, nil
}

View File

@@ -93,3 +93,67 @@ func TestQueryLog_Search_findClient(t *testing.T) {
assert.Equal(t, knownClientName, gotClient.Name)
}
// BenchmarkQueryLog_Search compares the speed of search with limit-offset
// parameters and the one with oldenThan timestamp specified.
func BenchmarkQueryLog_Search(b *testing.B) {
l, err := newQueryLog(Config{
Enabled: true,
RotationIvl: timeutil.Day,
MemSize: 100,
BaseDir: b.TempDir(),
})
require.NoError(b, err)
const (
entNum = 100000
firstPageDomain = "first.example.org"
secondPageDomain = "second.example.org"
)
// Add entries to the log.
for i := 0; i < entNum; i++ {
addEntry(l, secondPageDomain, net.IPv4(1, 1, 1, 1), net.IPv4(2, 2, 2, 1))
}
// Write them to the first file.
require.NoError(b, l.flushLogBuffer())
// Add more to the in-memory part of log.
for i := 0; i < entNum; i++ {
addEntry(l, firstPageDomain, net.IPv4(1, 1, 1, 1), net.IPv4(2, 2, 2, 1))
}
b.Run("limit_offset", func(b *testing.B) {
params := newSearchParams()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
params.offset += params.limit
_, _ = l.search(params)
}
})
b.Run("timestamp", func(b *testing.B) {
params := newSearchParams()
params.olderThan = time.Now().Add(-1 * time.Hour)
b.ReportAllocs()
for i := 0; i < b.N; i++ {
params.olderThan = params.olderThan.Add(1 * time.Minute)
_, _ = l.search(params)
}
})
// Most recent result, on a MBP15:
//
// goos: darwin
// goarch: amd64
// pkg: github.com/AdguardTeam/AdGuardHome/internal/querylog
// cpu: Intel(R) Core(TM) i7-8750H CPU @ 2.20GHz
// BenchmarkQueryLog_Search
// BenchmarkQueryLog_Search/limit_offset
// BenchmarkQueryLog_Search/limit_offset-12 547 2066079 ns/op 2325019 B/op 26633 allocs/op
// BenchmarkQueryLog_Search/timestamp
// BenchmarkQueryLog_Search/timestamp-12 1303 2028888 ns/op 2219337 B/op 25194 allocs/op
}

View File

@@ -6,6 +6,17 @@
## v0.107.30: API changes
### New HTTP API 'GET /control/querylog/export'
* The new `GET /control/querylog/export` HTTP API allows an export of query log
items in the CSV file. It returns a CSV object with the following format:
```csv
ans_dnssec,ans_rcode,ans_type,ans_value,cached,client_ip,client_id,ecs,elapsed,filter_id,filter_rule,proto,qclass,qname,qtype,reason,time,upstream
false,NOERROR,A,192.168.1.1,false,127.0.0.1,,,0.097409,,,,IN,example.com,A,Rewrite,2023-01-30T12:21:13.947563+07:00,
false,NOERROR,A,45.33.2.79,false,127.0.0.1,,,482.967871,,,,IN,test.com,A,NotFilteredNotFound,2022-12-13T12:18:04.964403+07:00,https://dns10.quad9.net:443/dns-query
```
### `POST /control/version.json` and `GET /control/dhcp/interfaces` content type
* The value of the `Content-Type` header in the `POST /control/version.json` and

View File

@@ -313,6 +313,51 @@
'responses':
'200':
'description': 'OK.'
'/querylog/export':
'get':
'tags':
- 'log'
'description': >
Returns a CSV file stream with the following fields, sorted a-z:
ans_dnssec, ans_rcode, ans_type, ans_value, cached, client_ip,
clientid, ecs, elapsed, filter_id, filter_rule, proto, qclass, qname,
qtype, reason, time, upstream. The fields list is a subject to change.
The content is UTF-8 encoded with quotation marks.
'operationId': 'getQueryLogExport'
'summary': 'Get DNS server query log items in a CSV stream.'
'parameters':
- 'name': 'search'
'in': 'query'
'description': 'Filter by domain name or client IP'
'schema':
'type': 'string'
- 'name': 'response_status'
'in': 'query'
'description': 'Filter by response status'
'schema':
'type': 'string'
'enum':
- 'all'
- 'filtered'
- 'blocked'
- 'blocked_safebrowsing'
- 'blocked_parental'
- 'whitelisted'
- 'rewritten'
- 'safe_search'
- 'processed'
'responses':
'200':
'description': 'OK.'
'content':
'text/csv':
'schema':
'type': 'string'
'example': >
ans_dnssec,ans_rcode,ans_type,ans_value,cached,client_ip,client_id,ecs,elapsed,filter_id,filter_rule,proto,qclass,qname,qtype,reason,time,upstream
false,NOERROR,A,192.168.1.1,false,127.0.0.1,,,0.097409,,,,IN,example.com,A,Rewrite,2023-01-30T12:21:13.947563+07:00,
false,NOERROR,A,45.33.2.79,false,127.0.0.1,,,482.967871,,,,IN,test.com,A,NotFilteredNotFound,2022-12-13T12:18:04.964403+07:00,https://dns10.quad9.net:443/dns-query
'/stats':
'get':
'tags':