248 lines
4.9 KiB
Go
248 lines
4.9 KiB
Go
package filters
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/AdguardTeam/AdGuardHome/util"
|
|
"github.com/AdguardTeam/golibs/log"
|
|
)
|
|
|
|
// Allows printable UTF-8 text with CR, LF, TAB characters
|
|
func isPrintableText(data []byte) bool {
|
|
for _, c := range data {
|
|
if (c >= ' ' && c != 0x7f) || c == '\n' || c == '\r' || c == '\t' {
|
|
continue
|
|
}
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Download filter data
|
|
// Return nil on success. Set f.Path to a file path, or "" if the file was not modified
|
|
func (fs *filterStg) downloadFilter(f *Filter) error {
|
|
log.Debug("Filters: Downloading filter from %s", f.URL)
|
|
|
|
// create temp file
|
|
tmpFile, err := ioutil.TempFile(filepath.Join(fs.conf.FilterDir), "")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
if tmpFile != nil {
|
|
_ = tmpFile.Close()
|
|
_ = os.Remove(tmpFile.Name())
|
|
}
|
|
}()
|
|
|
|
// create data reader object
|
|
var reader io.Reader
|
|
if filepath.IsAbs(f.URL) {
|
|
f, err := os.Open(f.URL)
|
|
if err != nil {
|
|
return fmt.Errorf("open file: %s", err)
|
|
}
|
|
defer f.Close()
|
|
reader = f
|
|
} else {
|
|
req, err := http.NewRequest("GET", f.URL, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(f.LastModified) != 0 {
|
|
req.Header.Add("If-Modified-Since", f.LastModified)
|
|
}
|
|
|
|
resp, err := fs.conf.HTTPClient.Do(req)
|
|
if resp != nil && resp.Body != nil {
|
|
defer resp.Body.Close()
|
|
}
|
|
if err != nil {
|
|
f.networkError = true
|
|
return err
|
|
}
|
|
|
|
if resp.StatusCode == 304 { // "NOT_MODIFIED"
|
|
log.Debug("Filters: filter %s isn't modified since %s",
|
|
f.URL, f.LastModified)
|
|
f.LastUpdated = time.Now()
|
|
f.Path = ""
|
|
return nil
|
|
|
|
} else if resp.StatusCode != 200 {
|
|
err := fmt.Errorf("Filters: Couldn't download filter from %s: status code: %d",
|
|
f.URL, resp.StatusCode)
|
|
return err
|
|
}
|
|
|
|
f.LastModified = resp.Header.Get("Last-Modified")
|
|
|
|
reader = resp.Body
|
|
}
|
|
|
|
// parse and validate data, write to a file
|
|
err = writeFile(f, reader, tmpFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Closing the file before renaming it is necessary on Windows
|
|
_ = tmpFile.Close()
|
|
fname := fs.filePath(*f)
|
|
err = os.Rename(tmpFile.Name(), fname)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
tmpFile = nil // prevent from deleting this file in "defer" handler
|
|
|
|
log.Debug("Filters: saved filter %s at %s", f.URL, fname)
|
|
f.Path = fname
|
|
f.LastUpdated = time.Now()
|
|
return nil
|
|
}
|
|
|
|
func gatherUntil(dst []byte, dstLen int, src []byte, until int) int {
|
|
num := util.MinInt(len(src), until-dstLen)
|
|
return copy(dst[dstLen:], src[:num])
|
|
}
|
|
|
|
func isHTML(buf []byte) bool {
|
|
s := strings.ToLower(string(buf))
|
|
return strings.Contains(s, "<html") ||
|
|
strings.Contains(s, "<!doctype")
|
|
}
|
|
|
|
// Read file data and count the number of rules
|
|
func parseFilter(f *Filter, reader io.Reader) error {
|
|
ruleCount := 0
|
|
r := bufio.NewReader(reader)
|
|
|
|
log.Debug("Filters: parsing %s", f.URL)
|
|
|
|
var err error
|
|
for err == nil {
|
|
var line string
|
|
line, err = r.ReadString('\n')
|
|
if err != nil && err != io.EOF {
|
|
return err
|
|
}
|
|
|
|
line = strings.TrimSpace(line)
|
|
|
|
if len(line) == 0 ||
|
|
line[0] == '#' ||
|
|
line[0] == '!' {
|
|
continue
|
|
}
|
|
|
|
ruleCount++
|
|
}
|
|
|
|
log.Debug("Filters: %s: %d rules", f.URL, ruleCount)
|
|
|
|
f.RuleCount = uint64(ruleCount)
|
|
return nil
|
|
}
|
|
|
|
// Read data, parse, write to a file
|
|
func writeFile(f *Filter, reader io.Reader, outFile *os.File) error {
|
|
ruleCount := 0
|
|
buf := make([]byte, 64*1024)
|
|
total := 0
|
|
var chunk []byte
|
|
|
|
firstChunk := make([]byte, 4*1024)
|
|
firstChunkLen := 0
|
|
|
|
for {
|
|
n, err := reader.Read(buf)
|
|
if err != nil && err != io.EOF {
|
|
return err
|
|
}
|
|
total += n
|
|
|
|
if !isPrintableText(buf[:n]) {
|
|
return fmt.Errorf("data contains non-printable characters")
|
|
}
|
|
|
|
if firstChunk != nil {
|
|
// gather full buffer firstChunk and perform its data tests
|
|
firstChunkLen += gatherUntil(firstChunk, firstChunkLen, buf[:n], len(firstChunk))
|
|
|
|
if firstChunkLen == len(firstChunk) ||
|
|
err == io.EOF {
|
|
|
|
if isHTML(firstChunk[:firstChunkLen]) {
|
|
return fmt.Errorf("data is HTML, not plain text")
|
|
}
|
|
|
|
firstChunk = nil
|
|
}
|
|
}
|
|
|
|
_, err2 := outFile.Write(buf[:n])
|
|
if err2 != nil {
|
|
return err2
|
|
}
|
|
|
|
chunk = append(chunk, buf[:n]...)
|
|
s := string(chunk)
|
|
for len(s) != 0 {
|
|
i, line := splitNext(&s, '\n')
|
|
if i < 0 && err != io.EOF {
|
|
// no more lines in the current chunk
|
|
break
|
|
}
|
|
chunk = []byte(s)
|
|
|
|
if len(line) == 0 ||
|
|
line[0] == '#' ||
|
|
line[0] == '!' {
|
|
continue
|
|
}
|
|
|
|
ruleCount++
|
|
}
|
|
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
}
|
|
|
|
log.Debug("Filters: updated filter %s: %d bytes, %d rules",
|
|
f.URL, total, ruleCount)
|
|
|
|
f.RuleCount = uint64(ruleCount)
|
|
return nil
|
|
}
|
|
|
|
// SplitNext - split string by a byte
|
|
// Whitespace is trimmed
|
|
// Return byte position and the first chunk
|
|
func splitNext(data *string, by byte) (int, string) {
|
|
s := *data
|
|
i := strings.IndexByte(s, by)
|
|
var chunk string
|
|
if i < 0 {
|
|
chunk = s
|
|
s = ""
|
|
|
|
} else {
|
|
chunk = s[:i]
|
|
s = s[i+1:]
|
|
}
|
|
|
|
*data = s
|
|
chunk = strings.TrimSpace(chunk)
|
|
return i, chunk
|
|
}
|