chg: fix: html detector
This commit is contained in:
@@ -2,9 +2,7 @@ package utils
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"regexp"
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Filter filters a slice based on a predicate function.
|
// Filter filters a slice based on a predicate function.
|
||||||
@@ -84,10 +82,19 @@ func StableUniq(s []string) []string {
|
|||||||
return uniqValues
|
return uniqValues
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
doctypeRegex = regexp.MustCompile(`(?i)<!DOCTYPE\s+html>`)
|
||||||
|
htmlTagRegex = regexp.MustCompile(`(?i)<html[\s\S]*?>[\s\S]*?</html>`)
|
||||||
|
bodyTagRegex = regexp.MustCompile(`(?i)<body[\s\S]*?>[\s\S]*?</body>`)
|
||||||
|
)
|
||||||
|
|
||||||
func IsValidHTML(input string) bool {
|
func IsValidHTML(input string) bool {
|
||||||
r := strings.NewReader(input)
|
// Check for <!DOCTYPE>, <html>, or <body> tags
|
||||||
_, err := html.Parse(r)
|
if !doctypeRegex.MatchString(input) && !htmlTagRegex.MatchString(input) && !bodyTagRegex.MatchString(input) {
|
||||||
return err == nil
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// FormatBytes formats a byte size into a human-readable string.
|
// FormatBytes formats a byte size into a human-readable string.
|
||||||
|
|||||||
Reference in New Issue
Block a user