From 9bb98beb6156f02b6cbab2d3ebc371e0afdced65 Mon Sep 17 00:00:00 2001 From: Felipe Marinho Date: Wed, 30 Jul 2025 14:16:56 +0000 Subject: [PATCH] chg: fix: html detector --- utils/util.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/utils/util.go b/utils/util.go index 23dfadc..96eeb43 100644 --- a/utils/util.go +++ b/utils/util.go @@ -2,9 +2,7 @@ package utils import ( "fmt" - "strings" - - "golang.org/x/net/html" + "regexp" ) // Filter filters a slice based on a predicate function. @@ -84,10 +82,19 @@ func StableUniq(s []string) []string { return uniqValues } +var ( + doctypeRegex = regexp.MustCompile(`(?i)`) + htmlTagRegex = regexp.MustCompile(`(?i)[\s\S]*?`) + bodyTagRegex = regexp.MustCompile(`(?i)[\s\S]*?`) +) + func IsValidHTML(input string) bool { - r := strings.NewReader(input) - _, err := html.Parse(r) - return err == nil + // Check for , , or tags + if !doctypeRegex.MatchString(input) && !htmlTagRegex.MatchString(input) && !bodyTagRegex.MatchString(input) { + return false + } + + return true } // FormatBytes formats a byte size into a human-readable string.