From bc53fbab1a21397223399f3f9e494a40f3b3f097 Mon Sep 17 00:00:00 2001 From: Douglas Paz Date: Wed, 16 Jul 2025 15:15:38 -0300 Subject: [PATCH] fix: utils IsValidHTML, uses golang net html parse (#28) --- utils/util.go | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/utils/util.go b/utils/util.go index fe1d462..bd435ff 100644 --- a/utils/util.go +++ b/utils/util.go @@ -1,6 +1,9 @@ package utils -import "regexp" +import ( + "strings" + "golang.org/x/net/html" +) func Filter[A any](arr []A, f func(A) bool) []A { var res []A @@ -14,19 +17,7 @@ func Filter[A any](arr []A, f func(A) bool) []A { } func IsValidHTML(input string) bool { - // Check for declaration (case-insensitive) - doctypeRegex := regexp.MustCompile(`(?i)`) - if !doctypeRegex.MatchString(input) { - return false - } - - // Check for and tags (case-insensitive) - htmlTagRegex := regexp.MustCompile(`(?i)[\s\S]*?`) - if !htmlTagRegex.MatchString(input) { - return false - } - - // Check for and tags (case-insensitive) - bodyTagRegex := regexp.MustCompile(`(?i)[\s\S]*?`) - return bodyTagRegex.MatchString(input) + r := strings.NewReader(input) + _, err := html.Parse(r) + return err == nil }