package utils import ( "fmt" "regexp" "strings" "sync" ) var commonTLDs = []string{ ".com", ".net", ".org", ".info", ".biz", ".co", ".io", ".xyz", ".me", ".tv", ".cc", ".us", ".online", ".site", ".la", ".se", ".to", } var commonSubdomains = []string{ "", // no prefix "www.", } var commonWebsiteSLDs = []string{ "bludv", "torrentdosfilmes", "comando", "comandotorrents", "comandohds", "redetorrent", "torrenting", "baixarfilmesdubladosviatorrent", "hidratorrents", "wolverdonfilmes", "starckfilmes", "rapidotorrents", "sitedetorrents", "vamostorrent", "AZTORRENTS", } var websitePatterns = []string{ `\[\s*ACESSE\s+%s\s*\]`, `\[?\s*%s(\s*\])?`, } var regexesOnce sync.Once var regexes []*regexp.Regexp func getRegexes() []*regexp.Regexp { regexesOnce.Do(func() { var websites strings.Builder websites.WriteString("(?i)(") for _, prefix := range commonSubdomains { for _, name := range commonWebsiteSLDs { for _, tld := range commonTLDs { websites.WriteString(fmt.Sprintf("%s%s%s|", prefix, name, tld)) } } } // remove the last pipe character websites.WriteString(")") websitesStr := websites.String() websitesStr = strings.Replace(websitesStr, "|)", ")", 1) for _, pattern := range websitePatterns { regexes = append(regexes, regexp.MustCompile(fmt.Sprintf(pattern, websitesStr))) } }) return regexes } // RemoveKnownWebsites removes known website patterns from the title. // It uses a set of common prefixes, names, and TLDs to identify and remove // website references from the title. // It also removes any common patterns like "[ ACESSE bludv.com ]" or // "[ bludv.se ]" or "bludv.xyz". func RemoveKnownWebsites(title string) string { regexes := getRegexes() for _, re := range regexes { title = re.ReplaceAllString(title, "") } title = strings.TrimSpace(title) return title }