Files
torrent-indexer/utils/website.go
Felipe Marinho 455f734c8a Feat/Add post processors + refactor project (#37)
* chg: feat: clean known patterns from title

* chg: refactor: remove duplicated code, and improve maintainability

* chg: feat: add audio tagging post-processor

* chg: refactor: add generic parallelMap function

* chg: refactor: move more function to common locations

* chg: docs: add func docs
2025-07-24 01:03:38 -03:00

98 lines
1.8 KiB
Go

package utils
import (
"fmt"
"regexp"
"strings"
"sync"
)
var commonTLDs = []string{
".com",
".net",
".org",
".info",
".biz",
".co",
".io",
".xyz",
".me",
".tv",
".cc",
".us",
".online",
".site",
".la",
".se",
".to",
}
var commonSubdomains = []string{
"", // no prefix
"www.",
}
var commonWebsiteSLDs = []string{
"bludv",
"torrentdosfilmes",
"comando",
"comandotorrents",
"comandohds",
"redetorrent",
"torrenting",
"baixarfilmesdubladosviatorrent",
"hidratorrents",
"wolverdonfilmes",
"starckfilmes",
"rapidotorrents",
"sitedetorrents",
"vamostorrent",
"AZTORRENTS",
}
var websitePatterns = []string{
`\[\s*ACESSE\s+%s\s*\]`,
`\[?\s*%s(\s*\])?`,
}
var regexesOnce sync.Once
var regexes []*regexp.Regexp
func getRegexes() []*regexp.Regexp {
regexesOnce.Do(func() {
var websites strings.Builder
websites.WriteString("(?i)(")
for _, prefix := range commonSubdomains {
for _, name := range commonWebsiteSLDs {
for _, tld := range commonTLDs {
websites.WriteString(fmt.Sprintf("%s%s%s|", prefix, name, tld))
}
}
}
// remove the last pipe character
websites.WriteString(")")
websitesStr := websites.String()
websitesStr = strings.Replace(websitesStr, "|)", ")", 1)
for _, pattern := range websitePatterns {
regexes = append(regexes, regexp.MustCompile(fmt.Sprintf(pattern, websitesStr)))
}
})
return regexes
}
// RemoveKnownWebsites removes known website patterns from the title.
// It uses a set of common prefixes, names, and TLDs to identify and remove
// website references from the title.
// It also removes any common patterns like "[ ACESSE bludv.com ]" or
// "[ bludv.se ]" or "bludv.xyz".
func RemoveKnownWebsites(title string) string {
regexes := getRegexes()
for _, re := range regexes {
title = re.ReplaceAllString(title, "")
}
title = strings.TrimSpace(title)
return title
}