Feat/Add post processors + refactor project (#37)
* chg: feat: clean known patterns from title * chg: refactor: remove duplicated code, and improve maintainability * chg: feat: add audio tagging post-processor * chg: refactor: add generic parallelMap function * chg: refactor: move more function to common locations * chg: docs: add func docs
This commit is contained in:
97
utils/website.go
Normal file
97
utils/website.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var commonTLDs = []string{
|
||||
".com",
|
||||
".net",
|
||||
".org",
|
||||
".info",
|
||||
".biz",
|
||||
".co",
|
||||
".io",
|
||||
".xyz",
|
||||
".me",
|
||||
".tv",
|
||||
".cc",
|
||||
".us",
|
||||
".online",
|
||||
".site",
|
||||
".la",
|
||||
".se",
|
||||
".to",
|
||||
}
|
||||
|
||||
var commonSubdomains = []string{
|
||||
"", // no prefix
|
||||
"www.",
|
||||
}
|
||||
|
||||
var commonWebsiteSLDs = []string{
|
||||
"bludv",
|
||||
"torrentdosfilmes",
|
||||
"comando",
|
||||
"comandotorrents",
|
||||
"comandohds",
|
||||
"redetorrent",
|
||||
"torrenting",
|
||||
"baixarfilmesdubladosviatorrent",
|
||||
"hidratorrents",
|
||||
"wolverdonfilmes",
|
||||
"starckfilmes",
|
||||
"rapidotorrents",
|
||||
"sitedetorrents",
|
||||
"vamostorrent",
|
||||
"AZTORRENTS",
|
||||
}
|
||||
|
||||
var websitePatterns = []string{
|
||||
`\[\s*ACESSE\s+%s\s*\]`,
|
||||
`\[?\s*%s(\s*\])?`,
|
||||
}
|
||||
|
||||
var regexesOnce sync.Once
|
||||
var regexes []*regexp.Regexp
|
||||
|
||||
func getRegexes() []*regexp.Regexp {
|
||||
regexesOnce.Do(func() {
|
||||
var websites strings.Builder
|
||||
websites.WriteString("(?i)(")
|
||||
for _, prefix := range commonSubdomains {
|
||||
for _, name := range commonWebsiteSLDs {
|
||||
for _, tld := range commonTLDs {
|
||||
websites.WriteString(fmt.Sprintf("%s%s%s|", prefix, name, tld))
|
||||
}
|
||||
}
|
||||
}
|
||||
// remove the last pipe character
|
||||
websites.WriteString(")")
|
||||
|
||||
websitesStr := websites.String()
|
||||
websitesStr = strings.Replace(websitesStr, "|)", ")", 1)
|
||||
|
||||
for _, pattern := range websitePatterns {
|
||||
regexes = append(regexes, regexp.MustCompile(fmt.Sprintf(pattern, websitesStr)))
|
||||
}
|
||||
})
|
||||
return regexes
|
||||
}
|
||||
|
||||
// RemoveKnownWebsites removes known website patterns from the title.
|
||||
// It uses a set of common prefixes, names, and TLDs to identify and remove
|
||||
// website references from the title.
|
||||
// It also removes any common patterns like "[ ACESSE bludv.com ]" or
|
||||
// "[ bludv.se ]" or "bludv.xyz".
|
||||
func RemoveKnownWebsites(title string) string {
|
||||
regexes := getRegexes()
|
||||
for _, re := range regexes {
|
||||
title = re.ReplaceAllString(title, "")
|
||||
}
|
||||
title = strings.TrimSpace(title)
|
||||
return title
|
||||
}
|
||||
Reference in New Issue
Block a user