Feat/Rede torrents (#34)
* new: feat: add brand new redetorrent.com indexer * chg: refactor: create common package * chg: fix: comandohds formatting * chg: fix: dual audio detection * chg: fix: parsing issues * chg: refactor: remove duplicated code * chg: refactor: move test funcs to common file
This commit is contained in:
187
api/common.go
Normal file
187
api/common.go
Normal file
@@ -0,0 +1,187 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||
)
|
||||
|
||||
func getPublishedDateFromMeta(document *goquery.Document) time.Time {
|
||||
var date time.Time
|
||||
//<meta property="article:published_time" content="2019-08-23T13:20:57+00:00">
|
||||
datePublished := strings.TrimSpace(document.Find("meta[property=\"article:published_time\"]").AttrOr("content", ""))
|
||||
|
||||
if datePublished != "" {
|
||||
date, _ = time.Parse(time.RFC3339, datePublished)
|
||||
}
|
||||
|
||||
return date
|
||||
}
|
||||
|
||||
type datePattern struct {
|
||||
regex *regexp.Regexp
|
||||
layout string
|
||||
}
|
||||
|
||||
var datePatterns = []datePattern{
|
||||
{regexp.MustCompile(`\d{4}-\d{2}-\d{2}`), "2006-01-02"},
|
||||
{regexp.MustCompile(`\d{2}-\d{2}-\d{4}`), "02-01-2006"},
|
||||
{regexp.MustCompile(`\d{2}/\d{2}/\d{4}`), "02/01/2006"},
|
||||
}
|
||||
|
||||
// getPublishedDateFromRawString extracts the date from a raw string using predefined patterns.
|
||||
func getPublishedDateFromRawString(dateStr string) time.Time {
|
||||
for _, p := range datePatterns {
|
||||
match := p.regex.FindString(dateStr)
|
||||
|
||||
if match != "" {
|
||||
date, err := time.Parse(p.layout, match)
|
||||
if err == nil {
|
||||
return date.UTC()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
// getSeparator returns the separator used in the string.
|
||||
// It checks for common separators like "|", ",", "/", and " e "
|
||||
func getSeparator(s string) string {
|
||||
if strings.Contains(s, "|") {
|
||||
return "|"
|
||||
} else if strings.Contains(s, ",") {
|
||||
return ","
|
||||
} else if strings.Contains(s, "/") {
|
||||
return "/"
|
||||
} else if strings.Contains(s, " e ") {
|
||||
return " e "
|
||||
}
|
||||
return " "
|
||||
}
|
||||
|
||||
// findAudioFromText extracts audio languages from a given text.
|
||||
// It looks for patterns like "Áudio: Português, Inglês" or "Idioma: Português, Inglês"
|
||||
func findAudioFromText(text string) []schema.Audio {
|
||||
var audio []schema.Audio
|
||||
re := regexp.MustCompile(`(.udio|Idioma):.?(.*)`)
|
||||
audioMatch := re.FindStringSubmatch(text)
|
||||
if len(audioMatch) > 0 {
|
||||
sep := getSeparator(audioMatch[2])
|
||||
langs_raw := strings.Split(audioMatch[2], sep)
|
||||
for _, lang := range langs_raw {
|
||||
lang = strings.TrimSpace(lang)
|
||||
a := schema.GetAudioFromString(lang)
|
||||
if a != nil {
|
||||
audio = append(audio, *a)
|
||||
} else {
|
||||
fmt.Println("unknown language:", lang)
|
||||
}
|
||||
}
|
||||
}
|
||||
return audio
|
||||
}
|
||||
|
||||
// findYearFromText extracts the year from a given text.
|
||||
// It looks for patterns like "Lançamento: 2001" in the title.
|
||||
func findYearFromText(text string, title string) (year string) {
|
||||
re := regexp.MustCompile(`Lançamento: (.*)`)
|
||||
yearMatch := re.FindStringSubmatch(text)
|
||||
if len(yearMatch) > 0 {
|
||||
year = yearMatch[1]
|
||||
}
|
||||
|
||||
if year == "" {
|
||||
re = regexp.MustCompile(`\((\d{4})\)`)
|
||||
yearMatch := re.FindStringSubmatch(title)
|
||||
if len(yearMatch) > 0 {
|
||||
year = yearMatch[1]
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(year)
|
||||
}
|
||||
|
||||
// findSizesFromText extracts sizes from a given text.
|
||||
// It looks for patterns like "Tamanho: 1.26 GB" or "Tamanho: 700 MB".
|
||||
func findSizesFromText(text string) []string {
|
||||
var sizes []string
|
||||
// everything that ends with GB or MB, using ',' or '.' as decimal separator
|
||||
re := regexp.MustCompile(`(\d+[\.,]?\d+) ?(GB|MB)`)
|
||||
sizesMatch := re.FindAllStringSubmatch(text, -1)
|
||||
if len(sizesMatch) > 0 {
|
||||
for _, size := range sizesMatch {
|
||||
sizes = append(sizes, size[0])
|
||||
}
|
||||
}
|
||||
return sizes
|
||||
}
|
||||
|
||||
// getIMDBLink extracts the IMDB link from a given link.
|
||||
// It looks for patterns like "https://www.imdb.com/title/tt1234567/".
|
||||
// Returns an error if no valid IMDB link is found.
|
||||
func getIMDBLink(link string) (string, error) {
|
||||
var imdbLink string
|
||||
re := regexp.MustCompile(`https://www.imdb.com(/[a-z]{2})?/title/(tt\d+)/?`)
|
||||
|
||||
matches := re.FindStringSubmatch(link)
|
||||
if len(matches) > 0 {
|
||||
imdbLink = matches[0]
|
||||
} else {
|
||||
return "", fmt.Errorf("no imdb link found")
|
||||
}
|
||||
return imdbLink, nil
|
||||
}
|
||||
|
||||
// appendAudioISO639_2Code appends the audio languages to the title in ISO 639-2 code format.
|
||||
// It formats the title to include the audio languages in parentheses.
|
||||
// Example: "Movie Title (eng, por)"
|
||||
func appendAudioISO639_2Code(title string, a []schema.Audio) string {
|
||||
if len(a) > 0 {
|
||||
audio := []string{}
|
||||
for _, lang := range a {
|
||||
audio = append(audio, lang.String())
|
||||
}
|
||||
title = fmt.Sprintf("%s (%s)", title, strings.Join(audio, ", "))
|
||||
}
|
||||
return title
|
||||
}
|
||||
|
||||
// getAudioFromTitle extracts audio languages from the release title.
|
||||
// It checks for common patterns like "nacional", "dual", or "dublado"
|
||||
func getAudioFromTitle(releaseTitle string, audioFromContent []schema.Audio) []schema.Audio {
|
||||
magnetAudio := []schema.Audio{}
|
||||
isNacional := strings.Contains(strings.ToLower(releaseTitle), "nacional")
|
||||
if isNacional {
|
||||
magnetAudio = append(magnetAudio, schema.AudioPortuguese)
|
||||
}
|
||||
|
||||
if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") {
|
||||
magnetAudio = append(magnetAudio, audioFromContent...)
|
||||
// if Portuguese audio is not in the audio slice, append it
|
||||
if !slices.Contains(magnetAudio, schema.AudioPortuguese) {
|
||||
magnetAudio = append(magnetAudio, schema.AudioPortuguese)
|
||||
}
|
||||
} else if len(audioFromContent) > 1 {
|
||||
// remove portuguese audio, and append to magnetAudio
|
||||
for _, a := range audioFromContent {
|
||||
if a != schema.AudioPortuguese {
|
||||
magnetAudio = append(magnetAudio, a)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
magnetAudio = append(magnetAudio, audioFromContent...)
|
||||
}
|
||||
|
||||
// order and uniq the audio slice
|
||||
slices.SortFunc(magnetAudio, func(a, b schema.Audio) int {
|
||||
return strings.Compare(a.String(), b.String())
|
||||
})
|
||||
magnetAudio = slices.Compact(magnetAudio)
|
||||
|
||||
return magnetAudio
|
||||
}
|
||||
Reference in New Issue
Block a user