Feat/Rede torrents (#34)

* new: feat: add brand new redetorrent.com indexer * chg: refactor: create common package * chg: fix: comandohds formatting * chg: fix: dual audio detection * chg: fix: parsing issues * chg: refactor: remove duplicated code * chg: refactor: move test funcs to common file
2025-07-22 14:57:05 -03:00
parent 0b21e3b1e7
commit 782f9d6b3f
13 changed files with 762 additions and 327 deletions
--- a/api/common.go
+++ b/api/common.go
@@ -0,0 +1,187 @@
+package handler
+
+import (
+	"fmt"
+	"regexp"
+	"slices"
+	"strings"
+	"time"
+
+	"github.com/PuerkitoBio/goquery"
+	"github.com/felipemarinho97/torrent-indexer/schema"
+)
+
+func getPublishedDateFromMeta(document *goquery.Document) time.Time {
+	var date time.Time
+	//<meta property="article:published_time" content="2019-08-23T13:20:57+00:00">
+	datePublished := strings.TrimSpace(document.Find("meta[property=\"article:published_time\"]").AttrOr("content", ""))
+
+	if datePublished != "" {
+		date, _ = time.Parse(time.RFC3339, datePublished)
+	}
+
+	return date
+}
+
+type datePattern struct {
+	regex  *regexp.Regexp
+	layout string
+}
+
+var datePatterns = []datePattern{
+	{regexp.MustCompile(`\d{4}-\d{2}-\d{2}`), "2006-01-02"},
+	{regexp.MustCompile(`\d{2}-\d{2}-\d{4}`), "02-01-2006"},
+	{regexp.MustCompile(`\d{2}/\d{2}/\d{4}`), "02/01/2006"},
+}
+
+// getPublishedDateFromRawString extracts the date from a raw string using predefined patterns.
+func getPublishedDateFromRawString(dateStr string) time.Time {
+	for _, p := range datePatterns {
+		match := p.regex.FindString(dateStr)
+
+		if match != "" {
+			date, err := time.Parse(p.layout, match)
+			if err == nil {
+				return date.UTC()
+			}
+		}
+	}
+
+	return time.Time{}
+}
+
+// getSeparator returns the separator used in the string.
+// It checks for common separators like "|", ",", "/", and " e "
+func getSeparator(s string) string {
+	if strings.Contains(s, "|") {
+		return "|"
+	} else if strings.Contains(s, ",") {
+		return ","
+	} else if strings.Contains(s, "/") {
+		return "/"
+	} else if strings.Contains(s, " e ") {
+		return " e "
+	}
+	return " "
+}
+
+// findAudioFromText extracts audio languages from a given text.
+// It looks for patterns like "Áudio: Português, Inglês" or "Idioma: Português, Inglês"
+func findAudioFromText(text string) []schema.Audio {
+	var audio []schema.Audio
+	re := regexp.MustCompile(`(.udio|Idioma):.?(.*)`)
+	audioMatch := re.FindStringSubmatch(text)
+	if len(audioMatch) > 0 {
+		sep := getSeparator(audioMatch[2])
+		langs_raw := strings.Split(audioMatch[2], sep)
+		for _, lang := range langs_raw {
+			lang = strings.TrimSpace(lang)
+			a := schema.GetAudioFromString(lang)
+			if a != nil {
+				audio = append(audio, *a)
+			} else {
+				fmt.Println("unknown language:", lang)
+			}
+		}
+	}
+	return audio
+}
+
+// findYearFromText extracts the year from a given text.
+// It looks for patterns like "Lançamento: 2001" in the title.
+func findYearFromText(text string, title string) (year string) {
+	re := regexp.MustCompile(`Lançamento: (.*)`)
+	yearMatch := re.FindStringSubmatch(text)
+	if len(yearMatch) > 0 {
+		year = yearMatch[1]
+	}
+
+	if year == "" {
+		re = regexp.MustCompile(`\((\d{4})\)`)
+		yearMatch := re.FindStringSubmatch(title)
+		if len(yearMatch) > 0 {
+			year = yearMatch[1]
+		}
+	}
+	return strings.TrimSpace(year)
+}
+
+// findSizesFromText extracts sizes from a given text.
+// It looks for patterns like "Tamanho: 1.26 GB" or "Tamanho: 700 MB".
+func findSizesFromText(text string) []string {
+	var sizes []string
+	// everything that ends with GB or MB, using ',' or '.' as decimal separator
+	re := regexp.MustCompile(`(\d+[\.,]?\d+) ?(GB|MB)`)
+	sizesMatch := re.FindAllStringSubmatch(text, -1)
+	if len(sizesMatch) > 0 {
+		for _, size := range sizesMatch {
+			sizes = append(sizes, size[0])
+		}
+	}
+	return sizes
+}
+
+// getIMDBLink extracts the IMDB link from a given link.
+// It looks for patterns like "https://www.imdb.com/title/tt1234567/".
+// Returns an error if no valid IMDB link is found.
+func getIMDBLink(link string) (string, error) {
+	var imdbLink string
+	re := regexp.MustCompile(`https://www.imdb.com(/[a-z]{2})?/title/(tt\d+)/?`)
+
+	matches := re.FindStringSubmatch(link)
+	if len(matches) > 0 {
+		imdbLink = matches[0]
+	} else {
+		return "", fmt.Errorf("no imdb link found")
+	}
+	return imdbLink, nil
+}
+
+// appendAudioISO639_2Code appends the audio languages to the title in ISO 639-2 code format.
+// It formats the title to include the audio languages in parentheses.
+// Example: "Movie Title (eng, por)"
+func appendAudioISO639_2Code(title string, a []schema.Audio) string {
+	if len(a) > 0 {
+		audio := []string{}
+		for _, lang := range a {
+			audio = append(audio, lang.String())
+		}
+		title = fmt.Sprintf("%s (%s)", title, strings.Join(audio, ", "))
+	}
+	return title
+}
+
+// getAudioFromTitle extracts audio languages from the release title.
+// It checks for common patterns like "nacional", "dual", or "dublado"
+func getAudioFromTitle(releaseTitle string, audioFromContent []schema.Audio) []schema.Audio {
+	magnetAudio := []schema.Audio{}
+	isNacional := strings.Contains(strings.ToLower(releaseTitle), "nacional")
+	if isNacional {
+		magnetAudio = append(magnetAudio, schema.AudioPortuguese)
+	}
+
+	if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") {
+		magnetAudio = append(magnetAudio, audioFromContent...)
+		// if Portuguese audio is not in the audio slice, append it
+		if !slices.Contains(magnetAudio, schema.AudioPortuguese) {
+			magnetAudio = append(magnetAudio, schema.AudioPortuguese)
+		}
+	} else if len(audioFromContent) > 1 {
+		// remove portuguese audio, and append to magnetAudio
+		for _, a := range audioFromContent {
+			if a != schema.AudioPortuguese {
+				magnetAudio = append(magnetAudio, a)
+			}
+		}
+	} else {
+		magnetAudio = append(magnetAudio, audioFromContent...)
+	}
+
+	// order and uniq the audio slice
+	slices.SortFunc(magnetAudio, func(a, b schema.Audio) int {
+		return strings.Compare(a.String(), b.String())
+	})
+	magnetAudio = slices.Compact(magnetAudio)
+
+	return magnetAudio
+}