package handler import ( "bytes" "context" "fmt" "io" "regexp" "slices" "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/felipemarinho97/torrent-indexer/schema" ) // getDocument retrieves a document from the cache or makes a request to get it. // It first checks the Redis cache for the document body. func getDocument(ctx context.Context, i *Indexer, link string) (*goquery.Document, error) { // try to get from redis first docCache, err := i.redis.Get(ctx, link) if err == nil { i.metrics.CacheHits.WithLabelValues("document_body").Inc() fmt.Printf("returning from long-lived cache: %s\n", link) return goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(docCache))) } defer i.metrics.CacheMisses.WithLabelValues("document_body").Inc() resp, err := i.requester.GetDocument(ctx, link) if err != nil { return nil, err } defer resp.Close() body, err := io.ReadAll(resp) if err != nil { return nil, err } // set cache err = i.redis.Set(ctx, link, body) if err != nil { fmt.Println(err) } doc, err := goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(body))) if err != nil { return nil, err } return doc, nil } func getPublishedDateFromMeta(document *goquery.Document) time.Time { var date time.Time // datePublished := strings.TrimSpace(document.Find("meta[property=\"article:published_time\"]").AttrOr("content", "")) if datePublished != "" { date, _ = time.Parse(time.RFC3339, datePublished) } return date } type datePattern struct { regex *regexp.Regexp layout string } var datePatterns = []datePattern{ {regexp.MustCompile(`\d{4}-\d{2}-\d{2}`), "2006-01-02"}, {regexp.MustCompile(`\d{2}-\d{2}-\d{4}`), "02-01-2006"}, {regexp.MustCompile(`\d{2}/\d{2}/\d{4}`), "02/01/2006"}, } // getPublishedDateFromRawString extracts the date from a raw string using predefined patterns. func getPublishedDateFromRawString(dateStr string) time.Time { for _, p := range datePatterns { match := p.regex.FindString(dateStr) if match != "" { date, err := time.Parse(p.layout, match) if err == nil { return date.UTC() } } } return time.Time{} } // getSeparator returns the separator used in the string. // It checks for common separators like "|", ",", "/", and " e " func getSeparator(s string) string { if strings.Contains(s, "|") { return "|" } else if strings.Contains(s, ",") { return "," } else if strings.Contains(s, "/") { return "/" } else if strings.Contains(s, " e ") { return " e " } return " " } // findAudioFromText extracts audio languages from a given text. // It looks for patterns like "Áudio: Português, Inglês" or "Idioma: Português, Inglês" func findAudioFromText(text string) []schema.Audio { var audio []schema.Audio re := regexp.MustCompile(`(.udio|Idioma):.?(.*)`) audioMatch := re.FindStringSubmatch(text) if len(audioMatch) > 0 { sep := getSeparator(audioMatch[2]) langs_raw := strings.Split(audioMatch[2], sep) for _, lang := range langs_raw { lang = strings.TrimSpace(lang) a := schema.GetAudioFromString(lang) if a != nil { audio = append(audio, *a) } else { fmt.Println("unknown language:", lang) } } } return audio } // findYearFromText extracts the year from a given text. // It looks for patterns like "Lançamento: 2001" in the title. func findYearFromText(text string, title string) (year string) { re := regexp.MustCompile(`Lançamento: (.*)`) yearMatch := re.FindStringSubmatch(text) if len(yearMatch) > 0 { year = yearMatch[1] } if year == "" { re = regexp.MustCompile(`\((\d{4})\)`) yearMatch := re.FindStringSubmatch(title) if len(yearMatch) > 0 { year = yearMatch[1] } } return strings.TrimSpace(year) } // findSizesFromText extracts sizes from a given text. // It looks for patterns like "Tamanho: 1.26 GB" or "Tamanho: 700 MB". func findSizesFromText(text string) []string { var sizes []string // everything that ends with GB or MB, using ',' or '.' as decimal separator re := regexp.MustCompile(`(\d+[\.,]?\d+) ?(GB|MB)`) sizesMatch := re.FindAllStringSubmatch(text, -1) if len(sizesMatch) > 0 { for _, size := range sizesMatch { sizes = append(sizes, size[0]) } } return sizes } // getIMDBLink extracts the IMDB link from a given link. // It looks for patterns like "https://www.imdb.com/title/tt1234567/". // Returns an error if no valid IMDB link is found. func getIMDBLink(link string) (string, error) { var imdbLink string re := regexp.MustCompile(`https://www.imdb.com(/[a-z]{2})?/title/(tt\d+)/?`) matches := re.FindStringSubmatch(link) if len(matches) > 0 { imdbLink = matches[0] } else { return "", fmt.Errorf("no imdb link found") } return imdbLink, nil } // appendAudioISO639_2Code appends the audio languages to the title in ISO 639-2 code format. // It formats the title to include the audio languages in parentheses. // Example: "Movie Title (eng, por)" func appendAudioISO639_2Code(title string, a []schema.Audio) string { if len(a) > 0 { audio := []string{} for _, lang := range a { audio = append(audio, lang.String()) } title = fmt.Sprintf("%s (%s)", title, strings.Join(audio, ", ")) } return title } // getAudioFromTitle extracts audio languages from the release title. // It checks for common patterns like "nacional", "dual", or "dublado" func getAudioFromTitle(releaseTitle string, audioFromContent []schema.Audio) []schema.Audio { magnetAudio := []schema.Audio{} isNacional := strings.Contains(strings.ToLower(releaseTitle), "nacional") if isNacional { magnetAudio = append(magnetAudio, schema.AudioPortuguese) } if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { magnetAudio = append(magnetAudio, audioFromContent...) // if Portuguese audio is not in the audio slice, append it if !slices.Contains(magnetAudio, schema.AudioPortuguese) { magnetAudio = append(magnetAudio, schema.AudioPortuguese) } } else if len(audioFromContent) > 1 { // remove portuguese audio, and append to magnetAudio for _, a := range audioFromContent { if a != schema.AudioPortuguese { magnetAudio = append(magnetAudio, a) } } } else { magnetAudio = append(magnetAudio, audioFromContent...) } // order and uniq the audio slice slices.SortFunc(magnetAudio, func(a, b schema.Audio) int { return strings.Compare(a.String(), b.String()) }) magnetAudio = slices.Compact(magnetAudio) return magnetAudio }