Files
torrent-indexer/api/bludv.go
Felipe Marinho d9141c8df7 new: feat: add magnet-metadata-api post processor (#39)
* new: feat: add magnet-metadata-api post processor

* chg: fix: lint issue

* chg: chore: comment optional containers

* chg: fix: remove redundant check
2025-07-29 12:34:37 -03:00

258 lines
6.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package handler
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/felipemarinho97/torrent-indexer/magnet"
"github.com/felipemarinho97/torrent-indexer/schema"
goscrape "github.com/felipemarinho97/torrent-indexer/scrape"
"github.com/felipemarinho97/torrent-indexer/utils"
)
var bludv = IndexerMeta{
Label: "bludv",
URL: "https://bludv.xyz/",
SearchURL: "?s=",
PagePattern: "page/%s",
}
func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
start := time.Now()
metadata := bludv
defer func() {
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
}()
ctx := r.Context()
// supported query params: q, season, episode, page, filter_results
q := r.URL.Query().Get("q")
page := r.URL.Query().Get("page")
// URL encode query param
q = url.QueryEscape(q)
url := metadata.URL
if page != "" {
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
} else {
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
}
fmt.Println("URL:>", url)
resp, err := i.requester.GetDocument(ctx, url)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
if err != nil {
fmt.Println(err)
}
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
return
}
defer resp.Close()
doc, err := goquery.NewDocumentFromReader(resp)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
if err != nil {
fmt.Println(err)
}
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
return
}
var links []string
doc.Find(".post").Each(func(i int, s *goquery.Selection) {
// get link from h2.entry-title > a
link, _ := s.Find("div.title > a").Attr("href")
links = append(links, link)
})
// extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsBluDV(ctx, i, link)
})
// Apply post-processors
postProcessedTorrents := indexedTorrents
for _, processor := range i.postProcessors {
postProcessedTorrents = processor(i, r, postProcessedTorrents)
}
w.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w).Encode(Response{
Results: postProcessedTorrents,
Count: len(postProcessedTorrents),
})
if err != nil {
fmt.Println(err)
}
}
func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]schema.IndexedTorrent, error) {
var indexedTorrents []schema.IndexedTorrent
doc, err := getDocument(ctx, i, link)
if err != nil {
return nil, err
}
article := doc.Find(".post")
title := strings.Replace(article.Find(".title > h1").Text(), " - Download", "", -1)
textContent := article.Find("div.content")
date := getPublishedDate(doc)
magnets := textContent.Find("a[href^=\"magnet\"]")
var magnetLinks []string
magnets.Each(func(i int, s *goquery.Selection) {
magnetLink, _ := s.Attr("href")
magnetLinks = append(magnetLinks, magnetLink)
})
adwareLinks := textContent.Find("a[href^=\"https://www.seuvideo.xyz\"]")
adwareLinks.Each(func(_ int, s *goquery.Selection) {
href, _ := s.Attr("href")
// extract querysting "id" from url
parsedUrl, err := url.Parse(href)
if err != nil {
fmt.Println(err)
return
}
magnetLink := parsedUrl.Query().Get("id")
magnetLinkDecoded, err := utils.DecodeAdLink(magnetLink)
if err != nil {
fmt.Printf("failed to decode ad link \"%s\": %v\n", href, err)
return
}
// if decoded magnet link is indeed a magnet link, append it
if strings.HasPrefix(magnetLinkDecoded, "magnet:") {
magnetLinks = append(magnetLinks, magnetLinkDecoded)
} else if !strings.Contains(magnetLinkDecoded, "watch.brplayer") {
fmt.Printf("WARN: link \"%s\" decoding resulted in non-magnet link: %s\n", href, magnetLinkDecoded)
}
})
var audio []schema.Audio
var year string
var size []string
article.Find("div.content p").Each(func(i int, s *goquery.Selection) {
// pattern:
// Título Traduzido: Fundação
// Título Original: Foundation
// IMDb: 7,5
// Ano de Lançamento: 2023
// Gênero: Ação | Aventura | Ficção
// Formato: MKV
// Qualidade: WEB-DL
// Áudio: Português | Inglês
// Idioma: Português | Inglês
// Legenda: Português
// Tamanho:
// Qualidade de Áudio: 10
// Qualidade de Vídeo: 10
// Duração: 59 Min.
// Servidor: Torrent
text := s.Text()
audio = append(audio, findAudioFromText(text)...)
y := findYearFromText(text, title)
if y != "" {
year = y
}
size = append(size, findSizesFromText(text)...)
})
// find any link from imdb
imdbLink := ""
article.Find("div.content a").Each(func(i int, s *goquery.Selection) {
link, _ := s.Attr("href")
_imdbLink, err := getIMDBLink(link)
if err == nil {
imdbLink = _imdbLink
}
})
size = utils.StableUniq(size)
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
// for each magnet link, create a new indexed torrent
for it, magnetLink := range magnetLinks {
it := it
go func(it int, magnetLink string) {
magnet, err := magnet.ParseMagnetUri(magnetLink)
if err != nil {
fmt.Println(err)
}
releaseTitle := magnet.DisplayName
infoHash := magnet.InfoHash.String()
trackers := magnet.Trackers
magnetAudio := getAudioFromTitle(releaseTitle, audio)
peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers)
if err != nil {
fmt.Println(err)
}
title := processTitle(title, magnetAudio)
// if the number of sizes is equal to the number of magnets, then assign the size to each indexed torrent in order
var mySize string
if len(size) == len(magnetLinks) {
mySize = size[it]
}
if mySize == "" {
go func() {
_, _ = i.magnetMetadataAPI.FetchMetadata(ctx, magnetLink)
}()
}
ixt := schema.IndexedTorrent{
Title: releaseTitle,
OriginalTitle: title,
Details: link,
Year: year,
IMDB: imdbLink,
Audio: magnetAudio,
MagnetLink: magnetLink,
Date: date,
InfoHash: infoHash,
Trackers: trackers,
LeechCount: peer,
SeedCount: seed,
Size: mySize,
}
chanIndexedTorrent <- ixt
}(it, magnetLink)
}
for i := 0; i < len(magnetLinks); i++ {
it := <-chanIndexedTorrent
indexedTorrents = append(indexedTorrents, it)
}
return indexedTorrents, nil
}
func getPublishedDate(document *goquery.Document) time.Time {
var date time.Time
//<meta property="article:published_time" content="2019-08-23T13:20:57+00:00">
datePublished := strings.TrimSpace(document.Find("meta[property=\"article:published_time\"]").AttrOr("content", ""))
if datePublished != "" {
date, _ = time.Parse(time.RFC3339, datePublished)
}
return date
}