Files
torrent-indexer/api/comandohds.go
Felipe Marinho d9141c8df7 new: feat: add magnet-metadata-api post processor (#39)
* new: feat: add magnet-metadata-api post processor

* chg: fix: lint issue

* chg: chore: comment optional containers

* chg: fix: remove redundant check
2025-07-29 12:34:37 -03:00

227 lines
5.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package handler
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/felipemarinho97/torrent-indexer/magnet"
"github.com/felipemarinho97/torrent-indexer/schema"
goscrape "github.com/felipemarinho97/torrent-indexer/scrape"
"github.com/felipemarinho97/torrent-indexer/utils"
)
var comandohds = IndexerMeta{
Label: "comandohds",
URL: "https://comandohds.org/",
SearchURL: "?s=",
PagePattern: "page/%s",
}
var title_re = regexp.MustCompile(`^[(Filme)|(Série)\s]+`)
func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Request) {
start := time.Now()
metadata := comandohds
defer func() {
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
}()
ctx := r.Context()
// supported query params: q, page, filter_results
q := r.URL.Query().Get("q")
page := r.URL.Query().Get("page")
// URL encode query param
q = url.QueryEscape(q)
url := metadata.URL
if q != "" {
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
} else if page != "" {
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
}
fmt.Println("URL:>", url)
resp, err := i.requester.GetDocument(ctx, url)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
if err != nil {
fmt.Println(err)
}
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
return
}
defer resp.Close()
doc, err := goquery.NewDocumentFromReader(resp)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
if err != nil {
fmt.Println(err)
}
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
return
}
var links []string
doc.Find(".post").Each(func(i int, s *goquery.Selection) {
link, _ := s.Find("div.title > a").Attr("href")
links = append(links, link)
})
// extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsComandoHDs(ctx, i, link)
})
// Apply post-processors
postProcessedTorrents := indexedTorrents
for _, processor := range i.postProcessors {
postProcessedTorrents = processor(i, r, postProcessedTorrents)
}
w.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w).Encode(Response{
Results: postProcessedTorrents,
Count: len(postProcessedTorrents),
})
if err != nil {
fmt.Println(err)
}
}
func getTorrentsComandoHDs(ctx context.Context, i *Indexer, link string) ([]schema.IndexedTorrent, error) {
var indexedTorrents []schema.IndexedTorrent
doc, err := getDocument(ctx, i, link)
if err != nil {
return nil, err
}
article := doc.Find("article")
title := title_re.ReplaceAllString(article.Find(".main_title > h1").Text(), "")
textContent := article.Find("div.content")
date := getPublishedDateFromMeta(doc)
magnets := textContent.Find("a[href^=\"magnet\"]")
var magnetLinks []string
magnets.Each(func(i int, s *goquery.Selection) {
magnetLink, _ := s.Attr("href")
magnetLinks = append(magnetLinks, magnetLink)
})
var audio []schema.Audio
var year string
var size []string
article.Find("div.content p").Each(func(i int, s *goquery.Selection) {
// pattern:
// »INFORMAÇÕES«
// Titulo Traduzido: O Guerreiro Banido
// Titulo Original: 天龍八部之喬峰傳
// <picture />: 5.7
// Ano de Lançamento: 2023
// Gênero: Ação
// Formato: MKV
// Qualidade: WEB-DL
// Idioma: Português | Inglês
// Legenda: Português
// Tamanho: GB
// Qualidade Áudio e Vídeo: 10
// Duração: 130 Min
// Servidor: Torrent
text := s.Text()
audio = append(audio, findAudioFromText(text)...)
y := findYearFromText(text, title)
if y != "" {
year = y
}
size = append(size, findSizesFromText(text)...)
})
// find any link from imdb
imdbLink := ""
article.Find("div.content a").Each(func(i int, s *goquery.Selection) {
link, _ := s.Attr("href")
_imdbLink, err := getIMDBLink(link)
if err == nil {
imdbLink = _imdbLink
}
})
size = utils.StableUniq(size)
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
// for each magnet link, create a new indexed torrent
for it, magnetLink := range magnetLinks {
it := it
go func(it int, magnetLink string) {
magnet, err := magnet.ParseMagnetUri(magnetLink)
if err != nil {
fmt.Println(err)
}
releaseTitle := strings.TrimSpace(magnet.DisplayName)
infoHash := magnet.InfoHash.String()
trackers := magnet.Trackers
for i, tracker := range trackers {
trackers[i] = strings.TrimSpace(tracker)
}
magnetAudio := getAudioFromTitle(releaseTitle, audio)
peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers)
if err != nil {
fmt.Println(err)
}
title := processTitle(title, magnetAudio)
// if the number of sizes is equal to the number of magnets, then assign the size to each indexed torrent in order
var mySize string
if len(size) == len(magnetLinks) {
mySize = size[it]
}
if mySize == "" {
go func() {
_, _ = i.magnetMetadataAPI.FetchMetadata(ctx, magnetLink)
}()
}
ixt := schema.IndexedTorrent{
Title: releaseTitle,
OriginalTitle: title,
Details: link,
Year: year,
IMDB: imdbLink,
Audio: magnetAudio,
MagnetLink: magnetLink,
Date: date,
InfoHash: infoHash,
Trackers: trackers,
LeechCount: peer,
SeedCount: seed,
Size: mySize,
}
chanIndexedTorrent <- ixt
}(it, magnetLink)
}
for i := 0; i < len(magnetLinks); i++ {
it := <-chanIndexedTorrent
indexedTorrents = append(indexedTorrents, it)
}
return indexedTorrents, nil
}