From 472fedc5651a2bbebdd8c2671142904ee8392dc5 Mon Sep 17 00:00:00 2001 From: Douglas Paz Date: Wed, 16 Jul 2025 15:32:27 -0300 Subject: [PATCH] feat: add comandohds indexer (#31) Co-authored-by: Felipe Marinho --- api/comandohds.go | 263 ++++++++++++++++++++++++++++++++++++++++++++++ api/index.go | 11 ++ main.go | 1 + 3 files changed, 275 insertions(+) create mode 100644 api/comandohds.go diff --git a/api/comandohds.go b/api/comandohds.go new file mode 100644 index 0000000..480914b --- /dev/null +++ b/api/comandohds.go @@ -0,0 +1,263 @@ +package handler + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "slices" + "strings" + "time" + "regexp" + + "github.com/PuerkitoBio/goquery" + "github.com/hbollon/go-edlib" + + "github.com/felipemarinho97/torrent-indexer/magnet" + "github.com/felipemarinho97/torrent-indexer/schema" + goscrape "github.com/felipemarinho97/torrent-indexer/scrape" + "github.com/felipemarinho97/torrent-indexer/utils" +) + +var comandohds = IndexerMeta{ + URL: "https://comandohds.org/", + SearchURL: "?s=", +} + +var title_re = regexp.MustCompile(`^[(Filme)|(Série)\s]+`) + +func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Request) { + start := time.Now() + defer func() { + i.metrics.IndexerDuration.WithLabelValues("comandohds").Observe(time.Since(start).Seconds()) + i.metrics.IndexerRequests.WithLabelValues("comandohds").Inc() + }() + + ctx := r.Context() + // supported query params: q, page, filter_results + q := r.URL.Query().Get("q") + page := r.URL.Query().Get("page") + + // URL encode query param + q = url.QueryEscape(q) + url := comandohds.URL + if q != "" { + url = fmt.Sprintf("%s%s%s", url, comandohds.SearchURL, q) + } else if page != "" { + url = fmt.Sprintf("%spage/%s", url, page) + } + + fmt.Println("URL:>", url) + resp, err := i.requester.GetDocument(ctx, url) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + if err != nil { + fmt.Println(err) + } + i.metrics.IndexerErrors.WithLabelValues("comandohds").Inc() + return + } + defer resp.Close() + + doc, err := goquery.NewDocumentFromReader(resp) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + if err != nil { + fmt.Println(err) + } + + i.metrics.IndexerErrors.WithLabelValues("comandohds").Inc() + return + } + + var links []string + doc.Find(".post").Each(func(i int, s *goquery.Selection) { + link, _ := s.Find("div.title > a").Attr("href") + links = append(links, link) + }) + + var itChan = make(chan []schema.IndexedTorrent) + var errChan = make(chan error) + indexedTorrents := []schema.IndexedTorrent{} + for _, link := range links { + go func(link string) { + torrents, err := getTorrentsComandoHDs(ctx, i, link) + if err != nil { + fmt.Println(err) + errChan <- err + } + itChan <- torrents + }(link) + } + + for i := 0; i < len(links); i++ { + select { + case torrents := <-itChan: + indexedTorrents = append(indexedTorrents, torrents...) + case err := <-errChan: + fmt.Println(err) + } + } + + for i, it := range indexedTorrents { + jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") + qLower := strings.ToLower(q) + splitLength := 2 + indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) + } + + // remove the ones with zero similarity + if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { + indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool { + return it.Similarity > 0 + }) + } + + // sort by similarity + slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int { + return int((j.Similarity - i.Similarity) * 1000) + }) + + // send to search index + go func() { + _ = i.search.IndexTorrents(indexedTorrents) + }() + + w.Header().Set("Content-Type", "application/json") + err = json.NewEncoder(w).Encode(Response{ + Results: indexedTorrents, + Count: len(indexedTorrents), + }) + if err != nil { + fmt.Println(err) + } +} + +func getTorrentsComandoHDs(ctx context.Context, i *Indexer, link string) ([]schema.IndexedTorrent, error) { + var indexedTorrents []schema.IndexedTorrent + doc, err := getDocument(ctx, i, link) + if err != nil { + return nil, err + } + + article := doc.Find("article") + title := title_re.ReplaceAllString(article.Find(".main_title > h1").Text(), "") + textContent := article.Find("div.content") + date := getPublishedDateTDF(doc) + magnets := textContent.Find("a[href^=\"magnet\"]") + var magnetLinks []string + magnets.Each(func(i int, s *goquery.Selection) { + magnetLink, _ := s.Attr("href") + magnetLinks = append(magnetLinks, magnetLink) + }) + + var audio []schema.Audio + var year string + var size []string + article.Find("div.content p").Each(func(i int, s *goquery.Selection) { + // pattern: + // »INFORMAÇÕES« + // Titulo Traduzido: O Guerreiro Banido + // Titulo Original: 天龍八部之喬峰傳 + // : 5.7 + // Ano de Lançamento: 2023 + // Gênero: Ação + // Formato: MKV + // Qualidade: WEB-DL + // Idioma: Português | Inglês + // Legenda: Português + // Tamanho: – GB + // Qualidade Áudio e Vídeo: 10 + // Duração: 130 Min + // Servidor: Torrent + text := s.Text() + + audio = append(audio, findAudioFromText(text)...) + y := findYearFromText(text, title) + if y != "" { + year = y + } + size = append(size, findSizesFromText(text)...) + }) + + // find any link from imdb + imdbLink := "" + article.Find("div.content a").Each(func(i int, s *goquery.Selection) { + link, _ := s.Attr("href") + _imdbLink, err := getIMDBLink(link) + if err == nil { + imdbLink = _imdbLink + } + }) + + size = stableUniq(size) + + var chanIndexedTorrent = make(chan schema.IndexedTorrent) + + // for each magnet link, create a new indexed torrent + for it, magnetLink := range magnetLinks { + it := it + go func(it int, magnetLink string) { + magnet, err := magnet.ParseMagnetUri(magnetLink) + if err != nil { + fmt.Println(err) + } + releaseTitle := magnet.DisplayName + infoHash := magnet.InfoHash.String() + trackers := magnet.Trackers + magnetAudio := []schema.Audio{} + if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { + magnetAudio = append(magnetAudio, audio...) + } else if len(audio) > 1 { + // remove portuguese audio, and append to magnetAudio + for _, a := range audio { + if a != schema.AudioPortuguese { + magnetAudio = append(magnetAudio, a) + } + } + } else { + magnetAudio = append(magnetAudio, audio...) + } + + peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) + if err != nil { + fmt.Println(err) + } + + title := processTitle(title, magnetAudio) + + // if the number of sizes is equal to the number of magnets, then assign the size to each indexed torrent in order + var mySize string + if len(size) == len(magnetLinks) { + mySize = size[it] + } + + ixt := schema.IndexedTorrent{ + Title: appendAudioISO639_2Code(releaseTitle, magnetAudio), + OriginalTitle: title, + Details: link, + Year: year, + IMDB: imdbLink, + Audio: magnetAudio, + MagnetLink: magnetLink, + Date: date, + InfoHash: infoHash, + Trackers: trackers, + LeechCount: peer, + SeedCount: seed, + Size: mySize, + } + chanIndexedTorrent <- ixt + }(it, magnetLink) + } + + for i := 0; i < len(magnetLinks); i++ { + it := <-chanIndexedTorrent + indexedTorrents = append(indexedTorrents, it) + } + + return indexedTorrents, nil +} diff --git a/api/index.go b/api/index.go index 6763c5b..39f9f4c 100644 --- a/api/index.go +++ b/api/index.go @@ -77,6 +77,17 @@ func HandlerIndex(w http.ResponseWriter, r *http.Request) { }, }, }, + "/indexers/comandohds": []map[string]interface{}{ + { + "method": "GET", + "page": "page number", + "description": "Indexer for Comando HDs", + "query_params": map[string]string{ + "q": "search query", + "filter_results": "if results with similarity equals to zero should be filtered (true/false)", + }, + }, + }, "/indexers/starck-filmes": []map[string]interface{}{ { "method": "GET", diff --git a/main.go b/main.go index 64f81e8..4fb7675 100644 --- a/main.go +++ b/main.go @@ -49,6 +49,7 @@ func main() { indexerMux.HandleFunc("/indexers/comando_torrents", indexers.HandlerComandoIndexer) indexerMux.HandleFunc("/indexers/torrent-dos-filmes", indexers.HandlerTorrentDosFilmesIndexer) indexerMux.HandleFunc("/indexers/bludv", indexers.HandlerBluDVIndexer) + indexerMux.HandleFunc("/indexers/comandohds", indexers.HandlerComandoHDsIndexer) indexerMux.HandleFunc("/indexers/starck-filmes", indexers.HandlerStarckFilmesIndexer) indexerMux.HandleFunc("/indexers/manual", indexers.HandlerManualIndexer) indexerMux.HandleFunc("/search", search.SearchTorrentHandler)