package handler import ( "context" "encoding/json" "fmt" "net/http" "net/url" "regexp" "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/felipemarinho97/torrent-indexer/magnet" "github.com/felipemarinho97/torrent-indexer/schema" goscrape "github.com/felipemarinho97/torrent-indexer/scrape" "github.com/felipemarinho97/torrent-indexer/utils" ) var rede_torrent = IndexerMeta{ Label: "rede_torrent", URL: "https://redetorrent.com/", SearchURL: "index.php?s=", PagePattern: "%s", } func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Request) { start := time.Now() metadata := rede_torrent defer func() { i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds()) i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc() }() ctx := r.Context() // supported query params: q, season, episode, page, filter_results q := r.URL.Query().Get("q") page := r.URL.Query().Get("page") // URL encode query param q = url.QueryEscape(q) url := metadata.URL if q != "" { url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q) } else if page != "" { url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page) } fmt.Println("URL:>", url) resp, err := i.requester.GetDocument(ctx, url) if err != nil { w.WriteHeader(http.StatusInternalServerError) err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) if err != nil { fmt.Println(err) } i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } defer resp.Close() doc, err := goquery.NewDocumentFromReader(resp) if err != nil { w.WriteHeader(http.StatusInternalServerError) err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) if err != nil { fmt.Println(err) } i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } var links []string doc.Find(".capa_lista").Each(func(i int, s *goquery.Selection) { link, _ := s.Find("a").Attr("href") links = append(links, link) }) // if no links were indexed, expire the document in cache if len(links) == 0 { i.requester.ExpireDocument(ctx, url) } // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsRedeTorrent(ctx, i, link) }) // Apply post-processors postProcessedTorrents := indexedTorrents for _, processor := range i.postProcessors { postProcessedTorrents = processor(i, r, postProcessedTorrents) } w.Header().Set("Content-Type", "application/json") err = json.NewEncoder(w).Encode(Response{ Results: postProcessedTorrents, Count: len(postProcessedTorrents), }) if err != nil { fmt.Println(err) } } func getTorrentsRedeTorrent(ctx context.Context, i *Indexer, link string) ([]schema.IndexedTorrent, error) { var indexedTorrents []schema.IndexedTorrent doc, err := getDocument(ctx, i, link) if err != nil { return nil, err } article := doc.Find(".conteudo") // title pattern: "Something - optional balbla (dddd) some shit" - extract "Something" and "dddd" titleRe := regexp.MustCompile(`^(.*?)(?: - (.*?))? \((\d{4})\)`) titleP := titleRe.FindStringSubmatch(article.Find("h1").Text()) if len(titleP) < 3 { return nil, fmt.Errorf("could not extract title from %s", link) } title := strings.TrimSpace(titleP[1]) year := strings.TrimSpace(titleP[3]) textContent := article.Find(".apenas_itemprop") date := getPublishedDateFromMeta(doc) magnets := textContent.Find("a[href^=\"magnet\"]") var magnetLinks []string magnets.Each(func(i int, s *goquery.Selection) { magnetLink, _ := s.Attr("href") magnetLinks = append(magnetLinks, magnetLink) }) var audio []schema.Audio var size []string article.Find("div#informacoes > p").Each(func(i int, s *goquery.Selection) { // pattern: // Filme Bicho de Sete Cabeças Torrent // Título Original: Bicho de Sete Cabeças // Lançamento: 2001 // Gêneros: Drama / Nacional // Idioma: Português // Qualidade: 720p / BluRay // Duração: 1h 14 Minutos // Formato: Mp4 // Vídeo: 10 e Áudio: 10 // Legendas: Português // Nota do Imdb: 7.7 // Tamanho: 1.26 GB // we need to manualy parse because the text is not well formatted htmlContent, err := s.Html() if err != nil { fmt.Println(err) return } // remove any \n and \t characters htmlContent = strings.ReplaceAll(htmlContent, "\n", "") htmlContent = strings.ReplaceAll(htmlContent, "\t", "") // split by
tags and render each line brRe := regexp.MustCompile(``) htmlContent = brRe.ReplaceAllString(htmlContent, "
") lines := strings.Split(htmlContent, "
") var text strings.Builder for _, line := range lines { // remove any HTML tags re := regexp.MustCompile(`<[^>]*>`) line = re.ReplaceAllString(line, "") line = strings.TrimSpace(line) text.WriteString(line + "\n") } audio = append(audio, findAudioFromText(text.String())...) y := findYearFromText(text.String(), title) if y != "" { year = y } size = append(size, findSizesFromText(text.String())...) }) // find any link from imdb imdbLink := "" article.Find("a").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") _imdbLink, err := getIMDBLink(link) if err == nil { imdbLink = _imdbLink } }) size = utils.StableUniq(size) var chanIndexedTorrent = make(chan schema.IndexedTorrent) // for each magnet link, create a new indexed torrent for it, magnetLink := range magnetLinks { it := it go func(it int, magnetLink string) { magnet, err := magnet.ParseMagnetUri(magnetLink) if err != nil { fmt.Println(err) } releaseTitle := magnet.DisplayName infoHash := magnet.InfoHash.String() trackers := magnet.Trackers magnetAudio := getAudioFromTitle(releaseTitle, audio) peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) if err != nil { fmt.Println(err) } title := processTitle(title, magnetAudio) // if the number of sizes is equal to the number of magnets, then assign the size to each indexed torrent in order var mySize string if len(size) == len(magnetLinks) { mySize = size[it] } if mySize == "" { go func() { _, _ = i.magnetMetadataAPI.FetchMetadata(ctx, magnetLink) }() } ixt := schema.IndexedTorrent{ Title: releaseTitle, OriginalTitle: title, Details: link, Year: year, IMDB: imdbLink, Audio: magnetAudio, MagnetLink: magnetLink, Date: date, InfoHash: infoHash, Trackers: trackers, LeechCount: peer, SeedCount: seed, Size: mySize, } chanIndexedTorrent <- ixt }(it, magnetLink) } for i := 0; i < len(magnetLinks); i++ { it := <-chanIndexedTorrent indexedTorrents = append(indexedTorrents, it) } return indexedTorrents, nil }