From 782f9d6b3f08217ebfbca7d4603f1077ee09aac9 Mon Sep 17 00:00:00 2001 From: Felipe Marinho Date: Tue, 22 Jul 2025 14:57:05 -0300 Subject: [PATCH] Feat/Rede torrents (#34) * new: feat: add brand new redetorrent.com indexer * chg: refactor: create common package * chg: fix: comandohds formatting * chg: fix: dual audio detection * chg: fix: parsing issues * chg: refactor: remove duplicated code * chg: refactor: move test funcs to common file --- README.md | 1 + api/bludv.go | 14 +- api/comando_torrents.go | 97 +----------- api/comando_torrents_test.go | 144 ------------------ api/comandohds.go | 22 +-- api/common.go | 187 +++++++++++++++++++++++ api/common_test.go | 241 ++++++++++++++++++++++++++++++ api/index.go | 19 +++ api/manual.go | 15 +- api/rede_torrent.go | 280 +++++++++++++++++++++++++++++++++++ api/starck_filmes.go | 34 +++-- api/torrent_dos_filmes.go | 28 +--- main.go | 7 +- 13 files changed, 762 insertions(+), 327 deletions(-) create mode 100644 api/common.go create mode 100644 api/common_test.go create mode 100644 api/rede_torrent.go diff --git a/README.md b/README.md index 792151c..6a6c3d3 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Visit [https://torrent-indexer.darklyn.org/](https://torrent-indexer.darklyn.org - [torrent-dos-filmes](https://torrentdosfilmes.se/) - [starck-filmes](https://www.starckfilmes.online/) - [comandohds](https://comandohds.org/) +- [rede-torrent](https://redetorrent.com/) ## Deploy diff --git a/api/bludv.go b/api/bludv.go index e84b4a6..2c2102a 100644 --- a/api/bludv.go +++ b/api/bludv.go @@ -231,19 +231,7 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]schema.In releaseTitle := magnet.DisplayName infoHash := magnet.InfoHash.String() trackers := magnet.Trackers - magnetAudio := []schema.Audio{} - if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { - magnetAudio = append(magnetAudio, audio...) - } else if len(audio) > 1 { - // remove portuguese audio, and append to magnetAudio - for _, a := range audio { - if a != schema.AudioPortuguese { - magnetAudio = append(magnetAudio, a) - } - } - } else { - magnetAudio = append(magnetAudio, audio...) - } + magnetAudio := getAudioFromTitle(releaseTitle, audio) peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) if err != nil { diff --git a/api/comando_torrents.go b/api/comando_torrents.go index 3f732bc..1ed455f 100644 --- a/api/comando_torrents.go +++ b/api/comando_torrents.go @@ -230,19 +230,7 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed releaseTitle := magnet.DisplayName infoHash := magnet.InfoHash.String() trackers := magnet.Trackers - magnetAudio := []schema.Audio{} - if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { - magnetAudio = append(magnetAudio, audio...) - } else if len(audio) > 1 { - // remove portuguese audio, and append to magnetAudio - for _, a := range audio { - if a != schema.AudioPortuguese { - magnetAudio = append(magnetAudio, a) - } - } - } else { - magnetAudio = append(magnetAudio, audio...) - } + magnetAudio := getAudioFromTitle(releaseTitle, audio) peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) if err != nil { @@ -284,19 +272,6 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed return indexedTorrents, nil } -func getIMDBLink(link string) (string, error) { - var imdbLink string - re := regexp.MustCompile(`https://www.imdb.com(/[a-z]{2})?/title/(tt\d+)/?`) - - matches := re.FindStringSubmatch(link) - if len(matches) > 0 { - imdbLink = matches[0] - } else { - return "", fmt.Errorf("no imdb link found") - } - return imdbLink, nil -} - func parseLocalizedDate(datePublished string) (time.Time, error) { re := regexp.MustCompile(`(\d{1,2}) de (\w+) de (\d{4})`) matches := re.FindStringSubmatch(datePublished) @@ -350,56 +325,6 @@ func stableUniq(s []string) []string { return uniqValues } -func findYearFromText(text string, title string) (year string) { - re := regexp.MustCompile(`Lançamento: (.*)`) - yearMatch := re.FindStringSubmatch(text) - if len(yearMatch) > 0 { - year = yearMatch[1] - } - - if year == "" { - re = regexp.MustCompile(`\((\d{4})\)`) - yearMatch := re.FindStringSubmatch(title) - if len(yearMatch) > 0 { - year = yearMatch[1] - } - } - return strings.TrimSpace(year) -} - -func findAudioFromText(text string) []schema.Audio { - var audio []schema.Audio - re := regexp.MustCompile(`(.udio|Idioma):.?(.*)`) - audioMatch := re.FindStringSubmatch(text) - if len(audioMatch) > 0 { - sep := getSeparator(audioMatch[2]) - langs_raw := strings.Split(audioMatch[2], sep) - for _, lang := range langs_raw { - lang = strings.TrimSpace(lang) - a := schema.GetAudioFromString(lang) - if a != nil { - audio = append(audio, *a) - } else { - fmt.Println("unknown language:", lang) - } - } - } - return audio -} - -func findSizesFromText(text string) []string { - var sizes []string - // everything that ends with GB or MB, using ',' or '.' as decimal separator - re := regexp.MustCompile(`(\d+[\.,]?\d+) ?(GB|MB)`) - sizesMatch := re.FindAllStringSubmatch(text, -1) - if len(sizesMatch) > 0 { - for _, size := range sizesMatch { - sizes = append(sizes, size[0]) - } - } - return sizes -} - func processTitle(title string, a []schema.Audio) string { // remove ' - Donwload' from title title = strings.Replace(title, " – Download", "", -1) @@ -413,26 +338,6 @@ func processTitle(title string, a []schema.Audio) string { return title } -func appendAudioISO639_2Code(title string, a []schema.Audio) string { - if len(a) > 0 { - audio := []string{} - for _, lang := range a { - audio = append(audio, lang.String()) - } - title = fmt.Sprintf("%s (%s)", title, strings.Join(audio, ", ")) - } - return title -} - -func getSeparator(s string) string { - if strings.Contains(s, "|") { - return "|" - } else if strings.Contains(s, ",") { - return "," - } - return " " -} - func getDocument(ctx context.Context, i *Indexer, link string) (*goquery.Document, error) { // try to get from redis first docCache, err := i.redis.Get(ctx, link) diff --git a/api/comando_torrents_test.go b/api/comando_torrents_test.go index 11ee6ab..f41ba64 100644 --- a/api/comando_torrents_test.go +++ b/api/comando_torrents_test.go @@ -4,81 +4,8 @@ import ( "reflect" "testing" "time" - - "github.com/felipemarinho97/torrent-indexer/schema" ) -func Test_findAudioFromText(t *testing.T) { - type args struct { - text string - } - tests := []struct { - name string - args args - want []schema.Audio - }{ - { - name: "should return audio in portuguese", - args: args{ - text: "Áudio: Português", - }, - want: []schema.Audio{ - schema.AudioPortuguese, - }, - }, - { - name: "should return audio in portuguese", - args: args{ - text: "Idioma: Português", - }, - want: []schema.Audio{ - schema.AudioPortuguese, - }, - }, - { - name: "should return audio in portuguese", - args: args{ - text: "Audio: Português", - }, - want: []schema.Audio{ - schema.AudioPortuguese, - }, - }, - { - name: "should return audio in portuguese", - args: args{ - text: ` -»INFORMAÇÕES« -Título Traduzido: O Cangaceiro do Futuro -Título Original: O Cangaceiro do Futuro -IMDb: 7,1 -Gênero:Comédia -Lançamento: 2022 -Qualidade: WEB-DL -Áudio: Português -Legenda: S/L -Formato: MKV -Tamanho: 5.77 GB | 9.60 GB -Duração: 30 Min./Ep. -Qualidade de Áudio: 10 -Qualidade de Vídeo: 10 -Servidor Via: Torrent - `, - }, - want: []schema.Audio{ - schema.AudioPortuguese, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := findAudioFromText(tt.args.text); !reflect.DeepEqual(got, tt.want) { - t.Errorf("findAudioFromText() = %v, want %v", got, tt.want) - } - }) - } -} - func Test_parseLocalizedDate(t *testing.T) { type args struct { datePublished string @@ -119,74 +46,3 @@ func Test_parseLocalizedDate(t *testing.T) { }) } } - -func Test_getIMDBLink(t *testing.T) { - type args struct { - link string - } - tests := []struct { - name string - args args - want string - wantErr bool - }{ - { - name: "should return imdb link", - args: args{ - link: "https://www.imdb.com/title/tt1234567", - }, - want: "https://www.imdb.com/title/tt1234567", - wantErr: false, - }, - { - name: "should return imdb link when end with /", - args: args{ - link: "https://www.imdb.com/title/tt1234567/", - }, - want: "https://www.imdb.com/title/tt1234567/", - wantErr: false, - }, - { - name: "should return imdb link when end with /", - args: args{ - link: "https://www.imdb.com/title/tt1234567/", - }, - want: "https://www.imdb.com/title/tt1234567/", - wantErr: false, - }, - { - name: "should return imdb link when it has a language", - args: args{ - link: "https://www.imdb.com/pt/title/tt18722864/", - }, - want: "https://www.imdb.com/pt/title/tt18722864/", - }, - { - name: "should return imdb link when it has a language", - args: args{ - link: "https://www.imdb.com/pt/title/tt34608980/", - }, - want: "https://www.imdb.com/pt/title/tt34608980/", - }, - { - name: "should return error when link is invalid", - args: args{ - link: "https://www.google.com", - }, - want: "", - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := getIMDBLink(tt.args.link) - if (err != nil) != tt.wantErr { - t.Errorf("getIMDBLink() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("getIMDBLink() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/api/comandohds.go b/api/comandohds.go index 480914b..2c815fa 100644 --- a/api/comandohds.go +++ b/api/comandohds.go @@ -6,10 +6,10 @@ import ( "fmt" "net/http" "net/url" + "regexp" "slices" "strings" "time" - "regexp" "github.com/PuerkitoBio/goquery" "github.com/hbollon/go-edlib" @@ -146,7 +146,7 @@ func getTorrentsComandoHDs(ctx context.Context, i *Indexer, link string) ([]sche article := doc.Find("article") title := title_re.ReplaceAllString(article.Find(".main_title > h1").Text(), "") textContent := article.Find("div.content") - date := getPublishedDateTDF(doc) + date := getPublishedDateFromMeta(doc) magnets := textContent.Find("a[href^=\"magnet\"]") var magnetLinks []string magnets.Each(func(i int, s *goquery.Selection) { @@ -205,23 +205,15 @@ func getTorrentsComandoHDs(ctx context.Context, i *Indexer, link string) ([]sche if err != nil { fmt.Println(err) } - releaseTitle := magnet.DisplayName + releaseTitle := strings.TrimSpace(magnet.DisplayName) infoHash := magnet.InfoHash.String() trackers := magnet.Trackers - magnetAudio := []schema.Audio{} - if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { - magnetAudio = append(magnetAudio, audio...) - } else if len(audio) > 1 { - // remove portuguese audio, and append to magnetAudio - for _, a := range audio { - if a != schema.AudioPortuguese { - magnetAudio = append(magnetAudio, a) - } - } - } else { - magnetAudio = append(magnetAudio, audio...) + for i, tracker := range trackers { + trackers[i] = strings.TrimSpace(tracker) } + magnetAudio := getAudioFromTitle(releaseTitle, audio) + peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) if err != nil { fmt.Println(err) diff --git a/api/common.go b/api/common.go new file mode 100644 index 0000000..3a32dfb --- /dev/null +++ b/api/common.go @@ -0,0 +1,187 @@ +package handler + +import ( + "fmt" + "regexp" + "slices" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/felipemarinho97/torrent-indexer/schema" +) + +func getPublishedDateFromMeta(document *goquery.Document) time.Time { + var date time.Time + // + datePublished := strings.TrimSpace(document.Find("meta[property=\"article:published_time\"]").AttrOr("content", "")) + + if datePublished != "" { + date, _ = time.Parse(time.RFC3339, datePublished) + } + + return date +} + +type datePattern struct { + regex *regexp.Regexp + layout string +} + +var datePatterns = []datePattern{ + {regexp.MustCompile(`\d{4}-\d{2}-\d{2}`), "2006-01-02"}, + {regexp.MustCompile(`\d{2}-\d{2}-\d{4}`), "02-01-2006"}, + {regexp.MustCompile(`\d{2}/\d{2}/\d{4}`), "02/01/2006"}, +} + +// getPublishedDateFromRawString extracts the date from a raw string using predefined patterns. +func getPublishedDateFromRawString(dateStr string) time.Time { + for _, p := range datePatterns { + match := p.regex.FindString(dateStr) + + if match != "" { + date, err := time.Parse(p.layout, match) + if err == nil { + return date.UTC() + } + } + } + + return time.Time{} +} + +// getSeparator returns the separator used in the string. +// It checks for common separators like "|", ",", "/", and " e " +func getSeparator(s string) string { + if strings.Contains(s, "|") { + return "|" + } else if strings.Contains(s, ",") { + return "," + } else if strings.Contains(s, "/") { + return "/" + } else if strings.Contains(s, " e ") { + return " e " + } + return " " +} + +// findAudioFromText extracts audio languages from a given text. +// It looks for patterns like "Áudio: Português, Inglês" or "Idioma: Português, Inglês" +func findAudioFromText(text string) []schema.Audio { + var audio []schema.Audio + re := regexp.MustCompile(`(.udio|Idioma):.?(.*)`) + audioMatch := re.FindStringSubmatch(text) + if len(audioMatch) > 0 { + sep := getSeparator(audioMatch[2]) + langs_raw := strings.Split(audioMatch[2], sep) + for _, lang := range langs_raw { + lang = strings.TrimSpace(lang) + a := schema.GetAudioFromString(lang) + if a != nil { + audio = append(audio, *a) + } else { + fmt.Println("unknown language:", lang) + } + } + } + return audio +} + +// findYearFromText extracts the year from a given text. +// It looks for patterns like "Lançamento: 2001" in the title. +func findYearFromText(text string, title string) (year string) { + re := regexp.MustCompile(`Lançamento: (.*)`) + yearMatch := re.FindStringSubmatch(text) + if len(yearMatch) > 0 { + year = yearMatch[1] + } + + if year == "" { + re = regexp.MustCompile(`\((\d{4})\)`) + yearMatch := re.FindStringSubmatch(title) + if len(yearMatch) > 0 { + year = yearMatch[1] + } + } + return strings.TrimSpace(year) +} + +// findSizesFromText extracts sizes from a given text. +// It looks for patterns like "Tamanho: 1.26 GB" or "Tamanho: 700 MB". +func findSizesFromText(text string) []string { + var sizes []string + // everything that ends with GB or MB, using ',' or '.' as decimal separator + re := regexp.MustCompile(`(\d+[\.,]?\d+) ?(GB|MB)`) + sizesMatch := re.FindAllStringSubmatch(text, -1) + if len(sizesMatch) > 0 { + for _, size := range sizesMatch { + sizes = append(sizes, size[0]) + } + } + return sizes +} + +// getIMDBLink extracts the IMDB link from a given link. +// It looks for patterns like "https://www.imdb.com/title/tt1234567/". +// Returns an error if no valid IMDB link is found. +func getIMDBLink(link string) (string, error) { + var imdbLink string + re := regexp.MustCompile(`https://www.imdb.com(/[a-z]{2})?/title/(tt\d+)/?`) + + matches := re.FindStringSubmatch(link) + if len(matches) > 0 { + imdbLink = matches[0] + } else { + return "", fmt.Errorf("no imdb link found") + } + return imdbLink, nil +} + +// appendAudioISO639_2Code appends the audio languages to the title in ISO 639-2 code format. +// It formats the title to include the audio languages in parentheses. +// Example: "Movie Title (eng, por)" +func appendAudioISO639_2Code(title string, a []schema.Audio) string { + if len(a) > 0 { + audio := []string{} + for _, lang := range a { + audio = append(audio, lang.String()) + } + title = fmt.Sprintf("%s (%s)", title, strings.Join(audio, ", ")) + } + return title +} + +// getAudioFromTitle extracts audio languages from the release title. +// It checks for common patterns like "nacional", "dual", or "dublado" +func getAudioFromTitle(releaseTitle string, audioFromContent []schema.Audio) []schema.Audio { + magnetAudio := []schema.Audio{} + isNacional := strings.Contains(strings.ToLower(releaseTitle), "nacional") + if isNacional { + magnetAudio = append(magnetAudio, schema.AudioPortuguese) + } + + if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { + magnetAudio = append(magnetAudio, audioFromContent...) + // if Portuguese audio is not in the audio slice, append it + if !slices.Contains(magnetAudio, schema.AudioPortuguese) { + magnetAudio = append(magnetAudio, schema.AudioPortuguese) + } + } else if len(audioFromContent) > 1 { + // remove portuguese audio, and append to magnetAudio + for _, a := range audioFromContent { + if a != schema.AudioPortuguese { + magnetAudio = append(magnetAudio, a) + } + } + } else { + magnetAudio = append(magnetAudio, audioFromContent...) + } + + // order and uniq the audio slice + slices.SortFunc(magnetAudio, func(a, b schema.Audio) int { + return strings.Compare(a.String(), b.String()) + }) + magnetAudio = slices.Compact(magnetAudio) + + return magnetAudio +} diff --git a/api/common_test.go b/api/common_test.go new file mode 100644 index 0000000..3f19cc7 --- /dev/null +++ b/api/common_test.go @@ -0,0 +1,241 @@ +package handler + +import ( + "reflect" + "testing" + "time" + + "github.com/felipemarinho97/torrent-indexer/schema" +) + +func Test_getPublishedDateFromRawString(t *testing.T) { + type args struct { + dateStr string + } + tests := []struct { + name string + args args + want time.Time + }{ + { + name: "should parse date in format 2025-01-01", + args: args{ + dateStr: "2025-01-01", + }, + want: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC), + }, + { + name: "should parse date in format 01-01-2025", + args: args{ + dateStr: "01-01-2025", + }, + want: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC), + }, + { + name: "should parse date in format 01/01/2025", + args: args{ + dateStr: "01/01/2025", + }, + want: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC), + }, + { + name: "should parse date from starck-filmes link", + args: args{ + dateStr: "https://www.starckfilmes.online/catalog/jogos-de-seducao-2025-18-07-2025/", + }, + want: time.Date(2025, 7, 18, 0, 0, 0, 0, time.UTC), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := getPublishedDateFromRawString(tt.args.dateStr); !reflect.DeepEqual(got, tt.want) { + t.Errorf("getPublishedDateFromRawString() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_findAudioFromText(t *testing.T) { + type args struct { + text string + } + tests := []struct { + name string + args args + want []schema.Audio + }{ + { + name: "should return audio in portuguese", + args: args{ + text: "Áudio: Português", + }, + want: []schema.Audio{ + schema.AudioPortuguese, + }, + }, + { + name: "should return audio in portuguese", + args: args{ + text: "Idioma: Português", + }, + want: []schema.Audio{ + schema.AudioPortuguese, + }, + }, + { + name: "should return audio in portuguese", + args: args{ + text: "Audio: Português", + }, + want: []schema.Audio{ + schema.AudioPortuguese, + }, + }, + { + name: "should return audio in portuguese - comando_torrents", + args: args{ + text: ` +»INFORMAÇÕES« +Título Traduzido: O Cangaceiro do Futuro +Título Original: O Cangaceiro do Futuro +IMDb: 7,1 +Gênero:Comédia +Lançamento: 2022 +Qualidade: WEB-DL +Áudio: Português +Legenda: S/L +Formato: MKV +Tamanho: 5.77 GB | 9.60 GB +Duração: 30 Min./Ep. +Qualidade de Áudio: 10 +Qualidade de Vídeo: 10 +Servidor Via: Torrent + `, + }, + want: []schema.Audio{ + schema.AudioPortuguese, + }, + }, + { + name: "should return audio in portuguese - rede torrent", + args: args{ + text: ` +Filme Bicho de Sete Cabeças Torrent +Título Original: Bicho de Sete Cabeças +Lançamento: 2001 +Gêneros: Drama / Nacional +Idioma: Português +Qualidade: 720p / BluRay +Duração: 1h 14 Minutos +Formato: Mp4 +Vídeo: 10 e Áudio: 10 +Legendas: Português +Nota do Imdb: 7.7 +Tamanho: 1.26 GB + `, + }, + want: []schema.Audio{ + schema.AudioPortuguese, + }, + }, + { + name: "should return audio in portuguese - rede torrent 2", + args: args{ + text: ` +Filme Branca de Neve e o Caçador Torrent / Assistir Online +Título Original: Snow White and the Huntsman +Lançamento: 2012 +Gêneros: Ação / Aventura / Fantasia +Idioma: Português / Inglês +Duração: 126 Minutos +Formato: Mkv / Mp4 +Vídeo: 10 e Áudio: 10 +Legendas: Sim +Tamanho: 2.69 GB / 1.95 GB / 1.0 GB + `, + }, + want: []schema.Audio{ + schema.AudioPortuguese, + schema.AudioEnglish, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := findAudioFromText(tt.args.text); !reflect.DeepEqual(got, tt.want) { + t.Errorf("findAudioFromText() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_getIMDBLink(t *testing.T) { + type args struct { + link string + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "should return imdb link", + args: args{ + link: "https://www.imdb.com/title/tt1234567", + }, + want: "https://www.imdb.com/title/tt1234567", + wantErr: false, + }, + { + name: "should return imdb link when end with /", + args: args{ + link: "https://www.imdb.com/title/tt1234567/", + }, + want: "https://www.imdb.com/title/tt1234567/", + wantErr: false, + }, + { + name: "should return imdb link when end with /", + args: args{ + link: "https://www.imdb.com/title/tt1234567/", + }, + want: "https://www.imdb.com/title/tt1234567/", + wantErr: false, + }, + { + name: "should return imdb link when it has a language", + args: args{ + link: "https://www.imdb.com/pt/title/tt18722864/", + }, + want: "https://www.imdb.com/pt/title/tt18722864/", + }, + { + name: "should return imdb link when it has a language", + args: args{ + link: "https://www.imdb.com/pt/title/tt34608980/", + }, + want: "https://www.imdb.com/pt/title/tt34608980/", + }, + { + name: "should return error when link is invalid", + args: args{ + link: "https://www.google.com", + }, + want: "", + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := getIMDBLink(tt.args.link) + if (err != nil) != tt.wantErr { + t.Errorf("getIMDBLink() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("getIMDBLink() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/api/index.go b/api/index.go index 39f9f4c..dd55a5c 100644 --- a/api/index.go +++ b/api/index.go @@ -22,6 +22,8 @@ type Indexer struct { type IndexerMeta struct { URL string SearchURL string + // pattern for pagination, e.g. "page/%s" + PagePattern string } type Response struct { @@ -99,6 +101,17 @@ func HandlerIndex(w http.ResponseWriter, r *http.Request) { }, }, }, + "/indexers/rede_torrent": []map[string]interface{}{ + { + "method": "GET", + "description": "Indexer for rede torrent", + "query_params": map[string]string{ + "q": "search query", + "page": "page number", + "filter_results": "if results with similarity equals to zero should be filtered (true/false)", + }, + }, + }, "/indexers/manual": []map[string]interface{}{ { "method": "POST", @@ -120,6 +133,12 @@ func HandlerIndex(w http.ResponseWriter, r *http.Request) { }, }, }, + "/ui/": []map[string]interface{}{ + { + "method": "GET", + "description": "Show the unified search UI (only work if Meilisearch is enabled)", + }, + }, }, }) if err != nil { diff --git a/api/manual.go b/api/manual.go index 9f25d06..a006a6e 100644 --- a/api/manual.go +++ b/api/manual.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "net/http" - "strings" "time" "github.com/felipemarinho97/torrent-indexer/magnet" @@ -76,19 +75,7 @@ func (i *Indexer) HandlerManualIndexer(w http.ResponseWriter, r *http.Request) { releaseTitle := magnet.DisplayName infoHash := magnet.InfoHash.String() trackers := magnet.Trackers - magnetAudio := []schema.Audio{} - if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { - magnetAudio = append(magnetAudio, audio...) - } else if len(audio) > 1 { - // remove portuguese audio, and append to magnetAudio - for _, a := range audio { - if a != schema.AudioPortuguese { - magnetAudio = append(magnetAudio, a) - } - } - } else { - magnetAudio = append(magnetAudio, audio...) - } + magnetAudio := getAudioFromTitle(releaseTitle, audio) peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) if err != nil { diff --git a/api/rede_torrent.go b/api/rede_torrent.go new file mode 100644 index 0000000..474d5cf --- /dev/null +++ b/api/rede_torrent.go @@ -0,0 +1,280 @@ +package handler + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "regexp" + "slices" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/hbollon/go-edlib" + + "github.com/felipemarinho97/torrent-indexer/magnet" + "github.com/felipemarinho97/torrent-indexer/schema" + goscrape "github.com/felipemarinho97/torrent-indexer/scrape" + "github.com/felipemarinho97/torrent-indexer/utils" +) + +var rede_torrent = IndexerMeta{ + URL: "https://redetorrent.com/", + SearchURL: "index.php?s=", + PagePattern: "%s", +} + +func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Request) { + start := time.Now() + defer func() { + i.metrics.IndexerDuration.WithLabelValues("rede_torrent").Observe(time.Since(start).Seconds()) + i.metrics.IndexerRequests.WithLabelValues("rede_torrent").Inc() + }() + + ctx := r.Context() + // supported query params: q, season, episode, page, filter_results + q := r.URL.Query().Get("q") + page := r.URL.Query().Get("page") + + // URL encode query param + q = url.QueryEscape(q) + url := rede_torrent.URL + if q != "" { + url = fmt.Sprintf("%s%s%s", url, rede_torrent.SearchURL, q) + } else if page != "" { + url = fmt.Sprintf(fmt.Sprintf("%s%s", url, rede_torrent.PagePattern), page) + } + + fmt.Println("URL:>", url) + resp, err := i.requester.GetDocument(ctx, url) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + if err != nil { + fmt.Println(err) + } + i.metrics.IndexerErrors.WithLabelValues("rede_torrent").Inc() + return + } + defer resp.Close() + + doc, err := goquery.NewDocumentFromReader(resp) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + err = json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + if err != nil { + fmt.Println(err) + } + + i.metrics.IndexerErrors.WithLabelValues("rede_torrent").Inc() + return + } + + var links []string + doc.Find(".capa_lista").Each(func(i int, s *goquery.Selection) { + link, _ := s.Find("a").Attr("href") + links = append(links, link) + }) + + var itChan = make(chan []schema.IndexedTorrent) + var errChan = make(chan error) + indexedTorrents := []schema.IndexedTorrent{} + for _, link := range links { + go func(link string) { + torrents, err := getTorrentsRedeTorrent(ctx, i, link) + if err != nil { + fmt.Println(err) + errChan <- err + } + itChan <- torrents + }(link) + } + + for i := 0; i < len(links); i++ { + select { + case torrents := <-itChan: + indexedTorrents = append(indexedTorrents, torrents...) + case err := <-errChan: + fmt.Println(err) + } + } + + for i, it := range indexedTorrents { + jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") + qLower := strings.ToLower(q) + splitLength := 2 + indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) + } + + // remove the ones with zero similarity + if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { + indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool { + return it.Similarity > 0 + }) + } + + // sort by similarity + slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int { + return int((j.Similarity - i.Similarity) * 1000) + }) + + // send to search index + go func() { + _ = i.search.IndexTorrents(indexedTorrents) + }() + + w.Header().Set("Content-Type", "application/json") + err = json.NewEncoder(w).Encode(Response{ + Results: indexedTorrents, + Count: len(indexedTorrents), + }) + if err != nil { + fmt.Println(err) + } +} + +func getTorrentsRedeTorrent(ctx context.Context, i *Indexer, link string) ([]schema.IndexedTorrent, error) { + var indexedTorrents []schema.IndexedTorrent + doc, err := getDocument(ctx, i, link) + if err != nil { + return nil, err + } + + article := doc.Find(".conteudo") + // title pattern: "Something - optional balbla (dddd) some shit" - extract "Something" and "dddd" + titleRe := regexp.MustCompile(`^(.*?)(?: - (.*?))? \((\d{4})\)`) + titleP := titleRe.FindStringSubmatch(article.Find("h1").Text()) + if len(titleP) < 3 { + return nil, fmt.Errorf("could not extract title from %s", link) + } + title := strings.TrimSpace(titleP[1]) + year := strings.TrimSpace(titleP[3]) + + textContent := article.Find(".apenas_itemprop") + date := getPublishedDateFromMeta(doc) + magnets := textContent.Find("a[href^=\"magnet\"]") + var magnetLinks []string + magnets.Each(func(i int, s *goquery.Selection) { + magnetLink, _ := s.Attr("href") + magnetLinks = append(magnetLinks, magnetLink) + }) + + var audio []schema.Audio + var size []string + article.Find("div#informacoes > p").Each(func(i int, s *goquery.Selection) { + // pattern: + // Filme Bicho de Sete Cabeças Torrent + // Título Original: Bicho de Sete Cabeças + // Lançamento: 2001 + // Gêneros: Drama / Nacional + // Idioma: Português + // Qualidade: 720p / BluRay + // Duração: 1h 14 Minutos + // Formato: Mp4 + // Vídeo: 10 e Áudio: 10 + // Legendas: Português + // Nota do Imdb: 7.7 + // Tamanho: 1.26 GB + + // we need to manualy parse because the text is not well formatted + htmlContent, err := s.Html() + if err != nil { + fmt.Println(err) + return + } + + // remove any \n and \t characters + htmlContent = strings.ReplaceAll(htmlContent, "\n", "") + htmlContent = strings.ReplaceAll(htmlContent, "\t", "") + + // split by
tags and render each line + brRe := regexp.MustCompile(``) + htmlContent = brRe.ReplaceAllString(htmlContent, "
") + lines := strings.Split(htmlContent, "
") + + var text strings.Builder + for _, line := range lines { + // remove any HTML tags + re := regexp.MustCompile(`<[^>]*>`) + line = re.ReplaceAllString(line, "") + + line = strings.TrimSpace(line) + text.WriteString(line + "\n") + } + + audio = append(audio, findAudioFromText(text.String())...) + y := findYearFromText(text.String(), title) + if y != "" { + year = y + } + size = append(size, findSizesFromText(text.String())...) + }) + + // find any link from imdb + imdbLink := "" + article.Find("a").Each(func(i int, s *goquery.Selection) { + link, _ := s.Attr("href") + _imdbLink, err := getIMDBLink(link) + if err == nil { + imdbLink = _imdbLink + } + }) + + size = stableUniq(size) + + var chanIndexedTorrent = make(chan schema.IndexedTorrent) + + // for each magnet link, create a new indexed torrent + for it, magnetLink := range magnetLinks { + it := it + go func(it int, magnetLink string) { + magnet, err := magnet.ParseMagnetUri(magnetLink) + if err != nil { + fmt.Println(err) + } + releaseTitle := magnet.DisplayName + infoHash := magnet.InfoHash.String() + trackers := magnet.Trackers + magnetAudio := getAudioFromTitle(releaseTitle, audio) + + peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) + if err != nil { + fmt.Println(err) + } + + title := processTitle(title, magnetAudio) + + // if the number of sizes is equal to the number of magnets, then assign the size to each indexed torrent in order + var mySize string + if len(size) == len(magnetLinks) { + mySize = size[it] + } + + ixt := schema.IndexedTorrent{ + Title: appendAudioISO639_2Code(releaseTitle, magnetAudio), + OriginalTitle: title, + Details: link, + Year: year, + IMDB: imdbLink, + Audio: magnetAudio, + MagnetLink: magnetLink, + Date: date, + InfoHash: infoHash, + Trackers: trackers, + LeechCount: peer, + SeedCount: seed, + Size: mySize, + } + chanIndexedTorrent <- ixt + }(it, magnetLink) + } + + for i := 0; i < len(magnetLinks); i++ { + it := <-chanIndexedTorrent + indexedTorrents = append(indexedTorrents, it) + } + + return indexedTorrents, nil +} diff --git a/api/starck_filmes.go b/api/starck_filmes.go index dca0530..745c9fc 100644 --- a/api/starck_filmes.go +++ b/api/starck_filmes.go @@ -140,6 +140,8 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch return nil, err } + date := getPublishedDateFromRawString(link) + post := doc.Find(".post") capa := post.Find(".capa") title := capa.Find(".post-description > h2").Text() @@ -161,17 +163,16 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch // Duração: 1h 40 min // Gênero: Terror, Suspense, Ficção // Formato: MKV - // Tamanho: 2.45 GB + // Tamanho: 2.45 GB // Qualidade de Video: 10 // Qualidade do Audio: 10 // Idioma: Português | Inglês // Legenda: Português, Inglês, Espanhol var text strings.Builder - s.Find("span").Each(func (i int, span *goquery.Selection) { + s.Find("span").Each(func(i int, span *goquery.Selection) { text.WriteString(span.Text()) text.WriteString(" ") }) - fmt.Println(text.String()) audio = append(audio, findAudioFromText(text.String())...) y := findYearFromText(text.String(), title) if y != "" { @@ -195,22 +196,23 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch if err != nil { fmt.Println(err) } - releaseTitle := magnet.DisplayName + releaseTitle := strings.TrimSpace(magnet.DisplayName) + // url decode the title + releaseTitle, err = url.QueryUnescape(releaseTitle) + if err != nil { + fmt.Println(err) + releaseTitle = strings.TrimSpace(magnet.DisplayName) + } infoHash := magnet.InfoHash.String() trackers := magnet.Trackers - magnetAudio := []schema.Audio{} - if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { - magnetAudio = append(magnetAudio, audio...) - } else if len(audio) > 1 { - // remove portuguese audio, and append to magnetAudio - for _, a := range audio { - if a != schema.AudioPortuguese { - magnetAudio = append(magnetAudio, a) - } + for i, tracker := range trackers { + unescapedTracker, err := url.QueryUnescape(tracker) + if err != nil { + fmt.Println(err) } - } else { - magnetAudio = append(magnetAudio, audio...) + trackers[i] = strings.TrimSpace(unescapedTracker) } + magnetAudio := getAudioFromTitle(releaseTitle, audio) peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) if err != nil { @@ -233,7 +235,7 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch IMDB: imdbLink, Audio: magnetAudio, MagnetLink: magnetLink, - Date: time.Now(), + Date: date, InfoHash: infoHash, Trackers: trackers, LeechCount: peer, diff --git a/api/torrent_dos_filmes.go b/api/torrent_dos_filmes.go index 0bbbadd..6fc400a 100644 --- a/api/torrent_dos_filmes.go +++ b/api/torrent_dos_filmes.go @@ -143,7 +143,7 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) ( article := doc.Find("article") title := strings.Replace(article.Find(".title > h1").Text(), " - Download", "", -1) textContent := article.Find("div.content") - date := getPublishedDateTDF(doc) + date := getPublishedDateFromMeta(doc) magnets := textContent.Find("a[href^=\"magnet\"]") var magnetLinks []string magnets.Each(func(i int, s *goquery.Selection) { @@ -206,19 +206,7 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) ( releaseTitle := magnet.DisplayName infoHash := magnet.InfoHash.String() trackers := magnet.Trackers - magnetAudio := []schema.Audio{} - if strings.Contains(strings.ToLower(releaseTitle), "dual") || strings.Contains(strings.ToLower(releaseTitle), "dublado") { - magnetAudio = append(magnetAudio, audio...) - } else if len(audio) > 1 { - // remove portuguese audio, and append to magnetAudio - for _, a := range audio { - if a != schema.AudioPortuguese { - magnetAudio = append(magnetAudio, a) - } - } - } else { - magnetAudio = append(magnetAudio, audio...) - } + magnetAudio := getAudioFromTitle(releaseTitle, audio) peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, i.metrics, infoHash, trackers) if err != nil { @@ -259,15 +247,3 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) ( return indexedTorrents, nil } - -func getPublishedDateTDF(document *goquery.Document) time.Time { - var date time.Time - // - datePublished := strings.TrimSpace(document.Find("meta[property=\"article:published_time\"]").AttrOr("content", "")) - - if datePublished != "" { - date, _ = time.Parse(time.RFC3339, datePublished) - } - - return date -} diff --git a/main.go b/main.go index 4fb7675..d31ee4e 100644 --- a/main.go +++ b/main.go @@ -46,11 +46,12 @@ func main() { metricsMux := http.NewServeMux() indexerMux.HandleFunc("/", handler.HandlerIndex) - indexerMux.HandleFunc("/indexers/comando_torrents", indexers.HandlerComandoIndexer) - indexerMux.HandleFunc("/indexers/torrent-dos-filmes", indexers.HandlerTorrentDosFilmesIndexer) indexerMux.HandleFunc("/indexers/bludv", indexers.HandlerBluDVIndexer) + indexerMux.HandleFunc("/indexers/comando_torrents", indexers.HandlerComandoIndexer) indexerMux.HandleFunc("/indexers/comandohds", indexers.HandlerComandoHDsIndexer) + indexerMux.HandleFunc("/indexers/rede_torrent", indexers.HandlerRedeTorrentIndexer) indexerMux.HandleFunc("/indexers/starck-filmes", indexers.HandlerStarckFilmesIndexer) + indexerMux.HandleFunc("/indexers/torrent-dos-filmes", indexers.HandlerTorrentDosFilmesIndexer) indexerMux.HandleFunc("/indexers/manual", indexers.HandlerManualIndexer) indexerMux.HandleFunc("/search", search.SearchTorrentHandler) indexerMux.Handle("/ui/", http.StripPrefix("/ui/", http.FileServer(http.FS(public.UIFiles)))) @@ -66,7 +67,7 @@ func main() { port := os.Getenv("PORT") if port == "" { - port = "7006" + port = "7007" } fmt.Printf("Server listening on :%s\n", port)