diff --git a/api/bludv.go b/api/bludv.go index bcb56c0..22495d6 100644 --- a/api/bludv.go +++ b/api/bludv.go @@ -6,7 +6,6 @@ import ( "fmt" "net/http" "net/url" - "regexp" "slices" "strings" "time" @@ -187,10 +186,9 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]schema.In imdbLink := "" article.Find("div.content a").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") - re := regexp.MustCompile(`https://www.imdb.com/title/(tt\d+)`) - matches := re.FindStringSubmatch(link) - if len(matches) > 0 { - imdbLink = matches[0] + _imdbLink, err := getIMDBLink(link) + if err == nil { + imdbLink = _imdbLink } }) diff --git a/api/comando_torrents.go b/api/comando_torrents.go index 61746b3..649c878 100644 --- a/api/comando_torrents.go +++ b/api/comando_torrents.go @@ -163,19 +163,11 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed // div itemprop="datePublished" datePublished := strings.TrimSpace(article.Find("div[itemprop=\"datePublished\"]").Text()) // pattern: 10 de setembro de 2021 - re := regexp.MustCompile(`(\d{2}) de (\w+) de (\d{4})`) - matches := re.FindStringSubmatch(datePublished) - var date time.Time - if len(matches) > 0 { - day := matches[1] - month := matches[2] - year := matches[3] - datePublished = fmt.Sprintf("%s-%s-%s", year, replacer.Replace(month), day) - date, err = time.Parse("2006-01-02", datePublished) - if err != nil { - return nil, err - } + date, err := parseLocalizedDate(datePublished) + if err != nil { + return nil, err } + magnets := textContent.Find("a[href^=\"magnet\"]") var magnetLinks []string magnets.Each(func(i int, s *goquery.Selection) { @@ -217,10 +209,9 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed imdbLink := "" article.Find("a").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") - re := regexp.MustCompile(`https://www.imdb.com/title/(tt\d+)`) - matches := re.FindStringSubmatch(link) - if len(matches) > 0 { - imdbLink = matches[0] + _imdbLink, err := getIMDBLink(link) + if err == nil { + imdbLink = _imdbLink } }) @@ -293,6 +284,40 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed return indexedTorrents, nil } +func getIMDBLink(link string) (string, error) { + var imdbLink string + re := regexp.MustCompile(`https://www.imdb.com(/[a-z]{2})?/title/(tt\d+)/?`) + + matches := re.FindStringSubmatch(link) + if len(matches) > 0 { + imdbLink = matches[0] + } else { + return "", fmt.Errorf("no imdb link found") + } + return imdbLink, nil +} + +func parseLocalizedDate(datePublished string) (time.Time, error) { + re := regexp.MustCompile(`(\d{1,2}) de (\w+) de (\d{4})`) + matches := re.FindStringSubmatch(datePublished) + if len(matches) > 0 { + day := matches[1] + // append 0 to single digit day + if len(day) == 1 { + day = fmt.Sprintf("0%s", day) + } + month := matches[2] + year := matches[3] + datePublished = fmt.Sprintf("%s-%s-%s", year, replacer.Replace(month), day) + date, err := time.Parse("2006-01-02", datePublished) + if err != nil { + return time.Time{}, err + } + return date, nil + } + return time.Time{}, nil +} + func stableUniq(s []string) []string { var uniq []map[string]interface{} m := make(map[string]map[string]interface{}) diff --git a/api/comando_torrents_test.go b/api/comando_torrents_test.go index f6065b1..11ee6ab 100644 --- a/api/comando_torrents_test.go +++ b/api/comando_torrents_test.go @@ -3,6 +3,7 @@ package handler import ( "reflect" "testing" + "time" "github.com/felipemarinho97/torrent-indexer/schema" ) @@ -77,3 +78,115 @@ Servidor Via: Torrent }) } } + +func Test_parseLocalizedDate(t *testing.T) { + type args struct { + datePublished string + } + tests := []struct { + name string + args args + want time.Time + wantErr bool + }{ + { + name: "should return date", + args: args{ + datePublished: "12 de outubro de 2022", + }, + want: time.Date(2022, 10, 12, 0, 0, 0, 0, time.UTC), + wantErr: false, + }, + { + name: "should return date single digit", + args: args{ + datePublished: "1 de outubro de 2022", + }, + want: time.Date(2022, 10, 1, 0, 0, 0, 0, time.UTC), + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseLocalizedDate(tt.args.datePublished) + if (err != nil) != tt.wantErr { + t.Errorf("parseDate() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("parseDate() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_getIMDBLink(t *testing.T) { + type args struct { + link string + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "should return imdb link", + args: args{ + link: "https://www.imdb.com/title/tt1234567", + }, + want: "https://www.imdb.com/title/tt1234567", + wantErr: false, + }, + { + name: "should return imdb link when end with /", + args: args{ + link: "https://www.imdb.com/title/tt1234567/", + }, + want: "https://www.imdb.com/title/tt1234567/", + wantErr: false, + }, + { + name: "should return imdb link when end with /", + args: args{ + link: "https://www.imdb.com/title/tt1234567/", + }, + want: "https://www.imdb.com/title/tt1234567/", + wantErr: false, + }, + { + name: "should return imdb link when it has a language", + args: args{ + link: "https://www.imdb.com/pt/title/tt18722864/", + }, + want: "https://www.imdb.com/pt/title/tt18722864/", + }, + { + name: "should return imdb link when it has a language", + args: args{ + link: "https://www.imdb.com/pt/title/tt34608980/", + }, + want: "https://www.imdb.com/pt/title/tt34608980/", + }, + { + name: "should return error when link is invalid", + args: args{ + link: "https://www.google.com", + }, + want: "", + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := getIMDBLink(tt.args.link) + if (err != nil) != tt.wantErr { + t.Errorf("getIMDBLink() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("getIMDBLink() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/api/torrent_dos_filmes.go b/api/torrent_dos_filmes.go index 82f11ef..0bbbadd 100644 --- a/api/torrent_dos_filmes.go +++ b/api/torrent_dos_filmes.go @@ -6,7 +6,6 @@ import ( "fmt" "net/http" "net/url" - "regexp" "slices" "strings" "time" @@ -186,10 +185,9 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) ( imdbLink := "" article.Find("div.content a").Each(func(i int, s *goquery.Selection) { link, _ := s.Attr("href") - re := regexp.MustCompile(`https://www.imdb.com/title/(tt\d+)`) - matches := re.FindStringSubmatch(link) - if len(matches) > 0 { - imdbLink = matches[0] + _imdbLink, err := getIMDBLink(link) + if err == nil { + imdbLink = _imdbLink } }) diff --git a/schema/audio.go b/schema/audio.go index 8d1fe85..33c5f44 100644 --- a/schema/audio.go +++ b/schema/audio.go @@ -35,6 +35,8 @@ const ( AudioHindi = "Hindi" AudioFarsi = "Persa" AudioMalay = "Malaio" + AudioDutch = "Holandês" + AudioDutch2 = "Holandes" ) var AudioList = []Audio{ @@ -66,6 +68,10 @@ var AudioList = []Audio{ AudioThai2, AudioTurkish, AudioHindi, + AudioFarsi, + AudioMalay, + AudioDutch, + AudioDutch2, } func (a Audio) String() string { @@ -139,6 +145,14 @@ func (a Audio) toTag() string { return "tur" case AudioHindi: return "hin" + case AudioFarsi: + return "fas" + case AudioMalay: + return "msa" + case AudioDutch: + return "nld" + case AudioDutch2: + return "nld" default: return "" }