Fix/IMDB and comando-filmes date detection (#26)

* chg: fix: data and imdb detection

* chg: fix: improve IMDB detection

* chg: feat: add more audio strings
This commit is contained in:
2024-12-13 12:45:17 -03:00
committed by GitHub
parent 88d6d506bf
commit dc3cb3be92
5 changed files with 174 additions and 26 deletions

View File

@@ -6,7 +6,6 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"net/url" "net/url"
"regexp"
"slices" "slices"
"strings" "strings"
"time" "time"
@@ -187,10 +186,9 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]schema.In
imdbLink := "" imdbLink := ""
article.Find("div.content a").Each(func(i int, s *goquery.Selection) { article.Find("div.content a").Each(func(i int, s *goquery.Selection) {
link, _ := s.Attr("href") link, _ := s.Attr("href")
re := regexp.MustCompile(`https://www.imdb.com/title/(tt\d+)`) _imdbLink, err := getIMDBLink(link)
matches := re.FindStringSubmatch(link) if err == nil {
if len(matches) > 0 { imdbLink = _imdbLink
imdbLink = matches[0]
} }
}) })

View File

@@ -163,19 +163,11 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed
// div itemprop="datePublished" // div itemprop="datePublished"
datePublished := strings.TrimSpace(article.Find("div[itemprop=\"datePublished\"]").Text()) datePublished := strings.TrimSpace(article.Find("div[itemprop=\"datePublished\"]").Text())
// pattern: 10 de setembro de 2021 // pattern: 10 de setembro de 2021
re := regexp.MustCompile(`(\d{2}) de (\w+) de (\d{4})`) date, err := parseLocalizedDate(datePublished)
matches := re.FindStringSubmatch(datePublished) if err != nil {
var date time.Time return nil, err
if len(matches) > 0 {
day := matches[1]
month := matches[2]
year := matches[3]
datePublished = fmt.Sprintf("%s-%s-%s", year, replacer.Replace(month), day)
date, err = time.Parse("2006-01-02", datePublished)
if err != nil {
return nil, err
}
} }
magnets := textContent.Find("a[href^=\"magnet\"]") magnets := textContent.Find("a[href^=\"magnet\"]")
var magnetLinks []string var magnetLinks []string
magnets.Each(func(i int, s *goquery.Selection) { magnets.Each(func(i int, s *goquery.Selection) {
@@ -217,10 +209,9 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed
imdbLink := "" imdbLink := ""
article.Find("a").Each(func(i int, s *goquery.Selection) { article.Find("a").Each(func(i int, s *goquery.Selection) {
link, _ := s.Attr("href") link, _ := s.Attr("href")
re := regexp.MustCompile(`https://www.imdb.com/title/(tt\d+)`) _imdbLink, err := getIMDBLink(link)
matches := re.FindStringSubmatch(link) if err == nil {
if len(matches) > 0 { imdbLink = _imdbLink
imdbLink = matches[0]
} }
}) })
@@ -293,6 +284,40 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed
return indexedTorrents, nil return indexedTorrents, nil
} }
func getIMDBLink(link string) (string, error) {
var imdbLink string
re := regexp.MustCompile(`https://www.imdb.com(/[a-z]{2})?/title/(tt\d+)/?`)
matches := re.FindStringSubmatch(link)
if len(matches) > 0 {
imdbLink = matches[0]
} else {
return "", fmt.Errorf("no imdb link found")
}
return imdbLink, nil
}
func parseLocalizedDate(datePublished string) (time.Time, error) {
re := regexp.MustCompile(`(\d{1,2}) de (\w+) de (\d{4})`)
matches := re.FindStringSubmatch(datePublished)
if len(matches) > 0 {
day := matches[1]
// append 0 to single digit day
if len(day) == 1 {
day = fmt.Sprintf("0%s", day)
}
month := matches[2]
year := matches[3]
datePublished = fmt.Sprintf("%s-%s-%s", year, replacer.Replace(month), day)
date, err := time.Parse("2006-01-02", datePublished)
if err != nil {
return time.Time{}, err
}
return date, nil
}
return time.Time{}, nil
}
func stableUniq(s []string) []string { func stableUniq(s []string) []string {
var uniq []map[string]interface{} var uniq []map[string]interface{}
m := make(map[string]map[string]interface{}) m := make(map[string]map[string]interface{})

View File

@@ -3,6 +3,7 @@ package handler
import ( import (
"reflect" "reflect"
"testing" "testing"
"time"
"github.com/felipemarinho97/torrent-indexer/schema" "github.com/felipemarinho97/torrent-indexer/schema"
) )
@@ -77,3 +78,115 @@ Servidor Via: Torrent
}) })
} }
} }
func Test_parseLocalizedDate(t *testing.T) {
type args struct {
datePublished string
}
tests := []struct {
name string
args args
want time.Time
wantErr bool
}{
{
name: "should return date",
args: args{
datePublished: "12 de outubro de 2022",
},
want: time.Date(2022, 10, 12, 0, 0, 0, 0, time.UTC),
wantErr: false,
},
{
name: "should return date single digit",
args: args{
datePublished: "1 de outubro de 2022",
},
want: time.Date(2022, 10, 1, 0, 0, 0, 0, time.UTC),
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := parseLocalizedDate(tt.args.datePublished)
if (err != nil) != tt.wantErr {
t.Errorf("parseDate() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("parseDate() = %v, want %v", got, tt.want)
}
})
}
}
func Test_getIMDBLink(t *testing.T) {
type args struct {
link string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "should return imdb link",
args: args{
link: "https://www.imdb.com/title/tt1234567",
},
want: "https://www.imdb.com/title/tt1234567",
wantErr: false,
},
{
name: "should return imdb link when end with /",
args: args{
link: "https://www.imdb.com/title/tt1234567/",
},
want: "https://www.imdb.com/title/tt1234567/",
wantErr: false,
},
{
name: "should return imdb link when end with /",
args: args{
link: "https://www.imdb.com/title/tt1234567/",
},
want: "https://www.imdb.com/title/tt1234567/",
wantErr: false,
},
{
name: "should return imdb link when it has a language",
args: args{
link: "https://www.imdb.com/pt/title/tt18722864/",
},
want: "https://www.imdb.com/pt/title/tt18722864/",
},
{
name: "should return imdb link when it has a language",
args: args{
link: "https://www.imdb.com/pt/title/tt34608980/",
},
want: "https://www.imdb.com/pt/title/tt34608980/",
},
{
name: "should return error when link is invalid",
args: args{
link: "https://www.google.com",
},
want: "",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := getIMDBLink(tt.args.link)
if (err != nil) != tt.wantErr {
t.Errorf("getIMDBLink() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("getIMDBLink() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -6,7 +6,6 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"net/url" "net/url"
"regexp"
"slices" "slices"
"strings" "strings"
"time" "time"
@@ -186,10 +185,9 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) (
imdbLink := "" imdbLink := ""
article.Find("div.content a").Each(func(i int, s *goquery.Selection) { article.Find("div.content a").Each(func(i int, s *goquery.Selection) {
link, _ := s.Attr("href") link, _ := s.Attr("href")
re := regexp.MustCompile(`https://www.imdb.com/title/(tt\d+)`) _imdbLink, err := getIMDBLink(link)
matches := re.FindStringSubmatch(link) if err == nil {
if len(matches) > 0 { imdbLink = _imdbLink
imdbLink = matches[0]
} }
}) })

View File

@@ -35,6 +35,8 @@ const (
AudioHindi = "Hindi" AudioHindi = "Hindi"
AudioFarsi = "Persa" AudioFarsi = "Persa"
AudioMalay = "Malaio" AudioMalay = "Malaio"
AudioDutch = "Holandês"
AudioDutch2 = "Holandes"
) )
var AudioList = []Audio{ var AudioList = []Audio{
@@ -66,6 +68,10 @@ var AudioList = []Audio{
AudioThai2, AudioThai2,
AudioTurkish, AudioTurkish,
AudioHindi, AudioHindi,
AudioFarsi,
AudioMalay,
AudioDutch,
AudioDutch2,
} }
func (a Audio) String() string { func (a Audio) String() string {
@@ -139,6 +145,14 @@ func (a Audio) toTag() string {
return "tur" return "tur"
case AudioHindi: case AudioHindi:
return "hin" return "hin"
case AudioFarsi:
return "fas"
case AudioMalay:
return "msa"
case AudioDutch:
return "nld"
case AudioDutch2:
return "nld"
default: default:
return "" return ""
} }