From b1cece27438ed1d2353227d5cd86d575af690bf7 Mon Sep 17 00:00:00 2001 From: Marinho Date: Sun, 10 Mar 2024 12:48:48 +0000 Subject: [PATCH] chg: fix: year detection algorithim --- api/bludv.go | 34 +++++++++++++++++----------------- api/comando_torrents.go | 8 +++++++- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/api/bludv.go b/api/bludv.go index 8382f4d..567241c 100644 --- a/api/bludv.go +++ b/api/bludv.go @@ -96,22 +96,7 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]IndexedTo article := doc.Find(".post") title := strings.Replace(article.Find(".title > h1").Text(), " - Download", "", -1) textContent := article.Find("div.content") - // div itemprop="datePublished" - datePublished := strings.TrimSpace(article.Find("div[itemprop=\"datePublished\"]").Text()) - // pattern: 10 de setembro de 2021 - re := regexp.MustCompile(`(\d{2}) de (\w+) de (\d{4})`) - matches := re.FindStringSubmatch(datePublished) - var date time.Time - if len(matches) > 0 { - day := matches[1] - month := matches[2] - year := matches[3] - datePublished = fmt.Sprintf("%s-%s-%s", year, replacer.Replace(month), day) - date, err = time.Parse("2006-01-02", datePublished) - if err != nil { - return nil, err - } - } + date := getPublishedDate(doc) magnets := textContent.Find("a[href^=\"magnet\"]") var magnetLinks []string magnets.Each(func(i int, s *goquery.Selection) { @@ -142,7 +127,10 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]IndexedTo text := s.Text() audio = append(audio, findAudioFromText(text)...) - year = findYearFromText(text, title) + y := findYearFromText(text, title) + if y != "" { + year = y + } size = append(size, findSizesFromText(text)...) }) @@ -225,3 +213,15 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]IndexedTo return indexedTorrents, nil } + +func getPublishedDate(document *goquery.Document) time.Time { + var date time.Time + // + datePublished := strings.TrimSpace(document.Find("meta[property=\"article:published_time\"]").AttrOr("content", "")) + + if datePublished != "" { + date, _ = time.Parse(time.RFC3339, datePublished) + } + + return date +} diff --git a/api/comando_torrents.go b/api/comando_torrents.go index 580c81c..a3c5d84 100644 --- a/api/comando_torrents.go +++ b/api/comando_torrents.go @@ -159,7 +159,10 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]IndexedTorrent text := s.Text() audio = append(audio, findAudioFromText(text)...) - year = findYearFromText(text, title) + y := findYearFromText(text, title) + if y != "" { + year = y + } size = append(size, findSizesFromText(text)...) }) @@ -283,6 +286,9 @@ func findYearFromText(text string, title string) (year string) { } if year == "" { + fmt.Println("DEBUG: year not found in text, trying to find in title") + fmt.Println("DEBUG: title:", title) + fmt.Println("DEBUG: text:", text) re = regexp.MustCompile(`\((\d{4})\)`) yearMatch := re.FindStringSubmatch(title) if len(yearMatch) > 0 {