Feat/Add post processors + refactor project (#37)
* chg: feat: clean known patterns from title * chg: refactor: remove duplicated code, and improve maintainability * chg: feat: add audio tagging post-processor * chg: refactor: add generic parallelMap function * chg: refactor: move more function to common locations * chg: docs: add func docs
This commit is contained in:
80
api/bludv.go
80
api/bludv.go
@@ -6,12 +6,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/hbollon/go-edlib"
|
|
||||||
|
|
||||||
"github.com/felipemarinho97/torrent-indexer/magnet"
|
"github.com/felipemarinho97/torrent-indexer/magnet"
|
||||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||||
@@ -20,15 +18,19 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var bludv = IndexerMeta{
|
var bludv = IndexerMeta{
|
||||||
|
Label: "bludv",
|
||||||
URL: "https://bludv.xyz/",
|
URL: "https://bludv.xyz/",
|
||||||
SearchURL: "?s=",
|
SearchURL: "?s=",
|
||||||
|
PagePattern: "page/%s",
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
|
func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
metadata := bludv
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
i.metrics.IndexerDuration.WithLabelValues("bludv").Observe(time.Since(start).Seconds())
|
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
|
||||||
i.metrics.IndexerRequests.WithLabelValues("bludv").Inc()
|
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ctx := r.Context()
|
ctx := r.Context()
|
||||||
@@ -38,11 +40,11 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
// URL encode query param
|
// URL encode query param
|
||||||
q = url.QueryEscape(q)
|
q = url.QueryEscape(q)
|
||||||
url := bludv.URL
|
url := metadata.URL
|
||||||
if page != "" {
|
if page != "" {
|
||||||
url = fmt.Sprintf("%spage/%s", url, page)
|
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
|
||||||
} else {
|
} else {
|
||||||
url = fmt.Sprintf("%s%s%s", url, bludv.SearchURL, q)
|
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("URL:>", url)
|
fmt.Println("URL:>", url)
|
||||||
@@ -53,7 +55,7 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
i.metrics.IndexerErrors.WithLabelValues("bludv").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Close()
|
defer resp.Close()
|
||||||
@@ -66,7 +68,7 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
|
|||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
i.metrics.IndexerErrors.WithLabelValues("bludv").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,57 +79,21 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
|
|||||||
links = append(links, link)
|
links = append(links, link)
|
||||||
})
|
})
|
||||||
|
|
||||||
var itChan = make(chan []schema.IndexedTorrent)
|
// extract each torrent link
|
||||||
var errChan = make(chan error)
|
indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) {
|
||||||
indexedTorrents := []schema.IndexedTorrent{}
|
return getTorrentsBluDV(ctx, i, link)
|
||||||
for _, link := range links {
|
|
||||||
go func(link string) {
|
|
||||||
torrents, err := getTorrentsBluDV(ctx, i, link)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
errChan <- err
|
|
||||||
}
|
|
||||||
itChan <- torrents
|
|
||||||
}(link)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < len(links); i++ {
|
|
||||||
select {
|
|
||||||
case torrents := <-itChan:
|
|
||||||
indexedTorrents = append(indexedTorrents, torrents...)
|
|
||||||
case err := <-errChan:
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, it := range indexedTorrents {
|
|
||||||
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
|
|
||||||
qLower := strings.ToLower(q)
|
|
||||||
splitLength := 2
|
|
||||||
indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove the ones with zero similarity
|
|
||||||
if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
|
|
||||||
indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool {
|
|
||||||
return it.Similarity > 0
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort by similarity
|
|
||||||
slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int {
|
|
||||||
return int((j.Similarity - i.Similarity) * 1000)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// send to search index
|
// Apply post-processors
|
||||||
go func() {
|
postProcessedTorrents := indexedTorrents
|
||||||
_ = i.search.IndexTorrents(indexedTorrents)
|
for _, processor := range i.postProcessors {
|
||||||
}()
|
postProcessedTorrents = processor(i, r, postProcessedTorrents)
|
||||||
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
err = json.NewEncoder(w).Encode(Response{
|
err = json.NewEncoder(w).Encode(Response{
|
||||||
Results: indexedTorrents,
|
Results: postProcessedTorrents,
|
||||||
Count: len(indexedTorrents),
|
Count: len(postProcessedTorrents),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
@@ -216,7 +182,7 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]schema.In
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
size = stableUniq(size)
|
size = utils.StableUniq(size)
|
||||||
|
|
||||||
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
||||||
|
|
||||||
@@ -247,7 +213,7 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]schema.In
|
|||||||
}
|
}
|
||||||
|
|
||||||
ixt := schema.IndexedTorrent{
|
ixt := schema.IndexedTorrent{
|
||||||
Title: appendAudioISO639_2Code(releaseTitle, magnetAudio),
|
Title: releaseTitle,
|
||||||
OriginalTitle: title,
|
OriginalTitle: title,
|
||||||
Details: link,
|
Details: link,
|
||||||
Year: year,
|
Year: year,
|
||||||
|
|||||||
@@ -1,15 +1,12 @@
|
|||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -18,12 +15,13 @@ import (
|
|||||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||||
goscrape "github.com/felipemarinho97/torrent-indexer/scrape"
|
goscrape "github.com/felipemarinho97/torrent-indexer/scrape"
|
||||||
"github.com/felipemarinho97/torrent-indexer/utils"
|
"github.com/felipemarinho97/torrent-indexer/utils"
|
||||||
"github.com/hbollon/go-edlib"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var comando = IndexerMeta{
|
var comando = IndexerMeta{
|
||||||
|
Label: "comando",
|
||||||
URL: "https://comando.la/",
|
URL: "https://comando.la/",
|
||||||
SearchURL: "?s=",
|
SearchURL: "?s=",
|
||||||
|
PagePattern: "page/%s",
|
||||||
}
|
}
|
||||||
|
|
||||||
var replacer = strings.NewReplacer(
|
var replacer = strings.NewReplacer(
|
||||||
@@ -43,9 +41,11 @@ var replacer = strings.NewReplacer(
|
|||||||
|
|
||||||
func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) {
|
func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
metadata := comando
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
i.metrics.IndexerDuration.WithLabelValues("comando").Observe(time.Since(start).Seconds())
|
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
|
||||||
i.metrics.IndexerRequests.WithLabelValues("comando").Inc()
|
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ctx := r.Context()
|
ctx := r.Context()
|
||||||
@@ -55,11 +55,11 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request)
|
|||||||
|
|
||||||
// URL encode query param
|
// URL encode query param
|
||||||
q = url.QueryEscape(q)
|
q = url.QueryEscape(q)
|
||||||
url := comando.URL
|
url := metadata.URL
|
||||||
if q != "" {
|
if q != "" {
|
||||||
url = fmt.Sprintf("%s%s%s", url, comando.SearchURL, q)
|
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
|
||||||
} else if page != "" {
|
} else if page != "" {
|
||||||
url = fmt.Sprintf("%spage/%s", url, page)
|
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("URL:>", url)
|
fmt.Println("URL:>", url)
|
||||||
@@ -70,7 +70,7 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request)
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
i.metrics.IndexerErrors.WithLabelValues("comando").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Close()
|
defer resp.Close()
|
||||||
@@ -82,7 +82,7 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request)
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
i.metrics.IndexerErrors.WithLabelValues("comando").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -93,57 +93,21 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request)
|
|||||||
links = append(links, link)
|
links = append(links, link)
|
||||||
})
|
})
|
||||||
|
|
||||||
var itChan = make(chan []schema.IndexedTorrent)
|
// extract each torrent link
|
||||||
var errChan = make(chan error)
|
indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) {
|
||||||
indexedTorrents := []schema.IndexedTorrent{}
|
return getTorrents(ctx, i, link)
|
||||||
for _, link := range links {
|
|
||||||
go func(link string) {
|
|
||||||
torrents, err := getTorrents(ctx, i, link)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
errChan <- err
|
|
||||||
}
|
|
||||||
itChan <- torrents
|
|
||||||
}(link)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < len(links); i++ {
|
|
||||||
select {
|
|
||||||
case torrents := <-itChan:
|
|
||||||
indexedTorrents = append(indexedTorrents, torrents...)
|
|
||||||
case err := <-errChan:
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, it := range indexedTorrents {
|
|
||||||
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
|
|
||||||
qLower := strings.ToLower(q)
|
|
||||||
splitLength := 2
|
|
||||||
indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove the ones with zero similarity
|
|
||||||
if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
|
|
||||||
indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool {
|
|
||||||
return it.Similarity > 0
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort by similarity
|
|
||||||
slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int {
|
|
||||||
return int((j.Similarity - i.Similarity) * 1000)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// send to search index
|
// Apply post-processors
|
||||||
go func() {
|
postProcessedTorrents := indexedTorrents
|
||||||
_ = i.search.IndexTorrents(indexedTorrents)
|
for _, processor := range i.postProcessors {
|
||||||
}()
|
postProcessedTorrents = processor(i, r, postProcessedTorrents)
|
||||||
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
err = json.NewEncoder(w).Encode(Response{
|
err = json.NewEncoder(w).Encode(Response{
|
||||||
Results: indexedTorrents,
|
Results: postProcessedTorrents,
|
||||||
Count: len(indexedTorrents),
|
Count: len(postProcessedTorrents),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
@@ -215,7 +179,7 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
size = stableUniq(size)
|
size = utils.StableUniq(size)
|
||||||
|
|
||||||
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
||||||
|
|
||||||
@@ -246,7 +210,7 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed
|
|||||||
}
|
}
|
||||||
|
|
||||||
ixt := schema.IndexedTorrent{
|
ixt := schema.IndexedTorrent{
|
||||||
Title: appendAudioISO639_2Code(releaseTitle, magnetAudio),
|
Title: releaseTitle,
|
||||||
OriginalTitle: title,
|
OriginalTitle: title,
|
||||||
Details: link,
|
Details: link,
|
||||||
Year: year,
|
Year: year,
|
||||||
@@ -293,38 +257,6 @@ func parseLocalizedDate(datePublished string) (time.Time, error) {
|
|||||||
return time.Time{}, nil
|
return time.Time{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func stableUniq(s []string) []string {
|
|
||||||
var uniq []map[string]interface{}
|
|
||||||
m := make(map[string]map[string]interface{})
|
|
||||||
for i, v := range s {
|
|
||||||
m[v] = map[string]interface{}{
|
|
||||||
"v": v,
|
|
||||||
"i": i,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// to order by index
|
|
||||||
for _, v := range m {
|
|
||||||
uniq = append(uniq, v)
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort by index
|
|
||||||
for i := 0; i < len(uniq); i++ {
|
|
||||||
for j := i + 1; j < len(uniq); j++ {
|
|
||||||
if uniq[i]["i"].(int) > uniq[j]["i"].(int) {
|
|
||||||
uniq[i], uniq[j] = uniq[j], uniq[i]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// get only values
|
|
||||||
var uniqValues []string
|
|
||||||
for _, v := range uniq {
|
|
||||||
uniqValues = append(uniqValues, v["v"].(string))
|
|
||||||
}
|
|
||||||
|
|
||||||
return uniqValues
|
|
||||||
}
|
|
||||||
|
|
||||||
func processTitle(title string, a []schema.Audio) string {
|
func processTitle(title string, a []schema.Audio) string {
|
||||||
// remove ' - Donwload' from title
|
// remove ' - Donwload' from title
|
||||||
title = strings.Replace(title, " – Download", "", -1)
|
title = strings.Replace(title, " – Download", "", -1)
|
||||||
@@ -337,38 +269,3 @@ func processTitle(title string, a []schema.Audio) string {
|
|||||||
|
|
||||||
return title
|
return title
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDocument(ctx context.Context, i *Indexer, link string) (*goquery.Document, error) {
|
|
||||||
// try to get from redis first
|
|
||||||
docCache, err := i.redis.Get(ctx, link)
|
|
||||||
if err == nil {
|
|
||||||
i.metrics.CacheHits.WithLabelValues("document_body").Inc()
|
|
||||||
fmt.Printf("returning from long-lived cache: %s\n", link)
|
|
||||||
return goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(docCache)))
|
|
||||||
}
|
|
||||||
defer i.metrics.CacheMisses.WithLabelValues("document_body").Inc()
|
|
||||||
|
|
||||||
resp, err := i.requester.GetDocument(ctx, link)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer resp.Close()
|
|
||||||
|
|
||||||
body, err := io.ReadAll(resp)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// set cache
|
|
||||||
err = i.redis.Set(ctx, link, body)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(body)))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return doc, nil
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -7,12 +7,10 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/hbollon/go-edlib"
|
|
||||||
|
|
||||||
"github.com/felipemarinho97/torrent-indexer/magnet"
|
"github.com/felipemarinho97/torrent-indexer/magnet"
|
||||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||||
@@ -21,17 +19,21 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var comandohds = IndexerMeta{
|
var comandohds = IndexerMeta{
|
||||||
|
Label: "comandohds",
|
||||||
URL: "https://comandohds.org/",
|
URL: "https://comandohds.org/",
|
||||||
SearchURL: "?s=",
|
SearchURL: "?s=",
|
||||||
|
PagePattern: "page/%s",
|
||||||
}
|
}
|
||||||
|
|
||||||
var title_re = regexp.MustCompile(`^[(Filme)|(Série)\s]+`)
|
var title_re = regexp.MustCompile(`^[(Filme)|(Série)\s]+`)
|
||||||
|
|
||||||
func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Request) {
|
func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Request) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
metadata := comandohds
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
i.metrics.IndexerDuration.WithLabelValues("comandohds").Observe(time.Since(start).Seconds())
|
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
|
||||||
i.metrics.IndexerRequests.WithLabelValues("comandohds").Inc()
|
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ctx := r.Context()
|
ctx := r.Context()
|
||||||
@@ -41,11 +43,11 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques
|
|||||||
|
|
||||||
// URL encode query param
|
// URL encode query param
|
||||||
q = url.QueryEscape(q)
|
q = url.QueryEscape(q)
|
||||||
url := comandohds.URL
|
url := metadata.URL
|
||||||
if q != "" {
|
if q != "" {
|
||||||
url = fmt.Sprintf("%s%s%s", url, comandohds.SearchURL, q)
|
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
|
||||||
} else if page != "" {
|
} else if page != "" {
|
||||||
url = fmt.Sprintf("%spage/%s", url, page)
|
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("URL:>", url)
|
fmt.Println("URL:>", url)
|
||||||
@@ -56,7 +58,7 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
i.metrics.IndexerErrors.WithLabelValues("comandohds").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Close()
|
defer resp.Close()
|
||||||
@@ -69,7 +71,7 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques
|
|||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
i.metrics.IndexerErrors.WithLabelValues("comandohds").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,57 +81,21 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques
|
|||||||
links = append(links, link)
|
links = append(links, link)
|
||||||
})
|
})
|
||||||
|
|
||||||
var itChan = make(chan []schema.IndexedTorrent)
|
// extract each torrent link
|
||||||
var errChan = make(chan error)
|
indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) {
|
||||||
indexedTorrents := []schema.IndexedTorrent{}
|
return getTorrentsComandoHDs(ctx, i, link)
|
||||||
for _, link := range links {
|
|
||||||
go func(link string) {
|
|
||||||
torrents, err := getTorrentsComandoHDs(ctx, i, link)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
errChan <- err
|
|
||||||
}
|
|
||||||
itChan <- torrents
|
|
||||||
}(link)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < len(links); i++ {
|
|
||||||
select {
|
|
||||||
case torrents := <-itChan:
|
|
||||||
indexedTorrents = append(indexedTorrents, torrents...)
|
|
||||||
case err := <-errChan:
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, it := range indexedTorrents {
|
|
||||||
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
|
|
||||||
qLower := strings.ToLower(q)
|
|
||||||
splitLength := 2
|
|
||||||
indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove the ones with zero similarity
|
|
||||||
if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
|
|
||||||
indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool {
|
|
||||||
return it.Similarity > 0
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort by similarity
|
|
||||||
slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int {
|
|
||||||
return int((j.Similarity - i.Similarity) * 1000)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// send to search index
|
// Apply post-processors
|
||||||
go func() {
|
postProcessedTorrents := indexedTorrents
|
||||||
_ = i.search.IndexTorrents(indexedTorrents)
|
for _, processor := range i.postProcessors {
|
||||||
}()
|
postProcessedTorrents = processor(i, r, postProcessedTorrents)
|
||||||
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
err = json.NewEncoder(w).Encode(Response{
|
err = json.NewEncoder(w).Encode(Response{
|
||||||
Results: indexedTorrents,
|
Results: postProcessedTorrents,
|
||||||
Count: len(indexedTorrents),
|
Count: len(postProcessedTorrents),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
@@ -193,7 +159,7 @@ func getTorrentsComandoHDs(ctx context.Context, i *Indexer, link string) ([]sche
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
size = stableUniq(size)
|
size = utils.StableUniq(size)
|
||||||
|
|
||||||
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
||||||
|
|
||||||
@@ -228,7 +194,7 @@ func getTorrentsComandoHDs(ctx context.Context, i *Indexer, link string) ([]sche
|
|||||||
}
|
}
|
||||||
|
|
||||||
ixt := schema.IndexedTorrent{
|
ixt := schema.IndexedTorrent{
|
||||||
Title: appendAudioISO639_2Code(releaseTitle, magnetAudio),
|
Title: releaseTitle,
|
||||||
OriginalTitle: title,
|
OriginalTitle: title,
|
||||||
Details: link,
|
Details: link,
|
||||||
Year: year,
|
Year: year,
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
package handler
|
package handler
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -11,6 +14,43 @@ import (
|
|||||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// getDocument retrieves a document from the cache or makes a request to get it.
|
||||||
|
// It first checks the Redis cache for the document body.
|
||||||
|
func getDocument(ctx context.Context, i *Indexer, link string) (*goquery.Document, error) {
|
||||||
|
// try to get from redis first
|
||||||
|
docCache, err := i.redis.Get(ctx, link)
|
||||||
|
if err == nil {
|
||||||
|
i.metrics.CacheHits.WithLabelValues("document_body").Inc()
|
||||||
|
fmt.Printf("returning from long-lived cache: %s\n", link)
|
||||||
|
return goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(docCache)))
|
||||||
|
}
|
||||||
|
defer i.metrics.CacheMisses.WithLabelValues("document_body").Inc()
|
||||||
|
|
||||||
|
resp, err := i.requester.GetDocument(ctx, link)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// set cache
|
||||||
|
err = i.redis.Set(ctx, link, body)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, err := goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(body)))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return doc, nil
|
||||||
|
}
|
||||||
|
|
||||||
func getPublishedDateFromMeta(document *goquery.Document) time.Time {
|
func getPublishedDateFromMeta(document *goquery.Document) time.Time {
|
||||||
var date time.Time
|
var date time.Time
|
||||||
//<meta property="article:published_time" content="2019-08-23T13:20:57+00:00">
|
//<meta property="article:published_time" content="2019-08-23T13:20:57+00:00">
|
||||||
|
|||||||
19
api/index.go
19
api/index.go
@@ -17,13 +17,14 @@ type Indexer struct {
|
|||||||
metrics *monitoring.Metrics
|
metrics *monitoring.Metrics
|
||||||
requester *requester.Requster
|
requester *requester.Requster
|
||||||
search *meilisearch.SearchIndexer
|
search *meilisearch.SearchIndexer
|
||||||
|
postProcessors []PostProcessorFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
type IndexerMeta struct {
|
type IndexerMeta struct {
|
||||||
URL string
|
Label string // Label is used for Prometheus metrics and logging. Must be alphanumeric optionally with underscores.
|
||||||
SearchURL string
|
URL string // URL is the base URL of the indexer, e.g. "https://example.com/"
|
||||||
// pattern for pagination, e.g. "page/%s"
|
SearchURL string // SearchURL is the base URL for search queries, e.g. "?s="
|
||||||
PagePattern string
|
PagePattern string // PagePattern for pagination, e.g. "page/%s"
|
||||||
}
|
}
|
||||||
|
|
||||||
type Response struct {
|
type Response struct {
|
||||||
@@ -31,12 +32,22 @@ type Response struct {
|
|||||||
Count int `json:"count"`
|
Count int `json:"count"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type PostProcessorFunc func(*Indexer, *http.Request, []schema.IndexedTorrent) []schema.IndexedTorrent
|
||||||
|
|
||||||
|
var GlobalPostProcessors = []PostProcessorFunc{
|
||||||
|
AddSimilarityCheck, // Jaccard similarity
|
||||||
|
CleanupTitleWebsites, // Remove website names from titles
|
||||||
|
AppendAudioTags, // Add (brazilian, eng, etc.) audio tags to titles
|
||||||
|
SendToSearchIndexer, // Send indexed torrents to Meilisearch
|
||||||
|
}
|
||||||
|
|
||||||
func NewIndexers(redis *cache.Redis, metrics *monitoring.Metrics, req *requester.Requster, si *meilisearch.SearchIndexer) *Indexer {
|
func NewIndexers(redis *cache.Redis, metrics *monitoring.Metrics, req *requester.Requster, si *meilisearch.SearchIndexer) *Indexer {
|
||||||
return &Indexer{
|
return &Indexer{
|
||||||
redis: redis,
|
redis: redis,
|
||||||
metrics: metrics,
|
metrics: metrics,
|
||||||
requester: req,
|
requester: req,
|
||||||
search: si,
|
search: si,
|
||||||
|
postProcessors: GlobalPostProcessors,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
61
api/post_processors.go
Normal file
61
api/post_processors.go
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
package handler
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||||
|
"github.com/felipemarinho97/torrent-indexer/utils"
|
||||||
|
"github.com/hbollon/go-edlib"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CleanupTitleWebsites removes unwanted characters from the title
|
||||||
|
func CleanupTitleWebsites(_ *Indexer, _ *http.Request, torrents []schema.IndexedTorrent) []schema.IndexedTorrent {
|
||||||
|
for i := range torrents {
|
||||||
|
torrents[i].Title = utils.RemoveKnownWebsites(torrents[i].Title)
|
||||||
|
}
|
||||||
|
return torrents
|
||||||
|
}
|
||||||
|
|
||||||
|
func AppendAudioTags(_ *Indexer, _ *http.Request, torrents []schema.IndexedTorrent) []schema.IndexedTorrent {
|
||||||
|
for i, it := range torrents {
|
||||||
|
torrents[i].Title = appendAudioISO639_2Code(torrents[i].Title, it.Audio)
|
||||||
|
}
|
||||||
|
|
||||||
|
return torrents
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendToSearchIndexer sends the indexed torrents to the search indexer
|
||||||
|
func SendToSearchIndexer(i *Indexer, _ *http.Request, torrents []schema.IndexedTorrent) []schema.IndexedTorrent {
|
||||||
|
go func() {
|
||||||
|
_ = i.search.IndexTorrents(torrents)
|
||||||
|
}()
|
||||||
|
return torrents
|
||||||
|
}
|
||||||
|
|
||||||
|
func AddSimilarityCheck(i *Indexer, r *http.Request, torrents []schema.IndexedTorrent) []schema.IndexedTorrent {
|
||||||
|
q := r.URL.Query().Get("q")
|
||||||
|
|
||||||
|
for i, it := range torrents {
|
||||||
|
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
|
||||||
|
qLower := strings.ToLower(q)
|
||||||
|
splitLength := 2
|
||||||
|
torrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove the ones with zero similarity
|
||||||
|
if len(torrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
|
||||||
|
torrents = utils.Filter(torrents, func(it schema.IndexedTorrent) bool {
|
||||||
|
return it.Similarity > 0
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort by similarity
|
||||||
|
slices.SortFunc(torrents, func(i, j schema.IndexedTorrent) int {
|
||||||
|
return int((j.Similarity - i.Similarity) * 1000)
|
||||||
|
})
|
||||||
|
|
||||||
|
return torrents
|
||||||
|
}
|
||||||
@@ -7,12 +7,10 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/hbollon/go-edlib"
|
|
||||||
|
|
||||||
"github.com/felipemarinho97/torrent-indexer/magnet"
|
"github.com/felipemarinho97/torrent-indexer/magnet"
|
||||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||||
@@ -21,6 +19,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var rede_torrent = IndexerMeta{
|
var rede_torrent = IndexerMeta{
|
||||||
|
Label: "rede_torrent",
|
||||||
URL: "https://redetorrent.com/",
|
URL: "https://redetorrent.com/",
|
||||||
SearchURL: "index.php?s=",
|
SearchURL: "index.php?s=",
|
||||||
PagePattern: "%s",
|
PagePattern: "%s",
|
||||||
@@ -28,9 +27,11 @@ var rede_torrent = IndexerMeta{
|
|||||||
|
|
||||||
func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Request) {
|
func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Request) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
metadata := rede_torrent
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
i.metrics.IndexerDuration.WithLabelValues("rede_torrent").Observe(time.Since(start).Seconds())
|
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
|
||||||
i.metrics.IndexerRequests.WithLabelValues("rede_torrent").Inc()
|
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ctx := r.Context()
|
ctx := r.Context()
|
||||||
@@ -40,11 +41,11 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque
|
|||||||
|
|
||||||
// URL encode query param
|
// URL encode query param
|
||||||
q = url.QueryEscape(q)
|
q = url.QueryEscape(q)
|
||||||
url := rede_torrent.URL
|
url := metadata.URL
|
||||||
if q != "" {
|
if q != "" {
|
||||||
url = fmt.Sprintf("%s%s%s", url, rede_torrent.SearchURL, q)
|
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
|
||||||
} else if page != "" {
|
} else if page != "" {
|
||||||
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, rede_torrent.PagePattern), page)
|
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("URL:>", url)
|
fmt.Println("URL:>", url)
|
||||||
@@ -55,7 +56,7 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
i.metrics.IndexerErrors.WithLabelValues("rede_torrent").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Close()
|
defer resp.Close()
|
||||||
@@ -68,7 +69,7 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque
|
|||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
i.metrics.IndexerErrors.WithLabelValues("rede_torrent").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,57 +79,21 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque
|
|||||||
links = append(links, link)
|
links = append(links, link)
|
||||||
})
|
})
|
||||||
|
|
||||||
var itChan = make(chan []schema.IndexedTorrent)
|
// extract each torrent link
|
||||||
var errChan = make(chan error)
|
indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) {
|
||||||
indexedTorrents := []schema.IndexedTorrent{}
|
return getTorrentsRedeTorrent(ctx, i, link)
|
||||||
for _, link := range links {
|
|
||||||
go func(link string) {
|
|
||||||
torrents, err := getTorrentsRedeTorrent(ctx, i, link)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
errChan <- err
|
|
||||||
}
|
|
||||||
itChan <- torrents
|
|
||||||
}(link)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < len(links); i++ {
|
|
||||||
select {
|
|
||||||
case torrents := <-itChan:
|
|
||||||
indexedTorrents = append(indexedTorrents, torrents...)
|
|
||||||
case err := <-errChan:
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, it := range indexedTorrents {
|
|
||||||
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
|
|
||||||
qLower := strings.ToLower(q)
|
|
||||||
splitLength := 2
|
|
||||||
indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove the ones with zero similarity
|
|
||||||
if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
|
|
||||||
indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool {
|
|
||||||
return it.Similarity > 0
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort by similarity
|
|
||||||
slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int {
|
|
||||||
return int((j.Similarity - i.Similarity) * 1000)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// send to search index
|
// Apply post-processors
|
||||||
go func() {
|
postProcessedTorrents := indexedTorrents
|
||||||
_ = i.search.IndexTorrents(indexedTorrents)
|
for _, processor := range i.postProcessors {
|
||||||
}()
|
postProcessedTorrents = processor(i, r, postProcessedTorrents)
|
||||||
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
err = json.NewEncoder(w).Encode(Response{
|
err = json.NewEncoder(w).Encode(Response{
|
||||||
Results: indexedTorrents,
|
Results: postProcessedTorrents,
|
||||||
Count: len(indexedTorrents),
|
Count: len(postProcessedTorrents),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
@@ -222,7 +187,7 @@ func getTorrentsRedeTorrent(ctx context.Context, i *Indexer, link string) ([]sch
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
size = stableUniq(size)
|
size = utils.StableUniq(size)
|
||||||
|
|
||||||
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
||||||
|
|
||||||
@@ -253,7 +218,7 @@ func getTorrentsRedeTorrent(ctx context.Context, i *Indexer, link string) ([]sch
|
|||||||
}
|
}
|
||||||
|
|
||||||
ixt := schema.IndexedTorrent{
|
ixt := schema.IndexedTorrent{
|
||||||
Title: appendAudioISO639_2Code(releaseTitle, magnetAudio),
|
Title: releaseTitle,
|
||||||
OriginalTitle: title,
|
OriginalTitle: title,
|
||||||
Details: link,
|
Details: link,
|
||||||
Year: year,
|
Year: year,
|
||||||
|
|||||||
@@ -6,12 +6,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/hbollon/go-edlib"
|
|
||||||
|
|
||||||
"github.com/felipemarinho97/torrent-indexer/magnet"
|
"github.com/felipemarinho97/torrent-indexer/magnet"
|
||||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||||
@@ -20,15 +18,19 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var starck_filmes = IndexerMeta{
|
var starck_filmes = IndexerMeta{
|
||||||
|
Label: "starck_filmes",
|
||||||
URL: "https://www.starckfilmes.online/",
|
URL: "https://www.starckfilmes.online/",
|
||||||
SearchURL: "?s=",
|
SearchURL: "?s=",
|
||||||
|
PagePattern: "page/%s",
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Request) {
|
func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Request) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
metadata := starck_filmes
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
i.metrics.IndexerDuration.WithLabelValues("starck_filmes").Observe(time.Since(start).Seconds())
|
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
|
||||||
i.metrics.IndexerRequests.WithLabelValues("starck_filmes").Inc()
|
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ctx := r.Context()
|
ctx := r.Context()
|
||||||
@@ -38,11 +40,11 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
|
|||||||
|
|
||||||
// URL encode query param
|
// URL encode query param
|
||||||
q = url.QueryEscape(q)
|
q = url.QueryEscape(q)
|
||||||
url := starck_filmes.URL
|
url := metadata.URL
|
||||||
if q != "" {
|
if q != "" {
|
||||||
url = fmt.Sprintf("%s%s%s", url, starck_filmes.SearchURL, q)
|
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
|
||||||
} else if page != "" {
|
} else if page != "" {
|
||||||
url = fmt.Sprintf("%spage/%s", url, page)
|
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("URL:>", url)
|
fmt.Println("URL:>", url)
|
||||||
@@ -53,7 +55,7 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
i.metrics.IndexerErrors.WithLabelValues("starck_filmes").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Close()
|
defer resp.Close()
|
||||||
@@ -66,7 +68,7 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
|
|||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
i.metrics.IndexerErrors.WithLabelValues("starck_filmes").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,57 +78,21 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
|
|||||||
links = append(links, link)
|
links = append(links, link)
|
||||||
})
|
})
|
||||||
|
|
||||||
var itChan = make(chan []schema.IndexedTorrent)
|
// extract each torrent link
|
||||||
var errChan = make(chan error)
|
indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) {
|
||||||
indexedTorrents := []schema.IndexedTorrent{}
|
return getTorrentStarckFilmes(ctx, i, link)
|
||||||
for _, link := range links {
|
|
||||||
go func(link string) {
|
|
||||||
torrents, err := getTorrentStarckFilmes(ctx, i, link)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
errChan <- err
|
|
||||||
}
|
|
||||||
itChan <- torrents
|
|
||||||
}(link)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < len(links); i++ {
|
|
||||||
select {
|
|
||||||
case torrents := <-itChan:
|
|
||||||
indexedTorrents = append(indexedTorrents, torrents...)
|
|
||||||
case err := <-errChan:
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, it := range indexedTorrents {
|
|
||||||
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
|
|
||||||
qLower := strings.ToLower(q)
|
|
||||||
splitLength := 2
|
|
||||||
indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove the ones with zero similarity
|
|
||||||
if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
|
|
||||||
indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool {
|
|
||||||
return it.Similarity > 0
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort by similarity
|
|
||||||
slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int {
|
|
||||||
return int((j.Similarity - i.Similarity) * 1000)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// send to search index
|
// Apply post-processors
|
||||||
go func() {
|
postProcessedTorrents := indexedTorrents
|
||||||
_ = i.search.IndexTorrents(indexedTorrents)
|
for _, processor := range i.postProcessors {
|
||||||
}()
|
postProcessedTorrents = processor(i, r, postProcessedTorrents)
|
||||||
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
err = json.NewEncoder(w).Encode(Response{
|
err = json.NewEncoder(w).Encode(Response{
|
||||||
Results: indexedTorrents,
|
Results: postProcessedTorrents,
|
||||||
Count: len(indexedTorrents),
|
Count: len(postProcessedTorrents),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
@@ -184,7 +150,7 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch
|
|||||||
// TODO: find any link from imdb
|
// TODO: find any link from imdb
|
||||||
imdbLink := ""
|
imdbLink := ""
|
||||||
|
|
||||||
size = stableUniq(size)
|
size = utils.StableUniq(size)
|
||||||
|
|
||||||
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
||||||
|
|
||||||
@@ -228,7 +194,7 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch
|
|||||||
}
|
}
|
||||||
|
|
||||||
ixt := schema.IndexedTorrent{
|
ixt := schema.IndexedTorrent{
|
||||||
Title: appendAudioISO639_2Code(releaseTitle, magnetAudio),
|
Title: releaseTitle,
|
||||||
OriginalTitle: title,
|
OriginalTitle: title,
|
||||||
Details: link,
|
Details: link,
|
||||||
Year: year,
|
Year: year,
|
||||||
|
|||||||
@@ -6,12 +6,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/hbollon/go-edlib"
|
|
||||||
|
|
||||||
"github.com/felipemarinho97/torrent-indexer/magnet"
|
"github.com/felipemarinho97/torrent-indexer/magnet"
|
||||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||||
@@ -20,15 +18,19 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var torrent_dos_filmes = IndexerMeta{
|
var torrent_dos_filmes = IndexerMeta{
|
||||||
|
Label: "torrent_dos_filmes",
|
||||||
URL: "https://torrentdosfilmes.se/",
|
URL: "https://torrentdosfilmes.se/",
|
||||||
SearchURL: "?s=",
|
SearchURL: "?s=",
|
||||||
|
PagePattern: "page/%s",
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.Request) {
|
func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.Request) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
metadata := torrent_dos_filmes
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
i.metrics.IndexerDuration.WithLabelValues("torrent_dos_filmes").Observe(time.Since(start).Seconds())
|
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
|
||||||
i.metrics.IndexerRequests.WithLabelValues("torrent_dos_filmes").Inc()
|
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ctx := r.Context()
|
ctx := r.Context()
|
||||||
@@ -38,11 +40,11 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.
|
|||||||
|
|
||||||
// URL encode query param
|
// URL encode query param
|
||||||
q = url.QueryEscape(q)
|
q = url.QueryEscape(q)
|
||||||
url := torrent_dos_filmes.URL
|
url := metadata.URL
|
||||||
if q != "" {
|
if q != "" {
|
||||||
url = fmt.Sprintf("%s%s%s", url, torrent_dos_filmes.SearchURL, q)
|
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
|
||||||
} else if page != "" {
|
} else if page != "" {
|
||||||
url = fmt.Sprintf("%spage/%s", url, page)
|
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("URL:>", url)
|
fmt.Println("URL:>", url)
|
||||||
@@ -53,7 +55,7 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
i.metrics.IndexerErrors.WithLabelValues("torrent_dos_filmes").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer resp.Close()
|
defer resp.Close()
|
||||||
@@ -66,7 +68,7 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.
|
|||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
i.metrics.IndexerErrors.WithLabelValues("torrent_dos_filmes").Inc()
|
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,57 +78,21 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.
|
|||||||
links = append(links, link)
|
links = append(links, link)
|
||||||
})
|
})
|
||||||
|
|
||||||
var itChan = make(chan []schema.IndexedTorrent)
|
// extract each torrent link
|
||||||
var errChan = make(chan error)
|
indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) {
|
||||||
indexedTorrents := []schema.IndexedTorrent{}
|
return getTorrentsTorrentDosFilmes(ctx, i, link)
|
||||||
for _, link := range links {
|
|
||||||
go func(link string) {
|
|
||||||
torrents, err := getTorrentsTorrentDosFilmes(ctx, i, link)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
errChan <- err
|
|
||||||
}
|
|
||||||
itChan <- torrents
|
|
||||||
}(link)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < len(links); i++ {
|
|
||||||
select {
|
|
||||||
case torrents := <-itChan:
|
|
||||||
indexedTorrents = append(indexedTorrents, torrents...)
|
|
||||||
case err := <-errChan:
|
|
||||||
fmt.Println(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, it := range indexedTorrents {
|
|
||||||
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
|
|
||||||
qLower := strings.ToLower(q)
|
|
||||||
splitLength := 2
|
|
||||||
indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove the ones with zero similarity
|
|
||||||
if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
|
|
||||||
indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool {
|
|
||||||
return it.Similarity > 0
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort by similarity
|
|
||||||
slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int {
|
|
||||||
return int((j.Similarity - i.Similarity) * 1000)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
// send to search index
|
// Apply post-processors
|
||||||
go func() {
|
postProcessedTorrents := indexedTorrents
|
||||||
_ = i.search.IndexTorrents(indexedTorrents)
|
for _, processor := range i.postProcessors {
|
||||||
}()
|
postProcessedTorrents = processor(i, r, postProcessedTorrents)
|
||||||
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
err = json.NewEncoder(w).Encode(Response{
|
err = json.NewEncoder(w).Encode(Response{
|
||||||
Results: indexedTorrents,
|
Results: postProcessedTorrents,
|
||||||
Count: len(indexedTorrents),
|
Count: len(postProcessedTorrents),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
@@ -191,7 +157,7 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) (
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
size = stableUniq(size)
|
size = utils.StableUniq(size)
|
||||||
|
|
||||||
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
|
||||||
|
|
||||||
@@ -222,7 +188,7 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) (
|
|||||||
}
|
}
|
||||||
|
|
||||||
ixt := schema.IndexedTorrent{
|
ixt := schema.IndexedTorrent{
|
||||||
Title: appendAudioISO639_2Code(releaseTitle, magnetAudio),
|
Title: releaseTitle,
|
||||||
OriginalTitle: title,
|
OriginalTitle: title,
|
||||||
Details: link,
|
Details: link,
|
||||||
Year: year,
|
Year: year,
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ const (
|
|||||||
AudioMandarin3 = "Chines"
|
AudioMandarin3 = "Chines"
|
||||||
AudioRussian = "Russo"
|
AudioRussian = "Russo"
|
||||||
AudioSwedish = "Sueco"
|
AudioSwedish = "Sueco"
|
||||||
|
AudioSwedish2 = "Suéco"
|
||||||
AudioUkrainian = "Ucraniano"
|
AudioUkrainian = "Ucraniano"
|
||||||
AudioPolish = "Polaco"
|
AudioPolish = "Polaco"
|
||||||
AudioPolish2 = "Polonês"
|
AudioPolish2 = "Polonês"
|
||||||
@@ -34,9 +35,32 @@ const (
|
|||||||
AudioTurkish = "Turco"
|
AudioTurkish = "Turco"
|
||||||
AudioHindi = "Hindi"
|
AudioHindi = "Hindi"
|
||||||
AudioFarsi = "Persa"
|
AudioFarsi = "Persa"
|
||||||
|
AudioFarsi2 = "Farsi"
|
||||||
|
AudioFarsi3 = "Iraniano"
|
||||||
AudioMalay = "Malaio"
|
AudioMalay = "Malaio"
|
||||||
AudioDutch = "Holandês"
|
AudioDutch = "Holandês"
|
||||||
AudioDutch2 = "Holandes"
|
AudioDutch2 = "Holandes"
|
||||||
|
AudioFinnish = "Finlandês"
|
||||||
|
AudioFinnish2 = "Finlandes"
|
||||||
|
AudioDanish = "Dinamarquês"
|
||||||
|
AudioDanish2 = "Dinamarques"
|
||||||
|
AudioNorwegian = "Norueguês"
|
||||||
|
AudioNorwegian2 = "Noruegues"
|
||||||
|
AudioIcelandic = "Islandês"
|
||||||
|
AudioIcelandic2 = "Islandes"
|
||||||
|
AudioGreek = "Grego"
|
||||||
|
AudioArabic = "Árabe"
|
||||||
|
AudioArabic2 = "Arabe"
|
||||||
|
AudioHebrew = "Hebraico"
|
||||||
|
AudioVietnamese = "Vietnamita"
|
||||||
|
AudioIndonesian = "Indonésio"
|
||||||
|
AudioIndonesian2 = "Indonesio"
|
||||||
|
AudioFilipino = "Filipino"
|
||||||
|
AudioBengali = "Bengali"
|
||||||
|
AudioTamil = "Tamil"
|
||||||
|
AudioTelugu = "Telugu"
|
||||||
|
AudioGujarati = "Gujarati"
|
||||||
|
AudioMarathi = "Marathi"
|
||||||
)
|
)
|
||||||
|
|
||||||
var AudioList = []Audio{
|
var AudioList = []Audio{
|
||||||
@@ -60,6 +84,7 @@ var AudioList = []Audio{
|
|||||||
AudioMandarin3,
|
AudioMandarin3,
|
||||||
AudioRussian,
|
AudioRussian,
|
||||||
AudioSwedish,
|
AudioSwedish,
|
||||||
|
AudioSwedish2,
|
||||||
AudioUkrainian,
|
AudioUkrainian,
|
||||||
AudioPolish,
|
AudioPolish,
|
||||||
AudioPolish2,
|
AudioPolish2,
|
||||||
@@ -69,9 +94,32 @@ var AudioList = []Audio{
|
|||||||
AudioTurkish,
|
AudioTurkish,
|
||||||
AudioHindi,
|
AudioHindi,
|
||||||
AudioFarsi,
|
AudioFarsi,
|
||||||
|
AudioFarsi2,
|
||||||
|
AudioFarsi3,
|
||||||
AudioMalay,
|
AudioMalay,
|
||||||
AudioDutch,
|
AudioDutch,
|
||||||
AudioDutch2,
|
AudioDutch2,
|
||||||
|
AudioFinnish,
|
||||||
|
AudioFinnish2,
|
||||||
|
AudioDanish,
|
||||||
|
AudioDanish2,
|
||||||
|
AudioNorwegian,
|
||||||
|
AudioNorwegian2,
|
||||||
|
AudioIcelandic,
|
||||||
|
AudioIcelandic2,
|
||||||
|
AudioGreek,
|
||||||
|
AudioArabic,
|
||||||
|
AudioArabic2,
|
||||||
|
AudioHebrew,
|
||||||
|
AudioVietnamese,
|
||||||
|
AudioIndonesian,
|
||||||
|
AudioIndonesian2,
|
||||||
|
AudioFilipino,
|
||||||
|
AudioBengali,
|
||||||
|
AudioTamil,
|
||||||
|
AudioTelugu,
|
||||||
|
AudioGujarati,
|
||||||
|
AudioMarathi,
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a Audio) String() string {
|
func (a Audio) String() string {
|
||||||
@@ -129,6 +177,8 @@ func (a Audio) toTag() string {
|
|||||||
return "rus"
|
return "rus"
|
||||||
case AudioSwedish:
|
case AudioSwedish:
|
||||||
return "swe"
|
return "swe"
|
||||||
|
case AudioSwedish2:
|
||||||
|
return "swe"
|
||||||
case AudioUkrainian:
|
case AudioUkrainian:
|
||||||
return "ukr"
|
return "ukr"
|
||||||
case AudioPolish:
|
case AudioPolish:
|
||||||
@@ -147,12 +197,58 @@ func (a Audio) toTag() string {
|
|||||||
return "hin"
|
return "hin"
|
||||||
case AudioFarsi:
|
case AudioFarsi:
|
||||||
return "fas"
|
return "fas"
|
||||||
|
case AudioFarsi2:
|
||||||
|
return "fas"
|
||||||
|
case AudioFarsi3:
|
||||||
|
return "fas"
|
||||||
case AudioMalay:
|
case AudioMalay:
|
||||||
return "msa"
|
return "msa"
|
||||||
case AudioDutch:
|
case AudioDutch:
|
||||||
return "nld"
|
return "nld"
|
||||||
case AudioDutch2:
|
case AudioDutch2:
|
||||||
return "nld"
|
return "nld"
|
||||||
|
case AudioFinnish:
|
||||||
|
return "fin"
|
||||||
|
case AudioFinnish2:
|
||||||
|
return "fin"
|
||||||
|
case AudioDanish:
|
||||||
|
return "dan"
|
||||||
|
case AudioDanish2:
|
||||||
|
return "dan"
|
||||||
|
case AudioNorwegian:
|
||||||
|
return "nor"
|
||||||
|
case AudioNorwegian2:
|
||||||
|
return "nor"
|
||||||
|
case AudioIcelandic:
|
||||||
|
return "isl"
|
||||||
|
case AudioIcelandic2:
|
||||||
|
return "isl"
|
||||||
|
case AudioGreek:
|
||||||
|
return "ell"
|
||||||
|
case AudioArabic:
|
||||||
|
return "ara"
|
||||||
|
case AudioArabic2:
|
||||||
|
return "ara"
|
||||||
|
case AudioHebrew:
|
||||||
|
return "heb"
|
||||||
|
case AudioVietnamese:
|
||||||
|
return "vie"
|
||||||
|
case AudioIndonesian:
|
||||||
|
return "ind"
|
||||||
|
case AudioIndonesian2:
|
||||||
|
return "ind"
|
||||||
|
case AudioFilipino:
|
||||||
|
return "fil"
|
||||||
|
case AudioBengali:
|
||||||
|
return "ben"
|
||||||
|
case AudioTamil:
|
||||||
|
return "tam"
|
||||||
|
case AudioTelugu:
|
||||||
|
return "tel"
|
||||||
|
case AudioGujarati:
|
||||||
|
return "guj"
|
||||||
|
case AudioMarathi:
|
||||||
|
return "mar"
|
||||||
default:
|
default:
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,13 @@
|
|||||||
package utils
|
package utils
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Filter filters a slice based on a predicate function.
|
||||||
func Filter[A any](arr []A, f func(A) bool) []A {
|
func Filter[A any](arr []A, f func(A) bool) []A {
|
||||||
var res []A
|
var res []A
|
||||||
res = make([]A, 0)
|
res = make([]A, 0)
|
||||||
@@ -16,6 +19,71 @@ func Filter[A any](arr []A, f func(A) bool) []A {
|
|||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ParallelMap applies a function to each item in the iterable concurrently
|
||||||
|
// and returns a slice of results. It can handle errors by passing an error handler function.
|
||||||
|
func ParallelMap[T any, R any](iterable []T, mapper func(item T) ([]R, error), errHandler ...func(error)) []R {
|
||||||
|
var itChan = make(chan []R)
|
||||||
|
var errChan = make(chan error)
|
||||||
|
mappedItems := []R{}
|
||||||
|
for _, link := range iterable {
|
||||||
|
go func(link T) {
|
||||||
|
items, err := mapper(link)
|
||||||
|
if err != nil {
|
||||||
|
errChan <- err
|
||||||
|
}
|
||||||
|
itChan <- items
|
||||||
|
}(link)
|
||||||
|
}
|
||||||
|
|
||||||
|
for range iterable {
|
||||||
|
select {
|
||||||
|
case items := <-itChan:
|
||||||
|
mappedItems = append(mappedItems, items...)
|
||||||
|
case err := <-errChan:
|
||||||
|
for _, handler := range errHandler {
|
||||||
|
handler(err)
|
||||||
|
}
|
||||||
|
if len(errHandler) == 0 {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return mappedItems
|
||||||
|
}
|
||||||
|
|
||||||
|
// StableUniq removes duplicates from a slice while maintaining the order of elements.
|
||||||
|
func StableUniq(s []string) []string {
|
||||||
|
var uniq []map[string]interface{}
|
||||||
|
m := make(map[string]map[string]interface{})
|
||||||
|
for i, v := range s {
|
||||||
|
m[v] = map[string]interface{}{
|
||||||
|
"v": v,
|
||||||
|
"i": i,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// to order by index
|
||||||
|
for _, v := range m {
|
||||||
|
uniq = append(uniq, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort by index
|
||||||
|
for i := 0; i < len(uniq); i++ {
|
||||||
|
for j := i + 1; j < len(uniq); j++ {
|
||||||
|
if uniq[i]["i"].(int) > uniq[j]["i"].(int) {
|
||||||
|
uniq[i], uniq[j] = uniq[j], uniq[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// get only values
|
||||||
|
var uniqValues []string
|
||||||
|
for _, v := range uniq {
|
||||||
|
uniqValues = append(uniqValues, v["v"].(string))
|
||||||
|
}
|
||||||
|
|
||||||
|
return uniqValues
|
||||||
|
}
|
||||||
|
|
||||||
func IsValidHTML(input string) bool {
|
func IsValidHTML(input string) bool {
|
||||||
r := strings.NewReader(input)
|
r := strings.NewReader(input)
|
||||||
_, err := html.Parse(r)
|
_, err := html.Parse(r)
|
||||||
|
|||||||
97
utils/website.go
Normal file
97
utils/website.go
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
var commonTLDs = []string{
|
||||||
|
".com",
|
||||||
|
".net",
|
||||||
|
".org",
|
||||||
|
".info",
|
||||||
|
".biz",
|
||||||
|
".co",
|
||||||
|
".io",
|
||||||
|
".xyz",
|
||||||
|
".me",
|
||||||
|
".tv",
|
||||||
|
".cc",
|
||||||
|
".us",
|
||||||
|
".online",
|
||||||
|
".site",
|
||||||
|
".la",
|
||||||
|
".se",
|
||||||
|
".to",
|
||||||
|
}
|
||||||
|
|
||||||
|
var commonSubdomains = []string{
|
||||||
|
"", // no prefix
|
||||||
|
"www.",
|
||||||
|
}
|
||||||
|
|
||||||
|
var commonWebsiteSLDs = []string{
|
||||||
|
"bludv",
|
||||||
|
"torrentdosfilmes",
|
||||||
|
"comando",
|
||||||
|
"comandotorrents",
|
||||||
|
"comandohds",
|
||||||
|
"redetorrent",
|
||||||
|
"torrenting",
|
||||||
|
"baixarfilmesdubladosviatorrent",
|
||||||
|
"hidratorrents",
|
||||||
|
"wolverdonfilmes",
|
||||||
|
"starckfilmes",
|
||||||
|
"rapidotorrents",
|
||||||
|
"sitedetorrents",
|
||||||
|
"vamostorrent",
|
||||||
|
"AZTORRENTS",
|
||||||
|
}
|
||||||
|
|
||||||
|
var websitePatterns = []string{
|
||||||
|
`\[\s*ACESSE\s+%s\s*\]`,
|
||||||
|
`\[?\s*%s(\s*\])?`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var regexesOnce sync.Once
|
||||||
|
var regexes []*regexp.Regexp
|
||||||
|
|
||||||
|
func getRegexes() []*regexp.Regexp {
|
||||||
|
regexesOnce.Do(func() {
|
||||||
|
var websites strings.Builder
|
||||||
|
websites.WriteString("(?i)(")
|
||||||
|
for _, prefix := range commonSubdomains {
|
||||||
|
for _, name := range commonWebsiteSLDs {
|
||||||
|
for _, tld := range commonTLDs {
|
||||||
|
websites.WriteString(fmt.Sprintf("%s%s%s|", prefix, name, tld))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// remove the last pipe character
|
||||||
|
websites.WriteString(")")
|
||||||
|
|
||||||
|
websitesStr := websites.String()
|
||||||
|
websitesStr = strings.Replace(websitesStr, "|)", ")", 1)
|
||||||
|
|
||||||
|
for _, pattern := range websitePatterns {
|
||||||
|
regexes = append(regexes, regexp.MustCompile(fmt.Sprintf(pattern, websitesStr)))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return regexes
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemoveKnownWebsites removes known website patterns from the title.
|
||||||
|
// It uses a set of common prefixes, names, and TLDs to identify and remove
|
||||||
|
// website references from the title.
|
||||||
|
// It also removes any common patterns like "[ ACESSE bludv.com ]" or
|
||||||
|
// "[ bludv.se ]" or "bludv.xyz".
|
||||||
|
func RemoveKnownWebsites(title string) string {
|
||||||
|
regexes := getRegexes()
|
||||||
|
for _, re := range regexes {
|
||||||
|
title = re.ReplaceAllString(title, "")
|
||||||
|
}
|
||||||
|
title = strings.TrimSpace(title)
|
||||||
|
return title
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user