Feat/Add post processors + refactor project (#37)

* chg: feat: clean known patterns from title

* chg: refactor: remove duplicated code, and improve maintainability

* chg: feat: add audio tagging post-processor

* chg: refactor: add generic parallelMap function

* chg: refactor: move more function to common locations

* chg: docs: add func docs
This commit is contained in:
2025-07-24 01:03:38 -03:00
committed by GitHub
parent 6eba15d52a
commit 455f734c8a
12 changed files with 532 additions and 433 deletions

View File

@@ -6,12 +6,10 @@ import (
"fmt"
"net/http"
"net/url"
"slices"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/hbollon/go-edlib"
"github.com/felipemarinho97/torrent-indexer/magnet"
"github.com/felipemarinho97/torrent-indexer/schema"
@@ -20,15 +18,19 @@ import (
)
var starck_filmes = IndexerMeta{
URL: "https://www.starckfilmes.online/",
SearchURL: "?s=",
Label: "starck_filmes",
URL: "https://www.starckfilmes.online/",
SearchURL: "?s=",
PagePattern: "page/%s",
}
func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Request) {
start := time.Now()
metadata := starck_filmes
defer func() {
i.metrics.IndexerDuration.WithLabelValues("starck_filmes").Observe(time.Since(start).Seconds())
i.metrics.IndexerRequests.WithLabelValues("starck_filmes").Inc()
i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds())
i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc()
}()
ctx := r.Context()
@@ -38,11 +40,11 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
// URL encode query param
q = url.QueryEscape(q)
url := starck_filmes.URL
url := metadata.URL
if q != "" {
url = fmt.Sprintf("%s%s%s", url, starck_filmes.SearchURL, q)
url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q)
} else if page != "" {
url = fmt.Sprintf("%spage/%s", url, page)
url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page)
}
fmt.Println("URL:>", url)
@@ -53,7 +55,7 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
if err != nil {
fmt.Println(err)
}
i.metrics.IndexerErrors.WithLabelValues("starck_filmes").Inc()
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
return
}
defer resp.Close()
@@ -66,7 +68,7 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
fmt.Println(err)
}
i.metrics.IndexerErrors.WithLabelValues("starck_filmes").Inc()
i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc()
return
}
@@ -76,57 +78,21 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
links = append(links, link)
})
var itChan = make(chan []schema.IndexedTorrent)
var errChan = make(chan error)
indexedTorrents := []schema.IndexedTorrent{}
for _, link := range links {
go func(link string) {
torrents, err := getTorrentStarckFilmes(ctx, i, link)
if err != nil {
fmt.Println(err)
errChan <- err
}
itChan <- torrents
}(link)
}
for i := 0; i < len(links); i++ {
select {
case torrents := <-itChan:
indexedTorrents = append(indexedTorrents, torrents...)
case err := <-errChan:
fmt.Println(err)
}
}
for i, it := range indexedTorrents {
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
qLower := strings.ToLower(q)
splitLength := 2
indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
}
// remove the ones with zero similarity
if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool {
return it.Similarity > 0
})
}
// sort by similarity
slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int {
return int((j.Similarity - i.Similarity) * 1000)
// extract each torrent link
indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentStarckFilmes(ctx, i, link)
})
// send to search index
go func() {
_ = i.search.IndexTorrents(indexedTorrents)
}()
// Apply post-processors
postProcessedTorrents := indexedTorrents
for _, processor := range i.postProcessors {
postProcessedTorrents = processor(i, r, postProcessedTorrents)
}
w.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w).Encode(Response{
Results: indexedTorrents,
Count: len(indexedTorrents),
Results: postProcessedTorrents,
Count: len(postProcessedTorrents),
})
if err != nil {
fmt.Println(err)
@@ -184,7 +150,7 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch
// TODO: find any link from imdb
imdbLink := ""
size = stableUniq(size)
size = utils.StableUniq(size)
var chanIndexedTorrent = make(chan schema.IndexedTorrent)
@@ -228,7 +194,7 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch
}
ixt := schema.IndexedTorrent{
Title: appendAudioISO639_2Code(releaseTitle, magnetAudio),
Title: releaseTitle,
OriginalTitle: title,
Details: link,
Year: year,