Feat/Jaccard Similarity (#5)

* new: feat: add sorting and filtering by similarity

* chg: fix: imdb link retrieval

* chg: fix: do not filter when queryis empty
This commit is contained in:
2024-03-10 14:58:15 -03:00
committed by GitHub
parent 322bb34ebb
commit 268ece5650
6 changed files with 69 additions and 4 deletions

View File

@@ -7,13 +7,17 @@ import (
"net/http"
"net/url"
"regexp"
"slices"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/hbollon/go-edlib"
"github.com/felipemarinho97/torrent-indexer/magnet"
"github.com/felipemarinho97/torrent-indexer/schema"
goscrape "github.com/felipemarinho97/torrent-indexer/scrape"
"github.com/felipemarinho97/torrent-indexer/utils"
)
var bludv = IndexerMeta{
@@ -29,7 +33,7 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
}()
ctx := r.Context()
// supported query params: q, season, episode
// supported query params: q, season, episode, filter_results
q := r.URL.Query().Get("q")
// URL encode query param
@@ -87,6 +91,25 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
}
}
for i, it := range indexedTorrents {
jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ")
qLower := strings.ToLower(q)
splitLength := 2
indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength)
}
// remove the ones with zero similarity
if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" {
indexedTorrents = utils.Filter(indexedTorrents, func(it IndexedTorrent) bool {
return it.Similarity > 0
})
}
// sort by similarity
slices.SortFunc(indexedTorrents, func(i, j IndexedTorrent) int {
return int((j.Similarity - i.Similarity) * 1000)
})
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(Response{
Results: indexedTorrents,