5 Commits

Author SHA1 Message Date
b9cf22dc64 chg: fix: tidy up deps 2025-07-30 14:24:04 +00:00
ccb6eddd4c chg: fix: lint issue 2025-07-30 14:21:02 +00:00
db40de9d86 chg: feat: add retry and expiry 2025-07-30 14:17:20 +00:00
9bb98beb61 chg: fix: html detector 2025-07-30 14:16:56 +00:00
3b9f49a9c0 chg: fix: do not cache empty results 2025-07-30 14:16:06 +00:00
11 changed files with 72 additions and 14 deletions

View File

@@ -79,6 +79,11 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
links = append(links, link)
})
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsBluDV(ctx, i, link)

View File

@@ -93,6 +93,11 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request)
links = append(links, link)
})
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrents(ctx, i, link)

View File

@@ -81,6 +81,11 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques
links = append(links, link)
})
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsComandoHDs(ctx, i, link)

View File

@@ -79,6 +79,11 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque
links = append(links, link)
})
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsRedeTorrent(ctx, i, link)

View File

@@ -78,6 +78,11 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
links = append(links, link)
})
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentStarckFilmes(ctx, i, link)

View File

@@ -78,6 +78,11 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.
links = append(links, link)
})
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsTorrentDosFilmes(ctx, i, link)

4
cache/redis.go vendored
View File

@@ -48,3 +48,7 @@ func (r *Redis) Set(ctx context.Context, key string, value []byte) error {
func (r *Redis) SetWithExpiration(ctx context.Context, key string, value []byte, expiration time.Duration) error {
return r.client.Set(ctx, key, value, expiration).Err()
}
func (r *Redis) Del(ctx context.Context, key string) error {
return r.client.Del(ctx, key).Err()
}

2
go.mod
View File

@@ -13,6 +13,7 @@ require (
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.65.0 // indirect
github.com/prometheus/procfs v0.17.0 // indirect
golang.org/x/net v0.42.0 // indirect
golang.org/x/sys v0.34.0 // indirect
google.golang.org/protobuf v1.36.6 // indirect
)
@@ -22,5 +23,4 @@ require (
github.com/hbollon/go-edlib v1.6.0
github.com/prometheus/client_golang v1.22.0
github.com/xhit/go-str2duration/v2 v2.1.0
golang.org/x/net v0.42.0
)

View File

@@ -10,6 +10,7 @@ import (
"net/url"
"strings"
"sync"
"time"
"github.com/felipemarinho97/torrent-indexer/utils"
)
@@ -29,7 +30,9 @@ var (
func NewFlareSolverr(url string, timeoutMilli int) *FlareSolverr {
poolSize := 5
httpClient := &http.Client{}
httpClient := &http.Client{
Timeout: time.Duration(timeoutMilli) * time.Millisecond,
}
sessionPool := make(chan string, poolSize) // Pool size of 5 sessions
f := &FlareSolverr{
@@ -191,7 +194,7 @@ type Response struct {
} `json:"solution"`
}
func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) {
func (f *FlareSolverr) Get(_url string, attempts int) (io.ReadCloser, error) {
// Check if the FlareSolverr instance was initiated
if !f.initiated {
return io.NopCloser(bytes.NewReader([]byte(""))), nil
@@ -205,10 +208,10 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) {
f.sessionPool <- session
}()
body := map[string]string{
body := map[string]interface{}{
"cmd": "request.get",
"url": _url,
"maxTimeout": fmt.Sprintf("%d", f.maxTimeout),
"maxTimeout": f.maxTimeout,
"session": session,
}
jsonBody, err := json.Marshal(body)
@@ -237,7 +240,15 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) {
// Check if the response was successful
if response.Status != "ok" {
return nil, fmt.Errorf("failed to get response: %s", response.Message)
// if is 500 Internal Server Error, recursively call the Get method
if resp.StatusCode == http.StatusInternalServerError && attempts != 0 {
attempts--
fmt.Printf("[FlareSolverr] Internal Server Error for %s, retrying...\n", _url)
return f.Get(_url, attempts) // Retry the request
}
// log the http status code
return nil, fmt.Errorf("failed to get response: %s, statusCode: %s", response.Message, resp.Status)
}
// Check if "Under attack" is in the response
@@ -248,6 +259,7 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) {
// check if the response is valid HTML
if !utils.IsValidHTML(response.Solution.Response) {
fmt.Printf("[FlareSolverr] Invalid HTML response from %s\n", _url)
fmt.Printf("[FlareSolverr] Response: %s\n", response.Solution.Response)
response.Solution.Response = ""
}

View File

@@ -50,7 +50,7 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser,
resp, err := i.httpClient.Get(url)
if err != nil {
// try request with flare solverr
body, err = i.fs.Get(url)
body, err = i.fs.Get(url, 3)
if err != nil {
return nil, fmt.Errorf("failed to do request for url %s: %w", url, err)
}
@@ -65,7 +65,7 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser,
}
if hasChallange(bodyByte) {
// try request with flare solverr
body, err = i.fs.Get(url)
body, err = i.fs.Get(url, 3)
if err != nil {
return nil, fmt.Errorf("failed to do request for url %s: %w", url, err)
}
@@ -92,6 +92,11 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser,
return io.NopCloser(bytes.NewReader(bodyByte)), nil
}
func (i *Requster) ExpireDocument(ctx context.Context, url string) error {
key := fmt.Sprintf("%s:%s", cacheKey, url)
return i.c.Del(ctx, key)
}
// hasChallange checks if the body contains a challange by regex matching
func hasChallange(body []byte) bool {
return challangeRegex.Match(body)

View File

@@ -2,9 +2,7 @@ package utils
import (
"fmt"
"strings"
"golang.org/x/net/html"
"regexp"
)
// Filter filters a slice based on a predicate function.
@@ -84,10 +82,19 @@ func StableUniq(s []string) []string {
return uniqValues
}
var (
doctypeRegex = regexp.MustCompile(`(?i)<!DOCTYPE\s+html>`)
htmlTagRegex = regexp.MustCompile(`(?i)<html[\s\S]*?>[\s\S]*?</html>`)
bodyTagRegex = regexp.MustCompile(`(?i)<body[\s\S]*?>[\s\S]*?</body>`)
)
func IsValidHTML(input string) bool {
r := strings.NewReader(input)
_, err := html.Parse(r)
return err == nil
// Check for <!DOCTYPE>, <html>, or <body> tags
if !doctypeRegex.MatchString(input) && !htmlTagRegex.MatchString(input) && !bodyTagRegex.MatchString(input) {
return false
}
return true
}
// FormatBytes formats a byte size into a human-readable string.