From 3b9f49a9c021ff8fda7a3e458e2a4272954c0f2a Mon Sep 17 00:00:00 2001 From: Felipe Marinho Date: Wed, 30 Jul 2025 14:16:06 +0000 Subject: [PATCH 1/5] chg: fix: do not cache empty results --- api/bludv.go | 5 +++++ api/comando_torrents.go | 5 +++++ api/comandohds.go | 5 +++++ api/rede_torrent.go | 5 +++++ api/starck_filmes.go | 5 +++++ api/torrent_dos_filmes.go | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/api/bludv.go b/api/bludv.go index a1ca367..fbb8c75 100644 --- a/api/bludv.go +++ b/api/bludv.go @@ -79,6 +79,11 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsBluDV(ctx, i, link) diff --git a/api/comando_torrents.go b/api/comando_torrents.go index cfae586..154b8ad 100644 --- a/api/comando_torrents.go +++ b/api/comando_torrents.go @@ -93,6 +93,11 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrents(ctx, i, link) diff --git a/api/comandohds.go b/api/comandohds.go index f20a669..30b3e2d 100644 --- a/api/comandohds.go +++ b/api/comandohds.go @@ -81,6 +81,11 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsComandoHDs(ctx, i, link) diff --git a/api/rede_torrent.go b/api/rede_torrent.go index 1225742..0d00af8 100644 --- a/api/rede_torrent.go +++ b/api/rede_torrent.go @@ -79,6 +79,11 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsRedeTorrent(ctx, i, link) diff --git a/api/starck_filmes.go b/api/starck_filmes.go index 9cc5560..b699189 100644 --- a/api/starck_filmes.go +++ b/api/starck_filmes.go @@ -78,6 +78,11 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentStarckFilmes(ctx, i, link) diff --git a/api/torrent_dos_filmes.go b/api/torrent_dos_filmes.go index 1c9d4fc..40c37c9 100644 --- a/api/torrent_dos_filmes.go +++ b/api/torrent_dos_filmes.go @@ -78,6 +78,11 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http. links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsTorrentDosFilmes(ctx, i, link) -- 2.49.1 From 9bb98beb6156f02b6cbab2d3ebc371e0afdced65 Mon Sep 17 00:00:00 2001 From: Felipe Marinho Date: Wed, 30 Jul 2025 14:16:56 +0000 Subject: [PATCH 2/5] chg: fix: html detector --- utils/util.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/utils/util.go b/utils/util.go index 23dfadc..96eeb43 100644 --- a/utils/util.go +++ b/utils/util.go @@ -2,9 +2,7 @@ package utils import ( "fmt" - "strings" - - "golang.org/x/net/html" + "regexp" ) // Filter filters a slice based on a predicate function. @@ -84,10 +82,19 @@ func StableUniq(s []string) []string { return uniqValues } +var ( + doctypeRegex = regexp.MustCompile(`(?i)`) + htmlTagRegex = regexp.MustCompile(`(?i)[\s\S]*?`) + bodyTagRegex = regexp.MustCompile(`(?i)[\s\S]*?`) +) + func IsValidHTML(input string) bool { - r := strings.NewReader(input) - _, err := html.Parse(r) - return err == nil + // Check for , , or tags + if !doctypeRegex.MatchString(input) && !htmlTagRegex.MatchString(input) && !bodyTagRegex.MatchString(input) { + return false + } + + return true } // FormatBytes formats a byte size into a human-readable string. -- 2.49.1 From db40de9d8623e8d2fcafc772bdaf2f89412b59b5 Mon Sep 17 00:00:00 2001 From: Felipe Marinho Date: Wed, 30 Jul 2025 14:17:20 +0000 Subject: [PATCH 3/5] chg: feat: add retry and expiry --- cache/redis.go | 4 ++++ requester/flaresolverr.go | 22 +++++++++++++++++----- requester/requester.go | 9 +++++++-- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/cache/redis.go b/cache/redis.go index 62e82dc..9e09b99 100644 --- a/cache/redis.go +++ b/cache/redis.go @@ -48,3 +48,7 @@ func (r *Redis) Set(ctx context.Context, key string, value []byte) error { func (r *Redis) SetWithExpiration(ctx context.Context, key string, value []byte, expiration time.Duration) error { return r.client.Set(ctx, key, value, expiration).Err() } + +func (r *Redis) Del(ctx context.Context, key string) error { + return r.client.Del(ctx, key).Err() +} diff --git a/requester/flaresolverr.go b/requester/flaresolverr.go index 7f063ec..95aedbb 100644 --- a/requester/flaresolverr.go +++ b/requester/flaresolverr.go @@ -10,6 +10,7 @@ import ( "net/url" "strings" "sync" + "time" "github.com/felipemarinho97/torrent-indexer/utils" ) @@ -29,7 +30,9 @@ var ( func NewFlareSolverr(url string, timeoutMilli int) *FlareSolverr { poolSize := 5 - httpClient := &http.Client{} + httpClient := &http.Client{ + Timeout: time.Duration(timeoutMilli) * time.Millisecond, + } sessionPool := make(chan string, poolSize) // Pool size of 5 sessions f := &FlareSolverr{ @@ -191,7 +194,7 @@ type Response struct { } `json:"solution"` } -func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { +func (f *FlareSolverr) Get(_url string, attempts int) (io.ReadCloser, error) { // Check if the FlareSolverr instance was initiated if !f.initiated { return io.NopCloser(bytes.NewReader([]byte(""))), nil @@ -205,10 +208,10 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { f.sessionPool <- session }() - body := map[string]string{ + body := map[string]interface{}{ "cmd": "request.get", "url": _url, - "maxTimeout": fmt.Sprintf("%d", f.maxTimeout), + "maxTimeout": f.maxTimeout, "session": session, } jsonBody, err := json.Marshal(body) @@ -237,7 +240,15 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { // Check if the response was successful if response.Status != "ok" { - return nil, fmt.Errorf("failed to get response: %s", response.Message) + // if is 500 Internal Server Error, recursively call the Get method + if resp.StatusCode == http.StatusInternalServerError && attempts != 0 { + attempts-- + fmt.Printf("[FlareSolverr] Internal Server Error for %s, retrying...\n", _url) + return f.Get(_url, attempts) // Retry the request + } + + // log the http status code + return nil, fmt.Errorf("failed to get response: %s, statusCode: %s", response.Message, resp.Status) } // Check if "Under attack" is in the response @@ -248,6 +259,7 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { // check if the response is valid HTML if !utils.IsValidHTML(response.Solution.Response) { fmt.Printf("[FlareSolverr] Invalid HTML response from %s\n", _url) + fmt.Printf("[FlareSolverr] Response: %s\n", response.Solution.Response) response.Solution.Response = "" } diff --git a/requester/requester.go b/requester/requester.go index d2baf03..42a5278 100644 --- a/requester/requester.go +++ b/requester/requester.go @@ -50,7 +50,7 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser, resp, err := i.httpClient.Get(url) if err != nil { // try request with flare solverr - body, err = i.fs.Get(url) + body, err = i.fs.Get(url, 3) if err != nil { return nil, fmt.Errorf("failed to do request for url %s: %w", url, err) } @@ -65,7 +65,7 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser, } if hasChallange(bodyByte) { // try request with flare solverr - body, err = i.fs.Get(url) + body, err = i.fs.Get(url, 3) if err != nil { return nil, fmt.Errorf("failed to do request for url %s: %w", url, err) } @@ -92,6 +92,11 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser, return io.NopCloser(bytes.NewReader(bodyByte)), nil } +func (i *Requster) ExpireDocument(ctx context.Context, url string) error { + key := fmt.Sprintf("%s:%s", cacheKey, url) + return i.c.Del(ctx, key) +} + // hasChallange checks if the body contains a challange by regex matching func hasChallange(body []byte) bool { return challangeRegex.Match(body) -- 2.49.1 From ccb6eddd4cc7f38a646bd442f66ef7045528f294 Mon Sep 17 00:00:00 2001 From: Felipe Marinho Date: Wed, 30 Jul 2025 14:21:02 +0000 Subject: [PATCH 4/5] chg: fix: lint issue --- api/bludv.go | 2 +- api/comando_torrents.go | 2 +- api/comandohds.go | 2 +- api/rede_torrent.go | 2 +- api/starck_filmes.go | 2 +- api/torrent_dos_filmes.go | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/api/bludv.go b/api/bludv.go index fbb8c75..37a9633 100644 --- a/api/bludv.go +++ b/api/bludv.go @@ -81,7 +81,7 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { // if no links were indexed, expire the document in cache if len(links) == 0 { - i.requester.ExpireDocument(ctx, url) + _ = i.requester.ExpireDocument(ctx, url) } // extract each torrent link diff --git a/api/comando_torrents.go b/api/comando_torrents.go index 154b8ad..822504b 100644 --- a/api/comando_torrents.go +++ b/api/comando_torrents.go @@ -95,7 +95,7 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) // if no links were indexed, expire the document in cache if len(links) == 0 { - i.requester.ExpireDocument(ctx, url) + _ = i.requester.ExpireDocument(ctx, url) } // extract each torrent link diff --git a/api/comandohds.go b/api/comandohds.go index 30b3e2d..9943ef0 100644 --- a/api/comandohds.go +++ b/api/comandohds.go @@ -83,7 +83,7 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques // if no links were indexed, expire the document in cache if len(links) == 0 { - i.requester.ExpireDocument(ctx, url) + _ = i.requester.ExpireDocument(ctx, url) } // extract each torrent link diff --git a/api/rede_torrent.go b/api/rede_torrent.go index 0d00af8..73a026c 100644 --- a/api/rede_torrent.go +++ b/api/rede_torrent.go @@ -81,7 +81,7 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque // if no links were indexed, expire the document in cache if len(links) == 0 { - i.requester.ExpireDocument(ctx, url) + _ = i.requester.ExpireDocument(ctx, url) } // extract each torrent link diff --git a/api/starck_filmes.go b/api/starck_filmes.go index b699189..dc23a5c 100644 --- a/api/starck_filmes.go +++ b/api/starck_filmes.go @@ -80,7 +80,7 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ // if no links were indexed, expire the document in cache if len(links) == 0 { - i.requester.ExpireDocument(ctx, url) + _ = i.requester.ExpireDocument(ctx, url) } // extract each torrent link diff --git a/api/torrent_dos_filmes.go b/api/torrent_dos_filmes.go index 40c37c9..7b76bce 100644 --- a/api/torrent_dos_filmes.go +++ b/api/torrent_dos_filmes.go @@ -80,7 +80,7 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http. // if no links were indexed, expire the document in cache if len(links) == 0 { - i.requester.ExpireDocument(ctx, url) + _ = i.requester.ExpireDocument(ctx, url) } // extract each torrent link -- 2.49.1 From b9cf22dc64c833a15a821d0834f09a1eea8e1ebf Mon Sep 17 00:00:00 2001 From: Felipe Marinho Date: Wed, 30 Jul 2025 14:24:04 +0000 Subject: [PATCH 5/5] chg: fix: tidy up deps --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 298c0cf..2710f7a 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.65.0 // indirect github.com/prometheus/procfs v0.17.0 // indirect + golang.org/x/net v0.42.0 // indirect golang.org/x/sys v0.34.0 // indirect google.golang.org/protobuf v1.36.6 // indirect ) @@ -22,5 +23,4 @@ require ( github.com/hbollon/go-edlib v1.6.0 github.com/prometheus/client_golang v1.22.0 github.com/xhit/go-str2duration/v2 v2.1.0 - golang.org/x/net v0.42.0 ) -- 2.49.1