diff --git a/api/bludv.go b/api/bludv.go index a1ca367..37a9633 100644 --- a/api/bludv.go +++ b/api/bludv.go @@ -79,6 +79,11 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + _ = i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsBluDV(ctx, i, link) diff --git a/api/comando_torrents.go b/api/comando_torrents.go index cfae586..822504b 100644 --- a/api/comando_torrents.go +++ b/api/comando_torrents.go @@ -93,6 +93,11 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + _ = i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrents(ctx, i, link) diff --git a/api/comandohds.go b/api/comandohds.go index f20a669..9943ef0 100644 --- a/api/comandohds.go +++ b/api/comandohds.go @@ -81,6 +81,11 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + _ = i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsComandoHDs(ctx, i, link) diff --git a/api/rede_torrent.go b/api/rede_torrent.go index 1225742..73a026c 100644 --- a/api/rede_torrent.go +++ b/api/rede_torrent.go @@ -79,6 +79,11 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + _ = i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsRedeTorrent(ctx, i, link) diff --git a/api/starck_filmes.go b/api/starck_filmes.go index 9cc5560..dc23a5c 100644 --- a/api/starck_filmes.go +++ b/api/starck_filmes.go @@ -78,6 +78,11 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + _ = i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentStarckFilmes(ctx, i, link) diff --git a/api/torrent_dos_filmes.go b/api/torrent_dos_filmes.go index 1c9d4fc..7b76bce 100644 --- a/api/torrent_dos_filmes.go +++ b/api/torrent_dos_filmes.go @@ -78,6 +78,11 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http. links = append(links, link) }) + // if no links were indexed, expire the document in cache + if len(links) == 0 { + _ = i.requester.ExpireDocument(ctx, url) + } + // extract each torrent link indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { return getTorrentsTorrentDosFilmes(ctx, i, link) diff --git a/cache/redis.go b/cache/redis.go index 62e82dc..9e09b99 100644 --- a/cache/redis.go +++ b/cache/redis.go @@ -48,3 +48,7 @@ func (r *Redis) Set(ctx context.Context, key string, value []byte) error { func (r *Redis) SetWithExpiration(ctx context.Context, key string, value []byte, expiration time.Duration) error { return r.client.Set(ctx, key, value, expiration).Err() } + +func (r *Redis) Del(ctx context.Context, key string) error { + return r.client.Del(ctx, key).Err() +} diff --git a/go.mod b/go.mod index 298c0cf..2710f7a 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.65.0 // indirect github.com/prometheus/procfs v0.17.0 // indirect + golang.org/x/net v0.42.0 // indirect golang.org/x/sys v0.34.0 // indirect google.golang.org/protobuf v1.36.6 // indirect ) @@ -22,5 +23,4 @@ require ( github.com/hbollon/go-edlib v1.6.0 github.com/prometheus/client_golang v1.22.0 github.com/xhit/go-str2duration/v2 v2.1.0 - golang.org/x/net v0.42.0 ) diff --git a/requester/flaresolverr.go b/requester/flaresolverr.go index 7f063ec..95aedbb 100644 --- a/requester/flaresolverr.go +++ b/requester/flaresolverr.go @@ -10,6 +10,7 @@ import ( "net/url" "strings" "sync" + "time" "github.com/felipemarinho97/torrent-indexer/utils" ) @@ -29,7 +30,9 @@ var ( func NewFlareSolverr(url string, timeoutMilli int) *FlareSolverr { poolSize := 5 - httpClient := &http.Client{} + httpClient := &http.Client{ + Timeout: time.Duration(timeoutMilli) * time.Millisecond, + } sessionPool := make(chan string, poolSize) // Pool size of 5 sessions f := &FlareSolverr{ @@ -191,7 +194,7 @@ type Response struct { } `json:"solution"` } -func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { +func (f *FlareSolverr) Get(_url string, attempts int) (io.ReadCloser, error) { // Check if the FlareSolverr instance was initiated if !f.initiated { return io.NopCloser(bytes.NewReader([]byte(""))), nil @@ -205,10 +208,10 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { f.sessionPool <- session }() - body := map[string]string{ + body := map[string]interface{}{ "cmd": "request.get", "url": _url, - "maxTimeout": fmt.Sprintf("%d", f.maxTimeout), + "maxTimeout": f.maxTimeout, "session": session, } jsonBody, err := json.Marshal(body) @@ -237,7 +240,15 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { // Check if the response was successful if response.Status != "ok" { - return nil, fmt.Errorf("failed to get response: %s", response.Message) + // if is 500 Internal Server Error, recursively call the Get method + if resp.StatusCode == http.StatusInternalServerError && attempts != 0 { + attempts-- + fmt.Printf("[FlareSolverr] Internal Server Error for %s, retrying...\n", _url) + return f.Get(_url, attempts) // Retry the request + } + + // log the http status code + return nil, fmt.Errorf("failed to get response: %s, statusCode: %s", response.Message, resp.Status) } // Check if "Under attack" is in the response @@ -248,6 +259,7 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { // check if the response is valid HTML if !utils.IsValidHTML(response.Solution.Response) { fmt.Printf("[FlareSolverr] Invalid HTML response from %s\n", _url) + fmt.Printf("[FlareSolverr] Response: %s\n", response.Solution.Response) response.Solution.Response = "" } diff --git a/requester/requester.go b/requester/requester.go index d2baf03..42a5278 100644 --- a/requester/requester.go +++ b/requester/requester.go @@ -50,7 +50,7 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser, resp, err := i.httpClient.Get(url) if err != nil { // try request with flare solverr - body, err = i.fs.Get(url) + body, err = i.fs.Get(url, 3) if err != nil { return nil, fmt.Errorf("failed to do request for url %s: %w", url, err) } @@ -65,7 +65,7 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser, } if hasChallange(bodyByte) { // try request with flare solverr - body, err = i.fs.Get(url) + body, err = i.fs.Get(url, 3) if err != nil { return nil, fmt.Errorf("failed to do request for url %s: %w", url, err) } @@ -92,6 +92,11 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser, return io.NopCloser(bytes.NewReader(bodyByte)), nil } +func (i *Requster) ExpireDocument(ctx context.Context, url string) error { + key := fmt.Sprintf("%s:%s", cacheKey, url) + return i.c.Del(ctx, key) +} + // hasChallange checks if the body contains a challange by regex matching func hasChallange(body []byte) bool { return challangeRegex.Match(body) diff --git a/utils/util.go b/utils/util.go index 23dfadc..96eeb43 100644 --- a/utils/util.go +++ b/utils/util.go @@ -2,9 +2,7 @@ package utils import ( "fmt" - "strings" - - "golang.org/x/net/html" + "regexp" ) // Filter filters a slice based on a predicate function. @@ -84,10 +82,19 @@ func StableUniq(s []string) []string { return uniqValues } +var ( + doctypeRegex = regexp.MustCompile(`(?i)`) + htmlTagRegex = regexp.MustCompile(`(?i)[\s\S]*?`) + bodyTagRegex = regexp.MustCompile(`(?i)[\s\S]*?`) +) + func IsValidHTML(input string) bool { - r := strings.NewReader(input) - _, err := html.Parse(r) - return err == nil + // Check for , , or tags + if !doctypeRegex.MatchString(input) && !htmlTagRegex.MatchString(input) && !bodyTagRegex.MatchString(input) { + return false + } + + return true } // FormatBytes formats a byte size into a human-readable string.