feat/flaresolverr-resilience #3

Open
darklyn wants to merge 5 commits from feat/flaresolverr-resilience into main
11 changed files with 72 additions and 14 deletions

View File

@@ -79,6 +79,11 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) {
links = append(links, link) links = append(links, link)
}) })
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link // extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsBluDV(ctx, i, link) return getTorrentsBluDV(ctx, i, link)

View File

@@ -93,6 +93,11 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request)
links = append(links, link) links = append(links, link)
}) })
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link // extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrents(ctx, i, link) return getTorrents(ctx, i, link)

View File

@@ -81,6 +81,11 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques
links = append(links, link) links = append(links, link)
}) })
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link // extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsComandoHDs(ctx, i, link) return getTorrentsComandoHDs(ctx, i, link)

View File

@@ -79,6 +79,11 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque
links = append(links, link) links = append(links, link)
}) })
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link // extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsRedeTorrent(ctx, i, link) return getTorrentsRedeTorrent(ctx, i, link)

View File

@@ -78,6 +78,11 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ
links = append(links, link) links = append(links, link)
}) })
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link // extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentStarckFilmes(ctx, i, link) return getTorrentStarckFilmes(ctx, i, link)

View File

@@ -78,6 +78,11 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.
links = append(links, link) links = append(links, link)
}) })
// if no links were indexed, expire the document in cache
if len(links) == 0 {
_ = i.requester.ExpireDocument(ctx, url)
}
// extract each torrent link // extract each torrent link
indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) { indexedTorrents := utils.ParallelFlatMap(links, func(link string) ([]schema.IndexedTorrent, error) {
return getTorrentsTorrentDosFilmes(ctx, i, link) return getTorrentsTorrentDosFilmes(ctx, i, link)

4
cache/redis.go vendored
View File

@@ -48,3 +48,7 @@ func (r *Redis) Set(ctx context.Context, key string, value []byte) error {
func (r *Redis) SetWithExpiration(ctx context.Context, key string, value []byte, expiration time.Duration) error { func (r *Redis) SetWithExpiration(ctx context.Context, key string, value []byte, expiration time.Duration) error {
return r.client.Set(ctx, key, value, expiration).Err() return r.client.Set(ctx, key, value, expiration).Err()
} }
func (r *Redis) Del(ctx context.Context, key string) error {
return r.client.Del(ctx, key).Err()
}

2
go.mod
View File

@@ -13,6 +13,7 @@ require (
github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.65.0 // indirect github.com/prometheus/common v0.65.0 // indirect
github.com/prometheus/procfs v0.17.0 // indirect github.com/prometheus/procfs v0.17.0 // indirect
golang.org/x/net v0.42.0 // indirect
golang.org/x/sys v0.34.0 // indirect golang.org/x/sys v0.34.0 // indirect
google.golang.org/protobuf v1.36.6 // indirect google.golang.org/protobuf v1.36.6 // indirect
) )
@@ -22,5 +23,4 @@ require (
github.com/hbollon/go-edlib v1.6.0 github.com/hbollon/go-edlib v1.6.0
github.com/prometheus/client_golang v1.22.0 github.com/prometheus/client_golang v1.22.0
github.com/xhit/go-str2duration/v2 v2.1.0 github.com/xhit/go-str2duration/v2 v2.1.0
golang.org/x/net v0.42.0
) )

View File

@@ -10,6 +10,7 @@ import (
"net/url" "net/url"
"strings" "strings"
"sync" "sync"
"time"
"github.com/felipemarinho97/torrent-indexer/utils" "github.com/felipemarinho97/torrent-indexer/utils"
) )
@@ -29,7 +30,9 @@ var (
func NewFlareSolverr(url string, timeoutMilli int) *FlareSolverr { func NewFlareSolverr(url string, timeoutMilli int) *FlareSolverr {
poolSize := 5 poolSize := 5
httpClient := &http.Client{} httpClient := &http.Client{
Timeout: time.Duration(timeoutMilli) * time.Millisecond,
}
sessionPool := make(chan string, poolSize) // Pool size of 5 sessions sessionPool := make(chan string, poolSize) // Pool size of 5 sessions
f := &FlareSolverr{ f := &FlareSolverr{
@@ -191,7 +194,7 @@ type Response struct {
} `json:"solution"` } `json:"solution"`
} }
func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) { func (f *FlareSolverr) Get(_url string, attempts int) (io.ReadCloser, error) {
// Check if the FlareSolverr instance was initiated // Check if the FlareSolverr instance was initiated
if !f.initiated { if !f.initiated {
return io.NopCloser(bytes.NewReader([]byte(""))), nil return io.NopCloser(bytes.NewReader([]byte(""))), nil
@@ -205,10 +208,10 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) {
f.sessionPool <- session f.sessionPool <- session
}() }()
body := map[string]string{ body := map[string]interface{}{
"cmd": "request.get", "cmd": "request.get",
"url": _url, "url": _url,
"maxTimeout": fmt.Sprintf("%d", f.maxTimeout), "maxTimeout": f.maxTimeout,
"session": session, "session": session,
} }
jsonBody, err := json.Marshal(body) jsonBody, err := json.Marshal(body)
@@ -237,7 +240,15 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) {
// Check if the response was successful // Check if the response was successful
if response.Status != "ok" { if response.Status != "ok" {
return nil, fmt.Errorf("failed to get response: %s", response.Message) // if is 500 Internal Server Error, recursively call the Get method
if resp.StatusCode == http.StatusInternalServerError && attempts != 0 {
attempts--
fmt.Printf("[FlareSolverr] Internal Server Error for %s, retrying...\n", _url)
return f.Get(_url, attempts) // Retry the request
}
// log the http status code
return nil, fmt.Errorf("failed to get response: %s, statusCode: %s", response.Message, resp.Status)
} }
// Check if "Under attack" is in the response // Check if "Under attack" is in the response
@@ -248,6 +259,7 @@ func (f *FlareSolverr) Get(_url string) (io.ReadCloser, error) {
// check if the response is valid HTML // check if the response is valid HTML
if !utils.IsValidHTML(response.Solution.Response) { if !utils.IsValidHTML(response.Solution.Response) {
fmt.Printf("[FlareSolverr] Invalid HTML response from %s\n", _url) fmt.Printf("[FlareSolverr] Invalid HTML response from %s\n", _url)
fmt.Printf("[FlareSolverr] Response: %s\n", response.Solution.Response)
response.Solution.Response = "" response.Solution.Response = ""
} }

View File

@@ -50,7 +50,7 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser,
resp, err := i.httpClient.Get(url) resp, err := i.httpClient.Get(url)
if err != nil { if err != nil {
// try request with flare solverr // try request with flare solverr
body, err = i.fs.Get(url) body, err = i.fs.Get(url, 3)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to do request for url %s: %w", url, err) return nil, fmt.Errorf("failed to do request for url %s: %w", url, err)
} }
@@ -65,7 +65,7 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser,
} }
if hasChallange(bodyByte) { if hasChallange(bodyByte) {
// try request with flare solverr // try request with flare solverr
body, err = i.fs.Get(url) body, err = i.fs.Get(url, 3)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to do request for url %s: %w", url, err) return nil, fmt.Errorf("failed to do request for url %s: %w", url, err)
} }
@@ -92,6 +92,11 @@ func (i *Requster) GetDocument(ctx context.Context, url string) (io.ReadCloser,
return io.NopCloser(bytes.NewReader(bodyByte)), nil return io.NopCloser(bytes.NewReader(bodyByte)), nil
} }
func (i *Requster) ExpireDocument(ctx context.Context, url string) error {
key := fmt.Sprintf("%s:%s", cacheKey, url)
return i.c.Del(ctx, key)
}
// hasChallange checks if the body contains a challange by regex matching // hasChallange checks if the body contains a challange by regex matching
func hasChallange(body []byte) bool { func hasChallange(body []byte) bool {
return challangeRegex.Match(body) return challangeRegex.Match(body)

View File

@@ -2,9 +2,7 @@ package utils
import ( import (
"fmt" "fmt"
"strings" "regexp"
"golang.org/x/net/html"
) )
// Filter filters a slice based on a predicate function. // Filter filters a slice based on a predicate function.
@@ -84,10 +82,19 @@ func StableUniq(s []string) []string {
return uniqValues return uniqValues
} }
var (
doctypeRegex = regexp.MustCompile(`(?i)<!DOCTYPE\s+html>`)
htmlTagRegex = regexp.MustCompile(`(?i)<html[\s\S]*?>[\s\S]*?</html>`)
bodyTagRegex = regexp.MustCompile(`(?i)<body[\s\S]*?>[\s\S]*?</body>`)
)
func IsValidHTML(input string) bool { func IsValidHTML(input string) bool {
r := strings.NewReader(input) // Check for <!DOCTYPE>, <html>, or <body> tags
_, err := html.Parse(r) if !doctypeRegex.MatchString(input) && !htmlTagRegex.MatchString(input) && !bodyTagRegex.MatchString(input) {
return err == nil return false
}
return true
} }
// FormatBytes formats a byte size into a human-readable string. // FormatBytes formats a byte size into a human-readable string.