Feat/Add post processors + refactor project (#37)
* chg: feat: clean known patterns from title * chg: refactor: remove duplicated code, and improve maintainability * chg: feat: add audio tagging post-processor * chg: refactor: add generic parallelMap function * chg: refactor: move more function to common locations * chg: docs: add func docs
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
@@ -11,6 +14,43 @@ import (
|
||||
"github.com/felipemarinho97/torrent-indexer/schema"
|
||||
)
|
||||
|
||||
// getDocument retrieves a document from the cache or makes a request to get it.
|
||||
// It first checks the Redis cache for the document body.
|
||||
func getDocument(ctx context.Context, i *Indexer, link string) (*goquery.Document, error) {
|
||||
// try to get from redis first
|
||||
docCache, err := i.redis.Get(ctx, link)
|
||||
if err == nil {
|
||||
i.metrics.CacheHits.WithLabelValues("document_body").Inc()
|
||||
fmt.Printf("returning from long-lived cache: %s\n", link)
|
||||
return goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(docCache)))
|
||||
}
|
||||
defer i.metrics.CacheMisses.WithLabelValues("document_body").Inc()
|
||||
|
||||
resp, err := i.requester.GetDocument(ctx, link)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Close()
|
||||
|
||||
body, err := io.ReadAll(resp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// set cache
|
||||
err = i.redis.Set(ctx, link, body)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(body)))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
func getPublishedDateFromMeta(document *goquery.Document) time.Time {
|
||||
var date time.Time
|
||||
//<meta property="article:published_time" content="2019-08-23T13:20:57+00:00">
|
||||
|
||||
Reference in New Issue
Block a user