diff --git a/api/bludv.go b/api/bludv.go index 2c2102a..6eb42c7 100644 --- a/api/bludv.go +++ b/api/bludv.go @@ -6,12 +6,10 @@ import ( "fmt" "net/http" "net/url" - "slices" "strings" "time" "github.com/PuerkitoBio/goquery" - "github.com/hbollon/go-edlib" "github.com/felipemarinho97/torrent-indexer/magnet" "github.com/felipemarinho97/torrent-indexer/schema" @@ -20,15 +18,19 @@ import ( ) var bludv = IndexerMeta{ - URL: "https://bludv.xyz/", - SearchURL: "?s=", + Label: "bludv", + URL: "https://bludv.xyz/", + SearchURL: "?s=", + PagePattern: "page/%s", } func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { start := time.Now() + metadata := bludv + defer func() { - i.metrics.IndexerDuration.WithLabelValues("bludv").Observe(time.Since(start).Seconds()) - i.metrics.IndexerRequests.WithLabelValues("bludv").Inc() + i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds()) + i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc() }() ctx := r.Context() @@ -38,11 +40,11 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { // URL encode query param q = url.QueryEscape(q) - url := bludv.URL + url := metadata.URL if page != "" { - url = fmt.Sprintf("%spage/%s", url, page) + url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page) } else { - url = fmt.Sprintf("%s%s%s", url, bludv.SearchURL, q) + url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q) } fmt.Println("URL:>", url) @@ -53,7 +55,7 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { if err != nil { fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("bludv").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } defer resp.Close() @@ -66,7 +68,7 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("bludv").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } @@ -77,57 +79,21 @@ func (i *Indexer) HandlerBluDVIndexer(w http.ResponseWriter, r *http.Request) { links = append(links, link) }) - var itChan = make(chan []schema.IndexedTorrent) - var errChan = make(chan error) - indexedTorrents := []schema.IndexedTorrent{} - for _, link := range links { - go func(link string) { - torrents, err := getTorrentsBluDV(ctx, i, link) - if err != nil { - fmt.Println(err) - errChan <- err - } - itChan <- torrents - }(link) - } - - for i := 0; i < len(links); i++ { - select { - case torrents := <-itChan: - indexedTorrents = append(indexedTorrents, torrents...) - case err := <-errChan: - fmt.Println(err) - } - } - - for i, it := range indexedTorrents { - jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") - qLower := strings.ToLower(q) - splitLength := 2 - indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) - } - - // remove the ones with zero similarity - if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { - indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool { - return it.Similarity > 0 - }) - } - - // sort by similarity - slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int { - return int((j.Similarity - i.Similarity) * 1000) + // extract each torrent link + indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) { + return getTorrentsBluDV(ctx, i, link) }) - // send to search index - go func() { - _ = i.search.IndexTorrents(indexedTorrents) - }() + // Apply post-processors + postProcessedTorrents := indexedTorrents + for _, processor := range i.postProcessors { + postProcessedTorrents = processor(i, r, postProcessedTorrents) + } w.Header().Set("Content-Type", "application/json") err = json.NewEncoder(w).Encode(Response{ - Results: indexedTorrents, - Count: len(indexedTorrents), + Results: postProcessedTorrents, + Count: len(postProcessedTorrents), }) if err != nil { fmt.Println(err) @@ -216,7 +182,7 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]schema.In } }) - size = stableUniq(size) + size = utils.StableUniq(size) var chanIndexedTorrent = make(chan schema.IndexedTorrent) @@ -247,7 +213,7 @@ func getTorrentsBluDV(ctx context.Context, i *Indexer, link string) ([]schema.In } ixt := schema.IndexedTorrent{ - Title: appendAudioISO639_2Code(releaseTitle, magnetAudio), + Title: releaseTitle, OriginalTitle: title, Details: link, Year: year, diff --git a/api/comando_torrents.go b/api/comando_torrents.go index 1ed455f..c04ffc2 100644 --- a/api/comando_torrents.go +++ b/api/comando_torrents.go @@ -1,15 +1,12 @@ package handler import ( - "bytes" "context" "encoding/json" "fmt" - "io" "net/http" "net/url" "regexp" - "slices" "strings" "time" @@ -18,12 +15,13 @@ import ( "github.com/felipemarinho97/torrent-indexer/schema" goscrape "github.com/felipemarinho97/torrent-indexer/scrape" "github.com/felipemarinho97/torrent-indexer/utils" - "github.com/hbollon/go-edlib" ) var comando = IndexerMeta{ - URL: "https://comando.la/", - SearchURL: "?s=", + Label: "comando", + URL: "https://comando.la/", + SearchURL: "?s=", + PagePattern: "page/%s", } var replacer = strings.NewReplacer( @@ -43,9 +41,11 @@ var replacer = strings.NewReplacer( func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) { start := time.Now() + metadata := comando + defer func() { - i.metrics.IndexerDuration.WithLabelValues("comando").Observe(time.Since(start).Seconds()) - i.metrics.IndexerRequests.WithLabelValues("comando").Inc() + i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds()) + i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc() }() ctx := r.Context() @@ -55,11 +55,11 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) // URL encode query param q = url.QueryEscape(q) - url := comando.URL + url := metadata.URL if q != "" { - url = fmt.Sprintf("%s%s%s", url, comando.SearchURL, q) + url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q) } else if page != "" { - url = fmt.Sprintf("%spage/%s", url, page) + url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page) } fmt.Println("URL:>", url) @@ -70,7 +70,7 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) if err != nil { fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("comando").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } defer resp.Close() @@ -82,7 +82,7 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) if err != nil { fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("comando").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } @@ -93,57 +93,21 @@ func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) links = append(links, link) }) - var itChan = make(chan []schema.IndexedTorrent) - var errChan = make(chan error) - indexedTorrents := []schema.IndexedTorrent{} - for _, link := range links { - go func(link string) { - torrents, err := getTorrents(ctx, i, link) - if err != nil { - fmt.Println(err) - errChan <- err - } - itChan <- torrents - }(link) - } - - for i := 0; i < len(links); i++ { - select { - case torrents := <-itChan: - indexedTorrents = append(indexedTorrents, torrents...) - case err := <-errChan: - fmt.Println(err) - } - } - - for i, it := range indexedTorrents { - jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") - qLower := strings.ToLower(q) - splitLength := 2 - indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) - } - - // remove the ones with zero similarity - if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { - indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool { - return it.Similarity > 0 - }) - } - - // sort by similarity - slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int { - return int((j.Similarity - i.Similarity) * 1000) + // extract each torrent link + indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) { + return getTorrents(ctx, i, link) }) - // send to search index - go func() { - _ = i.search.IndexTorrents(indexedTorrents) - }() + // Apply post-processors + postProcessedTorrents := indexedTorrents + for _, processor := range i.postProcessors { + postProcessedTorrents = processor(i, r, postProcessedTorrents) + } w.Header().Set("Content-Type", "application/json") err = json.NewEncoder(w).Encode(Response{ - Results: indexedTorrents, - Count: len(indexedTorrents), + Results: postProcessedTorrents, + Count: len(postProcessedTorrents), }) if err != nil { fmt.Println(err) @@ -215,7 +179,7 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed } }) - size = stableUniq(size) + size = utils.StableUniq(size) var chanIndexedTorrent = make(chan schema.IndexedTorrent) @@ -246,7 +210,7 @@ func getTorrents(ctx context.Context, i *Indexer, link string) ([]schema.Indexed } ixt := schema.IndexedTorrent{ - Title: appendAudioISO639_2Code(releaseTitle, magnetAudio), + Title: releaseTitle, OriginalTitle: title, Details: link, Year: year, @@ -293,38 +257,6 @@ func parseLocalizedDate(datePublished string) (time.Time, error) { return time.Time{}, nil } -func stableUniq(s []string) []string { - var uniq []map[string]interface{} - m := make(map[string]map[string]interface{}) - for i, v := range s { - m[v] = map[string]interface{}{ - "v": v, - "i": i, - } - } - // to order by index - for _, v := range m { - uniq = append(uniq, v) - } - - // sort by index - for i := 0; i < len(uniq); i++ { - for j := i + 1; j < len(uniq); j++ { - if uniq[i]["i"].(int) > uniq[j]["i"].(int) { - uniq[i], uniq[j] = uniq[j], uniq[i] - } - } - } - - // get only values - var uniqValues []string - for _, v := range uniq { - uniqValues = append(uniqValues, v["v"].(string)) - } - - return uniqValues -} - func processTitle(title string, a []schema.Audio) string { // remove ' - Donwload' from title title = strings.Replace(title, " – Download", "", -1) @@ -337,38 +269,3 @@ func processTitle(title string, a []schema.Audio) string { return title } - -func getDocument(ctx context.Context, i *Indexer, link string) (*goquery.Document, error) { - // try to get from redis first - docCache, err := i.redis.Get(ctx, link) - if err == nil { - i.metrics.CacheHits.WithLabelValues("document_body").Inc() - fmt.Printf("returning from long-lived cache: %s\n", link) - return goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(docCache))) - } - defer i.metrics.CacheMisses.WithLabelValues("document_body").Inc() - - resp, err := i.requester.GetDocument(ctx, link) - if err != nil { - return nil, err - } - defer resp.Close() - - body, err := io.ReadAll(resp) - if err != nil { - return nil, err - } - - // set cache - err = i.redis.Set(ctx, link, body) - if err != nil { - fmt.Println(err) - } - - doc, err := goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(body))) - if err != nil { - return nil, err - } - - return doc, nil -} diff --git a/api/comandohds.go b/api/comandohds.go index 2c815fa..e88739b 100644 --- a/api/comandohds.go +++ b/api/comandohds.go @@ -7,12 +7,10 @@ import ( "net/http" "net/url" "regexp" - "slices" "strings" "time" "github.com/PuerkitoBio/goquery" - "github.com/hbollon/go-edlib" "github.com/felipemarinho97/torrent-indexer/magnet" "github.com/felipemarinho97/torrent-indexer/schema" @@ -21,17 +19,21 @@ import ( ) var comandohds = IndexerMeta{ - URL: "https://comandohds.org/", - SearchURL: "?s=", + Label: "comandohds", + URL: "https://comandohds.org/", + SearchURL: "?s=", + PagePattern: "page/%s", } var title_re = regexp.MustCompile(`^[(Filme)|(Série)\s]+`) func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Request) { start := time.Now() + metadata := comandohds + defer func() { - i.metrics.IndexerDuration.WithLabelValues("comandohds").Observe(time.Since(start).Seconds()) - i.metrics.IndexerRequests.WithLabelValues("comandohds").Inc() + i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds()) + i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc() }() ctx := r.Context() @@ -41,11 +43,11 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques // URL encode query param q = url.QueryEscape(q) - url := comandohds.URL + url := metadata.URL if q != "" { - url = fmt.Sprintf("%s%s%s", url, comandohds.SearchURL, q) + url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q) } else if page != "" { - url = fmt.Sprintf("%spage/%s", url, page) + url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page) } fmt.Println("URL:>", url) @@ -56,7 +58,7 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques if err != nil { fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("comandohds").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } defer resp.Close() @@ -69,7 +71,7 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("comandohds").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } @@ -79,57 +81,21 @@ func (i *Indexer) HandlerComandoHDsIndexer(w http.ResponseWriter, r *http.Reques links = append(links, link) }) - var itChan = make(chan []schema.IndexedTorrent) - var errChan = make(chan error) - indexedTorrents := []schema.IndexedTorrent{} - for _, link := range links { - go func(link string) { - torrents, err := getTorrentsComandoHDs(ctx, i, link) - if err != nil { - fmt.Println(err) - errChan <- err - } - itChan <- torrents - }(link) - } - - for i := 0; i < len(links); i++ { - select { - case torrents := <-itChan: - indexedTorrents = append(indexedTorrents, torrents...) - case err := <-errChan: - fmt.Println(err) - } - } - - for i, it := range indexedTorrents { - jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") - qLower := strings.ToLower(q) - splitLength := 2 - indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) - } - - // remove the ones with zero similarity - if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { - indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool { - return it.Similarity > 0 - }) - } - - // sort by similarity - slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int { - return int((j.Similarity - i.Similarity) * 1000) + // extract each torrent link + indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) { + return getTorrentsComandoHDs(ctx, i, link) }) - // send to search index - go func() { - _ = i.search.IndexTorrents(indexedTorrents) - }() + // Apply post-processors + postProcessedTorrents := indexedTorrents + for _, processor := range i.postProcessors { + postProcessedTorrents = processor(i, r, postProcessedTorrents) + } w.Header().Set("Content-Type", "application/json") err = json.NewEncoder(w).Encode(Response{ - Results: indexedTorrents, - Count: len(indexedTorrents), + Results: postProcessedTorrents, + Count: len(postProcessedTorrents), }) if err != nil { fmt.Println(err) @@ -193,7 +159,7 @@ func getTorrentsComandoHDs(ctx context.Context, i *Indexer, link string) ([]sche } }) - size = stableUniq(size) + size = utils.StableUniq(size) var chanIndexedTorrent = make(chan schema.IndexedTorrent) @@ -228,7 +194,7 @@ func getTorrentsComandoHDs(ctx context.Context, i *Indexer, link string) ([]sche } ixt := schema.IndexedTorrent{ - Title: appendAudioISO639_2Code(releaseTitle, magnetAudio), + Title: releaseTitle, OriginalTitle: title, Details: link, Year: year, diff --git a/api/common.go b/api/common.go index 3a32dfb..e934348 100644 --- a/api/common.go +++ b/api/common.go @@ -1,7 +1,10 @@ package handler import ( + "bytes" + "context" "fmt" + "io" "regexp" "slices" "strings" @@ -11,6 +14,43 @@ import ( "github.com/felipemarinho97/torrent-indexer/schema" ) +// getDocument retrieves a document from the cache or makes a request to get it. +// It first checks the Redis cache for the document body. +func getDocument(ctx context.Context, i *Indexer, link string) (*goquery.Document, error) { + // try to get from redis first + docCache, err := i.redis.Get(ctx, link) + if err == nil { + i.metrics.CacheHits.WithLabelValues("document_body").Inc() + fmt.Printf("returning from long-lived cache: %s\n", link) + return goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(docCache))) + } + defer i.metrics.CacheMisses.WithLabelValues("document_body").Inc() + + resp, err := i.requester.GetDocument(ctx, link) + if err != nil { + return nil, err + } + defer resp.Close() + + body, err := io.ReadAll(resp) + if err != nil { + return nil, err + } + + // set cache + err = i.redis.Set(ctx, link, body) + if err != nil { + fmt.Println(err) + } + + doc, err := goquery.NewDocumentFromReader(io.NopCloser(bytes.NewReader(body))) + if err != nil { + return nil, err + } + + return doc, nil +} + func getPublishedDateFromMeta(document *goquery.Document) time.Time { var date time.Time // diff --git a/api/index.go b/api/index.go index dd55a5c..0964b88 100644 --- a/api/index.go +++ b/api/index.go @@ -13,17 +13,18 @@ import ( ) type Indexer struct { - redis *cache.Redis - metrics *monitoring.Metrics - requester *requester.Requster - search *meilisearch.SearchIndexer + redis *cache.Redis + metrics *monitoring.Metrics + requester *requester.Requster + search *meilisearch.SearchIndexer + postProcessors []PostProcessorFunc } type IndexerMeta struct { - URL string - SearchURL string - // pattern for pagination, e.g. "page/%s" - PagePattern string + Label string // Label is used for Prometheus metrics and logging. Must be alphanumeric optionally with underscores. + URL string // URL is the base URL of the indexer, e.g. "https://example.com/" + SearchURL string // SearchURL is the base URL for search queries, e.g. "?s=" + PagePattern string // PagePattern for pagination, e.g. "page/%s" } type Response struct { @@ -31,12 +32,22 @@ type Response struct { Count int `json:"count"` } +type PostProcessorFunc func(*Indexer, *http.Request, []schema.IndexedTorrent) []schema.IndexedTorrent + +var GlobalPostProcessors = []PostProcessorFunc{ + AddSimilarityCheck, // Jaccard similarity + CleanupTitleWebsites, // Remove website names from titles + AppendAudioTags, // Add (brazilian, eng, etc.) audio tags to titles + SendToSearchIndexer, // Send indexed torrents to Meilisearch +} + func NewIndexers(redis *cache.Redis, metrics *monitoring.Metrics, req *requester.Requster, si *meilisearch.SearchIndexer) *Indexer { return &Indexer{ - redis: redis, - metrics: metrics, - requester: req, - search: si, + redis: redis, + metrics: metrics, + requester: req, + search: si, + postProcessors: GlobalPostProcessors, } } diff --git a/api/post_processors.go b/api/post_processors.go new file mode 100644 index 0000000..dd68acc --- /dev/null +++ b/api/post_processors.go @@ -0,0 +1,61 @@ +package handler + +import ( + "fmt" + "net/http" + "slices" + "strings" + + "github.com/felipemarinho97/torrent-indexer/schema" + "github.com/felipemarinho97/torrent-indexer/utils" + "github.com/hbollon/go-edlib" +) + +// CleanupTitleWebsites removes unwanted characters from the title +func CleanupTitleWebsites(_ *Indexer, _ *http.Request, torrents []schema.IndexedTorrent) []schema.IndexedTorrent { + for i := range torrents { + torrents[i].Title = utils.RemoveKnownWebsites(torrents[i].Title) + } + return torrents +} + +func AppendAudioTags(_ *Indexer, _ *http.Request, torrents []schema.IndexedTorrent) []schema.IndexedTorrent { + for i, it := range torrents { + torrents[i].Title = appendAudioISO639_2Code(torrents[i].Title, it.Audio) + } + + return torrents +} + +// SendToSearchIndexer sends the indexed torrents to the search indexer +func SendToSearchIndexer(i *Indexer, _ *http.Request, torrents []schema.IndexedTorrent) []schema.IndexedTorrent { + go func() { + _ = i.search.IndexTorrents(torrents) + }() + return torrents +} + +func AddSimilarityCheck(i *Indexer, r *http.Request, torrents []schema.IndexedTorrent) []schema.IndexedTorrent { + q := r.URL.Query().Get("q") + + for i, it := range torrents { + jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") + qLower := strings.ToLower(q) + splitLength := 2 + torrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) + } + + // remove the ones with zero similarity + if len(torrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { + torrents = utils.Filter(torrents, func(it schema.IndexedTorrent) bool { + return it.Similarity > 0 + }) + } + + // sort by similarity + slices.SortFunc(torrents, func(i, j schema.IndexedTorrent) int { + return int((j.Similarity - i.Similarity) * 1000) + }) + + return torrents +} diff --git a/api/rede_torrent.go b/api/rede_torrent.go index 474d5cf..7e4339c 100644 --- a/api/rede_torrent.go +++ b/api/rede_torrent.go @@ -7,12 +7,10 @@ import ( "net/http" "net/url" "regexp" - "slices" "strings" "time" "github.com/PuerkitoBio/goquery" - "github.com/hbollon/go-edlib" "github.com/felipemarinho97/torrent-indexer/magnet" "github.com/felipemarinho97/torrent-indexer/schema" @@ -21,6 +19,7 @@ import ( ) var rede_torrent = IndexerMeta{ + Label: "rede_torrent", URL: "https://redetorrent.com/", SearchURL: "index.php?s=", PagePattern: "%s", @@ -28,9 +27,11 @@ var rede_torrent = IndexerMeta{ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Request) { start := time.Now() + metadata := rede_torrent + defer func() { - i.metrics.IndexerDuration.WithLabelValues("rede_torrent").Observe(time.Since(start).Seconds()) - i.metrics.IndexerRequests.WithLabelValues("rede_torrent").Inc() + i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds()) + i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc() }() ctx := r.Context() @@ -40,11 +41,11 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque // URL encode query param q = url.QueryEscape(q) - url := rede_torrent.URL + url := metadata.URL if q != "" { - url = fmt.Sprintf("%s%s%s", url, rede_torrent.SearchURL, q) + url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q) } else if page != "" { - url = fmt.Sprintf(fmt.Sprintf("%s%s", url, rede_torrent.PagePattern), page) + url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page) } fmt.Println("URL:>", url) @@ -55,7 +56,7 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque if err != nil { fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("rede_torrent").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } defer resp.Close() @@ -68,7 +69,7 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("rede_torrent").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } @@ -78,57 +79,21 @@ func (i *Indexer) HandlerRedeTorrentIndexer(w http.ResponseWriter, r *http.Reque links = append(links, link) }) - var itChan = make(chan []schema.IndexedTorrent) - var errChan = make(chan error) - indexedTorrents := []schema.IndexedTorrent{} - for _, link := range links { - go func(link string) { - torrents, err := getTorrentsRedeTorrent(ctx, i, link) - if err != nil { - fmt.Println(err) - errChan <- err - } - itChan <- torrents - }(link) - } - - for i := 0; i < len(links); i++ { - select { - case torrents := <-itChan: - indexedTorrents = append(indexedTorrents, torrents...) - case err := <-errChan: - fmt.Println(err) - } - } - - for i, it := range indexedTorrents { - jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") - qLower := strings.ToLower(q) - splitLength := 2 - indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) - } - - // remove the ones with zero similarity - if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { - indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool { - return it.Similarity > 0 - }) - } - - // sort by similarity - slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int { - return int((j.Similarity - i.Similarity) * 1000) + // extract each torrent link + indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) { + return getTorrentsRedeTorrent(ctx, i, link) }) - // send to search index - go func() { - _ = i.search.IndexTorrents(indexedTorrents) - }() + // Apply post-processors + postProcessedTorrents := indexedTorrents + for _, processor := range i.postProcessors { + postProcessedTorrents = processor(i, r, postProcessedTorrents) + } w.Header().Set("Content-Type", "application/json") err = json.NewEncoder(w).Encode(Response{ - Results: indexedTorrents, - Count: len(indexedTorrents), + Results: postProcessedTorrents, + Count: len(postProcessedTorrents), }) if err != nil { fmt.Println(err) @@ -222,7 +187,7 @@ func getTorrentsRedeTorrent(ctx context.Context, i *Indexer, link string) ([]sch } }) - size = stableUniq(size) + size = utils.StableUniq(size) var chanIndexedTorrent = make(chan schema.IndexedTorrent) @@ -253,7 +218,7 @@ func getTorrentsRedeTorrent(ctx context.Context, i *Indexer, link string) ([]sch } ixt := schema.IndexedTorrent{ - Title: appendAudioISO639_2Code(releaseTitle, magnetAudio), + Title: releaseTitle, OriginalTitle: title, Details: link, Year: year, diff --git a/api/starck_filmes.go b/api/starck_filmes.go index 745c9fc..20e5698 100644 --- a/api/starck_filmes.go +++ b/api/starck_filmes.go @@ -6,12 +6,10 @@ import ( "fmt" "net/http" "net/url" - "slices" "strings" "time" "github.com/PuerkitoBio/goquery" - "github.com/hbollon/go-edlib" "github.com/felipemarinho97/torrent-indexer/magnet" "github.com/felipemarinho97/torrent-indexer/schema" @@ -20,15 +18,19 @@ import ( ) var starck_filmes = IndexerMeta{ - URL: "https://www.starckfilmes.online/", - SearchURL: "?s=", + Label: "starck_filmes", + URL: "https://www.starckfilmes.online/", + SearchURL: "?s=", + PagePattern: "page/%s", } func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Request) { start := time.Now() + metadata := starck_filmes + defer func() { - i.metrics.IndexerDuration.WithLabelValues("starck_filmes").Observe(time.Since(start).Seconds()) - i.metrics.IndexerRequests.WithLabelValues("starck_filmes").Inc() + i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds()) + i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc() }() ctx := r.Context() @@ -38,11 +40,11 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ // URL encode query param q = url.QueryEscape(q) - url := starck_filmes.URL + url := metadata.URL if q != "" { - url = fmt.Sprintf("%s%s%s", url, starck_filmes.SearchURL, q) + url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q) } else if page != "" { - url = fmt.Sprintf("%spage/%s", url, page) + url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page) } fmt.Println("URL:>", url) @@ -53,7 +55,7 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ if err != nil { fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("starck_filmes").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } defer resp.Close() @@ -66,7 +68,7 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("starck_filmes").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } @@ -76,57 +78,21 @@ func (i *Indexer) HandlerStarckFilmesIndexer(w http.ResponseWriter, r *http.Requ links = append(links, link) }) - var itChan = make(chan []schema.IndexedTorrent) - var errChan = make(chan error) - indexedTorrents := []schema.IndexedTorrent{} - for _, link := range links { - go func(link string) { - torrents, err := getTorrentStarckFilmes(ctx, i, link) - if err != nil { - fmt.Println(err) - errChan <- err - } - itChan <- torrents - }(link) - } - - for i := 0; i < len(links); i++ { - select { - case torrents := <-itChan: - indexedTorrents = append(indexedTorrents, torrents...) - case err := <-errChan: - fmt.Println(err) - } - } - - for i, it := range indexedTorrents { - jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") - qLower := strings.ToLower(q) - splitLength := 2 - indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) - } - - // remove the ones with zero similarity - if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { - indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool { - return it.Similarity > 0 - }) - } - - // sort by similarity - slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int { - return int((j.Similarity - i.Similarity) * 1000) + // extract each torrent link + indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) { + return getTorrentStarckFilmes(ctx, i, link) }) - // send to search index - go func() { - _ = i.search.IndexTorrents(indexedTorrents) - }() + // Apply post-processors + postProcessedTorrents := indexedTorrents + for _, processor := range i.postProcessors { + postProcessedTorrents = processor(i, r, postProcessedTorrents) + } w.Header().Set("Content-Type", "application/json") err = json.NewEncoder(w).Encode(Response{ - Results: indexedTorrents, - Count: len(indexedTorrents), + Results: postProcessedTorrents, + Count: len(postProcessedTorrents), }) if err != nil { fmt.Println(err) @@ -184,7 +150,7 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch // TODO: find any link from imdb imdbLink := "" - size = stableUniq(size) + size = utils.StableUniq(size) var chanIndexedTorrent = make(chan schema.IndexedTorrent) @@ -228,7 +194,7 @@ func getTorrentStarckFilmes(ctx context.Context, i *Indexer, link string) ([]sch } ixt := schema.IndexedTorrent{ - Title: appendAudioISO639_2Code(releaseTitle, magnetAudio), + Title: releaseTitle, OriginalTitle: title, Details: link, Year: year, diff --git a/api/torrent_dos_filmes.go b/api/torrent_dos_filmes.go index 6fc400a..a0f2f74 100644 --- a/api/torrent_dos_filmes.go +++ b/api/torrent_dos_filmes.go @@ -6,12 +6,10 @@ import ( "fmt" "net/http" "net/url" - "slices" "strings" "time" "github.com/PuerkitoBio/goquery" - "github.com/hbollon/go-edlib" "github.com/felipemarinho97/torrent-indexer/magnet" "github.com/felipemarinho97/torrent-indexer/schema" @@ -20,15 +18,19 @@ import ( ) var torrent_dos_filmes = IndexerMeta{ - URL: "https://torrentdosfilmes.se/", - SearchURL: "?s=", + Label: "torrent_dos_filmes", + URL: "https://torrentdosfilmes.se/", + SearchURL: "?s=", + PagePattern: "page/%s", } func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http.Request) { start := time.Now() + metadata := torrent_dos_filmes + defer func() { - i.metrics.IndexerDuration.WithLabelValues("torrent_dos_filmes").Observe(time.Since(start).Seconds()) - i.metrics.IndexerRequests.WithLabelValues("torrent_dos_filmes").Inc() + i.metrics.IndexerDuration.WithLabelValues(metadata.Label).Observe(time.Since(start).Seconds()) + i.metrics.IndexerRequests.WithLabelValues(metadata.Label).Inc() }() ctx := r.Context() @@ -38,11 +40,11 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http. // URL encode query param q = url.QueryEscape(q) - url := torrent_dos_filmes.URL + url := metadata.URL if q != "" { - url = fmt.Sprintf("%s%s%s", url, torrent_dos_filmes.SearchURL, q) + url = fmt.Sprintf("%s%s%s", url, metadata.SearchURL, q) } else if page != "" { - url = fmt.Sprintf("%spage/%s", url, page) + url = fmt.Sprintf(fmt.Sprintf("%s%s", url, metadata.PagePattern), page) } fmt.Println("URL:>", url) @@ -53,7 +55,7 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http. if err != nil { fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("torrent_dos_filmes").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } defer resp.Close() @@ -66,7 +68,7 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http. fmt.Println(err) } - i.metrics.IndexerErrors.WithLabelValues("torrent_dos_filmes").Inc() + i.metrics.IndexerErrors.WithLabelValues(metadata.Label).Inc() return } @@ -76,57 +78,21 @@ func (i *Indexer) HandlerTorrentDosFilmesIndexer(w http.ResponseWriter, r *http. links = append(links, link) }) - var itChan = make(chan []schema.IndexedTorrent) - var errChan = make(chan error) - indexedTorrents := []schema.IndexedTorrent{} - for _, link := range links { - go func(link string) { - torrents, err := getTorrentsTorrentDosFilmes(ctx, i, link) - if err != nil { - fmt.Println(err) - errChan <- err - } - itChan <- torrents - }(link) - } - - for i := 0; i < len(links); i++ { - select { - case torrents := <-itChan: - indexedTorrents = append(indexedTorrents, torrents...) - case err := <-errChan: - fmt.Println(err) - } - } - - for i, it := range indexedTorrents { - jLower := strings.ReplaceAll(strings.ToLower(fmt.Sprintf("%s %s", it.Title, it.OriginalTitle)), ".", " ") - qLower := strings.ToLower(q) - splitLength := 2 - indexedTorrents[i].Similarity = edlib.JaccardSimilarity(jLower, qLower, splitLength) - } - - // remove the ones with zero similarity - if len(indexedTorrents) > 20 && r.URL.Query().Get("filter_results") != "" && r.URL.Query().Get("q") != "" { - indexedTorrents = utils.Filter(indexedTorrents, func(it schema.IndexedTorrent) bool { - return it.Similarity > 0 - }) - } - - // sort by similarity - slices.SortFunc(indexedTorrents, func(i, j schema.IndexedTorrent) int { - return int((j.Similarity - i.Similarity) * 1000) + // extract each torrent link + indexedTorrents := utils.ParallelMap(links, func(link string) ([]schema.IndexedTorrent, error) { + return getTorrentsTorrentDosFilmes(ctx, i, link) }) - // send to search index - go func() { - _ = i.search.IndexTorrents(indexedTorrents) - }() + // Apply post-processors + postProcessedTorrents := indexedTorrents + for _, processor := range i.postProcessors { + postProcessedTorrents = processor(i, r, postProcessedTorrents) + } w.Header().Set("Content-Type", "application/json") err = json.NewEncoder(w).Encode(Response{ - Results: indexedTorrents, - Count: len(indexedTorrents), + Results: postProcessedTorrents, + Count: len(postProcessedTorrents), }) if err != nil { fmt.Println(err) @@ -191,7 +157,7 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) ( } }) - size = stableUniq(size) + size = utils.StableUniq(size) var chanIndexedTorrent = make(chan schema.IndexedTorrent) @@ -222,7 +188,7 @@ func getTorrentsTorrentDosFilmes(ctx context.Context, i *Indexer, link string) ( } ixt := schema.IndexedTorrent{ - Title: appendAudioISO639_2Code(releaseTitle, magnetAudio), + Title: releaseTitle, OriginalTitle: title, Details: link, Year: year, diff --git a/schema/audio.go b/schema/audio.go index 33c5f44..811c0be 100644 --- a/schema/audio.go +++ b/schema/audio.go @@ -25,6 +25,7 @@ const ( AudioMandarin3 = "Chines" AudioRussian = "Russo" AudioSwedish = "Sueco" + AudioSwedish2 = "Suéco" AudioUkrainian = "Ucraniano" AudioPolish = "Polaco" AudioPolish2 = "Polonês" @@ -34,9 +35,32 @@ const ( AudioTurkish = "Turco" AudioHindi = "Hindi" AudioFarsi = "Persa" + AudioFarsi2 = "Farsi" + AudioFarsi3 = "Iraniano" AudioMalay = "Malaio" AudioDutch = "Holandês" AudioDutch2 = "Holandes" + AudioFinnish = "Finlandês" + AudioFinnish2 = "Finlandes" + AudioDanish = "Dinamarquês" + AudioDanish2 = "Dinamarques" + AudioNorwegian = "Norueguês" + AudioNorwegian2 = "Noruegues" + AudioIcelandic = "Islandês" + AudioIcelandic2 = "Islandes" + AudioGreek = "Grego" + AudioArabic = "Árabe" + AudioArabic2 = "Arabe" + AudioHebrew = "Hebraico" + AudioVietnamese = "Vietnamita" + AudioIndonesian = "Indonésio" + AudioIndonesian2 = "Indonesio" + AudioFilipino = "Filipino" + AudioBengali = "Bengali" + AudioTamil = "Tamil" + AudioTelugu = "Telugu" + AudioGujarati = "Gujarati" + AudioMarathi = "Marathi" ) var AudioList = []Audio{ @@ -60,6 +84,7 @@ var AudioList = []Audio{ AudioMandarin3, AudioRussian, AudioSwedish, + AudioSwedish2, AudioUkrainian, AudioPolish, AudioPolish2, @@ -69,9 +94,32 @@ var AudioList = []Audio{ AudioTurkish, AudioHindi, AudioFarsi, + AudioFarsi2, + AudioFarsi3, AudioMalay, AudioDutch, AudioDutch2, + AudioFinnish, + AudioFinnish2, + AudioDanish, + AudioDanish2, + AudioNorwegian, + AudioNorwegian2, + AudioIcelandic, + AudioIcelandic2, + AudioGreek, + AudioArabic, + AudioArabic2, + AudioHebrew, + AudioVietnamese, + AudioIndonesian, + AudioIndonesian2, + AudioFilipino, + AudioBengali, + AudioTamil, + AudioTelugu, + AudioGujarati, + AudioMarathi, } func (a Audio) String() string { @@ -129,6 +177,8 @@ func (a Audio) toTag() string { return "rus" case AudioSwedish: return "swe" + case AudioSwedish2: + return "swe" case AudioUkrainian: return "ukr" case AudioPolish: @@ -147,12 +197,58 @@ func (a Audio) toTag() string { return "hin" case AudioFarsi: return "fas" + case AudioFarsi2: + return "fas" + case AudioFarsi3: + return "fas" case AudioMalay: return "msa" case AudioDutch: return "nld" case AudioDutch2: return "nld" + case AudioFinnish: + return "fin" + case AudioFinnish2: + return "fin" + case AudioDanish: + return "dan" + case AudioDanish2: + return "dan" + case AudioNorwegian: + return "nor" + case AudioNorwegian2: + return "nor" + case AudioIcelandic: + return "isl" + case AudioIcelandic2: + return "isl" + case AudioGreek: + return "ell" + case AudioArabic: + return "ara" + case AudioArabic2: + return "ara" + case AudioHebrew: + return "heb" + case AudioVietnamese: + return "vie" + case AudioIndonesian: + return "ind" + case AudioIndonesian2: + return "ind" + case AudioFilipino: + return "fil" + case AudioBengali: + return "ben" + case AudioTamil: + return "tam" + case AudioTelugu: + return "tel" + case AudioGujarati: + return "guj" + case AudioMarathi: + return "mar" default: return "" } diff --git a/utils/util.go b/utils/util.go index bd435ff..7f645dc 100644 --- a/utils/util.go +++ b/utils/util.go @@ -1,10 +1,13 @@ package utils import ( + "fmt" "strings" + "golang.org/x/net/html" ) +// Filter filters a slice based on a predicate function. func Filter[A any](arr []A, f func(A) bool) []A { var res []A res = make([]A, 0) @@ -16,6 +19,71 @@ func Filter[A any](arr []A, f func(A) bool) []A { return res } +// ParallelMap applies a function to each item in the iterable concurrently +// and returns a slice of results. It can handle errors by passing an error handler function. +func ParallelMap[T any, R any](iterable []T, mapper func(item T) ([]R, error), errHandler ...func(error)) []R { + var itChan = make(chan []R) + var errChan = make(chan error) + mappedItems := []R{} + for _, link := range iterable { + go func(link T) { + items, err := mapper(link) + if err != nil { + errChan <- err + } + itChan <- items + }(link) + } + + for range iterable { + select { + case items := <-itChan: + mappedItems = append(mappedItems, items...) + case err := <-errChan: + for _, handler := range errHandler { + handler(err) + } + if len(errHandler) == 0 { + fmt.Println(err) + } + } + } + return mappedItems +} + +// StableUniq removes duplicates from a slice while maintaining the order of elements. +func StableUniq(s []string) []string { + var uniq []map[string]interface{} + m := make(map[string]map[string]interface{}) + for i, v := range s { + m[v] = map[string]interface{}{ + "v": v, + "i": i, + } + } + // to order by index + for _, v := range m { + uniq = append(uniq, v) + } + + // sort by index + for i := 0; i < len(uniq); i++ { + for j := i + 1; j < len(uniq); j++ { + if uniq[i]["i"].(int) > uniq[j]["i"].(int) { + uniq[i], uniq[j] = uniq[j], uniq[i] + } + } + } + + // get only values + var uniqValues []string + for _, v := range uniq { + uniqValues = append(uniqValues, v["v"].(string)) + } + + return uniqValues +} + func IsValidHTML(input string) bool { r := strings.NewReader(input) _, err := html.Parse(r) diff --git a/utils/website.go b/utils/website.go new file mode 100644 index 0000000..414cfcf --- /dev/null +++ b/utils/website.go @@ -0,0 +1,97 @@ +package utils + +import ( + "fmt" + "regexp" + "strings" + "sync" +) + +var commonTLDs = []string{ + ".com", + ".net", + ".org", + ".info", + ".biz", + ".co", + ".io", + ".xyz", + ".me", + ".tv", + ".cc", + ".us", + ".online", + ".site", + ".la", + ".se", + ".to", +} + +var commonSubdomains = []string{ + "", // no prefix + "www.", +} + +var commonWebsiteSLDs = []string{ + "bludv", + "torrentdosfilmes", + "comando", + "comandotorrents", + "comandohds", + "redetorrent", + "torrenting", + "baixarfilmesdubladosviatorrent", + "hidratorrents", + "wolverdonfilmes", + "starckfilmes", + "rapidotorrents", + "sitedetorrents", + "vamostorrent", + "AZTORRENTS", +} + +var websitePatterns = []string{ + `\[\s*ACESSE\s+%s\s*\]`, + `\[?\s*%s(\s*\])?`, +} + +var regexesOnce sync.Once +var regexes []*regexp.Regexp + +func getRegexes() []*regexp.Regexp { + regexesOnce.Do(func() { + var websites strings.Builder + websites.WriteString("(?i)(") + for _, prefix := range commonSubdomains { + for _, name := range commonWebsiteSLDs { + for _, tld := range commonTLDs { + websites.WriteString(fmt.Sprintf("%s%s%s|", prefix, name, tld)) + } + } + } + // remove the last pipe character + websites.WriteString(")") + + websitesStr := websites.String() + websitesStr = strings.Replace(websitesStr, "|)", ")", 1) + + for _, pattern := range websitePatterns { + regexes = append(regexes, regexp.MustCompile(fmt.Sprintf(pattern, websitesStr))) + } + }) + return regexes +} + +// RemoveKnownWebsites removes known website patterns from the title. +// It uses a set of common prefixes, names, and TLDs to identify and remove +// website references from the title. +// It also removes any common patterns like "[ ACESSE bludv.com ]" or +// "[ bludv.se ]" or "bludv.xyz". +func RemoveKnownWebsites(title string) string { + regexes := getRegexes() + for _, re := range regexes { + title = re.ReplaceAllString(title, "") + } + title = strings.TrimSpace(title) + return title +}