From faa721dc1904c66d93f9f51b24b5456cd0482f67 Mon Sep 17 00:00:00 2001 From: Marinho Date: Sat, 23 Sep 2023 17:02:55 +0000 Subject: [PATCH] chg: feat: rewrite to be a torrent indexer --- Dockerfile | 18 ++ README.md | 31 +-- api/b3/prices/[ticker].go | 143 ------------- api/{indexers => }/comando_torrents.go | 187 +++++++++++------ api/index.go | 33 ++- api/statusinvest/companies.go | 39 ---- api/statusinvest/indicators.go | 85 -------- cache/redis.go | 44 ++++ docker-compose.yml | 26 +++ go.mod | 13 +- go.sum | 25 +-- main.go | 9 +- schema/audio.go | 97 +++++++++ scrape/info.go | 75 +++++++ scrape/scrape.go | 275 +++++++++++++++++++++++++ 15 files changed, 711 insertions(+), 389 deletions(-) create mode 100644 Dockerfile delete mode 100644 api/b3/prices/[ticker].go rename api/{indexers => }/comando_torrents.go (58%) delete mode 100644 api/statusinvest/companies.go delete mode 100644 api/statusinvest/indicators.go create mode 100644 cache/redis.go create mode 100644 docker-compose.yml create mode 100644 schema/audio.go create mode 100644 scrape/info.go create mode 100644 scrape/scrape.go diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7cddfc8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM golang:1.19 as builder + +WORKDIR /go/src/app +COPY . . + +RUN go get -d -v ./... +RUN go install -v ./... +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o app . + +FROM alpine:latest + +RUN apk --no-cache add ca-certificates + +WORKDIR /root/ + +COPY --from=builder /go/src/app/app . + +CMD ["/root/app"] diff --git a/README.md b/README.md index 1b0eae1..24f7bf1 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,7 @@ -# vercel-lambdas +# torrent-indexer -This is a collection of lambdas for personal use powered by [vercel](https://vercel.com/). +This is a simple torrent indexer that can be used to index torrents from HTML pages. It is written in Golang and uses Redis as a cache. -## statusinvest +## Supported sites -The path `statusinvest` contains lambdas that fetches the status of a company (fundamentalist analysis) from [statusinvest.com](https://statusinvest.com.br/). - -## b3 - -The path `b3` contains lambdas that fetches stock prices data directly from from [b3.com](https://b3.com.br/). - -- `b3/prices/:symbol` [Test it!](https://vercel-lambdas-felipemarinho.vercel.app/api/b3/prices/IBOV) - - Returns the stock prices for a given symbol. - - Example Response: - ```json - { - "symbol": "IBOV", - "name": "IBOVESPA", - "market": "Indices", - "openingPrice": 112295.87, - "minPrice": 111689.15, - "maxPrice": 113221.54, - "averagePrice": 112702.99, - "currentPrice": 112323.12, - "priceVariation": 0.02, - "indexComponentIndicator": false - } - ``` - - **Obs**: The `currentPrice` field has 15 minutes delay. +- [comando-torrents](https://comando.la/) \ No newline at end of file diff --git a/api/b3/prices/[ticker].go b/api/b3/prices/[ticker].go deleted file mode 100644 index 3c8648d..0000000 --- a/api/b3/prices/[ticker].go +++ /dev/null @@ -1,143 +0,0 @@ -package quotation - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "net/http" - "strings" - - log "github.com/sirupsen/logrus" -) - -type RawResponse struct { - BizSts struct { - Cd string `json:"cd"` - } `json:"BizSts"` - Msg struct { - DtTm string `json:"dtTm"` - } `json:"Msg"` - Trad []struct { - Scty struct { - SctyQtn struct { - OpngPric float64 `json:"opngPric"` - MinPric float64 `json:"minPric"` - MaxPric float64 `json:"maxPric"` - AvrgPric float64 `json:"avrgPric"` - CurPrc float64 `json:"curPrc"` - PrcFlcn float64 `json:"prcFlcn"` - } `json:"SctyQtn"` - Mkt struct { - Nm string `json:"nm"` - } `json:"mkt"` - Symb string `json:"symb"` - Desc string `json:"desc"` - IndxCmpnInd bool `json:"indxCmpnInd"` - } `json:"scty"` - TTLQty int `json:"ttlQty"` - } `json:"Trad"` -} - -type Response struct { - Symbol string `json:"symbol"` - Name string `json:"name"` - Market string `json:"market"` - OpeningPrice float64 `json:"openingPrice"` - MinPrice float64 `json:"minPrice"` - MaxPrice float64 `json:"maxPrice"` - AveragePrice float64 `json:"averagePrice"` - CurrentPrice float64 `json:"currentPrice"` - PriceVariation float64 `json:"priceVariation"` - IndexComponentIndicator bool `json:"indexComponentIndicator"` -} - -type RawErrorResponse struct { - BizSts struct { - Cd string `json:"cd"` - Desc string `json:"desc"` - } `json:"BizSts"` - Msg struct { - DtTm string `json:"dtTm"` - } `json:"Msg"` -} - -type Error struct { - Message string `json:"message"` -} - -func HandlerListCompanies(w http.ResponseWriter, r *http.Request) { - ticker := strings.Split(r.URL.Path, "/")[4] - log.Info("Getting quotation info for ticker: " + ticker) - if ticker == "" { - w.WriteHeader(http.StatusBadRequest) - w.Write([]byte("Ticker is required")) - return - } - - client := http.Client{} - res, err := client.Get(fmt.Sprintf("https://cotacao.b3.com.br/mds/api/v1/instrumentQuotation/%s", ticker)) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - defer res.Body.Close() - w.Header().Set("Content-Type", "application/json") - // add 1min cache header - w.Header().Set("Cache-Control", "max-age=60, public") - - out, err := ioutil.ReadAll(res.Body) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - var raw RawResponse - err = json.Unmarshal(out, &raw) - if err != nil || raw.BizSts.Cd != "OK" { - var errorResponse RawErrorResponse - err = json.Unmarshal(out, &errorResponse) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - // 404 - w.WriteHeader(http.StatusNotFound) - formatedError := Error{Message: errorResponse.BizSts.Desc} - err := json.NewEncoder(w).Encode(formatedError) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - return - } - - var response []Response - for _, trad := range raw.Trad { - response = append(response, Response{ - Symbol: trad.Scty.Symb, - Name: trad.Scty.Desc, - Market: trad.Scty.Mkt.Nm, - OpeningPrice: trad.Scty.SctyQtn.OpngPric, - MinPrice: trad.Scty.SctyQtn.MinPric, - MaxPrice: trad.Scty.SctyQtn.MaxPric, - AveragePrice: trad.Scty.SctyQtn.AvrgPric, - CurrentPrice: trad.Scty.SctyQtn.CurPrc, - PriceVariation: trad.Scty.SctyQtn.PrcFlcn, - IndexComponentIndicator: trad.Scty.IndxCmpnInd, - }) - } - - err = json.NewEncoder(w).Encode(response[0]) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - return -} diff --git a/api/indexers/comando_torrents.go b/api/comando_torrents.go similarity index 58% rename from api/indexers/comando_torrents.go rename to api/comando_torrents.go index 4587644..668afc0 100644 --- a/api/indexers/comando_torrents.go +++ b/api/comando_torrents.go @@ -1,8 +1,11 @@ -package indexers +package handler import ( + "bytes" + "context" "encoding/json" "fmt" + "io/ioutil" "net/http" "net/url" "regexp" @@ -10,6 +13,8 @@ import ( "time" "github.com/PuerkitoBio/goquery" + "github.com/felipemarinho97/torrent-indexer/schema" + goscrape "github.com/felipemarinho97/torrent-indexer/scrape" ) const ( @@ -17,14 +22,6 @@ const ( queryFilter = "?s=" ) -type Audio string - -const ( - AudioPortuguese = "Português" - AudioEnglish = "Inglês" - AudioSpanish = "Espanhol" -) - var replacer = strings.NewReplacer( "janeiro", "01", "fevereiro", "02", @@ -41,28 +38,30 @@ var replacer = strings.NewReplacer( ) type IndexedTorrent struct { - Title string `json:"title"` - OriginalTitle string `json:"original_title"` - Details string `json:"details"` - Year string `json:"year"` - Audio []Audio `json:"audio"` - MagnetLink string `json:"magnet_link"` - Date time.Time `json:"date"` - InfoHash string `json:"info_hash"` + Title string `json:"title"` + OriginalTitle string `json:"original_title"` + Details string `json:"details"` + Year string `json:"year"` + Audio []schema.Audio `json:"audio"` + MagnetLink string `json:"magnet_link"` + Date time.Time `json:"date"` + InfoHash string `json:"info_hash"` + Trackers []string `json:"trackers"` + LeechCount int `json:"leech_count"` + SeedCount int `json:"seed_count"` } -func HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) { +func (i *Indexer) HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() // supported query params: q, season, episode q := r.URL.Query().Get("q") - if q == "" { - w.WriteHeader(http.StatusBadRequest) - json.NewEncoder(w).Encode(map[string]string{"error": "param q is required"}) - return - } // URL encode query param q = url.QueryEscape(q) - url := URL + queryFilter + q + url := URL + if q != "" { + url = fmt.Sprintf("%s%s%s", URL, queryFilter, q) + } fmt.Println("URL:>", url) resp, err := http.Get(url) @@ -92,7 +91,7 @@ func HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) { var indexedTorrents []IndexedTorrent for _, link := range links { go func(link string) { - torrents, err := getTorrents(link) + torrents, err := getTorrents(ctx, i, link) if err != nil { fmt.Println(err) errChan <- err @@ -114,15 +113,9 @@ func HandlerComandoIndexer(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode(indexedTorrents) } -func getTorrents(link string) ([]IndexedTorrent, error) { +func getTorrents(ctx context.Context, i *Indexer, link string) ([]IndexedTorrent, error) { var indexedTorrents []IndexedTorrent - resp, err := http.Get(link) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - doc, err := goquery.NewDocumentFromReader(resp.Body) + doc, err := getDocument(ctx, i, link) if err != nil { return nil, err } @@ -153,7 +146,7 @@ func getTorrents(link string) ([]IndexedTorrent, error) { magnetLinks = append(magnetLinks, magnetLink) }) - var audio []Audio + var audio []schema.Audio var year string article.Find("div.entry-content > p").Each(func(i int, s *goquery.Selection) { // pattern: @@ -165,6 +158,7 @@ func getTorrents(link string) ([]IndexedTorrent, error) { // Formato: MKV // Qualidade: WEB-DL // Áudio: Português | Inglês + // Idioma: Português | Inglês // Legenda: Português // Tamanho: – // Qualidade de Áudio: 10 @@ -173,18 +167,19 @@ func getTorrents(link string) ([]IndexedTorrent, error) { // Servidor: Torrent text := s.Text() - re := regexp.MustCompile(`Áudio: (.*)`) + //re := regexp.MustCompile(`Áudio: (.*)`) + re := regexp.MustCompile(`(Áudio|Idioma): (.*)`) audioMatch := re.FindStringSubmatch(text) if len(audioMatch) > 0 { - langs_raw := strings.Split(audioMatch[1], "|") + sep := getSeparator(audioMatch[2]) + langs_raw := strings.Split(audioMatch[2], sep) for _, lang := range langs_raw { lang = strings.TrimSpace(lang) - if lang == "Português" { - audio = append(audio, AudioPortuguese) - } else if lang == "Inglês" { - audio = append(audio, AudioEnglish) - } else if lang == "Espanhol" { - audio = append(audio, AudioSpanish) + a := schema.GetAudioFromString(lang) + if a != nil { + audio = append(audio, *a) + } else { + fmt.Println("unknown language:", lang) } } } @@ -208,25 +203,30 @@ func getTorrents(link string) ([]IndexedTorrent, error) { // for each magnet link, create a new indexed torrent for _, magnetLink := range magnetLinks { releaseTitle := extractReleaseName(magnetLink) - magnetAudio := []Audio{} + magnetAudio := []schema.Audio{} if strings.Contains(strings.ToLower(releaseTitle), "dual") { - magnetAudio = append(magnetAudio, AudioPortuguese) magnetAudio = append(magnetAudio, audio...) - } else { - // filter portuguese audio from list - for _, lang := range audio { - if lang != AudioPortuguese { - magnetAudio = append(magnetAudio, lang) + } else if len(audio) > 1 { + // remove portuguese audio, and append to magnetAudio + for _, a := range audio { + if a != schema.AudioPortuguese { + magnetAudio = append(magnetAudio, a) } } + } else { + magnetAudio = append(magnetAudio, audio...) } - - // remove duplicates - magnetAudio = removeDuplicates(magnetAudio) // decode url encoded title releaseTitle, _ = url.QueryUnescape(releaseTitle) infoHash := extractInfoHash(magnetLink) + trackers := extractTrackers(magnetLink) + peer, seed, err := goscrape.GetLeechsAndSeeds(ctx, i.redis, infoHash, trackers) + if err != nil { + fmt.Println(err) + } + + title := processTitle(title, magnetAudio) indexedTorrents = append(indexedTorrents, IndexedTorrent{ Title: releaseTitle, @@ -237,12 +237,75 @@ func getTorrents(link string) ([]IndexedTorrent, error) { MagnetLink: magnetLink, Date: date, InfoHash: infoHash, + Trackers: trackers, + LeechCount: peer, + SeedCount: seed, }) } return indexedTorrents, nil } +func processTitle(title string, a []schema.Audio) string { + // remove ' - Donwload' from title + title = strings.Replace(title, " - Download", "", -1) + + // remove 'comando.la' from title + title = strings.Replace(title, "comando.la", "", -1) + + // add audio ISO 639-2 code to title between () + if len(a) > 0 { + audio := []string{} + for _, lang := range a { + audio = append(audio, lang.String()) + } + title = fmt.Sprintf("%s (%s)", title, strings.Join(audio, ", ")) + } + + return title +} + +func getSeparator(s string) string { + if strings.Contains(s, "|") { + return "|" + } else if strings.Contains(s, ",") { + return "," + } + return " " +} + +func getDocument(ctx context.Context, i *Indexer, link string) (*goquery.Document, error) { + // try to get from redis first + docCache, err := i.redis.Get(ctx, link) + if err == nil { + return goquery.NewDocumentFromReader(ioutil.NopCloser(bytes.NewReader(docCache))) + } + + resp, err := http.Get(link) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + // set cache + err = i.redis.Set(ctx, link, body) + if err != nil { + fmt.Println(err) + } + + doc, err := goquery.NewDocumentFromReader(ioutil.NopCloser(bytes.NewReader(body))) + if err != nil { + return nil, err + } + + return doc, nil +} + func extractReleaseName(magnetLink string) string { re := regexp.MustCompile(`dn=(.*?)&`) matches := re.FindStringSubmatch(magnetLink) @@ -261,16 +324,14 @@ func extractInfoHash(magnetLink string) string { return "" } -func removeDuplicates(elements []Audio) []Audio { - encountered := map[Audio]bool{} - result := []Audio{} - - for _, element := range elements { - if !encountered[element] { - encountered[element] = true - result = append(result, element) - } +func extractTrackers(magnetLink string) []string { + re := regexp.MustCompile(`tr=(.*?)&`) + matches := re.FindAllStringSubmatch(magnetLink, -1) + var trackers []string + for _, match := range matches { + // url decode + tracker, _ := url.QueryUnescape(match[1]) + trackers = append(trackers, tracker) } - - return result + return trackers } diff --git a/api/index.go b/api/index.go index 406f677..a7920a0 100644 --- a/api/index.go +++ b/api/index.go @@ -1,16 +1,37 @@ package handler import ( - "fmt" + "encoding/json" "net/http" "time" + + "github.com/felipemarinho97/torrent-indexer/cache" ) +type Indexer struct { + redis *cache.Redis +} + +func NewIndexers(redis *cache.Redis) *Indexer { + return &Indexer{ + redis: redis, + } +} + func HandlerIndex(w http.ResponseWriter, r *http.Request) { currentTime := time.Now().Format(time.RFC850) - fmt.Fprintf(w, currentTime) - w.Header().Set("Content-Type", "text/html") - fmt.Fprintf(w, ` - Github OAuth2 => https://github.com/xjh22222228/github-oauth2 - `) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(map[string]interface{}{ + "time": currentTime, + "endpoints": map[string]interface{}{ + "/indexers/comando_torrents": map[string]interface{}{ + "method": "GET", + "description": "Indexer for comando torrents", + "query_params": map[string]string{ + "q": "search query", + }, + }, + }, + }) } diff --git a/api/statusinvest/companies.go b/api/statusinvest/companies.go deleted file mode 100644 index 1ac9a9f..0000000 --- a/api/statusinvest/companies.go +++ /dev/null @@ -1,39 +0,0 @@ -package statusinvest - -import ( - "io/ioutil" - "net/http" -) - -func HandlerListCompanies(w http.ResponseWriter, r *http.Request) { - client := http.Client{} - req, err := http.NewRequest("GET", "https://statusinvest.com.br/acao/companiesnavigation?page=1&size=500", nil) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36") - req.Header.Set("Accept", "application/json, text/plain, */*") - - resp, err := client.Do(req) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - defer resp.Body.Close() - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(resp.StatusCode) - - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - w.Write(body) -} diff --git a/api/statusinvest/indicators.go b/api/statusinvest/indicators.go deleted file mode 100644 index 891ffd2..0000000 --- a/api/statusinvest/indicators.go +++ /dev/null @@ -1,85 +0,0 @@ -package statusinvest - -import ( - "encoding/json" - "io/ioutil" - "net/http" - "net/url" - "strings" -) - -type Response struct { - Success bool `json:"success"` - Data map[string][]map[string]interface{} `json:"data"` -} - -type ParsedResponse map[string]interface{} - -func HandlerIndicators(w http.ResponseWriter, r *http.Request) { - if r.Method != "GET" { - w.WriteHeader(http.StatusMethodNotAllowed) - w.Write([]byte("Method not allowed")) - return - } - time := r.URL.Query().Get("time") - if time == "" { - time = "7" - } - ticker := r.URL.Query().Get("ticker") - if ticker == "" { - w.WriteHeader(http.StatusBadRequest) - w.Write([]byte("Ticker is required")) - return - } - - data := url.Values{ - "codes[]": strings.Split(ticker, ","), - "time": {time}, - "byQuarter": {"false"}, - "futureData": {"false"}, - } - - resp, err := http.PostForm("https://statusinvest.com.br/acao/indicatorhistoricallist", data) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - defer resp.Body.Close() - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(resp.StatusCode) - - out, err := ioutil.ReadAll(resp.Body) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - var indicators Response - err = json.Unmarshal(out, &indicators) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - parsedResponse := ParsedResponse{} - - d := indicators.Data[ticker] - - for _, v := range d { - v := v - parsedResponse[v["key"].(string)] = v - } - - out, err = json.Marshal(parsedResponse) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - w.Write([]byte(err.Error())) - return - } - - w.Write(out) -} diff --git a/cache/redis.go b/cache/redis.go new file mode 100644 index 0000000..fc02e67 --- /dev/null +++ b/cache/redis.go @@ -0,0 +1,44 @@ +package cache + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/redis/go-redis/v9" +) + +var ( + DefaultExpiration = 24 * time.Hour * 180 // 180 days + IndexerComandoTorrents = "indexer:comando_torrents" +) + +type Redis struct { + client *redis.Client +} + +func NewRedis() *Redis { + redisHost := os.Getenv("REDIS_HOST") + if redisHost == "" { + redisHost = "localhost" + } + return &Redis{ + client: redis.NewClient(&redis.Options{ + Addr: fmt.Sprintf("%s:6379", redisHost), + Password: "", + }), + } +} + +func (r *Redis) Get(ctx context.Context, key string) ([]byte, error) { + return r.client.Get(ctx, key).Bytes() +} + +func (r *Redis) Set(ctx context.Context, key string, value []byte) error { + return r.client.Set(ctx, key, value, DefaultExpiration).Err() +} + +func (r *Redis) SetWithExpiration(ctx context.Context, key string, value []byte, expiration time.Duration) error { + return r.client.Set(ctx, key, value, expiration).Err() +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ca63243 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,26 @@ +version: '3.7' + +services: + torrent-indexer: + image: + build: + context: . + dockerfile: Dockerfile + container_name: torrent-indexer + restart: unless-stopped + ports: + - 8080:7006 + networks: + - indexer + environment: + - REDIS_HOST=redis + + redis: + image: redis:alpine + container_name: redis + restart: unless-stopped + networks: + - indexer + +networks: + indexer: diff --git a/go.mod b/go.mod index e527426..48a4c82 100644 --- a/go.mod +++ b/go.mod @@ -1,15 +1,14 @@ -module github.com/felipemarinho97/vercel-lambdas +module github.com/felipemarinho97/torrent-indexer -go 1.17 +go 1.18 -require github.com/sirupsen/logrus v1.9.0 +require github.com/redis/go-redis/v9 v9.2.0 require ( github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect golang.org/x/net v0.7.0 // indirect ) -require ( - github.com/PuerkitoBio/goquery v1.8.1 - golang.org/x/sys v0.5.0 // indirect -) +require github.com/PuerkitoBio/goquery v1.8.1 diff --git a/go.sum b/go.sum index 1630e45..e650659 100644 --- a/go.sum +++ b/go.sum @@ -2,16 +2,16 @@ github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAc github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= -github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/redis/go-redis/v9 v9.2.0 h1:zwMdX0A4eVzse46YN18QhuDiM4uf3JmkOB4VZrdt5uI= +github.com/redis/go-redis/v9 v9.2.0/go.mod h1:hdY0cQFCN4fnSYT6TkisLufl/4W5UIXyv0b/CLO2V2M= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= @@ -29,9 +29,7 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -45,6 +43,3 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index b03ee7d..7c3662f 100644 --- a/main.go +++ b/main.go @@ -3,14 +3,15 @@ package main import ( "net/http" - handler "github.com/felipemarinho97/vercel-lambdas/api" - "github.com/felipemarinho97/vercel-lambdas/api/indexers" - "github.com/felipemarinho97/vercel-lambdas/api/statusinvest" + handler "github.com/felipemarinho97/torrent-indexer/api" + "github.com/felipemarinho97/torrent-indexer/cache" ) func main() { + redis := cache.NewRedis() + indexers := handler.NewIndexers(redis) + http.HandleFunc("/", handler.HandlerIndex) - http.HandleFunc("/statusinvest/companies", statusinvest.HandlerListCompanies) http.HandleFunc("/indexers/comando_torrents", indexers.HandlerComandoIndexer) err := http.ListenAndServe(":7006", nil) diff --git a/schema/audio.go b/schema/audio.go new file mode 100644 index 0000000..0605171 --- /dev/null +++ b/schema/audio.go @@ -0,0 +1,97 @@ +package schema + +type Audio string + +const ( + AudioPortuguese = "Português" + AudioEnglish = "Inglês" + AudioSpanish = "Espanhol" + AudioFrench = "Francês" + AudioGerman = "Alemão" + AudioItalian = "Italiano" + AudioJapanese = "Japonês" + AudioKorean = "Coreano" + AudioMandarin = "Mandarim" + AudioMandarin2 = "Chinês" + AudioRussian = "Russo" + AudioSwedish = "Sueco" + AudioUkrainian = "Ucraniano" + AudioPolish = "Polaco" + AudioPolish2 = "Polonês" + AudioThai = "Tailandês" + AudioTurkish = "Turco" +) + +var AudioList = []Audio{ + AudioPortuguese, + AudioEnglish, + AudioSpanish, + AudioFrench, + AudioGerman, + AudioItalian, + AudioJapanese, + AudioKorean, + AudioMandarin, + AudioMandarin2, + AudioRussian, + AudioSwedish, + AudioUkrainian, + AudioPolish, + AudioPolish2, + AudioThai, + AudioTurkish, +} + +func (a Audio) String() string { + return a.toISO639_2() +} + +func GetAudioFromString(s string) *Audio { + for _, a := range AudioList { + if string(a) == s { + return &a + } + } + return nil +} + +func (a Audio) toISO639_2() string { + switch a { + case AudioPortuguese: + return "por" + case AudioEnglish: + return "eng" + case AudioSpanish: + return "spa" + case AudioFrench: + return "fra" + case AudioGerman: + return "deu" + case AudioItalian: + return "ita" + case AudioJapanese: + return "jpn" + case AudioKorean: + return "kor" + case AudioMandarin: + return "chi" + case AudioMandarin2: + return "chi" + case AudioRussian: + return "rus" + case AudioSwedish: + return "swe" + case AudioUkrainian: + return "ukr" + case AudioPolish: + return "pol" + case AudioPolish2: + return "pol" + case AudioThai: + return "tha" + case AudioTurkish: + return "tur" + default: + return "" + } +} diff --git a/scrape/info.go b/scrape/info.go new file mode 100644 index 0000000..48f65cd --- /dev/null +++ b/scrape/info.go @@ -0,0 +1,75 @@ +package goscrape + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/felipemarinho97/torrent-indexer/cache" +) + +func getPeersFromCache(ctx context.Context, r *cache.Redis, infoHash string) (int, int, error) { + // get peers and seeds from redis first + peersCache, err := r.Get(ctx, infoHash) + if err == nil { + var peers map[string]int + err = json.Unmarshal(peersCache, &peers) + if err != nil { + return 0, 0, err + } + return peers["leech"], peers["seed"], nil + } + return 0, 0, err +} + +func setPeersToCache(ctx context.Context, r *cache.Redis, infoHash string, peer, seed int) error { + peers := map[string]int{ + "leech": peer, + "seed": seed, + } + peersJSON, err := json.Marshal(peers) + if err != nil { + return err + } + err = r.SetWithExpiration(ctx, infoHash, peersJSON, 24*time.Hour) + if err != nil { + return err + } + return nil +} + +func GetLeechsAndSeeds(ctx context.Context, r *cache.Redis, infoHash string, trackers []string) (int, int, error) { + var leech, seed int + leech, seed, err := getPeersFromCache(ctx, r, infoHash) + if err != nil { + fmt.Println("unable to get peers from cache for infohash:", infoHash) + } else { + fmt.Println("get from cache> leech:", leech, "seed:", seed) + return leech, seed, nil + } + + for _, tracker := range trackers { + // get peers and seeds from redis first + scraper, err := New(tracker) + if err != nil { + fmt.Println(err) + continue + } + + scraper.SetTimeout(500 * time.Millisecond) + + // get peers and seeds from redis first + res, err := scraper.Scrape([]byte(infoHash)) + if err != nil { + fmt.Println(err) + continue + } + + leech += int(res[0].Leechers) + seed += int(res[0].Seeders) + setPeersToCache(ctx, r, infoHash, leech, seed) + return leech, seed, nil + } + return leech, seed, nil +} diff --git a/scrape/scrape.go b/scrape/scrape.go new file mode 100644 index 0000000..377f630 --- /dev/null +++ b/scrape/scrape.go @@ -0,0 +1,275 @@ +package goscrape + +/* + This library partially implement BEP-0015. + See http://www.bittorrent.org/beps/bep_0015.html +*/ + +import ( + "bytes" + "encoding/binary" + "encoding/hex" + "errors" + "math/rand" + "net" + "net/url" + "sync" + "time" +) + +const ( + pid uint64 = 0x41727101980 // protocol id magic constant + actionConnect uint32 = 0 + actionAnnounce uint32 = 1 + actionScrap uint32 = 2 + actionError uint32 = 3 + defaultTimeout = time.Second * 15 +) + +var ( + // ErrUnsupportedScheme is returned if the URL scheme is unsupported + ErrUnsupportedScheme = errors.New("unsupported scrape scheme") + // ErrTooManyInfohash is returned if more than 74 infohash are given + ErrTooManyInfohash = errors.New("cannot lookup more than 74 infohash at once") + // ErrRequest is returned if a write was not done completely + ErrRequest = errors.New("udp packet was not entirely written") + // ErrResponse is returned if the tracker sent an invalid response + ErrResponse = errors.New("invalid response received from tracker") + // ErrInvalidAction is returned if the tracker answered with an invalid action + ErrInvalidAction = errors.New("invalid action") + // ErrInvalidTransactionID is returned if the tracker answered with an invalid transaction id + ErrInvalidTransactionID = errors.New("invalid transaction id received") + // ErrRemote is returned if a remote error occured + ErrRemote = errors.New("service unavailable") + // ErrRetryLimit is returned when the maximum number of retries is exceeded + ErrRetryLimit = errors.New("maximum number of retries exceeded") +) + +// ScrapeResult represents one result returned by the Scrape method +type ScrapeResult struct { + Infohash []byte + Seeders uint32 + Leechers uint32 + Completed uint32 +} + +// TorrentScrape represents the internal structure of goscrape +type Goscrape struct { + sync.Mutex + url string + conn net.Conn + connectionID uint64 + session time.Time + retries int + timeout time.Duration +} + +func init() { + rand.Seed(time.Now().UTC().UnixNano()) +} + +// New creates a new instance of goscrape for the given torrent tracker +func New(rawurl string) (*Goscrape, error) { + u, err := url.Parse(rawurl) + if err != nil { + return nil, err + } + + if u.Scheme != "udp" { + return nil, ErrUnsupportedScheme + } + + return &Goscrape{ + url: u.Host, + retries: 3, + timeout: defaultTimeout, + }, nil +} + +// SetRetryLimit sets the maximum number of attempts to do before giving up +func (g *Goscrape) SetRetryLimit(retries int) { + g.retries = retries +} + +// SetTimeout configure the time to wait for a tracker to answer a query +func (g *Goscrape) SetTimeout(timeout time.Duration) { + g.timeout = timeout +} + +func (g *Goscrape) transactionID() uint32 { + return uint32(rand.Int31()) +} + +func (g *Goscrape) connect() (net.Conn, uint64, error) { + var err error + + g.Lock() + defer g.Unlock() + + if time.Since(g.session) > time.Minute { + // Get a new transaction ID + tid := g.transactionID() + + // Prepare our outgoing UDP packet + buf := make([]byte, 16) + binary.BigEndian.PutUint64(buf[0:], pid) // magic constant + binary.BigEndian.PutUint32(buf[8:], 0) // action connect + binary.BigEndian.PutUint32(buf[12:], tid) // transaction id + + g.conn, err = net.DialTimeout("udp", g.url, g.timeout) + if err != nil { + return nil, 0, err + } + + var n, retries int + + for { + retries++ + + // Set a write deadline + g.conn.SetWriteDeadline(time.Now().Add(g.timeout)) + + n, err = g.conn.Write(buf) + if err != nil { + return nil, 0, err + } + if n != len(buf) { + return nil, 0, ErrRequest + } + + // Set a read deadline + g.conn.SetReadDeadline(time.Now().Add(g.timeout)) + + // Reuse our buffer to read the response + n, err = g.conn.Read(buf) + if err, ok := err.(net.Error); ok && err.Timeout() { + if retries > g.retries { + return nil, 0, ErrRetryLimit + } + continue + } else if err != nil { + return nil, 0, err + } + break + } + + if n != len(buf) { + return nil, 0, ErrResponse + } + + if action := binary.BigEndian.Uint32(buf[0:]); action != actionConnect { + return nil, 0, ErrInvalidAction + } + if tid := binary.BigEndian.Uint32(buf[4:]); tid != tid { + return nil, 0, ErrInvalidTransactionID + } + + g.connectionID = binary.BigEndian.Uint64(buf[8:]) + g.session = time.Now() + } + return g.conn, g.connectionID, nil +} + +// Scrape will scrape the given list of infohash and return a ScrapeResult struct +func (g *Goscrape) Scrape(infohash ...[]byte) ([]*ScrapeResult, error) { + + if len(infohash) > 74 { + return nil, ErrTooManyInfohash + } + + conn, connectionid, err := g.connect() + if err != nil { + return nil, err + } + + // Get a new transaction ID + tid := g.transactionID() + + // Prepare our outgoing UDP packet + buf := make([]byte, 16+(len(infohash)*20)) + binary.BigEndian.PutUint64(buf[0:], connectionid) // connection id + binary.BigEndian.PutUint32(buf[8:], 2) // action scrape + binary.BigEndian.PutUint32(buf[12:], tid) // transaction id + + // Pack all the infohash together + src := bytes.Join(infohash, []byte("")) + + // Create our temporary hex-decoded buffer + dst := make([]byte, hex.DecodedLen(len(src))) + + _, err = hex.Decode(dst, src) + if err != nil { + return nil, err + } + + // Copy the binary representation of the infohash + // to the packet buffer + copy(buf[16:], dst) + + response := make([]byte, 8+(12*len(infohash))) + + var n, retries int + for { + retries++ + + // Set a write deadline + conn.SetWriteDeadline(time.Now().Add(g.timeout)) + + // Send the packet to the tracker + n, err = conn.Write(buf) + if err != nil { + return nil, err + } + + if n != len(buf) { + return nil, ErrRequest + } + + // Set a read deadline + conn.SetReadDeadline(time.Now().Add(g.timeout)) + + n, err = conn.Read(response) + if err, ok := err.(net.Error); ok && err.Timeout() { + if retries > g.retries { + return nil, ErrRetryLimit + } + continue + } else if err != nil { + return nil, err + } + break + } + + // Check expected packet size + if n < 8+(12*len(infohash)) { + return nil, ErrResponse + } + + action := binary.BigEndian.Uint32(response[0:]) + + if transactionid := binary.BigEndian.Uint32(response[4:]); transactionid != tid { + return nil, ErrInvalidTransactionID + } + + if action == actionError { + return nil, ErrRemote + } + if action != actionScrap { + return nil, ErrInvalidAction + } + + r := make([]*ScrapeResult, len(infohash)) + + offset := 8 + for i := 0; i < len(infohash); i++ { + r[i] = &ScrapeResult{ + Infohash: infohash[i], + Seeders: binary.BigEndian.Uint32(response[offset:]), + Completed: binary.BigEndian.Uint32(response[offset+4:]), + Leechers: binary.BigEndian.Uint32(response[offset+8:]), + } + offset += 12 + } + + return r, nil +}