chg: feat: rewrite to be a torrent indexer

This commit is contained in:
2023-09-23 17:02:55 +00:00
parent c68df1ab37
commit faa721dc19
15 changed files with 711 additions and 389 deletions

75
scrape/info.go Normal file
View File

@@ -0,0 +1,75 @@
package goscrape
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/felipemarinho97/torrent-indexer/cache"
)
func getPeersFromCache(ctx context.Context, r *cache.Redis, infoHash string) (int, int, error) {
// get peers and seeds from redis first
peersCache, err := r.Get(ctx, infoHash)
if err == nil {
var peers map[string]int
err = json.Unmarshal(peersCache, &peers)
if err != nil {
return 0, 0, err
}
return peers["leech"], peers["seed"], nil
}
return 0, 0, err
}
func setPeersToCache(ctx context.Context, r *cache.Redis, infoHash string, peer, seed int) error {
peers := map[string]int{
"leech": peer,
"seed": seed,
}
peersJSON, err := json.Marshal(peers)
if err != nil {
return err
}
err = r.SetWithExpiration(ctx, infoHash, peersJSON, 24*time.Hour)
if err != nil {
return err
}
return nil
}
func GetLeechsAndSeeds(ctx context.Context, r *cache.Redis, infoHash string, trackers []string) (int, int, error) {
var leech, seed int
leech, seed, err := getPeersFromCache(ctx, r, infoHash)
if err != nil {
fmt.Println("unable to get peers from cache for infohash:", infoHash)
} else {
fmt.Println("get from cache> leech:", leech, "seed:", seed)
return leech, seed, nil
}
for _, tracker := range trackers {
// get peers and seeds from redis first
scraper, err := New(tracker)
if err != nil {
fmt.Println(err)
continue
}
scraper.SetTimeout(500 * time.Millisecond)
// get peers and seeds from redis first
res, err := scraper.Scrape([]byte(infoHash))
if err != nil {
fmt.Println(err)
continue
}
leech += int(res[0].Leechers)
seed += int(res[0].Seeders)
setPeersToCache(ctx, r, infoHash, leech, seed)
return leech, seed, nil
}
return leech, seed, nil
}

275
scrape/scrape.go Normal file
View File

@@ -0,0 +1,275 @@
package goscrape
/*
This library partially implement BEP-0015.
See http://www.bittorrent.org/beps/bep_0015.html
*/
import (
"bytes"
"encoding/binary"
"encoding/hex"
"errors"
"math/rand"
"net"
"net/url"
"sync"
"time"
)
const (
pid uint64 = 0x41727101980 // protocol id magic constant
actionConnect uint32 = 0
actionAnnounce uint32 = 1
actionScrap uint32 = 2
actionError uint32 = 3
defaultTimeout = time.Second * 15
)
var (
// ErrUnsupportedScheme is returned if the URL scheme is unsupported
ErrUnsupportedScheme = errors.New("unsupported scrape scheme")
// ErrTooManyInfohash is returned if more than 74 infohash are given
ErrTooManyInfohash = errors.New("cannot lookup more than 74 infohash at once")
// ErrRequest is returned if a write was not done completely
ErrRequest = errors.New("udp packet was not entirely written")
// ErrResponse is returned if the tracker sent an invalid response
ErrResponse = errors.New("invalid response received from tracker")
// ErrInvalidAction is returned if the tracker answered with an invalid action
ErrInvalidAction = errors.New("invalid action")
// ErrInvalidTransactionID is returned if the tracker answered with an invalid transaction id
ErrInvalidTransactionID = errors.New("invalid transaction id received")
// ErrRemote is returned if a remote error occured
ErrRemote = errors.New("service unavailable")
// ErrRetryLimit is returned when the maximum number of retries is exceeded
ErrRetryLimit = errors.New("maximum number of retries exceeded")
)
// ScrapeResult represents one result returned by the Scrape method
type ScrapeResult struct {
Infohash []byte
Seeders uint32
Leechers uint32
Completed uint32
}
// TorrentScrape represents the internal structure of goscrape
type Goscrape struct {
sync.Mutex
url string
conn net.Conn
connectionID uint64
session time.Time
retries int
timeout time.Duration
}
func init() {
rand.Seed(time.Now().UTC().UnixNano())
}
// New creates a new instance of goscrape for the given torrent tracker
func New(rawurl string) (*Goscrape, error) {
u, err := url.Parse(rawurl)
if err != nil {
return nil, err
}
if u.Scheme != "udp" {
return nil, ErrUnsupportedScheme
}
return &Goscrape{
url: u.Host,
retries: 3,
timeout: defaultTimeout,
}, nil
}
// SetRetryLimit sets the maximum number of attempts to do before giving up
func (g *Goscrape) SetRetryLimit(retries int) {
g.retries = retries
}
// SetTimeout configure the time to wait for a tracker to answer a query
func (g *Goscrape) SetTimeout(timeout time.Duration) {
g.timeout = timeout
}
func (g *Goscrape) transactionID() uint32 {
return uint32(rand.Int31())
}
func (g *Goscrape) connect() (net.Conn, uint64, error) {
var err error
g.Lock()
defer g.Unlock()
if time.Since(g.session) > time.Minute {
// Get a new transaction ID
tid := g.transactionID()
// Prepare our outgoing UDP packet
buf := make([]byte, 16)
binary.BigEndian.PutUint64(buf[0:], pid) // magic constant
binary.BigEndian.PutUint32(buf[8:], 0) // action connect
binary.BigEndian.PutUint32(buf[12:], tid) // transaction id
g.conn, err = net.DialTimeout("udp", g.url, g.timeout)
if err != nil {
return nil, 0, err
}
var n, retries int
for {
retries++
// Set a write deadline
g.conn.SetWriteDeadline(time.Now().Add(g.timeout))
n, err = g.conn.Write(buf)
if err != nil {
return nil, 0, err
}
if n != len(buf) {
return nil, 0, ErrRequest
}
// Set a read deadline
g.conn.SetReadDeadline(time.Now().Add(g.timeout))
// Reuse our buffer to read the response
n, err = g.conn.Read(buf)
if err, ok := err.(net.Error); ok && err.Timeout() {
if retries > g.retries {
return nil, 0, ErrRetryLimit
}
continue
} else if err != nil {
return nil, 0, err
}
break
}
if n != len(buf) {
return nil, 0, ErrResponse
}
if action := binary.BigEndian.Uint32(buf[0:]); action != actionConnect {
return nil, 0, ErrInvalidAction
}
if tid := binary.BigEndian.Uint32(buf[4:]); tid != tid {
return nil, 0, ErrInvalidTransactionID
}
g.connectionID = binary.BigEndian.Uint64(buf[8:])
g.session = time.Now()
}
return g.conn, g.connectionID, nil
}
// Scrape will scrape the given list of infohash and return a ScrapeResult struct
func (g *Goscrape) Scrape(infohash ...[]byte) ([]*ScrapeResult, error) {
if len(infohash) > 74 {
return nil, ErrTooManyInfohash
}
conn, connectionid, err := g.connect()
if err != nil {
return nil, err
}
// Get a new transaction ID
tid := g.transactionID()
// Prepare our outgoing UDP packet
buf := make([]byte, 16+(len(infohash)*20))
binary.BigEndian.PutUint64(buf[0:], connectionid) // connection id
binary.BigEndian.PutUint32(buf[8:], 2) // action scrape
binary.BigEndian.PutUint32(buf[12:], tid) // transaction id
// Pack all the infohash together
src := bytes.Join(infohash, []byte(""))
// Create our temporary hex-decoded buffer
dst := make([]byte, hex.DecodedLen(len(src)))
_, err = hex.Decode(dst, src)
if err != nil {
return nil, err
}
// Copy the binary representation of the infohash
// to the packet buffer
copy(buf[16:], dst)
response := make([]byte, 8+(12*len(infohash)))
var n, retries int
for {
retries++
// Set a write deadline
conn.SetWriteDeadline(time.Now().Add(g.timeout))
// Send the packet to the tracker
n, err = conn.Write(buf)
if err != nil {
return nil, err
}
if n != len(buf) {
return nil, ErrRequest
}
// Set a read deadline
conn.SetReadDeadline(time.Now().Add(g.timeout))
n, err = conn.Read(response)
if err, ok := err.(net.Error); ok && err.Timeout() {
if retries > g.retries {
return nil, ErrRetryLimit
}
continue
} else if err != nil {
return nil, err
}
break
}
// Check expected packet size
if n < 8+(12*len(infohash)) {
return nil, ErrResponse
}
action := binary.BigEndian.Uint32(response[0:])
if transactionid := binary.BigEndian.Uint32(response[4:]); transactionid != tid {
return nil, ErrInvalidTransactionID
}
if action == actionError {
return nil, ErrRemote
}
if action != actionScrap {
return nil, ErrInvalidAction
}
r := make([]*ScrapeResult, len(infohash))
offset := 8
for i := 0; i < len(infohash); i++ {
r[i] = &ScrapeResult{
Infohash: infohash[i],
Seeders: binary.BigEndian.Uint32(response[offset:]),
Completed: binary.BigEndian.Uint32(response[offset+4:]),
Leechers: binary.BigEndian.Uint32(response[offset+8:]),
}
offset += 12
}
return r, nil
}