2017-05-10 19:29:35 +02:00
package scraperService
import (
2017-06-16 01:13:09 +02:00
"fmt"
2017-05-31 04:21:57 +02:00
"net"
"net/url"
"time"
2017-05-17 07:58:40 +02:00
"github.com/NyaaPantsu/nyaa/config"
"github.com/NyaaPantsu/nyaa/db"
"github.com/NyaaPantsu/nyaa/model"
"github.com/NyaaPantsu/nyaa/util/log"
2017-05-10 19:29:35 +02:00
)
// MTU yes this is the ipv6 mtu
2017-05-11 00:06:21 +02:00
const MTU = 1500
2017-05-10 19:29:35 +02:00
2017-05-11 13:40:50 +02:00
// max number of scrapes per packet
const ScrapesPerPacket = 74
2017-05-10 19:29:35 +02:00
// bittorrent scraper
type Scraper struct {
2017-05-11 13:40:50 +02:00
done chan int
sendQueue chan * SendEvent
recvQueue chan * RecvEvent
errQueue chan error
trackers map [ string ] * Bucket
ticker * time . Ticker
cleanup * time . Ticker
interval time . Duration
PacketsPerSecond uint
2017-05-10 19:29:35 +02:00
}
func New ( conf * config . ScraperConfig ) ( sc * Scraper , err error ) {
sc = & Scraper {
done : make ( chan int ) ,
2017-05-11 00:10:20 +02:00
sendQueue : make ( chan * SendEvent , 1024 ) ,
recvQueue : make ( chan * RecvEvent , 1024 ) ,
2017-05-10 19:29:35 +02:00
errQueue : make ( chan error ) ,
trackers : make ( map [ string ] * Bucket ) ,
2017-05-11 21:06:47 +02:00
ticker : time . NewTicker ( time . Second * 10 ) ,
2017-05-10 19:29:35 +02:00
interval : time . Second * time . Duration ( conf . IntervalSeconds ) ,
2017-05-11 21:06:47 +02:00
cleanup : time . NewTicker ( time . Minute ) ,
2017-05-11 13:40:50 +02:00
}
if sc . PacketsPerSecond == 0 {
sc . PacketsPerSecond = 10
2017-05-10 19:29:35 +02:00
}
2017-05-11 13:40:50 +02:00
2017-05-10 19:29:35 +02:00
for idx := range conf . Trackers {
err = sc . AddTracker ( & conf . Trackers [ idx ] )
if err != nil {
break
}
}
return
}
func ( sc * Scraper ) AddTracker ( conf * config . ScrapeConfig ) ( err error ) {
var u * url . URL
u , err = url . Parse ( conf . URL )
if err == nil {
var ips [ ] net . IP
ips , err = net . LookupIP ( u . Hostname ( ) )
if err == nil {
// TODO: use more than 1 ip ?
addr := & net . UDPAddr {
IP : ips [ 0 ] ,
}
addr . Port , err = net . LookupPort ( "udp" , u . Port ( ) )
if err == nil {
sc . trackers [ addr . String ( ) ] = NewBucket ( addr )
}
}
}
return
}
func ( sc * Scraper ) Close ( ) ( err error ) {
close ( sc . sendQueue )
close ( sc . recvQueue )
close ( sc . errQueue )
sc . ticker . Stop ( )
sc . done <- 1
return
}
func ( sc * Scraper ) runRecv ( pc net . PacketConn ) {
for {
var buff [ MTU ] byte
n , from , err := pc . ReadFrom ( buff [ : ] )
if err == nil {
log . Debugf ( "got %d from %s" , n , from )
sc . recvQueue <- & RecvEvent {
From : from ,
Data : buff [ : n ] ,
}
} else {
sc . errQueue <- err
}
}
}
func ( sc * Scraper ) runSend ( pc net . PacketConn ) {
for {
ev , ok := <- sc . sendQueue
if ! ok {
return
}
log . Debugf ( "write %d to %s" , len ( ev . Data ) , ev . To )
pc . WriteTo ( ev . Data , ev . To )
}
}
func ( sc * Scraper ) RunWorker ( pc net . PacketConn ) ( err error ) {
go sc . runRecv ( pc )
go sc . runSend ( pc )
for {
var bucket * Bucket
ev , ok := <- sc . recvQueue
if ! ok {
break
}
tid , err := ev . TID ( )
2017-06-14 12:10:03 +02:00
if err != nil {
log . Warnf ( "failed: %s" , err )
break
}
2017-05-10 19:29:35 +02:00
action , err := ev . Action ( )
2017-06-14 12:10:03 +02:00
if err != nil {
log . Warnf ( "failed: %s" , err )
break
}
2017-05-10 19:29:35 +02:00
log . Debugf ( "transaction = %d action = %d" , tid , action )
2017-06-14 12:10:03 +02:00
bucket , ok = sc . trackers [ ev . From . String ( ) ]
if ! ok || bucket == nil {
log . Warnf ( "bucket not found for %s" , ev . From )
break
2017-05-10 19:29:35 +02:00
}
2017-06-14 12:10:03 +02:00
bucket . VisitTransaction ( tid , func ( t * Transaction ) {
if t == nil {
log . Warnf ( "no transaction %d" , tid )
return
}
if t . GotData ( ev . Data ) {
err := t . Sync ( )
if err != nil {
log . Warnf ( "failed to sync swarm: %s" , err )
}
t . Done ( )
log . Debugf ( "transaction %d done" , tid )
} else {
sc . sendQueue <- t . SendEvent ( ev . From )
}
} )
2017-05-10 19:29:35 +02:00
}
return
}
func ( sc * Scraper ) Run ( ) {
for {
2017-05-11 13:40:50 +02:00
select {
case <- sc . ticker . C :
sc . Scrape ( sc . PacketsPerSecond )
break
case <- sc . cleanup . C :
sc . removeStale ( )
break
}
}
}
func ( sc * Scraper ) removeStale ( ) {
for k := range sc . trackers {
sc . trackers [ k ] . ForEachTransaction ( func ( tid uint32 , t * Transaction ) {
if t == nil || t . IsTimedOut ( ) {
sc . trackers [ k ] . Forget ( tid )
}
} )
2017-05-10 19:29:35 +02:00
}
}
2017-05-11 13:40:50 +02:00
func ( sc * Scraper ) Scrape ( packets uint ) {
2017-05-11 00:06:21 +02:00
now := time . Now ( ) . Add ( 0 - sc . interval )
2017-05-11 15:40:33 +02:00
// only scrape torretns uploaded within 90 days
oldest := now . Add ( 0 - ( time . Hour * 24 * 90 ) )
2017-06-16 01:13:09 +02:00
query := fmt . Sprintf (
"SELECT * FROM (" +
2017-06-26 14:41:38 +02:00
// previously scraped torrents that will be scraped again:
"SELECT %[1]s.torrent_id, torrent_hash FROM %[1]s, %[2]s WHERE " +
"date > ? AND " +
"%[1]s.torrent_id = %[2]s.torrent_id AND " +
2017-06-26 14:44:26 +02:00
"$[2]s.last_scrape < ?" +
2017-06-26 14:41:38 +02:00
// torrents that weren't scraped before:
" UNION " +
"SELECT torrent_id, torrent_hash FROM %[1]s WHERE " +
"date > ? AND " +
"torrent_id NOT IN (SELECT torrent_id FROM %[2]s)" +
") AS x ORDER BY torrent_id DESC LIMIT ?" ,
2017-06-16 01:13:09 +02:00
config . Conf . Models . TorrentsTableName , config . Conf . Models . ScrapeTableName )
rows , err := db . ORM . Raw ( query , oldest , now , oldest , packets * ScrapesPerPacket ) . Rows ( )
2017-05-10 19:29:35 +02:00
if err == nil {
2017-05-11 00:06:21 +02:00
counter := 0
2017-05-11 13:40:50 +02:00
var scrape [ ScrapesPerPacket ] model . Torrent
2017-05-11 00:06:21 +02:00
for rows . Next ( ) {
2017-05-11 13:40:50 +02:00
idx := counter % ScrapesPerPacket
2017-05-11 00:06:21 +02:00
rows . Scan ( & scrape [ idx ] . ID , & scrape [ idx ] . Hash )
counter ++
2017-05-11 21:06:47 +02:00
if counter % ScrapesPerPacket == 0 {
2017-05-10 19:29:35 +02:00
for _ , b := range sc . trackers {
2017-05-11 00:06:21 +02:00
t := b . NewTransaction ( scrape [ : ] )
2017-05-10 19:29:35 +02:00
sc . sendQueue <- t . SendEvent ( b . Addr )
}
}
}
2017-05-11 21:06:47 +02:00
idx := counter % ScrapesPerPacket
if idx > 0 {
for _ , b := range sc . trackers {
t := b . NewTransaction ( scrape [ : idx ] )
sc . sendQueue <- t . SendEvent ( b . Addr )
}
}
log . Infof ( "scrape %d" , counter )
2017-05-11 00:06:21 +02:00
rows . Close ( )
2017-05-10 19:29:35 +02:00
} else {
log . Warnf ( "failed to select torrents for scrape: %s" , err )
}
}
func ( sc * Scraper ) Wait ( ) {
<- sc . done
}