Albirew/nyaa-pantsu
Archivé
1
0
Bifurcation 0

Merge pull request #440 from ElegantMonkey/dev

Add filesize fetcher mode (need SOMEONE for testing)
Cette révision appartient à :
PantsuDev 2017-05-14 15:08:23 +10:00 révisé par GitHub
révision 6548619b90
10 fichiers modifiés avec 399 ajouts et 6 suppressions

Voir le fichier

@ -29,13 +29,16 @@ type Config struct {
Cache CacheConfig `json:"cache"`
// search config
Search SearchConfig `json:"search"`
// internationalization config
I18n I18nConfig `json:"i18n"`
// optional i2p configuration
I2P *I2PConfig `json:"i2p"`
// filesize fetcher config
FilesizeFetcher FilesizeFetcherConfig `json:"filesize_fetcher"`
// internationalization config
I18n I18nConfig `json:"i18n"`
}
var Defaults = Config{"localhost", 9999, "sqlite3", "./nyaa.db?cache_size=50", "default", DefaultScraperConfig, DefaultCacheConfig, DefaultSearchConfig, DefaultI18nConfig, nil}
var Defaults = Config{"localhost", 9999, "sqlite3", "./nyaa.db?cache_size=50", "default", DefaultScraperConfig, DefaultCacheConfig, DefaultSearchConfig, nil, DefaultFilesizeFetcherConfig, DefaultI18nConfig}
var allowedDatabaseTypes = map[string]bool{
"sqlite3": true,
@ -59,6 +62,7 @@ func New() *Config {
config.DBLogMode = Defaults.DBLogMode
config.Scrape = Defaults.Scrape
config.Cache = Defaults.Cache
config.FilesizeFetcher = Defaults.FilesizeFetcher
config.I18n = Defaults.I18n
return &config
}

16
config/filesizeFetcher.go Fichier normal
Voir le fichier

@ -0,0 +1,16 @@
package config
type FilesizeFetcherConfig struct {
QueueSize int `json:"queue_size"`
Timeout int `json:"timeout"`
MaxDays int `json:"max_days"`
WakeUpInterval int `json:"wake_up_interval"`
}
var DefaultFilesizeFetcherConfig = FilesizeFetcherConfig{
QueueSize: 10,
Timeout: 120, // 2 min
MaxDays: 90,
WakeUpInterval: 300, // 5 min
}

Voir le fichier

@ -58,7 +58,7 @@ func GormInit(conf *config.Config, logger Logger) (*gorm.DB, error) {
if db.Error != nil {
return db, db.Error
}
db.AutoMigrate(&model.Torrent{}, &model.TorrentReport{})
db.AutoMigrate(&model.Torrent{}, &model.TorrentReport{}, &model.File{})
if db.Error != nil {
return db, db.Error
}

18
main.go
Voir le fichier

@ -14,6 +14,7 @@ import (
"github.com/ewhal/nyaa/network"
"github.com/ewhal/nyaa/router"
"github.com/ewhal/nyaa/service/scraper"
"github.com/ewhal/nyaa/service/torrent/filesizeFetcher"
"github.com/ewhal/nyaa/util/languages"
"github.com/ewhal/nyaa/util/log"
"github.com/ewhal/nyaa/util/search"
@ -82,11 +83,24 @@ func RunScraper(conf *config.Config) {
scraper.Wait()
}
// RunFilesizeFetcher runs the database filesize fetcher main loop
func RunFilesizeFetcher(conf *config.Config) {
fetcher, err := filesizeFetcher.New(&conf.FilesizeFetcher)
if err != nil {
log.Fatalf("failed to start fetcher, %s", err)
return
}
signals.RegisterCloser(fetcher)
fetcher.RunAsync()
fetcher.Wait()
}
func main() {
conf := config.New()
processFlags := conf.BindFlags()
defaults := flag.Bool("print-defaults", false, "print the default configuration file on stdout")
mode := flag.String("mode", "webapp", "which mode to run daemon in, either webapp or scraper")
mode := flag.String("mode", "webapp", "which mode to run daemon in, either webapp, scraper or filesize_fetcher")
flag.Float64Var(&conf.Cache.Size, "c", config.DefaultCacheSize, "size of the search cache in MB")
flag.Parse()
@ -133,6 +147,8 @@ func main() {
RunScraper(conf)
} else if *mode == "webapp" {
RunServer(conf)
} else if *mode == "filesize_fetcher" {
RunFilesizeFetcher(conf)
} else {
log.Fatalf("invalid runtime mode: %s", *mode)
}

16
model/file.go Fichier normal
Voir le fichier

@ -0,0 +1,16 @@
package model
type File struct {
ID uint `gorm:"column:file_id;primary_key"`
TorrentID uint `gorm:"column:torrent_id"`
Path string `gorm:"column:path"`
Filesize int64 `gorm:"column:filesize"`
Torrent *Torrent `gorm:"AssociationForeignKey:TorrentID;ForeignKey:torrent_id"`
}
// Returns the total size of memory allocated for this struct
func (f File) Size() int {
return (1 + len(f.Path) + 2) * 8;
}

Voir le fichier

@ -44,6 +44,7 @@ type Torrent struct {
Leechers uint32 `gorm:"column:leechers"`
Completed uint32 `gorm:"column:completed"`
LastScrape time.Time `gorm:"column:last_scrape"`
FileList []File `gorm:"ForeignKey:torrent_id"`
}
// Returns the total size of memory recursively allocated for this struct
@ -88,6 +89,11 @@ type CommentJSON struct {
Date time.Time `json:"date"`
}
type FileJSON struct {
Path string `json:"path"`
Length int64 `json:"length"`
}
type TorrentJSON struct {
ID string `json:"id"`
Name string `json:"name"`
@ -110,6 +116,7 @@ type TorrentJSON struct {
Leechers uint32 `json:"leechers"`
Completed uint32 `json:"completed"`
LastScrape time.Time `json:"last_scrape"`
FileList []File `json:"file_list"`
}
// ToJSON converts a model.Torrent to its equivalent JSON structure
@ -155,6 +162,7 @@ func (t *Torrent) ToJSON() TorrentJSON {
Seeders: t.Seeders,
Completed: t.Completed,
LastScrape: t.LastScrape,
FileList: t.FileList,
}
return res

Voir le fichier

@ -5,7 +5,7 @@ type WhereParams struct {
Params []interface{}
}
func CreateWhereParams(conditions string, params ...string) WhereParams {
func CreateWhereParams(conditions string, params ...interface{}) WhereParams {
whereParams := WhereParams{
Conditions: conditions,
Params: make([]interface{}, len(params)),

Voir le fichier

@ -0,0 +1,228 @@
package filesizeFetcher;
import (
"github.com/anacrolix/torrent"
"github.com/anacrolix/torrent/metainfo"
"github.com/ewhal/nyaa/config"
"github.com/ewhal/nyaa/db"
"github.com/ewhal/nyaa/model"
"github.com/ewhal/nyaa/util/log"
serviceBase "github.com/ewhal/nyaa/service"
torrentService "github.com/ewhal/nyaa/service/torrent"
"sync"
"time"
)
type FilesizeFetcher struct {
torrentClient *torrent.Client
results chan Result
queueSize int
timeout int
maxDays int
done chan int
queue []*FetchOperation
queueMutex sync.Mutex
failedOperations map[uint]struct{}
wakeUp *time.Ticker
wg sync.WaitGroup
}
func New(fetcherConfig *config.FilesizeFetcherConfig) (fetcher *FilesizeFetcher, err error) {
client, err := torrent.NewClient(nil)
fetcher = &FilesizeFetcher{
torrentClient: client,
results: make(chan Result, fetcherConfig.QueueSize),
queueSize: fetcherConfig.QueueSize,
timeout: fetcherConfig.Timeout,
maxDays: fetcherConfig.MaxDays,
done: make(chan int, 1),
failedOperations: make(map[uint]struct{}),
wakeUp: time.NewTicker(time.Second * time.Duration(fetcherConfig.WakeUpInterval)),
}
return
}
func (fetcher *FilesizeFetcher) isFetchingOrFailed(t model.Torrent) bool {
for _, op := range fetcher.queue {
if op.torrent.ID == t.ID {
return true
}
}
_, ok := fetcher.failedOperations[t.ID]
return ok
}
func (fetcher *FilesizeFetcher) addToQueue(op *FetchOperation) bool {
fetcher.queueMutex.Lock()
defer fetcher.queueMutex.Unlock()
if len(fetcher.queue) + 1 > fetcher.queueSize {
return false
}
fetcher.queue = append(fetcher.queue, op)
return true
}
func (fetcher *FilesizeFetcher) removeFromQueue(op *FetchOperation) bool {
fetcher.queueMutex.Lock()
defer fetcher.queueMutex.Unlock()
for i, queueOP := range fetcher.queue {
if queueOP == op {
fetcher.queue = append(fetcher.queue[:i], fetcher.queue[i+1:]...)
return true
}
}
return false
}
func updateFileList(dbEntry model.Torrent, info *metainfo.Info) error {
log.Infof("TID %d has %d files.", dbEntry.ID, len(info.Files))
for _, file := range info.Files {
path := file.DisplayPath(info)
fileExists := false
for _, existingFile := range dbEntry.FileList {
if existingFile.Path == path {
fileExists = true
break
}
}
if !fileExists {
log.Infof("Adding file %s to filelist of TID %d", path, dbEntry.ID)
dbFile := model.File{
TorrentID: dbEntry.ID,
Path: path,
Filesize: file.Length,
}
err := db.ORM.Create(&dbFile).Error
if err != nil {
return err
}
}
}
return nil
}
func (fetcher *FilesizeFetcher) gotResult(r Result) {
updatedSuccessfully := false
if r.err != nil {
log.Infof("Failed to get torrent filesize (TID: %d), err %v", r.operation.torrent.ID, r.err)
} else if r.info.TotalLength() == 0 {
log.Infof("Got length 0 for torrent TID: %d. Possible bug?", r.operation.torrent.ID)
} else {
log.Infof("Got length %d for torrent TID: %d. Updating.", r.info.TotalLength(), r.operation.torrent.ID)
r.operation.torrent.Filesize = r.info.TotalLength()
_, err := torrentService.UpdateTorrent(r.operation.torrent)
if err != nil {
log.Infof("Failed to update torrent TID: %d with new filesize", r.operation.torrent.ID)
} else {
updatedSuccessfully = true
}
// Also update the File list with FileInfo, I guess.
err = updateFileList(r.operation.torrent, r.info)
if err != nil {
log.Infof("Failed to update file list of TID %d", r.operation.torrent.ID)
}
}
if !updatedSuccessfully {
fetcher.failedOperations[r.operation.torrent.ID] = struct{}{}
}
fetcher.removeFromQueue(r.operation)
}
func (fetcher *FilesizeFetcher) fillQueue() {
toFill := fetcher.queueSize - len(fetcher.queue)
if toFill <= 0 {
return
}
oldest := time.Now().Add(0 - (time.Hour * time.Duration(24 * fetcher.maxDays)))
params := serviceBase.CreateWhereParams("(filesize IS NULL OR filesize = 0) AND date > ?", oldest)
// Get up to queueSize + len(failed) torrents, so we get at least some fresh new ones.
dbTorrents, count, err := torrentService.GetTorrents(params, fetcher.queueSize + len(fetcher.failedOperations), 0)
if err != nil {
log.Infof("Failed to get torrents for filesize updating")
return
}
if count == 0 {
log.Infof("No torrents for filesize update")
return
}
for _, T := range dbTorrents {
if fetcher.isFetchingOrFailed(T) {
continue
}
log.Infof("Added TID %d for filesize fetching", T.ID)
operation := NewFetchOperation(fetcher, T)
if fetcher.addToQueue(operation) {
fetcher.wg.Add(1)
go operation.Start(fetcher.results)
} else {
break
}
}
}
func (fetcher *FilesizeFetcher) run() {
var result Result
defer fetcher.wg.Done()
done := 0
fetcher.fillQueue()
for done == 0 {
select {
case done = <-fetcher.done:
break
case result = <-fetcher.results:
fetcher.gotResult(result)
fetcher.fillQueue()
break
case <-fetcher.wakeUp.C:
fetcher.fillQueue()
break
}
}
}
func (fetcher *FilesizeFetcher) RunAsync() {
fetcher.wg.Add(1)
go fetcher.run()
}
func (fetcher *FilesizeFetcher) Close() error {
fetcher.queueMutex.Lock()
defer fetcher.queueMutex.Unlock()
// Send the done event to every Operation
for _, op := range fetcher.queue {
op.done <- 1
}
fetcher.done <- 1
fetcher.wg.Wait()
return nil
}
func (fetcher *FilesizeFetcher) Wait() {
fetcher.wg.Wait()
}

Voir le fichier

@ -0,0 +1,60 @@
package filesizeFetcher;
import (
"github.com/anacrolix/torrent/metainfo"
"github.com/ewhal/nyaa/config"
"github.com/ewhal/nyaa/model"
"github.com/ewhal/nyaa/util"
"errors"
"time"
"strings"
)
type FetchOperation struct {
fetcher *FilesizeFetcher
torrent model.Torrent
done chan int
}
type Result struct {
operation *FetchOperation
err error
info *metainfo.Info
}
func NewFetchOperation(fetcher *FilesizeFetcher, dbEntry model.Torrent) (op *FetchOperation) {
op = &FetchOperation{
fetcher: fetcher,
torrent: dbEntry,
done: make(chan int, 1),
}
return
}
// Should be started from a goroutine somewhere
func (op *FetchOperation) Start(out chan Result) {
defer op.fetcher.wg.Done()
magnet := util.InfoHashToMagnet(strings.TrimSpace(op.torrent.Hash), op.torrent.Name, config.Trackers...)
downloadingTorrent, err := op.fetcher.torrentClient.AddMagnet(magnet)
if err != nil {
out <- Result{op, err, nil}
return
}
timeoutTicker := time.NewTicker(time.Second * time.Duration(op.fetcher.timeout))
select {
case <-downloadingTorrent.GotInfo():
downloadingTorrent.Drop()
out <- Result{op, nil, downloadingTorrent.Info()}
break
case <-timeoutTicker.C:
downloadingTorrent.Drop()
out <- Result{op, errors.New("Timeout"), nil}
break
case <-op.done:
downloadingTorrent.Drop()
break
}
}

Voir le fichier

@ -0,0 +1,45 @@
package filesizeFetcher;
import (
"testing"
"github.com/anacrolix/torrent"
"github.com/ewhal/nyaa/model"
)
func TestInvalidHash(t *testing.T) {
client, err := torrent.NewClient(nil)
if err != nil {
t.Skipf("Failed to create client, with err %v. Skipping.", err)
}
fetcher := &FilesizeFetcher{
timeout: 5,
torrentClient: client,
results: make(chan Result, 1),
}
dbEntry := model.Torrent{
Hash: "INVALID",
Name: "Invalid",
}
op := NewFetchOperation(fetcher, dbEntry)
fetcher.wg.Add(1)
op.Start(fetcher.results)
var res Result
select {
case res = <-fetcher.results:
break
default:
t.Fatal("No result in channel, should have one")
}
if res.err == nil {
t.Fatal("Got no error, should have got invalid magnet")
}
t.Logf("Got error %s, shouldn't be timeout", res.err)
}