Albirew/nyaa-pantsu
Albirew
/
nyaa-pantsu
Archivé
1
0
Bifurcation 0

Move scrape results to separate table (#980)

Cette révision appartient à :
sfan5 2017-06-16 01:13:09 +02:00 révisé par GitHub
Parent 93bc6e9cdb
révision abcdfd426b
9 fichiers modifiés avec 64 ajouts et 39 suppressions

Voir le fichier

@ -181,12 +181,7 @@ models:
files_table_name: files
# NotificationTableName : Name of notifications table in DB
notifications_table_name: notifications
# ActivitiesTableName : Name of activitis log table in DB
# ActivitiesTableName : Name of activities log table in DB
activities_table_name: activities
# for sukebei:
# LastOldTorrentID = 2303945
# TorrentsTableName = "sukebei_torrents"
# ReportsTableName = "sukebei_torrent_reports"
# CommentsTableName = "sukebei_comments"
# UploadsOldTableName = "sukebei_user_uploads_old"
# FilesTableName = "sukebei_files"
# NotificationTableName : Name of scrape table in DB
scrape_table_name: scrape

Voir le fichier

@ -18,3 +18,5 @@ models:
files_table_name: sukebei_files
# NotificationTableName : Name of notifications table in DB
notifications_table_name: sukebei_notifications
# NotificationTableName : Name of scrape table in DB
scrape_table_name: sukebei_scrape

Voir le fichier

@ -181,6 +181,7 @@ type ModelsConfig struct {
FilesTableName string `yaml:"files_table_name,omitempty"`
NotificationsTableName string `yaml:"notifications_table_name,omitempty"`
ActivityTableName string `yaml:"activities_table_name,omitempty"`
ScrapeTableName string `yaml:"scrape_table_name,omitempty"`
}
// SearchConfig : Config struct for search

Voir le fichier

@ -29,7 +29,7 @@ const queryDeleteUserFollowing = "DeleteUserFollowing"
const torrentSelectColumnsFull = `torrent_id, torrent_name, torrent_hash, category, sub_category, status, date, uploader, downloads, stardom, description, website_link, deleted_at, seeders, leechers, completed, last_scrape`
func scanTorrentColumnsFull(rows *sql.Rows, t *model.Torrent) {
rows.Scan(&t.ID, &t.Name, &t.Hash, &t.Category, &t.SubCategory, &t.Status, &t.Date, &t.UploaderID, &t.Downloads, &t.Stardom, &t.Description, &t.WebsiteLink, &t.DeletedAt, &t.Seeders, &t.Leechers, &t.Completed, &t.LastScrape)
rows.Scan(&t.ID, &t.Name, &t.Hash, &t.Category, &t.SubCategory, &t.Status, &t.Date, &t.UploaderID, &t.Downloads, &t.Stardom, &t.Description, &t.WebsiteLink, &t.DeletedAt, &t.Scrape.Seeders, &t.Scrape.Leechers, &t.Scrape.Completed, &t.Scrape.LastScrape)
}
const commentSelectColumnsFull = `comment_id, torrent_id, user_id, content, created_at, updated_at, deleted_at`

Voir le fichier

@ -86,7 +86,7 @@ func GormInit(conf *config.Config, logger Logger) (*gorm.DB, error) {
if db.Error != nil {
return db, db.Error
}
db.AutoMigrate(&model.Torrent{}, &model.TorrentReport{})
db.AutoMigrate(&model.Torrent{}, &model.TorrentReport{}, &model.Scrape{})
if db.Error != nil {
return db, db.Error
}

Voir le fichier

@ -67,12 +67,17 @@ type Torrent struct {
OldUploader string `gorm:"-"` // ???????
OldComments []OldComment `gorm:"ForeignKey:torrent_id"`
Comments []Comment `gorm:"ForeignKey:torrent_id"`
Scrape *Scrape `gorm:"AssociationForeignKey:ID;ForeignKey:torrent_id"`
FileList []File `gorm:"ForeignKey:torrent_id"`
}
// Scrape model
type Scrape struct{
TorrentID uint `gorm:"column:torrent_id;primary_key"`
Seeders uint32 `gorm:"column:seeders"`
Leechers uint32 `gorm:"column:leechers"`
Completed uint32 `gorm:"column:completed"`
LastScrape time.Time `gorm:"column:last_scrape"`
FileList []File `gorm:"ForeignKey:torrent_id"`
}
// Size : Returns the total size of memory recursively allocated for this struct
@ -80,14 +85,17 @@ type Torrent struct {
func (t Torrent) Size() (s int) {
s = int(reflect.TypeOf(t).Size())
return
}
// TableName : Return the name of torrents table
func (t Torrent) TableName() string {
return config.Conf.Models.TorrentsTableName
}
func (t Scrape) TableName() string {
return config.Conf.Models.ScrapeTableName
}
// Identifier : Return the identifier of a torrent
func (t *Torrent) Identifier() string {
return "torrent_" + strconv.Itoa(int(t.ID))
@ -98,7 +106,7 @@ func (t Torrent) IsNormal() bool {
return t.Status == TorrentStatusNormal
}
// IsRemake : Return if a torrent status is normal
// IsRemake : Return if a torrent status is remake
func (t Torrent) IsRemake() bool {
return t.Status == TorrentStatusRemake
}
@ -262,7 +270,7 @@ func (t *TorrentJSON) ToTorrent() Torrent {
UploaderID: t.UploaderID,
Downloads: t.Downloads,
//Stardom: t.Stardom,
Filesize: t.Filesize,
Filesize: t.Filesize,
//Description: t.Description,
//WebsiteLink: t.WebsiteLink,
//Trackers: t.Trackers,
@ -271,11 +279,9 @@ func (t *TorrentJSON) ToTorrent() Torrent {
//OldUploader: t.OldUploader,
//OldComments: TODO
// Comments: TODO
Seeders: t.Seeders,
Leechers: t.Leechers,
Completed: t.Completed,
LastScrape: time.Now(), // Not stored in ES, counts won't show without value
Language: t.Language,
// LastScrape not stored in ES, counts won't show without a value however
Scrape: &Scrape{Seeders: t.Seeders, Leechers: t.Leechers, Completed: t.Completed, LastScrape: time.Now()},
Language: t.Language,
//FileList: TODO
}
return torrent
@ -338,6 +344,10 @@ func (t *Torrent) ToJSON() TorrentJSON {
} else if t.ID > config.Conf.Models.LastOldTorrentID && len(config.Conf.Torrents.StorageLink) > 0 {
torrentlink = fmt.Sprintf(config.Conf.Torrents.StorageLink, t.Hash)
}
scrape := Scrape{}
if t.Scrape != nil {
scrape = *t.Scrape
}
res := TorrentJSON{
ID: t.ID,
Name: t.Name,
@ -357,10 +367,10 @@ func (t *Torrent) ToJSON() TorrentJSON {
Language: t.Language,
Magnet: template.URL(magnet),
TorrentLink: util.Safe(torrentlink),
Leechers: t.Leechers,
Seeders: t.Seeders,
Completed: t.Completed,
LastScrape: t.LastScrape,
Leechers: scrape.Leechers,
Seeders: scrape.Seeders,
Completed: scrape.Completed,
LastScrape: scrape.LastScrape,
FileList: fileListJSON,
}

Voir le fichier

@ -1,6 +1,7 @@
package scraperService
import (
"fmt"
"net"
"net/url"
"time"
@ -188,7 +189,26 @@ func (sc *Scraper) Scrape(packets uint) {
now := time.Now().Add(0 - sc.interval)
// only scrape torretns uploaded within 90 days
oldest := now.Add(0 - (time.Hour * 24 * 90))
rows, err := db.ORM.Raw("SELECT torrent_id, torrent_hash FROM "+config.Conf.Models.TorrentsTableName+" WHERE ( last_scrape IS NULL OR last_scrape < ? ) AND date > ? ORDER BY torrent_id DESC LIMIT ?", now, oldest, packets*ScrapesPerPacket).Rows()
query := fmt.Sprintf(
"SELECT * FROM ("+
// previously scraped torrents that will be scraped again:
"SELECT %[1]s.torrent_id, torrent_hash FROM %[1]s, %[2]s WHERE "+
"date > ? AND "+
"%[1]s.torrent_id = %[2]s.torrent_id AND "+
"last_scrape < ?"+
// torrents that weren't scraped before:
" UNION "+
"SELECT torrent_id, torrent_hash FROM %[1]s WHERE "+
"date > ? AND "+
"torrent_id NOT IN (SELECT torrent_id FROM %[2]s)"+
") ORDER BY torrent_id DESC LIMIT ?",
config.Conf.Models.TorrentsTableName, config.Conf.Models.ScrapeTableName)
rows, err := db.ORM.Raw(query, oldest, now, oldest, packets*ScrapesPerPacket).Rows()
if err == nil {
counter := 0
var scrape [ScrapesPerPacket]model.Torrent
@ -212,7 +232,6 @@ func (sc *Scraper) Scrape(packets uint) {
}
log.Infof("scrape %d", counter)
rows.Close()
} else {
log.Warnf("failed to select torrents for scrape: %s", err)
}

Voir le fichier

@ -44,19 +44,21 @@ func (t *Transaction) handleScrapeReply(data []byte) {
data = data[8:]
now := time.Now()
for idx := range t.swarms {
t.swarms[idx].Seeders = binary.BigEndian.Uint32(data)
t.swarms[idx].Scrape = &model.Scrape{}
t.swarms[idx].Scrape.Seeders = binary.BigEndian.Uint32(data)
data = data[4:]
t.swarms[idx].Completed = binary.BigEndian.Uint32(data)
t.swarms[idx].Scrape.Completed = binary.BigEndian.Uint32(data)
data = data[4:]
t.swarms[idx].Leechers = binary.BigEndian.Uint32(data)
t.swarms[idx].Scrape.Leechers = binary.BigEndian.Uint32(data)
data = data[4:]
t.swarms[idx].LastScrape = now
t.swarms[idx].Scrape.LastScrape = now
idx++
}
}
var pgQuery = "UPDATE " + config.Conf.Models.TorrentsTableName + " SET seeders = $1 , leechers = $2 , completed = $3 , last_scrape = $4 WHERE torrent_id = $5"
var sqliteQuery = "UPDATE " + config.Conf.Models.TorrentsTableName + " SET seeders = ? , leechers = ? , completed = ? , last_scrape = ? WHERE torrent_id = ?"
var pgQuery = "REPLACE INTO " + config.Conf.Models.ScrapeTableName + " (torrent_id, seeders, leechers, completed, last_scrape) VALUES ($1, $2, $3, $4, $5)"
var sqliteQuery = "REPLACE INTO " + config.Conf.Models.ScrapeTableName + " (torrent_id, seeders, leechers, completed, last_scrape) VALUES (?, ?, ?, ?, ?)"
// Sync syncs models with database
func (t *Transaction) Sync() (err error) {
@ -68,7 +70,7 @@ func (t *Transaction) Sync() (err error) {
err = e
if err == nil {
for idx := range t.swarms {
_, err = tx.Exec(q, t.swarms[idx].Seeders, t.swarms[idx].Leechers, t.swarms[idx].Completed, t.swarms[idx].LastScrape, t.swarms[idx].ID)
_, err = tx.Exec(q, t.swarms[idx].ID, t.swarms[idx].Scrape.Seeders, t.swarms[idx].Scrape.Leechers, t.swarms[idx].Scrape.Completed, t.swarms[idx].Scrape.LastScrape)
}
tx.Commit()
}

Voir le fichier

@ -35,7 +35,7 @@ func GetTorrentByID(id string) (torrent model.Torrent, err error) {
return
}
tmp := db.ORM.Where("torrent_id = ?", id).Preload("Comments")
tmp := db.ORM.Where("torrent_id = ?", id).Preload("Scrape").Preload("Comments")
if idInt > int64(config.Conf.Models.LastOldTorrentID) {
tmp = tmp.Preload("FileList")
}
@ -142,10 +142,6 @@ func getTorrentsOrderBy(parameters *serviceBase.WhereParams, orderBy string, lim
if conditions != "" {
dbQuery = dbQuery + " WHERE " + conditions
}
/* This makes all queries take roughly the same amount of time (lots)...
if strings.Contains(conditions, "torrent_name") && offset > 0 {
dbQuery = "WITH t AS (SELECT * FROM torrents WHERE " + conditions + ") SELECT * FROM t"
}*/
if orderBy == "" { // default OrderBy
orderBy = "torrent_id DESC"
@ -154,7 +150,7 @@ func getTorrentsOrderBy(parameters *serviceBase.WhereParams, orderBy string, lim
if limit != 0 || offset != 0 { // if limits provided
dbQuery = dbQuery + " LIMIT " + strconv.Itoa(limit) + " OFFSET " + strconv.Itoa(offset)
}
dbQ := db.ORM
dbQ := db.ORM.Preload("Scrape")
if withUser {
dbQ = dbQ.Preload("Uploader")
}