Move scrape results to separate table (#980)
Cette révision appartient à :
Parent
93bc6e9cdb
révision
abcdfd426b
9 fichiers modifiés avec 64 ajouts et 39 suppressions
|
@ -181,12 +181,7 @@ models:
|
|||
files_table_name: files
|
||||
# NotificationTableName : Name of notifications table in DB
|
||||
notifications_table_name: notifications
|
||||
# ActivitiesTableName : Name of activitis log table in DB
|
||||
# ActivitiesTableName : Name of activities log table in DB
|
||||
activities_table_name: activities
|
||||
# for sukebei:
|
||||
# LastOldTorrentID = 2303945
|
||||
# TorrentsTableName = "sukebei_torrents"
|
||||
# ReportsTableName = "sukebei_torrent_reports"
|
||||
# CommentsTableName = "sukebei_comments"
|
||||
# UploadsOldTableName = "sukebei_user_uploads_old"
|
||||
# FilesTableName = "sukebei_files"
|
||||
# NotificationTableName : Name of scrape table in DB
|
||||
scrape_table_name: scrape
|
||||
|
|
|
@ -18,3 +18,5 @@ models:
|
|||
files_table_name: sukebei_files
|
||||
# NotificationTableName : Name of notifications table in DB
|
||||
notifications_table_name: sukebei_notifications
|
||||
# NotificationTableName : Name of scrape table in DB
|
||||
scrape_table_name: sukebei_scrape
|
||||
|
|
|
@ -181,6 +181,7 @@ type ModelsConfig struct {
|
|||
FilesTableName string `yaml:"files_table_name,omitempty"`
|
||||
NotificationsTableName string `yaml:"notifications_table_name,omitempty"`
|
||||
ActivityTableName string `yaml:"activities_table_name,omitempty"`
|
||||
ScrapeTableName string `yaml:"scrape_table_name,omitempty"`
|
||||
}
|
||||
|
||||
// SearchConfig : Config struct for search
|
||||
|
|
|
@ -29,7 +29,7 @@ const queryDeleteUserFollowing = "DeleteUserFollowing"
|
|||
const torrentSelectColumnsFull = `torrent_id, torrent_name, torrent_hash, category, sub_category, status, date, uploader, downloads, stardom, description, website_link, deleted_at, seeders, leechers, completed, last_scrape`
|
||||
|
||||
func scanTorrentColumnsFull(rows *sql.Rows, t *model.Torrent) {
|
||||
rows.Scan(&t.ID, &t.Name, &t.Hash, &t.Category, &t.SubCategory, &t.Status, &t.Date, &t.UploaderID, &t.Downloads, &t.Stardom, &t.Description, &t.WebsiteLink, &t.DeletedAt, &t.Seeders, &t.Leechers, &t.Completed, &t.LastScrape)
|
||||
rows.Scan(&t.ID, &t.Name, &t.Hash, &t.Category, &t.SubCategory, &t.Status, &t.Date, &t.UploaderID, &t.Downloads, &t.Stardom, &t.Description, &t.WebsiteLink, &t.DeletedAt, &t.Scrape.Seeders, &t.Scrape.Leechers, &t.Scrape.Completed, &t.Scrape.LastScrape)
|
||||
}
|
||||
|
||||
const commentSelectColumnsFull = `comment_id, torrent_id, user_id, content, created_at, updated_at, deleted_at`
|
||||
|
|
|
@ -86,7 +86,7 @@ func GormInit(conf *config.Config, logger Logger) (*gorm.DB, error) {
|
|||
if db.Error != nil {
|
||||
return db, db.Error
|
||||
}
|
||||
db.AutoMigrate(&model.Torrent{}, &model.TorrentReport{})
|
||||
db.AutoMigrate(&model.Torrent{}, &model.TorrentReport{}, &model.Scrape{})
|
||||
if db.Error != nil {
|
||||
return db, db.Error
|
||||
}
|
||||
|
|
|
@ -67,12 +67,17 @@ type Torrent struct {
|
|||
OldUploader string `gorm:"-"` // ???????
|
||||
OldComments []OldComment `gorm:"ForeignKey:torrent_id"`
|
||||
Comments []Comment `gorm:"ForeignKey:torrent_id"`
|
||||
Scrape *Scrape `gorm:"AssociationForeignKey:ID;ForeignKey:torrent_id"`
|
||||
FileList []File `gorm:"ForeignKey:torrent_id"`
|
||||
}
|
||||
|
||||
// Scrape model
|
||||
type Scrape struct{
|
||||
TorrentID uint `gorm:"column:torrent_id;primary_key"`
|
||||
Seeders uint32 `gorm:"column:seeders"`
|
||||
Leechers uint32 `gorm:"column:leechers"`
|
||||
Completed uint32 `gorm:"column:completed"`
|
||||
LastScrape time.Time `gorm:"column:last_scrape"`
|
||||
FileList []File `gorm:"ForeignKey:torrent_id"`
|
||||
}
|
||||
|
||||
// Size : Returns the total size of memory recursively allocated for this struct
|
||||
|
@ -80,14 +85,17 @@ type Torrent struct {
|
|||
func (t Torrent) Size() (s int) {
|
||||
s = int(reflect.TypeOf(t).Size())
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
// TableName : Return the name of torrents table
|
||||
func (t Torrent) TableName() string {
|
||||
return config.Conf.Models.TorrentsTableName
|
||||
}
|
||||
|
||||
func (t Scrape) TableName() string {
|
||||
return config.Conf.Models.ScrapeTableName
|
||||
}
|
||||
|
||||
|
||||
// Identifier : Return the identifier of a torrent
|
||||
func (t *Torrent) Identifier() string {
|
||||
return "torrent_" + strconv.Itoa(int(t.ID))
|
||||
|
@ -98,7 +106,7 @@ func (t Torrent) IsNormal() bool {
|
|||
return t.Status == TorrentStatusNormal
|
||||
}
|
||||
|
||||
// IsRemake : Return if a torrent status is normal
|
||||
// IsRemake : Return if a torrent status is remake
|
||||
func (t Torrent) IsRemake() bool {
|
||||
return t.Status == TorrentStatusRemake
|
||||
}
|
||||
|
@ -262,7 +270,7 @@ func (t *TorrentJSON) ToTorrent() Torrent {
|
|||
UploaderID: t.UploaderID,
|
||||
Downloads: t.Downloads,
|
||||
//Stardom: t.Stardom,
|
||||
Filesize: t.Filesize,
|
||||
Filesize: t.Filesize,
|
||||
//Description: t.Description,
|
||||
//WebsiteLink: t.WebsiteLink,
|
||||
//Trackers: t.Trackers,
|
||||
|
@ -271,11 +279,9 @@ func (t *TorrentJSON) ToTorrent() Torrent {
|
|||
//OldUploader: t.OldUploader,
|
||||
//OldComments: TODO
|
||||
// Comments: TODO
|
||||
Seeders: t.Seeders,
|
||||
Leechers: t.Leechers,
|
||||
Completed: t.Completed,
|
||||
LastScrape: time.Now(), // Not stored in ES, counts won't show without value
|
||||
Language: t.Language,
|
||||
// LastScrape not stored in ES, counts won't show without a value however
|
||||
Scrape: &Scrape{Seeders: t.Seeders, Leechers: t.Leechers, Completed: t.Completed, LastScrape: time.Now()},
|
||||
Language: t.Language,
|
||||
//FileList: TODO
|
||||
}
|
||||
return torrent
|
||||
|
@ -338,6 +344,10 @@ func (t *Torrent) ToJSON() TorrentJSON {
|
|||
} else if t.ID > config.Conf.Models.LastOldTorrentID && len(config.Conf.Torrents.StorageLink) > 0 {
|
||||
torrentlink = fmt.Sprintf(config.Conf.Torrents.StorageLink, t.Hash)
|
||||
}
|
||||
scrape := Scrape{}
|
||||
if t.Scrape != nil {
|
||||
scrape = *t.Scrape
|
||||
}
|
||||
res := TorrentJSON{
|
||||
ID: t.ID,
|
||||
Name: t.Name,
|
||||
|
@ -357,10 +367,10 @@ func (t *Torrent) ToJSON() TorrentJSON {
|
|||
Language: t.Language,
|
||||
Magnet: template.URL(magnet),
|
||||
TorrentLink: util.Safe(torrentlink),
|
||||
Leechers: t.Leechers,
|
||||
Seeders: t.Seeders,
|
||||
Completed: t.Completed,
|
||||
LastScrape: t.LastScrape,
|
||||
Leechers: scrape.Leechers,
|
||||
Seeders: scrape.Seeders,
|
||||
Completed: scrape.Completed,
|
||||
LastScrape: scrape.LastScrape,
|
||||
FileList: fileListJSON,
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package scraperService
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/url"
|
||||
"time"
|
||||
|
@ -188,7 +189,26 @@ func (sc *Scraper) Scrape(packets uint) {
|
|||
now := time.Now().Add(0 - sc.interval)
|
||||
// only scrape torretns uploaded within 90 days
|
||||
oldest := now.Add(0 - (time.Hour * 24 * 90))
|
||||
rows, err := db.ORM.Raw("SELECT torrent_id, torrent_hash FROM "+config.Conf.Models.TorrentsTableName+" WHERE ( last_scrape IS NULL OR last_scrape < ? ) AND date > ? ORDER BY torrent_id DESC LIMIT ?", now, oldest, packets*ScrapesPerPacket).Rows()
|
||||
|
||||
query := fmt.Sprintf(
|
||||
"SELECT * FROM ("+
|
||||
|
||||
// previously scraped torrents that will be scraped again:
|
||||
"SELECT %[1]s.torrent_id, torrent_hash FROM %[1]s, %[2]s WHERE "+
|
||||
"date > ? AND "+
|
||||
"%[1]s.torrent_id = %[2]s.torrent_id AND "+
|
||||
"last_scrape < ?"+
|
||||
|
||||
// torrents that weren't scraped before:
|
||||
" UNION "+
|
||||
"SELECT torrent_id, torrent_hash FROM %[1]s WHERE "+
|
||||
"date > ? AND "+
|
||||
"torrent_id NOT IN (SELECT torrent_id FROM %[2]s)"+
|
||||
|
||||
") ORDER BY torrent_id DESC LIMIT ?",
|
||||
config.Conf.Models.TorrentsTableName, config.Conf.Models.ScrapeTableName)
|
||||
rows, err := db.ORM.Raw(query, oldest, now, oldest, packets*ScrapesPerPacket).Rows()
|
||||
|
||||
if err == nil {
|
||||
counter := 0
|
||||
var scrape [ScrapesPerPacket]model.Torrent
|
||||
|
@ -212,7 +232,6 @@ func (sc *Scraper) Scrape(packets uint) {
|
|||
}
|
||||
log.Infof("scrape %d", counter)
|
||||
rows.Close()
|
||||
|
||||
} else {
|
||||
log.Warnf("failed to select torrents for scrape: %s", err)
|
||||
}
|
||||
|
|
|
@ -44,19 +44,21 @@ func (t *Transaction) handleScrapeReply(data []byte) {
|
|||
data = data[8:]
|
||||
now := time.Now()
|
||||
for idx := range t.swarms {
|
||||
t.swarms[idx].Seeders = binary.BigEndian.Uint32(data)
|
||||
t.swarms[idx].Scrape = &model.Scrape{}
|
||||
t.swarms[idx].Scrape.Seeders = binary.BigEndian.Uint32(data)
|
||||
data = data[4:]
|
||||
t.swarms[idx].Completed = binary.BigEndian.Uint32(data)
|
||||
t.swarms[idx].Scrape.Completed = binary.BigEndian.Uint32(data)
|
||||
data = data[4:]
|
||||
t.swarms[idx].Leechers = binary.BigEndian.Uint32(data)
|
||||
t.swarms[idx].Scrape.Leechers = binary.BigEndian.Uint32(data)
|
||||
data = data[4:]
|
||||
t.swarms[idx].LastScrape = now
|
||||
t.swarms[idx].Scrape.LastScrape = now
|
||||
idx++
|
||||
}
|
||||
}
|
||||
|
||||
var pgQuery = "UPDATE " + config.Conf.Models.TorrentsTableName + " SET seeders = $1 , leechers = $2 , completed = $3 , last_scrape = $4 WHERE torrent_id = $5"
|
||||
var sqliteQuery = "UPDATE " + config.Conf.Models.TorrentsTableName + " SET seeders = ? , leechers = ? , completed = ? , last_scrape = ? WHERE torrent_id = ?"
|
||||
|
||||
var pgQuery = "REPLACE INTO " + config.Conf.Models.ScrapeTableName + " (torrent_id, seeders, leechers, completed, last_scrape) VALUES ($1, $2, $3, $4, $5)"
|
||||
var sqliteQuery = "REPLACE INTO " + config.Conf.Models.ScrapeTableName + " (torrent_id, seeders, leechers, completed, last_scrape) VALUES (?, ?, ?, ?, ?)"
|
||||
|
||||
// Sync syncs models with database
|
||||
func (t *Transaction) Sync() (err error) {
|
||||
|
@ -68,7 +70,7 @@ func (t *Transaction) Sync() (err error) {
|
|||
err = e
|
||||
if err == nil {
|
||||
for idx := range t.swarms {
|
||||
_, err = tx.Exec(q, t.swarms[idx].Seeders, t.swarms[idx].Leechers, t.swarms[idx].Completed, t.swarms[idx].LastScrape, t.swarms[idx].ID)
|
||||
_, err = tx.Exec(q, t.swarms[idx].ID, t.swarms[idx].Scrape.Seeders, t.swarms[idx].Scrape.Leechers, t.swarms[idx].Scrape.Completed, t.swarms[idx].Scrape.LastScrape)
|
||||
}
|
||||
tx.Commit()
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ func GetTorrentByID(id string) (torrent model.Torrent, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
tmp := db.ORM.Where("torrent_id = ?", id).Preload("Comments")
|
||||
tmp := db.ORM.Where("torrent_id = ?", id).Preload("Scrape").Preload("Comments")
|
||||
if idInt > int64(config.Conf.Models.LastOldTorrentID) {
|
||||
tmp = tmp.Preload("FileList")
|
||||
}
|
||||
|
@ -142,10 +142,6 @@ func getTorrentsOrderBy(parameters *serviceBase.WhereParams, orderBy string, lim
|
|||
if conditions != "" {
|
||||
dbQuery = dbQuery + " WHERE " + conditions
|
||||
}
|
||||
/* This makes all queries take roughly the same amount of time (lots)...
|
||||
if strings.Contains(conditions, "torrent_name") && offset > 0 {
|
||||
dbQuery = "WITH t AS (SELECT * FROM torrents WHERE " + conditions + ") SELECT * FROM t"
|
||||
}*/
|
||||
|
||||
if orderBy == "" { // default OrderBy
|
||||
orderBy = "torrent_id DESC"
|
||||
|
@ -154,7 +150,7 @@ func getTorrentsOrderBy(parameters *serviceBase.WhereParams, orderBy string, lim
|
|||
if limit != 0 || offset != 0 { // if limits provided
|
||||
dbQuery = dbQuery + " LIMIT " + strconv.Itoa(limit) + " OFFSET " + strconv.Itoa(offset)
|
||||
}
|
||||
dbQ := db.ORM
|
||||
dbQ := db.ORM.Preload("Scrape")
|
||||
if withUser {
|
||||
dbQ = dbQ.Preload("Uploader")
|
||||
}
|
||||
|
|
Référencer dans un nouveau ticket