Move scrape results to separate table (#980)
Cette révision appartient à :
Parent
93bc6e9cdb
révision
abcdfd426b
|
@ -181,12 +181,7 @@ models:
|
||||||
files_table_name: files
|
files_table_name: files
|
||||||
# NotificationTableName : Name of notifications table in DB
|
# NotificationTableName : Name of notifications table in DB
|
||||||
notifications_table_name: notifications
|
notifications_table_name: notifications
|
||||||
# ActivitiesTableName : Name of activitis log table in DB
|
# ActivitiesTableName : Name of activities log table in DB
|
||||||
activities_table_name: activities
|
activities_table_name: activities
|
||||||
# for sukebei:
|
# NotificationTableName : Name of scrape table in DB
|
||||||
# LastOldTorrentID = 2303945
|
scrape_table_name: scrape
|
||||||
# TorrentsTableName = "sukebei_torrents"
|
|
||||||
# ReportsTableName = "sukebei_torrent_reports"
|
|
||||||
# CommentsTableName = "sukebei_comments"
|
|
||||||
# UploadsOldTableName = "sukebei_user_uploads_old"
|
|
||||||
# FilesTableName = "sukebei_files"
|
|
||||||
|
|
|
@ -18,3 +18,5 @@ models:
|
||||||
files_table_name: sukebei_files
|
files_table_name: sukebei_files
|
||||||
# NotificationTableName : Name of notifications table in DB
|
# NotificationTableName : Name of notifications table in DB
|
||||||
notifications_table_name: sukebei_notifications
|
notifications_table_name: sukebei_notifications
|
||||||
|
# NotificationTableName : Name of scrape table in DB
|
||||||
|
scrape_table_name: sukebei_scrape
|
||||||
|
|
|
@ -181,6 +181,7 @@ type ModelsConfig struct {
|
||||||
FilesTableName string `yaml:"files_table_name,omitempty"`
|
FilesTableName string `yaml:"files_table_name,omitempty"`
|
||||||
NotificationsTableName string `yaml:"notifications_table_name,omitempty"`
|
NotificationsTableName string `yaml:"notifications_table_name,omitempty"`
|
||||||
ActivityTableName string `yaml:"activities_table_name,omitempty"`
|
ActivityTableName string `yaml:"activities_table_name,omitempty"`
|
||||||
|
ScrapeTableName string `yaml:"scrape_table_name,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SearchConfig : Config struct for search
|
// SearchConfig : Config struct for search
|
||||||
|
|
|
@ -29,7 +29,7 @@ const queryDeleteUserFollowing = "DeleteUserFollowing"
|
||||||
const torrentSelectColumnsFull = `torrent_id, torrent_name, torrent_hash, category, sub_category, status, date, uploader, downloads, stardom, description, website_link, deleted_at, seeders, leechers, completed, last_scrape`
|
const torrentSelectColumnsFull = `torrent_id, torrent_name, torrent_hash, category, sub_category, status, date, uploader, downloads, stardom, description, website_link, deleted_at, seeders, leechers, completed, last_scrape`
|
||||||
|
|
||||||
func scanTorrentColumnsFull(rows *sql.Rows, t *model.Torrent) {
|
func scanTorrentColumnsFull(rows *sql.Rows, t *model.Torrent) {
|
||||||
rows.Scan(&t.ID, &t.Name, &t.Hash, &t.Category, &t.SubCategory, &t.Status, &t.Date, &t.UploaderID, &t.Downloads, &t.Stardom, &t.Description, &t.WebsiteLink, &t.DeletedAt, &t.Seeders, &t.Leechers, &t.Completed, &t.LastScrape)
|
rows.Scan(&t.ID, &t.Name, &t.Hash, &t.Category, &t.SubCategory, &t.Status, &t.Date, &t.UploaderID, &t.Downloads, &t.Stardom, &t.Description, &t.WebsiteLink, &t.DeletedAt, &t.Scrape.Seeders, &t.Scrape.Leechers, &t.Scrape.Completed, &t.Scrape.LastScrape)
|
||||||
}
|
}
|
||||||
|
|
||||||
const commentSelectColumnsFull = `comment_id, torrent_id, user_id, content, created_at, updated_at, deleted_at`
|
const commentSelectColumnsFull = `comment_id, torrent_id, user_id, content, created_at, updated_at, deleted_at`
|
||||||
|
|
|
@ -86,7 +86,7 @@ func GormInit(conf *config.Config, logger Logger) (*gorm.DB, error) {
|
||||||
if db.Error != nil {
|
if db.Error != nil {
|
||||||
return db, db.Error
|
return db, db.Error
|
||||||
}
|
}
|
||||||
db.AutoMigrate(&model.Torrent{}, &model.TorrentReport{})
|
db.AutoMigrate(&model.Torrent{}, &model.TorrentReport{}, &model.Scrape{})
|
||||||
if db.Error != nil {
|
if db.Error != nil {
|
||||||
return db, db.Error
|
return db, db.Error
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,12 +67,17 @@ type Torrent struct {
|
||||||
OldUploader string `gorm:"-"` // ???????
|
OldUploader string `gorm:"-"` // ???????
|
||||||
OldComments []OldComment `gorm:"ForeignKey:torrent_id"`
|
OldComments []OldComment `gorm:"ForeignKey:torrent_id"`
|
||||||
Comments []Comment `gorm:"ForeignKey:torrent_id"`
|
Comments []Comment `gorm:"ForeignKey:torrent_id"`
|
||||||
|
Scrape *Scrape `gorm:"AssociationForeignKey:ID;ForeignKey:torrent_id"`
|
||||||
|
FileList []File `gorm:"ForeignKey:torrent_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scrape model
|
||||||
|
type Scrape struct{
|
||||||
|
TorrentID uint `gorm:"column:torrent_id;primary_key"`
|
||||||
Seeders uint32 `gorm:"column:seeders"`
|
Seeders uint32 `gorm:"column:seeders"`
|
||||||
Leechers uint32 `gorm:"column:leechers"`
|
Leechers uint32 `gorm:"column:leechers"`
|
||||||
Completed uint32 `gorm:"column:completed"`
|
Completed uint32 `gorm:"column:completed"`
|
||||||
LastScrape time.Time `gorm:"column:last_scrape"`
|
LastScrape time.Time `gorm:"column:last_scrape"`
|
||||||
FileList []File `gorm:"ForeignKey:torrent_id"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Size : Returns the total size of memory recursively allocated for this struct
|
// Size : Returns the total size of memory recursively allocated for this struct
|
||||||
|
@ -80,14 +85,17 @@ type Torrent struct {
|
||||||
func (t Torrent) Size() (s int) {
|
func (t Torrent) Size() (s int) {
|
||||||
s = int(reflect.TypeOf(t).Size())
|
s = int(reflect.TypeOf(t).Size())
|
||||||
return
|
return
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TableName : Return the name of torrents table
|
|
||||||
func (t Torrent) TableName() string {
|
func (t Torrent) TableName() string {
|
||||||
return config.Conf.Models.TorrentsTableName
|
return config.Conf.Models.TorrentsTableName
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t Scrape) TableName() string {
|
||||||
|
return config.Conf.Models.ScrapeTableName
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Identifier : Return the identifier of a torrent
|
// Identifier : Return the identifier of a torrent
|
||||||
func (t *Torrent) Identifier() string {
|
func (t *Torrent) Identifier() string {
|
||||||
return "torrent_" + strconv.Itoa(int(t.ID))
|
return "torrent_" + strconv.Itoa(int(t.ID))
|
||||||
|
@ -98,7 +106,7 @@ func (t Torrent) IsNormal() bool {
|
||||||
return t.Status == TorrentStatusNormal
|
return t.Status == TorrentStatusNormal
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsRemake : Return if a torrent status is normal
|
// IsRemake : Return if a torrent status is remake
|
||||||
func (t Torrent) IsRemake() bool {
|
func (t Torrent) IsRemake() bool {
|
||||||
return t.Status == TorrentStatusRemake
|
return t.Status == TorrentStatusRemake
|
||||||
}
|
}
|
||||||
|
@ -262,7 +270,7 @@ func (t *TorrentJSON) ToTorrent() Torrent {
|
||||||
UploaderID: t.UploaderID,
|
UploaderID: t.UploaderID,
|
||||||
Downloads: t.Downloads,
|
Downloads: t.Downloads,
|
||||||
//Stardom: t.Stardom,
|
//Stardom: t.Stardom,
|
||||||
Filesize: t.Filesize,
|
Filesize: t.Filesize,
|
||||||
//Description: t.Description,
|
//Description: t.Description,
|
||||||
//WebsiteLink: t.WebsiteLink,
|
//WebsiteLink: t.WebsiteLink,
|
||||||
//Trackers: t.Trackers,
|
//Trackers: t.Trackers,
|
||||||
|
@ -271,11 +279,9 @@ func (t *TorrentJSON) ToTorrent() Torrent {
|
||||||
//OldUploader: t.OldUploader,
|
//OldUploader: t.OldUploader,
|
||||||
//OldComments: TODO
|
//OldComments: TODO
|
||||||
// Comments: TODO
|
// Comments: TODO
|
||||||
Seeders: t.Seeders,
|
// LastScrape not stored in ES, counts won't show without a value however
|
||||||
Leechers: t.Leechers,
|
Scrape: &Scrape{Seeders: t.Seeders, Leechers: t.Leechers, Completed: t.Completed, LastScrape: time.Now()},
|
||||||
Completed: t.Completed,
|
Language: t.Language,
|
||||||
LastScrape: time.Now(), // Not stored in ES, counts won't show without value
|
|
||||||
Language: t.Language,
|
|
||||||
//FileList: TODO
|
//FileList: TODO
|
||||||
}
|
}
|
||||||
return torrent
|
return torrent
|
||||||
|
@ -338,6 +344,10 @@ func (t *Torrent) ToJSON() TorrentJSON {
|
||||||
} else if t.ID > config.Conf.Models.LastOldTorrentID && len(config.Conf.Torrents.StorageLink) > 0 {
|
} else if t.ID > config.Conf.Models.LastOldTorrentID && len(config.Conf.Torrents.StorageLink) > 0 {
|
||||||
torrentlink = fmt.Sprintf(config.Conf.Torrents.StorageLink, t.Hash)
|
torrentlink = fmt.Sprintf(config.Conf.Torrents.StorageLink, t.Hash)
|
||||||
}
|
}
|
||||||
|
scrape := Scrape{}
|
||||||
|
if t.Scrape != nil {
|
||||||
|
scrape = *t.Scrape
|
||||||
|
}
|
||||||
res := TorrentJSON{
|
res := TorrentJSON{
|
||||||
ID: t.ID,
|
ID: t.ID,
|
||||||
Name: t.Name,
|
Name: t.Name,
|
||||||
|
@ -357,10 +367,10 @@ func (t *Torrent) ToJSON() TorrentJSON {
|
||||||
Language: t.Language,
|
Language: t.Language,
|
||||||
Magnet: template.URL(magnet),
|
Magnet: template.URL(magnet),
|
||||||
TorrentLink: util.Safe(torrentlink),
|
TorrentLink: util.Safe(torrentlink),
|
||||||
Leechers: t.Leechers,
|
Leechers: scrape.Leechers,
|
||||||
Seeders: t.Seeders,
|
Seeders: scrape.Seeders,
|
||||||
Completed: t.Completed,
|
Completed: scrape.Completed,
|
||||||
LastScrape: t.LastScrape,
|
LastScrape: scrape.LastScrape,
|
||||||
FileList: fileListJSON,
|
FileList: fileListJSON,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package scraperService
|
package scraperService
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"net/url"
|
"net/url"
|
||||||
"time"
|
"time"
|
||||||
|
@ -188,7 +189,26 @@ func (sc *Scraper) Scrape(packets uint) {
|
||||||
now := time.Now().Add(0 - sc.interval)
|
now := time.Now().Add(0 - sc.interval)
|
||||||
// only scrape torretns uploaded within 90 days
|
// only scrape torretns uploaded within 90 days
|
||||||
oldest := now.Add(0 - (time.Hour * 24 * 90))
|
oldest := now.Add(0 - (time.Hour * 24 * 90))
|
||||||
rows, err := db.ORM.Raw("SELECT torrent_id, torrent_hash FROM "+config.Conf.Models.TorrentsTableName+" WHERE ( last_scrape IS NULL OR last_scrape < ? ) AND date > ? ORDER BY torrent_id DESC LIMIT ?", now, oldest, packets*ScrapesPerPacket).Rows()
|
|
||||||
|
query := fmt.Sprintf(
|
||||||
|
"SELECT * FROM ("+
|
||||||
|
|
||||||
|
// previously scraped torrents that will be scraped again:
|
||||||
|
"SELECT %[1]s.torrent_id, torrent_hash FROM %[1]s, %[2]s WHERE "+
|
||||||
|
"date > ? AND "+
|
||||||
|
"%[1]s.torrent_id = %[2]s.torrent_id AND "+
|
||||||
|
"last_scrape < ?"+
|
||||||
|
|
||||||
|
// torrents that weren't scraped before:
|
||||||
|
" UNION "+
|
||||||
|
"SELECT torrent_id, torrent_hash FROM %[1]s WHERE "+
|
||||||
|
"date > ? AND "+
|
||||||
|
"torrent_id NOT IN (SELECT torrent_id FROM %[2]s)"+
|
||||||
|
|
||||||
|
") ORDER BY torrent_id DESC LIMIT ?",
|
||||||
|
config.Conf.Models.TorrentsTableName, config.Conf.Models.ScrapeTableName)
|
||||||
|
rows, err := db.ORM.Raw(query, oldest, now, oldest, packets*ScrapesPerPacket).Rows()
|
||||||
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
counter := 0
|
counter := 0
|
||||||
var scrape [ScrapesPerPacket]model.Torrent
|
var scrape [ScrapesPerPacket]model.Torrent
|
||||||
|
@ -212,7 +232,6 @@ func (sc *Scraper) Scrape(packets uint) {
|
||||||
}
|
}
|
||||||
log.Infof("scrape %d", counter)
|
log.Infof("scrape %d", counter)
|
||||||
rows.Close()
|
rows.Close()
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("failed to select torrents for scrape: %s", err)
|
log.Warnf("failed to select torrents for scrape: %s", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,19 +44,21 @@ func (t *Transaction) handleScrapeReply(data []byte) {
|
||||||
data = data[8:]
|
data = data[8:]
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
for idx := range t.swarms {
|
for idx := range t.swarms {
|
||||||
t.swarms[idx].Seeders = binary.BigEndian.Uint32(data)
|
t.swarms[idx].Scrape = &model.Scrape{}
|
||||||
|
t.swarms[idx].Scrape.Seeders = binary.BigEndian.Uint32(data)
|
||||||
data = data[4:]
|
data = data[4:]
|
||||||
t.swarms[idx].Completed = binary.BigEndian.Uint32(data)
|
t.swarms[idx].Scrape.Completed = binary.BigEndian.Uint32(data)
|
||||||
data = data[4:]
|
data = data[4:]
|
||||||
t.swarms[idx].Leechers = binary.BigEndian.Uint32(data)
|
t.swarms[idx].Scrape.Leechers = binary.BigEndian.Uint32(data)
|
||||||
data = data[4:]
|
data = data[4:]
|
||||||
t.swarms[idx].LastScrape = now
|
t.swarms[idx].Scrape.LastScrape = now
|
||||||
idx++
|
idx++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var pgQuery = "UPDATE " + config.Conf.Models.TorrentsTableName + " SET seeders = $1 , leechers = $2 , completed = $3 , last_scrape = $4 WHERE torrent_id = $5"
|
|
||||||
var sqliteQuery = "UPDATE " + config.Conf.Models.TorrentsTableName + " SET seeders = ? , leechers = ? , completed = ? , last_scrape = ? WHERE torrent_id = ?"
|
var pgQuery = "REPLACE INTO " + config.Conf.Models.ScrapeTableName + " (torrent_id, seeders, leechers, completed, last_scrape) VALUES ($1, $2, $3, $4, $5)"
|
||||||
|
var sqliteQuery = "REPLACE INTO " + config.Conf.Models.ScrapeTableName + " (torrent_id, seeders, leechers, completed, last_scrape) VALUES (?, ?, ?, ?, ?)"
|
||||||
|
|
||||||
// Sync syncs models with database
|
// Sync syncs models with database
|
||||||
func (t *Transaction) Sync() (err error) {
|
func (t *Transaction) Sync() (err error) {
|
||||||
|
@ -68,7 +70,7 @@ func (t *Transaction) Sync() (err error) {
|
||||||
err = e
|
err = e
|
||||||
if err == nil {
|
if err == nil {
|
||||||
for idx := range t.swarms {
|
for idx := range t.swarms {
|
||||||
_, err = tx.Exec(q, t.swarms[idx].Seeders, t.swarms[idx].Leechers, t.swarms[idx].Completed, t.swarms[idx].LastScrape, t.swarms[idx].ID)
|
_, err = tx.Exec(q, t.swarms[idx].ID, t.swarms[idx].Scrape.Seeders, t.swarms[idx].Scrape.Leechers, t.swarms[idx].Scrape.Completed, t.swarms[idx].Scrape.LastScrape)
|
||||||
}
|
}
|
||||||
tx.Commit()
|
tx.Commit()
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@ func GetTorrentByID(id string) (torrent model.Torrent, err error) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp := db.ORM.Where("torrent_id = ?", id).Preload("Comments")
|
tmp := db.ORM.Where("torrent_id = ?", id).Preload("Scrape").Preload("Comments")
|
||||||
if idInt > int64(config.Conf.Models.LastOldTorrentID) {
|
if idInt > int64(config.Conf.Models.LastOldTorrentID) {
|
||||||
tmp = tmp.Preload("FileList")
|
tmp = tmp.Preload("FileList")
|
||||||
}
|
}
|
||||||
|
@ -142,10 +142,6 @@ func getTorrentsOrderBy(parameters *serviceBase.WhereParams, orderBy string, lim
|
||||||
if conditions != "" {
|
if conditions != "" {
|
||||||
dbQuery = dbQuery + " WHERE " + conditions
|
dbQuery = dbQuery + " WHERE " + conditions
|
||||||
}
|
}
|
||||||
/* This makes all queries take roughly the same amount of time (lots)...
|
|
||||||
if strings.Contains(conditions, "torrent_name") && offset > 0 {
|
|
||||||
dbQuery = "WITH t AS (SELECT * FROM torrents WHERE " + conditions + ") SELECT * FROM t"
|
|
||||||
}*/
|
|
||||||
|
|
||||||
if orderBy == "" { // default OrderBy
|
if orderBy == "" { // default OrderBy
|
||||||
orderBy = "torrent_id DESC"
|
orderBy = "torrent_id DESC"
|
||||||
|
@ -154,7 +150,7 @@ func getTorrentsOrderBy(parameters *serviceBase.WhereParams, orderBy string, lim
|
||||||
if limit != 0 || offset != 0 { // if limits provided
|
if limit != 0 || offset != 0 { // if limits provided
|
||||||
dbQuery = dbQuery + " LIMIT " + strconv.Itoa(limit) + " OFFSET " + strconv.Itoa(offset)
|
dbQuery = dbQuery + " LIMIT " + strconv.Itoa(limit) + " OFFSET " + strconv.Itoa(offset)
|
||||||
}
|
}
|
||||||
dbQ := db.ORM
|
dbQ := db.ORM.Preload("Scrape")
|
||||||
if withUser {
|
if withUser {
|
||||||
dbQ = dbQ.Preload("Uploader")
|
dbQ = dbQ.Preload("Uploader")
|
||||||
}
|
}
|
||||||
|
|
Référencer dans un nouveau ticket