2017-05-05 16:39:15 +02:00
package search
2017-05-06 12:41:48 +02:00
import (
2017-05-05 16:39:15 +02:00
"strconv"
"strings"
2017-05-08 15:50:18 +02:00
"unicode"
"unicode/utf8"
2017-05-30 00:28:21 +02:00
2017-07-05 17:06:24 +02:00
"time"
2017-05-17 07:58:40 +02:00
"github.com/NyaaPantsu/nyaa/config"
2017-06-29 13:15:23 +02:00
"github.com/NyaaPantsu/nyaa/models"
"github.com/NyaaPantsu/nyaa/models/torrents"
2017-07-05 17:06:24 +02:00
"github.com/NyaaPantsu/nyaa/utils/cache"
2017-07-02 16:54:55 +02:00
"github.com/NyaaPantsu/nyaa/utils/log"
"github.com/NyaaPantsu/nyaa/utils/search/structs"
2017-06-28 13:42:38 +02:00
"github.com/gin-gonic/gin"
2017-05-09 13:31:58 +02:00
)
2017-05-11 15:24:20 +02:00
var searchOperator string
2017-05-12 17:41:26 +02:00
var useTSQuery bool
2017-05-11 15:24:20 +02:00
2017-05-26 12:12:52 +02:00
// Configure : initialize search
2017-05-11 15:24:20 +02:00
func Configure ( conf * config . SearchConfig ) ( err error ) {
2017-05-12 19:38:08 +02:00
useTSQuery = false
2017-05-12 17:41:26 +02:00
// Postgres needs ILIKE for case-insensitivity
2017-07-02 16:54:55 +02:00
if models . ORM . Dialect ( ) . GetName ( ) == "postgres" {
2017-05-11 15:24:20 +02:00
searchOperator = "ILIKE ?"
2017-05-12 19:38:08 +02:00
//useTSQuery = true
// !!DISABLED!! because this makes search a lot stricter
// (only matches at word borders)
2017-05-12 17:41:26 +02:00
} else {
searchOperator = "LIKE ?"
2017-05-11 15:24:20 +02:00
}
return
}
2017-05-26 12:12:52 +02:00
func stringIsASCII ( input string ) bool {
2017-05-12 17:41:26 +02:00
for _ , char := range input {
if char > 127 {
return false
}
}
return true
}
2017-07-20 20:21:57 +02:00
// ByQueryNoUser : search torrents according to request without user
func ByQueryNoUser ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ) {
search , tor , count , err = ByQuery ( c , pagenum , true , false , false , false )
2017-05-20 13:45:15 +02:00
return
}
2017-07-05 13:33:12 +02:00
// ByQueryWithUser : search torrents according to request with user
func ByQueryWithUser ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ) {
2017-07-20 20:21:57 +02:00
search , tor , count , err = ByQuery ( c , pagenum , true , true , false , false )
2017-05-09 17:07:42 +02:00
return
}
2017-07-05 13:33:12 +02:00
// ByQueryNoCount : search torrents according to request without user and count
func ByQueryNoCount ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , err error ) {
2017-07-20 20:21:57 +02:00
search , tor , _ , err = ByQuery ( c , pagenum , false , false , false , false )
2017-05-09 17:07:42 +02:00
return
}
2017-07-05 13:33:12 +02:00
// ByQueryDeleted : search deleted torrents according to request with user and count
func ByQueryDeleted ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ) {
2017-07-20 20:21:57 +02:00
search , tor , count , err = ByQuery ( c , pagenum , true , true , true , false )
2017-06-21 03:58:54 +02:00
return
}
2017-07-05 13:33:12 +02:00
// ByQueryNoHidden : search torrents and filter those hidden
func ByQueryNoHidden ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ) {
2017-07-20 20:21:57 +02:00
search , tor , count , err = ByQuery ( c , pagenum , true , false , false , true )
2017-05-25 02:19:05 +02:00
return
}
2017-05-26 01:48:14 +02:00
// TODO Clean this up
2017-07-20 20:21:57 +02:00
// Some fields are postgres specific (countAll, withUser)
2017-05-26 01:48:14 +02:00
// elasticsearch always provide a count to how many hits
2017-07-20 20:21:57 +02:00
// ES doesn't store users
2017-05-26 01:48:14 +02:00
// deleted is unused because es doesn't index deleted torrents
2017-07-20 20:21:57 +02:00
func ByQuery ( c * gin . Context , pagenum int , countAll bool , withUser bool , deleted bool , hidden bool ) ( structs . TorrentParam , [ ] models . Torrent , int , error ) {
2017-07-05 19:41:16 +02:00
var err error
2017-07-14 16:14:38 +02:00
if models . ElasticSearchClient != nil && ! deleted {
2017-07-02 16:54:55 +02:00
var torrentParam structs . TorrentParam
2017-06-28 13:42:38 +02:00
torrentParam . FromRequest ( c )
2017-06-18 01:39:11 +02:00
torrentParam . Offset = uint32 ( pagenum )
2017-06-21 03:58:54 +02:00
torrentParam . Hidden = hidden
2017-07-05 14:29:31 +02:00
torrentParam . Full = withUser
2017-07-05 19:41:16 +02:00
if found , ok := cache . C . Get ( torrentParam . Identifier ( ) ) ; ok {
2017-07-05 19:50:44 +02:00
torrentCache := found . ( * structs . TorrentCache )
return torrentParam , torrentCache . Torrents , torrentCache . Count , nil
2017-07-05 19:41:16 +02:00
}
totalHits , tor , err := torrentParam . Find ( models . ElasticSearchClient )
2017-07-05 19:50:44 +02:00
cache . C . Set ( torrentParam . Identifier ( ) , & structs . TorrentCache { tor , int ( totalHits ) } , 5 * time . Minute )
2017-05-26 01:48:14 +02:00
// Convert back to non-json torrents
2017-07-05 19:41:16 +02:00
return torrentParam , tor , int ( totalHits ) , err
2017-05-26 01:48:14 +02:00
}
2017-05-30 00:28:21 +02:00
log . Errorf ( "Unable to create elasticsearch client: %s" , err )
log . Errorf ( "Falling back to postgresql query" )
2017-07-05 13:33:12 +02:00
return byQueryPostgres ( c , pagenum , countAll , withUser , deleted , hidden )
2017-05-26 01:48:14 +02:00
}
2017-07-05 13:33:12 +02:00
func byQueryPostgres ( c * gin . Context , pagenum int , countAll bool , withUser bool , deleted bool , hidden bool ) (
2017-07-02 16:54:55 +02:00
search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ,
2017-05-10 11:03:49 +02:00
) {
2017-07-02 16:54:55 +02:00
search . FromRequest ( c )
search . Offset = uint32 ( pagenum )
2017-06-21 03:58:54 +02:00
search . Hidden = hidden
2017-07-15 01:44:09 +02:00
search . Deleted = deleted
2017-07-06 23:54:21 +02:00
search . Full = withUser
2017-05-09 13:31:58 +02:00
2017-07-07 00:18:11 +02:00
orderBy := search . Sort . ToDBField ( )
if search . Sort == structs . Date {
search . NotNull = search . Sort . ToDBField ( ) + " IS NOT NULL"
}
2017-07-05 17:06:24 +02:00
if found , ok := cache . C . Get ( search . Identifier ( ) ) ; ok {
2017-07-05 19:50:44 +02:00
torrentCache := found . ( * structs . TorrentCache )
tor = torrentCache . Torrents
count = torrentCache . Count
2017-07-05 17:06:24 +02:00
return
}
Consistency, formatting, error checking, cleanup, and a couple bug fixes (#245)
* Checkpoint: it builds
The config, db, model, network, os, and public packages have had some
fixes to glaringly obvious flaws, dead code removed, and stylistic
changes.
* Style changes and old code removal in router
Router needs a lot of work done to its (lack of) error handling.
* Dead code removal and style changes
Now up to util/email/email.go. After I'm finished with the initial sweep
I'll go back and fix error handling and security issues. Then I'll fix
the broken API. Then I'll go through to add documentation and fix code
visibility.
* Finish dead code removal and style changes
Vendored libraries not touched. Everything still needs security fixes
and documentation. There's also one case of broken functionality.
* Fix accidental find-and-replace
* Style, error checking, saftey, bug fix changes
* Redo error checking erased during merge
* Re-add merge-erased fix. Make Safe safe.
2017-05-10 04:34:40 +02:00
orderBy += " "
2017-05-09 13:31:58 +02:00
2017-07-02 16:54:55 +02:00
switch search . Order {
case true :
Consistency, formatting, error checking, cleanup, and a couple bug fixes (#245)
* Checkpoint: it builds
The config, db, model, network, os, and public packages have had some
fixes to glaringly obvious flaws, dead code removed, and stylistic
changes.
* Style changes and old code removal in router
Router needs a lot of work done to its (lack of) error handling.
* Dead code removal and style changes
Now up to util/email/email.go. After I'm finished with the initial sweep
I'll go back and fix error handling and security issues. Then I'll fix
the broken API. Then I'll go through to add documentation and fix code
visibility.
* Finish dead code removal and style changes
Vendored libraries not touched. Everything still needs security fixes
and documentation. There's also one case of broken functionality.
* Fix accidental find-and-replace
* Style, error checking, saftey, bug fix changes
* Redo error checking erased during merge
* Re-add merge-erased fix. Make Safe safe.
2017-05-10 04:34:40 +02:00
orderBy += "asc"
2017-07-02 16:54:55 +02:00
if models . ORM . Dialect ( ) . GetName ( ) == "postgres" {
2017-05-17 23:41:18 +02:00
orderBy += " NULLS FIRST"
2017-05-24 09:11:13 +02:00
}
2017-07-02 16:54:55 +02:00
case false :
Consistency, formatting, error checking, cleanup, and a couple bug fixes (#245)
* Checkpoint: it builds
The config, db, model, network, os, and public packages have had some
fixes to glaringly obvious flaws, dead code removed, and stylistic
changes.
* Style changes and old code removal in router
Router needs a lot of work done to its (lack of) error handling.
* Dead code removal and style changes
Now up to util/email/email.go. After I'm finished with the initial sweep
I'll go back and fix error handling and security issues. Then I'll fix
the broken API. Then I'll go through to add documentation and fix code
visibility.
* Finish dead code removal and style changes
Vendored libraries not touched. Everything still needs security fixes
and documentation. There's also one case of broken functionality.
* Fix accidental find-and-replace
* Style, error checking, saftey, bug fix changes
* Redo error checking erased during merge
* Re-add merge-erased fix. Make Safe safe.
2017-05-10 04:34:40 +02:00
orderBy += "desc"
2017-07-02 16:54:55 +02:00
if models . ORM . Dialect ( ) . GetName ( ) == "postgres" {
2017-05-17 23:41:18 +02:00
orderBy += " NULLS LAST"
2017-05-24 09:11:13 +02:00
}
}
2017-05-17 23:41:18 +02:00
2017-07-02 16:54:55 +02:00
parameters := structs . WhereParams {
2017-05-11 14:22:49 +02:00
Params : make ( [ ] interface { } , 0 , 64 ) ,
}
conditions := make ( [ ] string , 0 , 64 )
2017-06-18 00:30:12 +02:00
if len ( search . Category ) > 0 {
conditionsOr := make ( [ ] string , len ( search . Category ) )
for key , val := range search . Category {
2017-07-14 19:08:02 +02:00
if val . Main > 0 {
conditionsOr [ key ] = "(category = ?"
parameters . Params = append ( parameters . Params , val . Main )
if val . Sub > 0 {
conditionsOr [ key ] += " AND sub_category = ?"
parameters . Params = append ( parameters . Params , val . Sub )
}
conditionsOr [ key ] += ")"
}
2017-06-18 00:30:12 +02:00
}
conditions = append ( conditions , strings . Join ( conditionsOr , " OR " ) )
2017-05-11 14:22:49 +02:00
}
2017-07-07 14:06:51 +02:00
if len ( search . Languages ) > 0 {
langs := ""
for key , val := range search . Languages {
langs += val . Code
if key + 1 < len ( search . Languages ) {
langs += ","
}
}
conditions = append ( conditions , "language " + searchOperator )
parameters . Params = append ( parameters . Params , "%" + langs + "%" )
}
2017-06-18 00:30:12 +02:00
2017-07-21 02:13:41 +02:00
if c . Query ( "userID" ) != "" {
if search . UserID > 0 {
conditions = append ( conditions , "uploader = ?" )
parameters . Params = append ( parameters . Params , search . UserID )
if search . Hidden {
conditions = append ( conditions , "hidden = ?" )
parameters . Params = append ( parameters . Params , false )
}
} else if search . UserID == 0 {
conditions = append ( conditions , "(uploader = ? OR hidden = ?)" )
parameters . Params = append ( parameters . Params , search . UserID )
parameters . Params = append ( parameters . Params , true )
2017-07-20 20:21:57 +02:00
}
2017-06-21 03:58:54 +02:00
}
2017-05-30 14:12:42 +02:00
if search . FromID != 0 {
2017-05-30 00:28:21 +02:00
conditions = append ( conditions , "torrent_id > ?" )
2017-05-30 14:12:42 +02:00
parameters . Params = append ( parameters . Params , search . FromID )
2017-05-30 00:28:21 +02:00
}
2017-06-02 16:10:31 +02:00
if search . FromDate != "" {
conditions = append ( conditions , "date >= ?" )
parameters . Params = append ( parameters . Params , search . FromDate )
}
if search . ToDate != "" {
conditions = append ( conditions , "date <= ?" )
parameters . Params = append ( parameters . Params , search . ToDate )
}
2017-05-11 14:22:49 +02:00
if search . Status != 0 {
2017-07-02 16:54:55 +02:00
if search . Status == structs . FilterRemakes {
2017-05-11 21:46:23 +02:00
conditions = append ( conditions , "status <> ?" )
2017-05-11 14:22:49 +02:00
} else {
conditions = append ( conditions , "status >= ?" )
2017-05-11 13:40:50 +02:00
}
2017-05-11 14:22:49 +02:00
parameters . Params = append ( parameters . Params , strconv . Itoa ( int ( search . Status ) + 1 ) )
}
if len ( search . NotNull ) > 0 {
conditions = append ( conditions , search . NotNull )
}
2017-06-13 13:31:11 +02:00
if search . MinSize > 0 {
conditions = append ( conditions , "filesize >= ?" )
parameters . Params = append ( parameters . Params , uint64 ( search . MinSize ) )
}
if search . MaxSize > 0 {
conditions = append ( conditions , "filesize <= ?" )
parameters . Params = append ( parameters . Params , uint64 ( search . MaxSize ) )
}
2017-05-09 13:31:58 +02:00
2017-07-02 16:54:55 +02:00
querySplit := strings . Fields ( search . NameLike )
for _ , word := range querySplit {
2017-05-11 14:22:49 +02:00
firstRune , _ := utf8 . DecodeRuneInString ( word )
if len ( word ) == 1 && unicode . IsPunct ( firstRune ) {
// some queries have a single punctuation character
// which causes a full scan instead of using the index
// and yields no meaningful results.
// due to len() == 1 we're just looking at 1-byte/ascii
// punctuation characters.
continue
2017-05-08 15:50:18 +02:00
}
2017-05-09 14:31:47 +02:00
2017-05-26 12:12:52 +02:00
if useTSQuery && stringIsASCII ( word ) {
2017-05-12 17:41:26 +02:00
conditions = append ( conditions , "torrent_name @@ plainto_tsquery(?)" )
parameters . Params = append ( parameters . Params , word )
} else {
// TODO: possible to make this faster?
conditions = append ( conditions , "torrent_name " + searchOperator )
parameters . Params = append ( parameters . Params , "%" + word + "%" )
}
2017-05-11 14:22:49 +02:00
}
parameters . Conditions = strings . Join ( conditions [ : ] , " AND " )
2017-05-11 15:24:20 +02:00
2017-05-11 14:22:49 +02:00
log . Infof ( "SQL query is :: %s\n" , parameters . Conditions )
2017-05-05 16:39:15 +02:00
2017-06-25 15:26:46 +02:00
if deleted {
2017-07-02 16:54:55 +02:00
tor , count , err = torrents . FindDeleted ( & parameters , orderBy , int ( search . Max ) , int ( search . Max * ( search . Offset - 1 ) ) )
2017-06-25 15:26:46 +02:00
} else if countAll && ! withUser {
2017-07-02 16:54:55 +02:00
tor , count , err = torrents . FindOrderBy ( & parameters , orderBy , int ( search . Max ) , int ( search . Max * ( search . Offset - 1 ) ) )
2017-07-20 20:21:57 +02:00
} else if countAll && withUser {
2017-07-02 16:54:55 +02:00
tor , count , err = torrents . FindWithUserOrderBy ( & parameters , orderBy , int ( search . Max ) , int ( search . Max * ( search . Offset - 1 ) ) )
2017-06-25 15:26:46 +02:00
} else {
2017-07-02 16:54:55 +02:00
tor , err = torrents . FindOrderByNoCount ( & parameters , orderBy , int ( search . Max ) , int ( search . Max * ( search . Offset - 1 ) ) )
2017-06-25 15:26:46 +02:00
}
2017-07-05 19:50:44 +02:00
cache . C . Set ( search . Identifier ( ) , & structs . TorrentCache { tor , count } , 5 * time . Minute )
2017-05-09 13:31:58 +02:00
return
2017-05-12 17:41:26 +02:00
}
2017-07-20 20:21:57 +02:00
// AuthorizedQuery return a seach byquery according to the bool. If false, it doesn't look for hidden torrents, else it looks for every torrents
func AuthorizedQuery ( c * gin . Context , pagenum int , authorized bool ) ( structs . TorrentParam , [ ] models . Torrent , int , error ) {
if ! authorized {
return ByQuery ( c , pagenum , true , true , false , true )
}
return ByQuery ( c , pagenum , true , true , false , false )
}