2017-05-05 16:39:15 +02:00
package search
2017-05-06 12:41:48 +02:00
import (
2017-05-05 16:39:15 +02:00
"strconv"
"strings"
2017-05-08 15:50:18 +02:00
"unicode"
"unicode/utf8"
2017-05-30 00:28:21 +02:00
2017-05-17 07:58:40 +02:00
"github.com/NyaaPantsu/nyaa/config"
2017-06-29 13:15:23 +02:00
"github.com/NyaaPantsu/nyaa/models"
"github.com/NyaaPantsu/nyaa/models/torrents"
2017-07-02 16:54:55 +02:00
"github.com/NyaaPantsu/nyaa/utils/log"
"github.com/NyaaPantsu/nyaa/utils/search/structs"
2017-06-28 13:42:38 +02:00
"github.com/gin-gonic/gin"
2017-05-09 13:31:58 +02:00
)
2017-05-11 15:24:20 +02:00
var searchOperator string
2017-05-12 17:41:26 +02:00
var useTSQuery bool
2017-05-11 15:24:20 +02:00
2017-05-26 12:12:52 +02:00
// Configure : initialize search
2017-05-11 15:24:20 +02:00
func Configure ( conf * config . SearchConfig ) ( err error ) {
2017-05-12 19:38:08 +02:00
useTSQuery = false
2017-05-12 17:41:26 +02:00
// Postgres needs ILIKE for case-insensitivity
2017-07-02 16:54:55 +02:00
if models . ORM . Dialect ( ) . GetName ( ) == "postgres" {
2017-05-11 15:24:20 +02:00
searchOperator = "ILIKE ?"
2017-05-12 19:38:08 +02:00
//useTSQuery = true
// !!DISABLED!! because this makes search a lot stricter
// (only matches at word borders)
2017-05-12 17:41:26 +02:00
} else {
searchOperator = "LIKE ?"
2017-05-11 15:24:20 +02:00
}
return
}
2017-05-26 12:12:52 +02:00
func stringIsASCII ( input string ) bool {
2017-05-12 17:41:26 +02:00
for _ , char := range input {
if char > 127 {
return false
}
}
return true
}
2017-05-26 12:12:52 +02:00
// SearchByQuery : search torrents according to request without user
2017-07-02 16:54:55 +02:00
func SearchByQuery ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ) {
2017-06-28 13:42:38 +02:00
search , tor , count , err = searchByQuery ( c , pagenum , true , false , false , false )
2017-05-20 13:45:15 +02:00
return
}
2017-05-26 12:12:52 +02:00
// SearchByQueryWithUser : search torrents according to request with user
2017-07-02 16:54:55 +02:00
func SearchByQueryWithUser ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ) {
2017-06-28 13:42:38 +02:00
search , tor , count , err = searchByQuery ( c , pagenum , true , true , false , false )
2017-05-09 17:07:42 +02:00
return
}
2017-05-26 12:12:52 +02:00
// SearchByQueryNoCount : search torrents according to request without user and count
2017-07-02 16:54:55 +02:00
func SearchByQueryNoCount ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , err error ) {
2017-06-28 13:42:38 +02:00
search , tor , _ , err = searchByQuery ( c , pagenum , false , false , false , false )
2017-05-09 17:07:42 +02:00
return
}
2017-05-26 12:12:52 +02:00
// SearchByQueryDeleted : search deleted torrents according to request with user and count
2017-07-02 16:54:55 +02:00
func SearchByQueryDeleted ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ) {
2017-06-28 13:42:38 +02:00
search , tor , count , err = searchByQuery ( c , pagenum , true , true , true , false )
2017-06-21 03:58:54 +02:00
return
}
// SearchByQueryNoHidden : search torrents and filter those hidden
2017-07-02 16:54:55 +02:00
func SearchByQueryNoHidden ( c * gin . Context , pagenum int ) ( search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ) {
2017-06-28 13:42:38 +02:00
search , tor , count , err = searchByQuery ( c , pagenum , true , false , false , true )
2017-05-25 02:19:05 +02:00
return
}
2017-05-26 01:48:14 +02:00
// TODO Clean this up
// FIXME Some fields are not used by elasticsearch (pagenum, countAll, deleted, withUser)
// pagenum is extracted from request in .FromRequest()
// elasticsearch always provide a count to how many hits
// deleted is unused because es doesn't index deleted torrents
2017-06-28 13:42:38 +02:00
func searchByQuery ( c * gin . Context , pagenum int , countAll bool , withUser bool , deleted bool , hidden bool ) (
2017-07-02 16:54:55 +02:00
search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ,
2017-05-26 01:48:14 +02:00
) {
2017-07-02 16:54:55 +02:00
if models . ElasticSearchClient != nil {
var torrentParam structs . TorrentParam
2017-06-28 13:42:38 +02:00
torrentParam . FromRequest ( c )
2017-06-18 01:39:11 +02:00
torrentParam . Offset = uint32 ( pagenum )
2017-06-21 03:58:54 +02:00
torrentParam . Hidden = hidden
2017-07-02 16:54:55 +02:00
totalHits , torrents , err := torrentParam . Find ( models . ElasticSearchClient )
2017-05-26 01:48:14 +02:00
// Convert back to non-json torrents
2017-07-02 16:54:55 +02:00
return torrentParam , torrents , int ( totalHits ) , err
2017-05-26 01:48:14 +02:00
}
2017-05-30 00:28:21 +02:00
log . Errorf ( "Unable to create elasticsearch client: %s" , err )
log . Errorf ( "Falling back to postgresql query" )
2017-06-28 13:42:38 +02:00
return searchByQueryPostgres ( c , pagenum , countAll , withUser , deleted , hidden )
2017-05-26 01:48:14 +02:00
}
2017-06-28 13:42:38 +02:00
func searchByQueryPostgres ( c * gin . Context , pagenum int , countAll bool , withUser bool , deleted bool , hidden bool ) (
2017-07-02 16:54:55 +02:00
search structs . TorrentParam , tor [ ] models . Torrent , count int , err error ,
2017-05-10 11:03:49 +02:00
) {
2017-07-02 16:54:55 +02:00
search . FromRequest ( c )
2017-05-09 13:31:58 +02:00
2017-07-02 16:54:55 +02:00
search . Offset = uint32 ( pagenum )
2017-06-21 03:58:54 +02:00
search . Hidden = hidden
2017-05-09 13:31:58 +02:00
2017-06-13 13:31:11 +02:00
orderBy := search . Sort . ToDBField ( )
2017-07-02 16:54:55 +02:00
if search . Sort == structs . Date {
2017-06-13 13:31:11 +02:00
search . NotNull = search . Sort . ToDBField ( ) + " IS NOT NULL"
2017-05-05 16:39:15 +02:00
}
2017-05-08 16:39:41 +02:00
Consistency, formatting, error checking, cleanup, and a couple bug fixes (#245)
* Checkpoint: it builds
The config, db, model, network, os, and public packages have had some
fixes to glaringly obvious flaws, dead code removed, and stylistic
changes.
* Style changes and old code removal in router
Router needs a lot of work done to its (lack of) error handling.
* Dead code removal and style changes
Now up to util/email/email.go. After I'm finished with the initial sweep
I'll go back and fix error handling and security issues. Then I'll fix
the broken API. Then I'll go through to add documentation and fix code
visibility.
* Finish dead code removal and style changes
Vendored libraries not touched. Everything still needs security fixes
and documentation. There's also one case of broken functionality.
* Fix accidental find-and-replace
* Style, error checking, saftey, bug fix changes
* Redo error checking erased during merge
* Re-add merge-erased fix. Make Safe safe.
2017-05-10 04:34:40 +02:00
orderBy += " "
2017-05-09 13:31:58 +02:00
2017-07-02 16:54:55 +02:00
switch search . Order {
case true :
Consistency, formatting, error checking, cleanup, and a couple bug fixes (#245)
* Checkpoint: it builds
The config, db, model, network, os, and public packages have had some
fixes to glaringly obvious flaws, dead code removed, and stylistic
changes.
* Style changes and old code removal in router
Router needs a lot of work done to its (lack of) error handling.
* Dead code removal and style changes
Now up to util/email/email.go. After I'm finished with the initial sweep
I'll go back and fix error handling and security issues. Then I'll fix
the broken API. Then I'll go through to add documentation and fix code
visibility.
* Finish dead code removal and style changes
Vendored libraries not touched. Everything still needs security fixes
and documentation. There's also one case of broken functionality.
* Fix accidental find-and-replace
* Style, error checking, saftey, bug fix changes
* Redo error checking erased during merge
* Re-add merge-erased fix. Make Safe safe.
2017-05-10 04:34:40 +02:00
orderBy += "asc"
2017-07-02 16:54:55 +02:00
if models . ORM . Dialect ( ) . GetName ( ) == "postgres" {
2017-05-17 23:41:18 +02:00
orderBy += " NULLS FIRST"
2017-05-24 09:11:13 +02:00
}
2017-07-02 16:54:55 +02:00
case false :
Consistency, formatting, error checking, cleanup, and a couple bug fixes (#245)
* Checkpoint: it builds
The config, db, model, network, os, and public packages have had some
fixes to glaringly obvious flaws, dead code removed, and stylistic
changes.
* Style changes and old code removal in router
Router needs a lot of work done to its (lack of) error handling.
* Dead code removal and style changes
Now up to util/email/email.go. After I'm finished with the initial sweep
I'll go back and fix error handling and security issues. Then I'll fix
the broken API. Then I'll go through to add documentation and fix code
visibility.
* Finish dead code removal and style changes
Vendored libraries not touched. Everything still needs security fixes
and documentation. There's also one case of broken functionality.
* Fix accidental find-and-replace
* Style, error checking, saftey, bug fix changes
* Redo error checking erased during merge
* Re-add merge-erased fix. Make Safe safe.
2017-05-10 04:34:40 +02:00
orderBy += "desc"
2017-07-02 16:54:55 +02:00
if models . ORM . Dialect ( ) . GetName ( ) == "postgres" {
2017-05-17 23:41:18 +02:00
orderBy += " NULLS LAST"
2017-05-24 09:11:13 +02:00
}
}
2017-05-17 23:41:18 +02:00
2017-07-02 16:54:55 +02:00
parameters := structs . WhereParams {
2017-05-11 14:22:49 +02:00
Params : make ( [ ] interface { } , 0 , 64 ) ,
}
conditions := make ( [ ] string , 0 , 64 )
2017-06-18 00:30:12 +02:00
if len ( search . Category ) > 0 {
conditionsOr := make ( [ ] string , len ( search . Category ) )
for key , val := range search . Category {
conditionsOr [ key ] = "(category = ? AND sub_category = ?)"
parameters . Params = append ( parameters . Params , val . Main )
parameters . Params = append ( parameters . Params , val . Sub )
}
conditions = append ( conditions , strings . Join ( conditionsOr , " OR " ) )
2017-05-11 14:22:49 +02:00
}
2017-06-18 00:30:12 +02:00
2017-05-11 14:22:49 +02:00
if search . UserID != 0 {
conditions = append ( conditions , "uploader = ?" )
parameters . Params = append ( parameters . Params , search . UserID )
}
2017-06-21 03:58:54 +02:00
if search . Hidden {
conditions = append ( conditions , "hidden = ?" )
parameters . Params = append ( parameters . Params , false )
}
2017-05-30 14:12:42 +02:00
if search . FromID != 0 {
2017-05-30 00:28:21 +02:00
conditions = append ( conditions , "torrent_id > ?" )
2017-05-30 14:12:42 +02:00
parameters . Params = append ( parameters . Params , search . FromID )
2017-05-30 00:28:21 +02:00
}
2017-06-02 16:10:31 +02:00
if search . FromDate != "" {
conditions = append ( conditions , "date >= ?" )
parameters . Params = append ( parameters . Params , search . FromDate )
}
if search . ToDate != "" {
conditions = append ( conditions , "date <= ?" )
parameters . Params = append ( parameters . Params , search . ToDate )
}
2017-05-11 14:22:49 +02:00
if search . Status != 0 {
2017-07-02 16:54:55 +02:00
if search . Status == structs . FilterRemakes {
2017-05-11 21:46:23 +02:00
conditions = append ( conditions , "status <> ?" )
2017-05-11 14:22:49 +02:00
} else {
conditions = append ( conditions , "status >= ?" )
2017-05-11 13:40:50 +02:00
}
2017-05-11 14:22:49 +02:00
parameters . Params = append ( parameters . Params , strconv . Itoa ( int ( search . Status ) + 1 ) )
}
if len ( search . NotNull ) > 0 {
conditions = append ( conditions , search . NotNull )
}
2017-06-12 01:14:26 +02:00
if search . Language != "" {
conditions = append ( conditions , "language " + searchOperator )
parameters . Params = append ( parameters . Params , "%" + search . Language + "%" )
}
2017-06-13 13:31:11 +02:00
if search . MinSize > 0 {
conditions = append ( conditions , "filesize >= ?" )
parameters . Params = append ( parameters . Params , uint64 ( search . MinSize ) )
}
if search . MaxSize > 0 {
conditions = append ( conditions , "filesize <= ?" )
parameters . Params = append ( parameters . Params , uint64 ( search . MaxSize ) )
}
2017-05-09 13:31:58 +02:00
2017-07-02 16:54:55 +02:00
querySplit := strings . Fields ( search . NameLike )
for _ , word := range querySplit {
2017-05-11 14:22:49 +02:00
firstRune , _ := utf8 . DecodeRuneInString ( word )
if len ( word ) == 1 && unicode . IsPunct ( firstRune ) {
// some queries have a single punctuation character
// which causes a full scan instead of using the index
// and yields no meaningful results.
// due to len() == 1 we're just looking at 1-byte/ascii
// punctuation characters.
continue
2017-05-08 15:50:18 +02:00
}
2017-05-09 14:31:47 +02:00
2017-05-26 12:12:52 +02:00
if useTSQuery && stringIsASCII ( word ) {
2017-05-12 17:41:26 +02:00
conditions = append ( conditions , "torrent_name @@ plainto_tsquery(?)" )
parameters . Params = append ( parameters . Params , word )
} else {
// TODO: possible to make this faster?
conditions = append ( conditions , "torrent_name " + searchOperator )
parameters . Params = append ( parameters . Params , "%" + word + "%" )
}
2017-05-11 14:22:49 +02:00
}
parameters . Conditions = strings . Join ( conditions [ : ] , " AND " )
2017-05-11 15:24:20 +02:00
2017-05-11 14:22:49 +02:00
log . Infof ( "SQL query is :: %s\n" , parameters . Conditions )
2017-05-05 16:39:15 +02:00
2017-06-25 15:26:46 +02:00
if deleted {
2017-07-02 16:54:55 +02:00
tor , count , err = torrents . FindDeleted ( & parameters , orderBy , int ( search . Max ) , int ( search . Max * ( search . Offset - 1 ) ) )
2017-06-25 15:26:46 +02:00
} else if countAll && ! withUser {
2017-07-02 16:54:55 +02:00
tor , count , err = torrents . FindOrderBy ( & parameters , orderBy , int ( search . Max ) , int ( search . Max * ( search . Offset - 1 ) ) )
2017-06-25 15:26:46 +02:00
} else if withUser {
2017-07-02 16:54:55 +02:00
tor , count , err = torrents . FindWithUserOrderBy ( & parameters , orderBy , int ( search . Max ) , int ( search . Max * ( search . Offset - 1 ) ) )
2017-06-25 15:26:46 +02:00
} else {
2017-07-02 16:54:55 +02:00
tor , err = torrents . FindOrderByNoCount ( & parameters , orderBy , int ( search . Max ) , int ( search . Max * ( search . Offset - 1 ) ) )
2017-06-25 15:26:46 +02:00
}
2017-05-09 13:31:58 +02:00
return
2017-05-12 17:41:26 +02:00
}