From 692d3c1b2c139bb4b675e5c837dba546a3d55610 Mon Sep 17 00:00:00 2001 From: inorichi Date: Tue, 10 Nov 2015 19:31:18 +0100 Subject: [PATCH] Initial chapter number recognition (needs improvement). Remove an old class. --- .../kanade/mangafeed/data/models/Chapter.java | 7 +- .../mangafeed/data/tables/ChaptersTable.java | 4 + .../mangafeed/util/ChapterRecognition.java | 102 ++++++++++++++++++ .../kanade/mangafeed/util/DummyDataUtil.java | 52 --------- .../mangafeed/ChapterRecognitionTest.java | 87 +++++++++++++++ 5 files changed, 198 insertions(+), 54 deletions(-) create mode 100644 app/src/main/java/eu/kanade/mangafeed/util/ChapterRecognition.java delete mode 100644 app/src/main/java/eu/kanade/mangafeed/util/DummyDataUtil.java create mode 100644 app/src/test/java/eu/kanade/mangafeed/ChapterRecognitionTest.java diff --git a/app/src/main/java/eu/kanade/mangafeed/data/models/Chapter.java b/app/src/main/java/eu/kanade/mangafeed/data/models/Chapter.java index 034990c3a..2f43cd208 100644 --- a/app/src/main/java/eu/kanade/mangafeed/data/models/Chapter.java +++ b/app/src/main/java/eu/kanade/mangafeed/data/models/Chapter.java @@ -32,6 +32,9 @@ public class Chapter { @StorIOSQLiteColumn(name = ChaptersTable.COLUMN_DATE_UPLOAD) public long date_upload; + @StorIOSQLiteColumn(name = ChaptersTable.COLUMN_CHAPTER_NUMBER) + public float chapter_number; + public int downloaded; public static final int UNKNOWN = 0; @@ -58,7 +61,7 @@ public class Chapter { } public static Chapter newChapter() { - Chapter c = new Chapter(); - return c; + return new Chapter(); } + } diff --git a/app/src/main/java/eu/kanade/mangafeed/data/tables/ChaptersTable.java b/app/src/main/java/eu/kanade/mangafeed/data/tables/ChaptersTable.java index 394c27360..8dc9d5179 100644 --- a/app/src/main/java/eu/kanade/mangafeed/data/tables/ChaptersTable.java +++ b/app/src/main/java/eu/kanade/mangafeed/data/tables/ChaptersTable.java @@ -31,6 +31,9 @@ public class ChaptersTable { @NonNull public static final String COLUMN_LAST_PAGE_READ = "last_page_read"; + @NonNull + public static final String COLUMN_CHAPTER_NUMBER = "chapter_number"; + @NonNull public static String getCreateTableQuery() { return "CREATE TABLE " + TABLE + "(" @@ -40,6 +43,7 @@ public class ChaptersTable { + COLUMN_NAME + " TEXT NOT NULL, " + COLUMN_READ + " BOOLEAN NOT NULL, " + COLUMN_LAST_PAGE_READ + " INT NOT NULL, " + + COLUMN_CHAPTER_NUMBER + " FLOAT NOT NULL, " + COLUMN_DATE_FETCH + " LONG NOT NULL, " + COLUMN_DATE_UPLOAD + " LONG NOT NULL, " + "FOREIGN KEY(" + COLUMN_MANGA_ID + ") REFERENCES " + MangasTable.TABLE + "(" + MangasTable.COLUMN_ID + ") " diff --git a/app/src/main/java/eu/kanade/mangafeed/util/ChapterRecognition.java b/app/src/main/java/eu/kanade/mangafeed/util/ChapterRecognition.java new file mode 100644 index 000000000..ecc7c468f --- /dev/null +++ b/app/src/main/java/eu/kanade/mangafeed/util/ChapterRecognition.java @@ -0,0 +1,102 @@ +package eu.kanade.mangafeed.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import eu.kanade.mangafeed.data.models.Chapter; +import eu.kanade.mangafeed.data.models.Manga; + +public class ChapterRecognition { + + private static Pattern p1 = Pattern.compile("ch.?(\\d+[\\.,]?\\d*)"); + private static Pattern p2 = Pattern.compile("(\\d+[\\.,]?\\d*)"); + + public static void parseChapterNumber(Chapter chapter, Manga manga) { + if (chapter.chapter_number != 0) + return; + + // Remove spaces and convert to lower case + String name = replaceIrrelevantCharacters(chapter.name); + Matcher matcher; + + // Safest option, the chapter has a token prepended + matcher = p1.matcher(name); + if (matcher.find()) { + chapter.chapter_number = Float.parseFloat(matcher.group(1)); + return; + } + + // If there's only one number, use it + matcher = p2.matcher(name); + List occurences = getAllOccurrences(matcher); + if (occurences.size() == 1) { + chapter.chapter_number = occurences.get(0); + return; + } + + // Try to remove the manga name from the chapter, and try again + String mangaName = replaceIrrelevantCharacters(manga.title); + String nameWithoutManga = difference(mangaName, name); + if (!nameWithoutManga.isEmpty()) { + matcher = p2.matcher(nameWithoutManga); + occurences = getAllOccurrences(matcher); + if (occurences.size() == 1) { + chapter.chapter_number = occurences.get(0); + return; + } + } + + // TODO more checks (maybe levenshtein?) + + } + + public static List getAllOccurrences(Matcher matcher) { + List occurences = new ArrayList<>(); + while (matcher.find()) { + try { + float value = Float.parseFloat(matcher.group()); + if (!occurences.contains(value)) + occurences.add(value); + } catch (NumberFormatException e) { /* Do nothing */ } + } + return occurences; + } + + public static String replaceIrrelevantCharacters(String str) { + return str.replaceAll("\\s+", "").toLowerCase(); + } + + public static String difference(String str1, String str2) { + if (str1 == null) { + return str2; + } + if (str2 == null) { + return str1; + } + int at = indexOfDifference(str1, str2); + if (at == -1) { + return ""; + } + return str2.substring(at); + } + public static int indexOfDifference(String str1, String str2) { + if (str1 == str2) { + return -1; + } + if (str1 == null || str2 == null) { + return 0; + } + int i; + for (i = 0; i < str1.length() && i < str2.length(); ++i) { + if (str1.charAt(i) != str2.charAt(i)) { + break; + } + } + if (i < str2.length() || i < str1.length()) { + return i; + } + return -1; + } +} diff --git a/app/src/main/java/eu/kanade/mangafeed/util/DummyDataUtil.java b/app/src/main/java/eu/kanade/mangafeed/util/DummyDataUtil.java deleted file mode 100644 index eb9f77d22..000000000 --- a/app/src/main/java/eu/kanade/mangafeed/util/DummyDataUtil.java +++ /dev/null @@ -1,52 +0,0 @@ -package eu.kanade.mangafeed.util; - -import java.util.ArrayList; -import java.util.List; - -import eu.kanade.mangafeed.data.models.Chapter; -import eu.kanade.mangafeed.data.models.Manga; - -/** - * Created by len on 8/10/15. - */ -public class DummyDataUtil { - - public static List createDummyManga() { - ArrayList mangas = new ArrayList<>(); - mangas.add(createDummyManga("One Piece")); - mangas.add(createDummyManga("Berserk")); - mangas.add(createDummyManga("Horimiya")); - mangas.add(createDummyManga("Übel Blatt")); - - return mangas; - } - - private static Manga createDummyManga(String title) { - Manga m = new Manga(); - m.title = title; - m.url="http://example.com"; - m.artist="Eiichiro Oda"; - m.author="Eiichiro Oda"; - m.description="..."; - m.genre="Action, Drama"; - m.status="Ongoing"; - m.thumbnail_url="http://example.com/pic.png"; - return m; - } - - public static List createDummyChapters() { - List chapters = new ArrayList<>(); - Chapter c; - - for (int i = 1; i < 50; i++) { - c = new Chapter(); - c.manga_id = 1L; - c.name = "Chapter " + i; - c.url = "http://example.com/1"; - chapters.add(c); - } - - return chapters; - } - -} diff --git a/app/src/test/java/eu/kanade/mangafeed/ChapterRecognitionTest.java b/app/src/test/java/eu/kanade/mangafeed/ChapterRecognitionTest.java new file mode 100644 index 000000000..165f8072d --- /dev/null +++ b/app/src/test/java/eu/kanade/mangafeed/ChapterRecognitionTest.java @@ -0,0 +1,87 @@ +package eu.kanade.mangafeed; + +import org.junit.Before; +import org.junit.Test; + +import eu.kanade.mangafeed.data.models.Chapter; +import eu.kanade.mangafeed.data.models.Manga; +import eu.kanade.mangafeed.util.ChapterRecognition; + +import static org.hamcrest.Matchers.is; +import static org.junit.Assert.assertThat; + +public class ChapterRecognitionTest { + + Manga randomManga; + + private Chapter createChapter(String title) { + Chapter chapter = new Chapter(); + chapter.name = title; + return chapter; + } + + @Before + public void setUp() { + randomManga = new Manga(); + randomManga.title = "Something"; + } + + @Test + public void testWithOneDigit() { + Chapter c = createChapter("Ch.3: Self-proclaimed Genius"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number, is(3f)); + } + + @Test + public void testWithVolumeBefore() { + Chapter c = createChapter("Vol.1 Ch.4: Misrepresentation"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number, is(4f)); + } + + @Test + public void testWithVolumeAndVersionNumber() { + Chapter c = createChapter("Vol.1 Ch.3 (v2) Read Online"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number, is(3f)); + } + + @Test + public void testWithVolumeAndNumberInTitle() { + Chapter c = createChapter("Vol.15 Ch.90: Here Blooms the Daylily, Part 4"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number, is(90f)); + } + + @Test + public void testWithVolumeAndSpecialChapter() { + Chapter c = createChapter("Vol.10 Ch.42.5: Homecoming (Beginning)"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number, is(42.5f)); + } + + @Test + public void testWithJustANumber() { + Chapter c = createChapter("Homecoming (Beginning) 42"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number, is(42f)); + } + + @Test + public void testWithJustASpecialChapter() { + Chapter c = createChapter("Homecoming (Beginning) 42.5"); + ChapterRecognition.parseChapterNumber(c, randomManga); + assertThat(c.chapter_number, is(42.5f)); + } + + @Test + public void testWithNumberinMangaTitle() { + Chapter c = createChapter("3x3 Eyes 96"); + Manga m = new Manga(); + m.title = "3x3 Eyes"; + ChapterRecognition.parseChapterNumber(c, m); + assertThat(c.chapter_number, is(96f)); + } + +}