Skip to content

Commit ad97f08

Browse files
authored
[YouTube] Fix parsing short relative date formats (English only) (#1068)
1 parent d294ccb commit ad97f08

File tree

5 files changed

+195
-27
lines changed

5 files changed

+195
-27
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/localization/TimeAgoParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ private boolean textualDateMatches(final String textualDate, final String agoPhr
9393
final String escapedSeparator = patternsHolder.wordSeparator().equals(" ")
9494
// From JDK8 → \h - Treat horizontal spaces as a normal one
9595
// (non-breaking space, thin space, etc.)
96-
? "[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]"
96+
// Also split the string on numbers to be able to parse strings like "2wk"
97+
? "[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000\\d]"
9798
: Pattern.quote(patternsHolder.wordSeparator());
9899

99100
// (^|separator)pattern($|separator)
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
package org.schabi.newpipe.extractor.utils;
2+
3+
import org.junit.jupiter.api.BeforeAll;
4+
import org.junit.jupiter.api.Test;
5+
import org.schabi.newpipe.extractor.exceptions.ParsingException;
6+
import org.schabi.newpipe.extractor.localization.Localization;
7+
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
8+
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
9+
10+
import java.time.OffsetDateTime;
11+
import java.time.ZoneOffset;
12+
import java.time.temporal.ChronoUnit;
13+
14+
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
import static org.junit.jupiter.api.Assertions.assertTrue;
16+
17+
public class TimeagoTest {
18+
private static TimeAgoParser parser;
19+
private static OffsetDateTime now;
20+
21+
@BeforeAll
22+
public static void setUp() {
23+
parser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
24+
now = OffsetDateTime.now(ZoneOffset.UTC);
25+
}
26+
27+
@Test
28+
void parseTimeago() throws ParsingException {
29+
assertTimeWithin1s(
30+
now.minus(1, ChronoUnit.SECONDS),
31+
parser.parse("1 second ago").offsetDateTime()
32+
);
33+
assertTimeWithin1s(
34+
now.minus(12, ChronoUnit.SECONDS),
35+
parser.parse("12 second ago").offsetDateTime()
36+
);
37+
assertTimeWithin1s(
38+
now.minus(1, ChronoUnit.MINUTES),
39+
parser.parse("1 minute ago").offsetDateTime()
40+
);
41+
assertTimeWithin1s(
42+
now.minus(23, ChronoUnit.MINUTES),
43+
parser.parse("23 minutes ago").offsetDateTime()
44+
);
45+
assertTimeWithin1s(
46+
now.minus(1, ChronoUnit.HOURS),
47+
parser.parse("1 hour ago").offsetDateTime()
48+
);
49+
assertTimeWithin1s(
50+
now.minus(8, ChronoUnit.HOURS),
51+
parser.parse("8 hours ago").offsetDateTime()
52+
);
53+
assertEquals(
54+
now.minus(1, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
55+
parser.parse("1 day ago").offsetDateTime()
56+
);
57+
assertEquals(
58+
now.minus(3, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
59+
parser.parse("3 days ago").offsetDateTime()
60+
);
61+
assertEquals(
62+
now.minus(1, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
63+
parser.parse("1 week ago").offsetDateTime()
64+
);
65+
assertEquals(
66+
now.minus(3, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
67+
parser.parse("3 weeks ago").offsetDateTime()
68+
);
69+
assertEquals(
70+
now.minus(1, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
71+
parser.parse("1 month ago").offsetDateTime()
72+
);
73+
assertEquals(
74+
now.minus(3, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
75+
parser.parse("3 months ago").offsetDateTime()
76+
);
77+
assertEquals(
78+
now.minus(1, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
79+
parser.parse("1 year ago").offsetDateTime()
80+
);
81+
assertEquals(
82+
now.minus(3, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
83+
parser.parse("3 years ago").offsetDateTime()
84+
);
85+
}
86+
87+
@Test
88+
void parseTimeagoShort() throws ParsingException {
89+
final TimeAgoParser parser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
90+
final OffsetDateTime now = OffsetDateTime.now(ZoneOffset.UTC);
91+
92+
assertTimeWithin1s(
93+
now.minus(1, ChronoUnit.SECONDS),
94+
parser.parse("1 sec ago").offsetDateTime()
95+
);
96+
assertTimeWithin1s(
97+
now.minus(12, ChronoUnit.SECONDS),
98+
parser.parse("12 sec ago").offsetDateTime()
99+
);
100+
assertTimeWithin1s(
101+
now.minus(1, ChronoUnit.MINUTES),
102+
parser.parse("1 min ago").offsetDateTime()
103+
);
104+
assertTimeWithin1s(
105+
now.minus(23, ChronoUnit.MINUTES),
106+
parser.parse("23 min ago").offsetDateTime()
107+
);
108+
assertTimeWithin1s(
109+
now.minus(1, ChronoUnit.HOURS),
110+
parser.parse("1 hr ago").offsetDateTime()
111+
);
112+
assertTimeWithin1s(
113+
now.minus(8, ChronoUnit.HOURS),
114+
parser.parse("8 hr ago").offsetDateTime()
115+
);
116+
assertEquals(
117+
now.minus(1, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
118+
parser.parse("1 day ago").offsetDateTime()
119+
);
120+
assertEquals(
121+
now.minus(3, ChronoUnit.DAYS).truncatedTo(ChronoUnit.HOURS),
122+
parser.parse("3 days ago").offsetDateTime()
123+
);
124+
assertEquals(
125+
now.minus(1, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
126+
parser.parse("1 wk ago").offsetDateTime()
127+
);
128+
assertEquals(
129+
now.minus(3, ChronoUnit.WEEKS).truncatedTo(ChronoUnit.HOURS),
130+
parser.parse("3 wk ago").offsetDateTime()
131+
);
132+
assertEquals(
133+
now.minus(1, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
134+
parser.parse("1 mo ago").offsetDateTime()
135+
);
136+
assertEquals(
137+
now.minus(3, ChronoUnit.MONTHS).truncatedTo(ChronoUnit.HOURS),
138+
parser.parse("3 mo ago").offsetDateTime()
139+
);
140+
assertEquals(
141+
now.minus(1, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
142+
parser.parse("1 yr ago").offsetDateTime()
143+
);
144+
assertEquals(
145+
now.minus(3, ChronoUnit.YEARS).minusDays(1).truncatedTo(ChronoUnit.HOURS),
146+
parser.parse("3 yr ago").offsetDateTime()
147+
);
148+
}
149+
150+
void assertTimeWithin1s(final OffsetDateTime expected, final OffsetDateTime actual) {
151+
final long delta = Math.abs(expected.toEpochSecond() - actual.toEpochSecond());
152+
assertTrue(delta <= 1, String.format("Expected: %s\nActual: %s", expected, actual));
153+
}
154+
}

timeago-parser/raw/unique_patterns.json

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -415,62 +415,75 @@
415415
"word_separator": " ",
416416
"seconds": [
417417
"second",
418-
"seconds"
418+
"seconds",
419+
"sec"
419420
],
420421
"minutes": [
421422
"minute",
422-
"minutes"
423+
"minutes",
424+
"min"
423425
],
424426
"hours": [
425427
"hour",
426-
"hours"
428+
"hours",
429+
"h"
427430
],
428431
"days": [
429432
"day",
430-
"days"
433+
"days",
434+
"d"
431435
],
432436
"weeks": [
433437
"week",
434-
"weeks"
438+
"weeks",
439+
"w"
435440
],
436441
"months": [
437442
"month",
438-
"months"
443+
"months",
444+
"mo"
439445
],
440446
"years": [
441447
"year",
442-
"years"
448+
"years",
449+
"y"
443450
]
444451
},
445452
"en-GB": {
446453
"word_separator": " ",
447454
"seconds": [
448455
"second",
449-
"seconds"
456+
"seconds",
457+
"sec"
450458
],
451459
"minutes": [
452460
"minute",
453-
"minutes"
461+
"minutes",
462+
"min"
454463
],
455464
"hours": [
456465
"hour",
457-
"hours"
466+
"hours",
467+
"hr"
458468
],
459469
"days": [
460470
"day",
461471
"days"
462472
],
463473
"weeks": [
464474
"week",
465-
"weeks"
475+
"weeks",
476+
"wk"
466477
],
467478
"months": [
468479
"month",
469-
"months"
480+
"months",
481+
"mo"
470482
],
471483
"years": [
472484
"year",
473-
"years"
485+
"years",
486+
"yr"
474487
]
475488
},
476489
"es": {

timeago-parser/src/main/java/org/schabi/newpipe/extractor/timeago/patterns/en.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
public class en extends PatternsHolder {
1010
private static final String WORD_SEPARATOR = " ";
1111
private static final String[]
12-
SECONDS /**/ = {"second", "seconds"},
13-
MINUTES /**/ = {"minute", "minutes"},
14-
HOURS /**/ = {"hour", "hours"},
15-
DAYS /**/ = {"day", "days"},
16-
WEEKS /**/ = {"week", "weeks"},
17-
MONTHS /**/ = {"month", "months"},
18-
YEARS /**/ = {"year", "years"};
12+
SECONDS /**/ = {"second", "seconds", "sec"},
13+
MINUTES /**/ = {"minute", "minutes", "min"},
14+
HOURS /**/ = {"hour", "hours", "h"},
15+
DAYS /**/ = {"day", "days", "d"},
16+
WEEKS /**/ = {"week", "weeks", "w"},
17+
MONTHS /**/ = {"month", "months", "mo"},
18+
YEARS /**/ = {"year", "years", "y"};
1919

2020
private static final en INSTANCE = new en();
2121

timeago-parser/src/main/java/org/schabi/newpipe/extractor/timeago/patterns/en_GB.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
public class en_GB extends PatternsHolder {
1010
private static final String WORD_SEPARATOR = " ";
1111
private static final String[]
12-
SECONDS /**/ = {"second", "seconds"},
13-
MINUTES /**/ = {"minute", "minutes"},
14-
HOURS /**/ = {"hour", "hours"},
12+
SECONDS /**/ = {"second", "seconds", "sec"},
13+
MINUTES /**/ = {"minute", "minutes", "min"},
14+
HOURS /**/ = {"hour", "hours", "hr"},
1515
DAYS /**/ = {"day", "days"},
16-
WEEKS /**/ = {"week", "weeks"},
17-
MONTHS /**/ = {"month", "months"},
18-
YEARS /**/ = {"year", "years"};
16+
WEEKS /**/ = {"week", "weeks", "wk"},
17+
MONTHS /**/ = {"month", "months", "mo"},
18+
YEARS /**/ = {"year", "years", "yr"};
1919

2020
private static final en_GB INSTANCE = new en_GB();
2121

0 commit comments

Comments
 (0)