Skip to content

Commit 02d0cdd

Browse files
committed
Pull request #8: AG-4306 Performance
- performance test - regex precompilation - nsstring - single-pass loops at important points - logic fixes - json serialization Squashed commit of the following: commit 539618e Author: mizzick <[email protected]> Date: Fri Oct 16 14:37:00 2020 +0200 performance: json encoder tests commit 81195cf Author: mizzick <[email protected]> Date: Fri Oct 16 14:01:02 2020 +0200 fix tests commit 5ec1bd8 Merge: 2e97cb0 d96452b Author: mizzick <[email protected]> Date: Fri Oct 16 13:58:13 2020 +0200 Merge remote-tracking branch 'origin/master' into feature/AG-4306 commit 2e97cb0 Author: mizzick <[email protected]> Date: Thu Oct 15 01:14:13 2020 +0200 performance: json encoder commit 3720c95 Author: mizzick <[email protected]> Date: Thu Oct 15 01:11:44 2020 +0200 performance: json encoder commit 21c7ee0 Author: mizzick <[email protected]> Date: Thu Oct 15 00:57:10 2020 +0200 performance: json encoder commit d568cb5 Author: mizzick <[email protected]> Date: Wed Oct 14 23:40:13 2020 +0200 performance: json encoder commit 7bab5e3 Author: Andrey Meshkov <[email protected]> Date: Wed Oct 14 20:36:12 2020 +0300 Fixed BlockedEntryFactoryTests commit 2a1e8d1 Author: Andrey Meshkov <[email protected]> Date: Wed Oct 14 20:33:12 2020 +0300 Avoid unnecessary calls of validateRegExp commit aea47a4 Author: Andrey Meshkov <[email protected]> Date: Wed Oct 14 20:09:37 2020 +0300 Fix uBO CSS conversion commit 9ed79d7 Author: Andrey Meshkov <[email protected]> Date: Wed Oct 14 20:07:20 2020 +0300 Speed up RuleConverter commit b920b59 Merge: dee9044 bc01f78 Author: Andrey Meshkov <[email protected]> Date: Wed Oct 14 19:17:34 2020 +0300 merge createRegex commit dee9044 Author: Andrey Meshkov <[email protected]> Date: Wed Oct 14 19:14:30 2020 +0300 Speed up createRegexText commit bc01f78 Author: mizzick <[email protected]> Date: Wed Oct 14 18:13:54 2020 +0200 performance: simple regex commit 02d70ee Author: mizzick <[email protected]> Date: Wed Oct 14 17:10:09 2020 +0200 performance: json serialization commit 251682c Author: mizzick <[email protected]> Date: Wed Oct 14 16:59:27 2020 +0200 performance: search pseudos commit be74132 Author: mizzick <[email protected]> Date: Wed Oct 14 16:46:12 2020 +0200 performance: nsstring commit 00c8b4b Author: mizzick <[email protected]> Date: Wed Oct 14 00:55:24 2020 +0200 performance: nsstring simple regex commit bffa069 Author: mizzick <[email protected]> Date: Wed Oct 14 00:08:45 2020 +0200 performance: nsstring ruleconverter commit 3777b35 Author: mizzick <[email protected]> Date: Tue Oct 13 22:55:58 2020 +0200 performance: nsstring ... and 41 more commits
1 parent d96452b commit 02d0cdd

30 files changed

+36765
-836
lines changed

.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Package.resolved

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"object": {
3+
"pins": [
4+
{
5+
"package": "Punycode",
6+
"repositoryURL": "https://github.com/gumob/PunycodeSwift.git",
7+
"state": {
8+
"branch": null,
9+
"revision": "4356ec54e073741449640d3d50a1fd24fd1e1b8b",
10+
"version": "2.1.0"
11+
}
12+
}
13+
]
14+
},
15+
"version": 1
16+
}

Package.swift

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ let package = Package(
1818
dependencies: [
1919
// Dependencies declare other packages that this package depends on.
2020
// .package(url: /* package url */, from: "1.0.0"),
21+
.package(url: "https://github.com/gumob/PunycodeSwift.git", from: "2.0.0"),
2122
],
2223
targets: [
2324
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
@@ -27,7 +28,7 @@ let package = Package(
2728
dependencies: ["ContentBlockerConverter"]),
2829
.target(
2930
name: "ContentBlockerConverter",
30-
dependencies: []),
31+
dependencies: ["Punnycode"]),
3132
.testTarget(
3233
name: "ContentBlockerConverterTests",
3334
dependencies: ["ContentBlockerConverter"]),

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,5 @@ Push a new tag in `v*.*.*` format, then provided github action is intended to bu
6666
- Scriptlet rules (#%#//scriptlet)
6767
- Scriptlet rules exceptions
6868

69-
### Third-party libraries
70-
- Punycode library (https://github.com/gumob/PunycodeSwift/tree/master/Source)
69+
### Third-party dependencies
70+
- Punycode (https://github.com/gumob/PunycodeSwift.git)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
import Foundation
2+
3+
/**
4+
* Blocker entries JSON encoder
5+
*/
6+
class BlockerEntryEncoder {
7+
8+
/**
9+
* Encodes array of blocker entries
10+
*/
11+
func encode(entries: [BlockerEntry]) -> String {
12+
var result = "[";
13+
14+
for index in 0..<entries.count {
15+
if (index > 0) {
16+
result.append(",");
17+
}
18+
19+
result.append(self.encodeEntry(entry: entries[index]));
20+
}
21+
22+
result.append("]");
23+
24+
return result;
25+
}
26+
27+
private func encodeEntry(entry: BlockerEntry) -> String {
28+
let action = encodeAction(action: entry.action);
29+
let trigger = encodeTrigger(trigger: entry.trigger);
30+
31+
var result = "{\"trigger\":";
32+
result.append(trigger);
33+
result.append(",\"action\":");
34+
result.append(action);
35+
result.append("}");
36+
return result;
37+
}
38+
39+
private func encodeAction(action: BlockerEntry.Action) -> String {
40+
var result = "{";
41+
42+
result.append("\"type\":\"");
43+
result.append(action.type);
44+
result.append("\"");
45+
46+
if action.selector != nil {
47+
result.append(",\"selector\":\"");
48+
result.append(self.escapeString(value: action.selector!));
49+
result.append("\"");
50+
}
51+
52+
if action.css != nil {
53+
result.append(",\"css\":\"");
54+
result.append(self.escapeString(value: action.css!));
55+
result.append("\"");
56+
}
57+
58+
if action.script != nil {
59+
result.append(",\"script\":\"");
60+
result.append(self.escapeString(value: action.script!));
61+
result.append("\"");
62+
}
63+
64+
if action.scriptlet != nil {
65+
result.append(",\"scriptlet\":\"");
66+
result.append(self.escapeString(value: action.scriptlet!));
67+
result.append("\"");
68+
}
69+
70+
if action.scriptletParam != nil {
71+
result.append(",\"scriptletParam\":\"");
72+
result.append(self.escapeString(value: action.scriptletParam!));
73+
result.append("\"");
74+
}
75+
76+
result.append("}");
77+
78+
return result;
79+
}
80+
81+
private func encodeTrigger(trigger: BlockerEntry.Trigger) -> String {
82+
var result = "{";
83+
84+
result.append("\"url-filter\":\"");
85+
result.append(self.escapeString(value: trigger.urlFilter!));
86+
result.append("\"");
87+
88+
if trigger.shortcut != nil {
89+
result.append("\"url-shortcut\":\"");
90+
result.append(self.escapeString(value: trigger.shortcut!));
91+
result.append("\"");
92+
}
93+
94+
if (trigger.caseSensitive != nil) {
95+
result.append(",\"url-filter-is-case-sensitive\":");
96+
result.append(trigger.caseSensitive! ? "\"true\"" : "\"false\"");
97+
}
98+
99+
if (trigger.regex != nil) {
100+
result.append(",\"regex\":\"");
101+
result.append(self.escapeString(value: trigger.regex!.pattern));
102+
result.append("\"");
103+
}
104+
105+
if (trigger.loadType != nil) {
106+
result.append(",\"load-type\":");
107+
result.append(self.encodeStringArray(arr: trigger.loadType!));
108+
}
109+
110+
if (trigger.resourceType != nil) {
111+
result.append(",\"resource-type\":");
112+
result.append(self.encodeStringArray(arr: trigger.resourceType!));
113+
}
114+
115+
if (trigger.ifDomain != nil) {
116+
result.append(",\"if-domain\":");
117+
result.append(self.encodeStringArray(arr: trigger.ifDomain!, escape: true));
118+
}
119+
120+
if (trigger.unlessDomain != nil) {
121+
result.append(",\"unless-domain\":");
122+
result.append(self.encodeStringArray(arr: trigger.unlessDomain!, escape: true));
123+
}
124+
125+
result.append("}");
126+
127+
return result;
128+
}
129+
130+
private func encodeStringArray(arr: [String], escape: Bool = false) -> String {
131+
var result = "[";
132+
133+
for index in 0..<arr.count {
134+
if (index > 0) {
135+
result.append(",");
136+
}
137+
138+
result.append("\"");
139+
result.append(escape ? self.escapeString(value: arr[index]) : arr[index]);
140+
result.append("\"");
141+
}
142+
143+
result.append("]");
144+
145+
return result;
146+
}
147+
148+
/**
149+
* Escapes specials in string value
150+
*/
151+
func escapeString(value: String) -> String {
152+
var result = "";
153+
154+
let scalars = value.unicodeScalars
155+
var start = scalars.startIndex
156+
let end = scalars.endIndex
157+
var idx = start
158+
while idx < scalars.endIndex {
159+
let s: String
160+
let c = scalars[idx]
161+
switch c {
162+
case "\\": s = "\\\\"
163+
case "\"": s = "\\\""
164+
case "\n": s = "\\n"
165+
case "\r": s = "\\r"
166+
case "\t": s = "\\t"
167+
case "\u{8}": s = "\\b"
168+
case "\u{C}": s = "\\f"
169+
case "\0"..<"\u{10}":
170+
s = "\\u000\(String(c.value, radix: 16, uppercase: true))"
171+
case "\u{10}"..<" ":
172+
s = "\\u00\(String(c.value, radix: 16, uppercase: true))"
173+
default:
174+
idx = scalars.index(after: idx)
175+
continue
176+
}
177+
178+
if idx != start {
179+
result.append(String(scalars[start..<idx]));
180+
}
181+
result.append(s);
182+
183+
idx = scalars.index(after: idx)
184+
start = idx
185+
}
186+
187+
if start != end {
188+
result.append(String(scalars[start..<end]));
189+
}
190+
191+
return result;
192+
}
193+
}

Sources/ContentBlockerConverter/Compiler/BlockerEntryFactory.swift

+38-18
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ class BlockerEntryFactory {
4444
"com.vn", "hk", "org.au", "tk", "lv", "live", "to", "mobi", "gov.cn", "sh"
4545
];
4646

47+
/**
48+
* Precompiled validate regexps
49+
*/
50+
private static let VALIDATE_REGEXP_DIGITS = try! NSRegularExpression(pattern: "\\{[0-9,]+\\}", options: [.caseInsensitive]);
51+
private static let VALIDATE_REGEXP_OR = try! NSRegularExpression(pattern: #"[^\\]+\|+\S*"#, options: [.caseInsensitive]);
52+
private static let VALIDATE_REGEXP_LOOKAHEAD = try! NSRegularExpression(pattern: "\\(\\?!.*\\)", options: [.caseInsensitive]);
53+
private static let VALIDATE_REGEXP_METACHARS = try! NSRegularExpression(pattern: #"[^\\]\\[bdfnrstvw]"#, options: [.caseInsensitive]);
54+
4755
let advancedBlockingEnabled: Bool;
4856
let errorsCounter: ErrorsCounter;
4957

@@ -86,8 +94,7 @@ class BlockerEntryFactory {
8694
throw ConversionError.unsupportedRule(message: "CSP rules are not supported");
8795
}
8896

89-
let urlFilter = createUrlFilterString(rule: rule);
90-
try validateRegExp(regExp: urlFilter);
97+
let urlFilter = try createUrlFilterString(rule: rule);
9198

9299
var trigger = BlockerEntry.Trigger(urlFilter: urlFilter);
93100
var action = BlockerEntry.Action(type: "block");
@@ -159,7 +166,7 @@ class BlockerEntryFactory {
159166
return result;
160167
}
161168

162-
private func createUrlFilterString(rule: NetworkRule) -> String {
169+
private func createUrlFilterString(rule: NetworkRule) throws -> String {
163170
let isWebSocket = rule.isWebSocket;
164171

165172
// Use a single standard regex for rules that are supposed to match every URL
@@ -179,6 +186,16 @@ class BlockerEntryFactory {
179186
return BlockerEntryFactory.URL_FILTER_WS_ANY_URL + ".*" + urlRegExpSource!;
180187
}
181188

189+
// Safari doesn't support non-ASCII characters in regular expressions
190+
if (!urlRegExpSource!.isASCII()) {
191+
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support non-ASCII characters in regular expressions");
192+
}
193+
194+
// Regex that we generate for basic non-regex rules are okay
195+
// But if this is a regex rule, we can't be sure
196+
if rule.isRegexRule() {
197+
try validateRegExp(urlRegExp: urlRegExpSource!);
198+
}
182199
return urlRegExpSource!;
183200
};
184201

@@ -335,7 +352,7 @@ class BlockerEntryFactory {
335352

336353
return result;
337354
};
338-
355+
339356
/**
340357
* Safari doesn't support some regular expressions
341358
* Supporeted expressions:
@@ -348,30 +365,33 @@ class BlockerEntryFactory {
348365
* * - Matches the preceding character zero or more times.
349366
* ? - Matches the preceding character zero or one time.
350367
*/
351-
private func validateRegExp(regExp: String) throws -> Void {
368+
private func validateRegExp(urlRegExp: String) throws -> Void {
352369
// Safari doesn't support {digit} in regular expressions
353-
if (regExp.isMatch(regex: "\\{[0-9,]+\\}")) {
354-
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support '{digit}' in regular expressions");
370+
if (urlRegExp.contains("{")) {
371+
if (SimpleRegex.isMatch(regex: BlockerEntryFactory.VALIDATE_REGEXP_DIGITS, target: urlRegExp)) {
372+
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support '{digit}' in regular expressions");
373+
}
355374
}
356375

357376
// Safari doesn't support | in regular expressions
358-
if (regExp.isMatch(regex: "[^\\\\]+\\|+\\S*")) {
359-
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support '|' in regular expressions");
360-
}
361-
362-
// Safari doesn't support non-ASCII characters in regular expressions
363-
if (regExp.isMatch(regex: "[^\\x00-\\x7F]")) {
364-
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support non-ASCII characters in regular expressions");
377+
if (urlRegExp.contains("|")) {
378+
if (SimpleRegex.isMatch(regex: BlockerEntryFactory.VALIDATE_REGEXP_OR, target: urlRegExp)) {
379+
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support '|' in regular expressions");
380+
}
365381
}
366382

367383
// Safari doesn't support negative lookahead (?!...) in regular expressions
368-
if (regExp.isMatch(regex: "\\(\\?!.*\\)")) {
369-
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support negative lookahead in regular expressions");
384+
if (urlRegExp.contains("(?!")) {
385+
if (SimpleRegex.isMatch(regex: BlockerEntryFactory.VALIDATE_REGEXP_LOOKAHEAD, target: urlRegExp)) {
386+
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support negative lookahead in regular expressions");
387+
}
370388
}
371389

372390
// Safari doesn't support metacharacters in regular expressions
373-
if (regExp.isMatch(regex: #"[^\\]\\[bBdDfnrsStvwW]"#)) {
374-
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support metacharacters in regular expressions");
391+
if (urlRegExp.contains("\\")) {
392+
if (SimpleRegex.isMatch(regex: BlockerEntryFactory.VALIDATE_REGEXP_METACHARS, target: urlRegExp)) {
393+
throw ConversionError.unsupportedRegExp(message: "Safari doesn't support metacharacters in regular expressions");
394+
}
375395
}
376396
};
377397

0 commit comments

Comments
 (0)