Skip to content

Commit 705cef2

Browse files
committed
Revert "experiment: avoid string creation from char[] for tags and attribute names"
This reverts commit d8f44bc.
1 parent d8f44bc commit 705cef2

4 files changed

Lines changed: 8 additions & 605 deletions

File tree

src/main/java/org/htmlunit/cyberneko/HTMLScanner.java

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import org.htmlunit.cyberneko.HTMLElements.Element;
2929
import org.htmlunit.cyberneko.io.PlaybackInputStream;
3030
import org.htmlunit.cyberneko.util.MiniStack;
31-
import org.htmlunit.cyberneko.util.StringCache;
3231
import org.htmlunit.cyberneko.xerces.util.EncodingTranslator;
3332
import org.htmlunit.cyberneko.xerces.util.NamespaceSupport;
3433
import org.htmlunit.cyberneko.xerces.util.StandardEncodingTranslator;
@@ -524,8 +523,6 @@ public class HTMLScanner implements XMLDocumentSource, XMLLocator, HTMLComponent
524523
/** Reusable parser for numeric character references (&#x...; and &#...;) */
525524
private final HTMLUnicodeEntitiesParser fUnicodeEntitiesParser = new HTMLUnicodeEntitiesParser();
526525

527-
final StringCache fStringCache = new StringCache();
528-
529526
final HTMLConfiguration htmlConfiguration_;
530527

531528
/**
@@ -1352,7 +1349,7 @@ else if (NAMES_LOWERCASE == mode && !Character.isLowerCase(c)) {
13521349
}
13531350

13541351
final int length = fCurrentEntity.offset_ - offset;
1355-
final String name = length > 0 ? fStringCache.get(fCurrentEntity.buffer_, offset, length) : null;
1352+
final String name = length > 0 ? new String(fCurrentEntity.buffer_, offset, length) : null;
13561353
if (DEBUG_BUFFER) {
13571354
fCurrentEntity.debugBufferIfNeeded(")scanName: ", " -> \"" + name + '"');
13581355
}
@@ -1422,7 +1419,7 @@ else if (NAMES_LOWERCASE == fNamesElems && !Character.isLowerCase(c)) {
14221419
}
14231420

14241421
final int length = fCurrentEntity.offset_ - offset;
1425-
final String name = length > 0 ? fStringCache.get(fCurrentEntity.buffer_, offset, length) : null;
1422+
final String name = length > 0 ? new String(fCurrentEntity.buffer_, offset, length) : null;
14261423
if (DEBUG_BUFFER) {
14271424
fCurrentEntity.debugBufferIfNeeded(")scanName: ", " -> \"" + name + '"');
14281425
}
@@ -1840,7 +1837,7 @@ int read() throws IOException {
18401837
* @return the read string (length may be smaller if EOF is encountered)
18411838
* @throws IOException in case of io problems
18421839
*/
1843-
String nextContent(final StringCache strCache, final int len) throws IOException {
1840+
String nextContent(final int len) throws IOException {
18441841
final int originalOffset = offset_;
18451842
final int originalColumnNumber = getColumnNumber();
18461843
final int originalCharacterOffset = getCharacterOffset();
@@ -1867,7 +1864,7 @@ String nextContent(final StringCache strCache, final int len) throws IOException
18671864
columnNumber_ = originalColumnNumber;
18681865
characterOffset_ = originalCharacterOffset;
18691866

1870-
return strCache.get(buff, 0, nbRead);
1867+
return new String(buff, 0, nbRead);
18711868
}
18721869

18731870
// Reads a single character, preserving the old buffer content
@@ -2422,7 +2419,7 @@ private void scanUntilEndTag(final String tagNameWithLeadingSlash) throws IOExce
24222419
break;
24232420
}
24242421
if (c == '<') {
2425-
final String next = fCurrentEntity.nextContent(fStringCache, lengthToScan) + " ";
2422+
final String next = fCurrentEntity.nextContent(lengthToScan) + " ";
24262423
if (next.length() >= lengthToScan
24272424
&& tagNameWithLeadingSlash.equalsIgnoreCase(
24282425
next.substring(0, tagNameWithLeadingSlash.length()))
@@ -3639,7 +3636,7 @@ public int scan(final boolean complete) throws IOException {
36393636
state = ScanScriptState.ESCAPED;
36403637
}
36413638
else if (c == '<') {
3642-
final String next = fCurrentEntity.nextContent(fStringCache, 8) + " ";
3639+
final String next = fCurrentEntity.nextContent(8) + " ";
36433640
if (next.length() >= 8 && "/script".equalsIgnoreCase(next.substring(0, 7))
36443641
&& ('>' == next.charAt(7) || Character.isWhitespace(next.charAt(7)))) {
36453642
fCurrentEntity.rewind();
@@ -3658,7 +3655,7 @@ else if (fScanScriptContent.endsWith("--!")) {
36583655
}
36593656
}
36603657
else if (c == '<') {
3661-
final String next = fCurrentEntity.nextContent(fStringCache, 8) + " ";
3658+
final String next = fCurrentEntity.nextContent(8) + " ";
36623659
if (next.length() >= 8 && "/script".equalsIgnoreCase(next.substring(0, 7))
36633660
&& ('>' == next.charAt(7) || Character.isWhitespace(next.charAt(7)))) {
36643661
fCurrentEntity.rewind();

src/main/java/org/htmlunit/cyberneko/util/StringCache.java

Lines changed: 0 additions & 135 deletions
This file was deleted.

src/test/java/org/htmlunit/cyberneko/HTMLScannerTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ class MyContentScanner extends HTMLScanner.ContentScanner {
209209
@Override
210210
protected int scanComment() throws IOException {
211211
// bug was here: calling nextContent() at the end of the buffer/input
212-
fCurrentEntity.nextContent(fStringCache, 30);
212+
fCurrentEntity.nextContent(30);
213213
return super.scanComment();
214214
}
215215
}

0 commit comments

Comments
 (0)