Skip to content

Commit 8f83479

Browse files
committed
Add StreamInfo to chunks
1 parent dfe0dcb commit 8f83479

6 files changed

Lines changed: 54 additions & 13 deletions

File tree

wcag-validation/src/main/java/org/verapdf/gf/model/factory/chunks/ChunkFactory.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ public final class ChunkFactory {
3636

3737
public static List<IChunk> chunksFromTokens(Integer pageNumber, COSKey objectKey, List<Object> rawTokens,
3838
GraphicsState inheritedGraphicState, ResourceHandler resourceHandler,
39-
COSKey parentObjectKey, Long markedContent) {
39+
COSKey parentObjectKey, Long markedContent, String xObjectName) {
4040
List<COSBase> arguments = new ArrayList<>();
4141
ChunkParser parser = new ChunkParser(pageNumber, objectKey, inheritedGraphicState, resourceHandler,
42-
parentObjectKey, markedContent);
42+
parentObjectKey, markedContent, xObjectName);
4343
for (int operatorIndex = 0; operatorIndex < rawTokens.size(); operatorIndex++) {
4444
Object rawToken = rawTokens.get(operatorIndex);
4545
if (rawToken instanceof COSBase) {

wcag-validation/src/main/java/org/verapdf/gf/model/factory/chunks/ChunkParser.java

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.verapdf.wcag.algorithms.entities.geometry.Vertex;
4747
import org.verapdf.wcag.algorithms.semanticalgorithms.containers.StaticContainers;
4848
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.NodeUtils;
49+
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.StreamInfo;
4950
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.TextChunkUtils;
5051

5152
import java.io.*;
@@ -80,16 +81,19 @@ public class ChunkParser {
8081
private final LineArtContainer lineArtContainer;
8182
private final COSKey parentObjectKey;
8283
private final Long parentMarkedContent;
84+
private final String xObjectName;
8385

8486
public ChunkParser(Integer pageNumber, COSKey objectKey, GraphicsState inheritedGraphicState,
85-
ResourceHandler resourceHandler, COSKey parentObjectKey, Long markedContent) {
87+
ResourceHandler resourceHandler, COSKey parentObjectKey, Long markedContent,
88+
String xObjectName) {
8689
this.pageNumber = pageNumber;
8790
lineArtContainer = new LineArtContainer(objectKey);
8891
this.objectKey = objectKey;
8992
this.graphicsState = inheritedGraphicState.clone();
9093
this.resourceHandler = resourceHandler;
9194
this.parentObjectKey = parentObjectKey;
9295
this.parentMarkedContent = markedContent;
96+
this.xObjectName = xObjectName;
9397
}
9498

9599
public List<IChunk> getArtifacts() {
@@ -371,7 +375,9 @@ public void parseChunk(Operator rawOperator, List<COSBase> arguments, int operat
371375
break;
372376
}
373377
ImageChunk imageChunk = new ImageChunk(parseImageBoundingBox());
374-
imageChunk.getOperatorIndexes().add(operatorIndex);
378+
if (StaticContainers.isDataLoader()) {
379+
imageChunk.getStreamInfos().add(new StreamInfo(operatorIndex, xObjectName));
380+
}
375381
putChunk(getMarkedContent(), imageChunk);
376382
break;
377383
}
@@ -510,11 +516,14 @@ public void parseChunk(Operator rawOperator, List<COSBase> arguments, int operat
510516
if (!processLayers()) {
511517
break;
512518
}
513-
PDXObject xObject = resourceHandler.getXObject(getLastCOSName(arguments));
519+
COSName xObjectName = getLastCOSName(arguments);
520+
PDXObject xObject = resourceHandler.getXObject(xObjectName);
514521
if (xObject != null) {
515522
if (ASAtom.IMAGE.equals(xObject.getType())) {
516523
ImageChunk imageChunk = new ImageChunk(parseImageBoundingBox());
517-
imageChunk.getOperatorIndexes().add(operatorIndex);
524+
if (StaticContainers.isDataLoader()) {
525+
imageChunk.getStreamInfos().add(new StreamInfo(operatorIndex, this.xObjectName));
526+
}
518527
putChunk(getMarkedContent(), imageChunk);
519528
} else if (ASAtom.FORM.equals(xObject.getType())) {
520529
Long markedContent = getMarkedContent();
@@ -526,7 +535,7 @@ public void parseChunk(Operator rawOperator, List<COSBase> arguments, int operat
526535
GraphicsState xFormGraphicsState = graphicsState.clone();
527536
xFormGraphicsState.getCTM().concatenate(new Matrix(((PDXForm) xObject).getMatrix()));
528537
GFSAXForm xForm = new GFSAXForm((PDXForm) xObject, resourceHandler, xFormGraphicsState, pageNumber,
529-
key, markedContent);
538+
key, markedContent, xObjectName.getName().getValue());
530539
artifacts.addAll(xForm.getArtifacts());
531540
}
532541
}
@@ -898,7 +907,9 @@ private TextChunk createTextChunk(List<COSBase> arguments, String operatorType,
898907
font.getFontDescriptor().getItalicAngle(), TextChunksHelper.calculateTextBaseLine(textRenderingMatrixAfter),
899908
graphicsState.getFillColor(), textRenderingMatrixAfter.getRotationDegree());
900909
textChunk.adjustSymbolEndsToBoundingBox(textPieces.getSymbolEnds());
901-
textChunk.getOperatorIndexes().add(operatorIndex);
910+
if (StaticContainers.isDataLoader()) {
911+
textChunk.getStreamInfos().addAll(textPieces.getStreamInfos(operatorIndex, xObjectName));
912+
}
902913
return textChunk;
903914
}
904915
return null;

wcag-validation/src/main/java/org/verapdf/gf/model/factory/chunks/TextPieces.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
*/
2121
package org.verapdf.gf.model.factory.chunks;
2222

23+
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.StreamInfo;
24+
2325
import java.util.*;
2426

2527
/**
@@ -29,9 +31,12 @@ public class TextPieces {
2931

3032
private final SortedSet<TextPiece> textPieces = new TreeSet<>(new TextPieceComparator());
3133
private double currentX = 0;
34+
private int currentIndex = 0;
3235

3336
public void add(TextPiece textPiece) {
3437
textPieces.add(textPiece);
38+
textPiece.startIndex = currentIndex;
39+
currentIndex += textPiece.value.length();
3540
currentX = textPiece.endX;
3641
}
3742

@@ -96,11 +101,34 @@ public void addSpaces(double threshold) {
96101
}
97102
textPieces.addAll(spaces);
98103
}
104+
105+
public List<StreamInfo> getStreamInfos(int operatorIndex, String xObjectName) {
106+
List<StreamInfo> streamInfos = new ArrayList<>();
107+
StreamInfo previousStreamInfo = null;
108+
for (TextPiece textPiece : textPieces) {
109+
if (textPiece.startIndex == null) {
110+
streamInfos.add(new StreamInfo(-1, null, 0,
111+
textPiece.value.length()));
112+
previousStreamInfo = null;
113+
} else {
114+
if (previousStreamInfo != null && previousStreamInfo.getEndIndex() == textPiece.startIndex) {
115+
previousStreamInfo.setEndIndex(textPiece.startIndex + textPiece.value.length());
116+
} else {
117+
StreamInfo currentStreamInfo = new StreamInfo(operatorIndex, xObjectName, textPiece.startIndex,
118+
textPiece.startIndex + textPiece.value.length(), currentIndex, null);
119+
streamInfos.add(currentStreamInfo);
120+
previousStreamInfo = currentStreamInfo;
121+
}
122+
}
123+
}
124+
return streamInfos;
125+
}
99126

100127
public static class TextPiece {
101128
private final String value;
102129
private final double startX;
103130
private final double endX;
131+
private Integer startIndex;
104132

105133
public TextPiece(String value, double startX, double endX) {
106134
this.value = value;

wcag-validation/src/main/java/org/verapdf/gf/model/impl/sa/GFSAContentStream.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,19 @@ public class GFSAContentStream {
5252
private final org.verapdf.pd.PDContentStream contentStream;
5353
private final Long markedContent;
5454
private final COSKey parentObjectKey;
55+
private final String xObjectName;
5556

5657
public GFSAContentStream(org.verapdf.pd.PDContentStream contentStream, GraphicsState inheritedGraphicsState,
5758
ResourceHandler resourceHandler, Integer pageNumber, COSKey objectKey,
58-
COSKey parentObjectKey, Long markedContent) {
59+
COSKey parentObjectKey, Long markedContent, String xObjectName) {
5960
this.pageNumber = pageNumber;
6061
this.objectKey = objectKey;
6162
this.contentStream = contentStream;
6263
this.resourceHandler = resourceHandler;
6364
this.inheritedGraphicsState = inheritedGraphicsState;
6465
this.markedContent = markedContent;
6566
this.parentObjectKey = parentObjectKey;
67+
this.xObjectName = xObjectName;
6668
}
6769

6870
public List<IChunk> getArtifacts() {
@@ -86,7 +88,7 @@ protected void parseChunks() {
8688
streamParser.parseTokens();
8789
this.artifacts = ChunkFactory.chunksFromTokens(pageNumber, objectKey,
8890
streamParser.getTokens(), inheritedGraphicsState, resourceHandler,
89-
parentObjectKey, markedContent);
91+
parentObjectKey, markedContent, xObjectName);
9092
}
9193
}
9294
}

wcag-validation/src/main/java/org/verapdf/gf/model/impl/sa/GFSAPage.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ private void parseContentStream() {
157157
GraphicsState graphicsState = new GraphicsState(resourceHandler);
158158
graphicsState.setCTM(createCurrentTransformationMatrix(pdPage));
159159
pdContentStream = new GFSAContentStream(pdPage.getContent(), graphicsState, resourceHandler,
160-
pdPage.getPageNumber(), pdPage.getObject().getKey(), null, null);
160+
pdPage.getPageNumber(), pdPage.getObject().getKey(), null, null, null);
161161
}
162162
this.contentStream = pdContentStream;
163163
}

wcag-validation/src/main/java/org/verapdf/gf/model/impl/sa/GFSAXForm.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ public class GFSAXForm {
3636

3737
public GFSAXForm(org.verapdf.pd.images.PDXForm xForm, ResourceHandler resourceHandler,
3838
GraphicsState inheritedGraphicsState, Integer pageNumber, COSKey parentObjectKey,
39-
Long markedContent) {
39+
Long markedContent, String xObjectName) {
4040
this.contentStream = new GFSAContentStream(xForm, inheritedGraphicsState,
4141
resourceHandler.getExtendedResources(xForm.getResources()), pageNumber, xForm.getObject().getKey(),
42-
parentObjectKey, markedContent);
42+
parentObjectKey, markedContent, xObjectName);
4343
}
4444

4545
public List<IChunk> getArtifacts() {

0 commit comments

Comments
 (0)