Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/UglyToad.PdfPig.Core/ReadHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@ public static class ReadHelper
/// </summary>
public const byte AsciiCarriageReturn = 13;

/// <summary>
/// The tab '\t' character.
/// </summary>
public const byte AsciiTab = 9;

private static readonly HashSet<int> EndOfNameCharacters =
[
' ',
AsciiCarriageReturn,
AsciiLineFeed,
9,
AsciiTab,
'>',
'<',
'[',
Expand Down
11 changes: 11 additions & 0 deletions src/UglyToad.PdfPig.Core/StreamInputBytes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,17 @@ public bool IsAtEnd()
/// <inheritdoc />
public void Seek(long position)
{
var current = CurrentOffset;
if (position == current)
{
return;
}
else if (peekByte.HasValue && position == current + 1)
{
MoveNext();
return;
}

isAtEnd = false;
peekByte = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public sealed class Type1ArrayTokenizer : ITokenizer
/// <inheritdoc />
public bool ReadsNextByte { get; } = false;

private static readonly string[] Space = [" "];
private static readonly char[] Space = [' '];

/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
Expand Down
5 changes: 5 additions & 0 deletions src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
{
int offset = 0;

while (inputBytes.Peek() is { } b && ReadHelper.IsWhitespace(b))
{
inputBytes.MoveNext();
}

while (inputBytes.MoveNext())
{
if (inputBytes.CurrentByte == (byte)ClearToMark[offset])
Expand Down
65 changes: 43 additions & 22 deletions src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Text;
using Core;
Expand Down Expand Up @@ -41,35 +42,43 @@ private Type1Token ReadNextToken()
do
{
skip = false;
while (bytes.MoveNext())
while (bytes.Peek() is { } b)
{
var b = bytes.CurrentByte;
var c = (char)b;

switch (c)
{
case '%':
bytes.MoveNext();
comments.Add(ReadComment());
break;
case '(':
bytes.MoveNext();
return ReadString();
case ')':
throw new InvalidOperationException("Encountered an end of string ')' outside of string.");
case '[':
bytes.MoveNext();
return new Type1Token(c, Type1Token.TokenType.StartArray);
case ']':
bytes.MoveNext();
return new Type1Token(c, Type1Token.TokenType.EndArray);
case '{':
bytes.MoveNext();
return new Type1Token(c, Type1Token.TokenType.StartProc);
case '}':
bytes.MoveNext();
return new Type1Token(c, Type1Token.TokenType.EndProc);
case '/':
{
var name = ReadLiteral();
bytes.MoveNext();
TryReadLiteral(out var name);
Debug.Assert(name != null);
return new Type1Token(name, Type1Token.TokenType.Literal);
}
case '<':
{
bytes.MoveNext();
var following = bytes.Peek();
if (following == '<')
{
Expand All @@ -81,6 +90,7 @@ private Type1Token ReadNextToken()
}
case '>':
{
bytes.MoveNext();
var following = bytes.Peek();
if (following == '>')
{
Expand All @@ -94,23 +104,24 @@ private Type1Token ReadNextToken()
{
if (ReadHelper.IsWhitespace(b))
{
bytes.MoveNext();
skip = true;
break;
}

if (b == 0)
{
bytes.MoveNext();
skip = true;
break;
}

if (TryReadNumber(c, out var number))
if (TryReadNumber(out var number))
{
return number;
}

var name = ReadLiteral(c);
if (name == null)
if (!TryReadLiteral(out var name))
{
throw new InvalidOperationException($"The binary portion of the type 1 font was invalid at position {bytes.CurrentOffset}.");
}
Expand Down Expand Up @@ -197,12 +208,21 @@ char GetNext()
return null;
}

private bool TryReadNumber(char c, out Type1Token numberToken)
private bool TryReadNumber(out Type1Token numberToken)
{
char GetNext()
{
bytes.MoveNext();
return (char)bytes.CurrentByte;
return (char)(bytes.Peek() ?? 0);
}

char c = (char)(bytes.Peek() ?? 0);

if (!((c >= '0' && c <= '9') || c is '+' or '-'))
{
// Easy out. Not a valid number
numberToken = null;
return false;
}

numberToken = null;
Expand Down Expand Up @@ -251,8 +271,6 @@ char GetNext()
else
{
// integer
bytes.Seek(bytes.CurrentOffset - 1);

numberToken = new Type1Token(sb.ToString(), Type1Token.TokenType.Integer);
return true;
}
Expand Down Expand Up @@ -309,7 +327,6 @@ char GetNext()
}
}

bytes.Seek(bytes.CurrentOffset - 1);
if (radix != null)
{
var number = Convert.ToInt32(sb.ToString(), int.Parse(radix.ToString(), CultureInfo.InvariantCulture));
Expand All @@ -323,14 +340,9 @@ char GetNext()
return true;
}

private string ReadLiteral(char? previousCharacter = null)
private bool TryReadLiteral(out string? value)
{
literalBuffer.Clear();
if (previousCharacter.HasValue)
{
literalBuffer.Append(previousCharacter);
}

do
{
var b = bytes.Peek();
Expand All @@ -350,8 +362,16 @@ private string ReadLiteral(char? previousCharacter = null)
literalBuffer.Append(c);
} while (bytes.MoveNext());

var literal = literalBuffer.ToString();
return literal.Length == 0 ? null : literal;
if (literalBuffer.Length > 0)
{
value = literalBuffer.ToString();
return true;
}
else
{
value = null;
return false;
}
}

private string ReadComment()
Expand All @@ -375,9 +395,10 @@ private string ReadComment()
private Type1DataToken ReadCharString(int length)
{
// Skip preceding space.
bytes.MoveNext();
// TODO: may be wrong
// bytes.MoveNext();
if (bytes.Peek() is { } ws && ReadHelper.IsWhitespace(ws))
{
bytes.MoveNext();
}

byte[] data = new byte[length];
for (int i = 0; i < length; i++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,10 @@ public void OnlyParsesNumberPart()
Assert.True(result);
Assert.Equal(135.6654, AssertNumericToken(token).Data);

Assert.Equal('/', (char)input.Bytes.CurrentByte);
if (tokenizer.ReadsNextByte)
Assert.Equal('/', (char)input.Bytes.CurrentByte);
else
Assert.Equal('4', (char)input.Bytes.CurrentByte);
}

[Fact]
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ internal sealed class ArrayTokenizer : ITokenizer
{
private readonly bool usePdfDocEncoding;

public bool ReadsNextByte { get; } = false;
public bool ReadsNextByte => false;

public ArrayTokenizer(bool usePdfDocEncoding)
{
Expand Down
7 changes: 4 additions & 3 deletions src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

internal sealed class CommentTokenizer : ITokenizer
{
public bool ReadsNextByte { get; } = true;
public bool ReadsNextByte => false;

public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
Expand All @@ -17,10 +17,11 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
return false;
}

using var builder = new ValueStringBuilder();
using var builder = new ValueStringBuilder(stackalloc char[32]);

while (inputBytes.MoveNext() && !ReadHelper.IsEndOfLine(inputBytes.CurrentByte))
while (inputBytes.Peek() is { } c && !ReadHelper.IsEndOfLine(c))
{
inputBytes.MoveNext();
builder.Append((char) inputBytes.CurrentByte);
}

Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ internal class DictionaryTokenizer : ITokenizer
private readonly IReadOnlyList<NameToken> requiredKeys;
private readonly bool useLenientParsing;

public bool ReadsNextByte { get; } = false;
public bool ReadsNextByte => false;

/// <summary>
/// Create a new <see cref="DictionaryTokenizer"/>.
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
public sealed class EndOfLineTokenizer : ITokenizer
{
/// <inheritdoc />
public bool ReadsNextByte { get; } = false;
public bool ReadsNextByte => false;

/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
Expand Down
2 changes: 1 addition & 1 deletion src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

internal sealed class HexTokenizer : ITokenizer
{
public bool ReadsNextByte { get; } = false;
public bool ReadsNextByte => false;

public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
Expand Down
17 changes: 9 additions & 8 deletions src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@

internal sealed class NameTokenizer : ITokenizer
{
#if NET
static NameTokenizer()
{
#if NET
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
#endif
}
#endif

public bool ReadsNextByte { get; } = true;
public bool ReadsNextByte => false;

public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
Expand All @@ -35,10 +35,8 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
int postEscapeRead = 0;
Span<char> escapedChars = stackalloc char[2];

while (inputBytes.MoveNext())
while (inputBytes.Peek() is { } b)
{
var b = inputBytes.CurrentByte;

if (b == '#')
{
escapeActive = true;
Expand All @@ -52,8 +50,9 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok

if (postEscapeRead == 2)
{
int high = escapedChars[0] <= '9' ? escapedChars[0] - '0' : char.ToUpper(escapedChars[0]) - 'A' + 10;
int low = escapedChars[1] <= '9' ? escapedChars[1] - '0' : char.ToUpper(escapedChars[1]) - 'A' + 10;
// We validated that the char is hex. So assume ASCII rules apply and shortcut hex decoding
int high = escapedChars[0] <= '9' ? escapedChars[0] - '0' : ((escapedChars[0] & 0xF) + 9);
int low = escapedChars[1] <= '9' ? escapedChars[1] - '0' : ((escapedChars[1] & 0xF) + 9);

byte characterToWrite = (byte)(high * 16 + low);

Expand Down Expand Up @@ -100,6 +99,8 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
{
bytes.Write(b);
}

inputBytes.MoveNext();
}

#if NET8_0_OR_GREATER
Expand Down
Loading
Loading