Skip to content

Commit

Permalink
Added "Smash I" canonicizer and associated unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBerdik committed Feb 23, 2019
1 parent 8de43ce commit 1b982d5
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 0 deletions.
89 changes: 89 additions & 0 deletions src/com/jgaap/canonicizers/SmashI.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* JGAAP -- a graphical program for stylometric authorship attribution
* Copyright (C) 2009,2011 by Patrick Juola
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.jgaap.canonicizers;

import com.jgaap.generics.CanonicizationException;
import com.jgaap.generics.Canonicizer;

/**
* Canonicizer for smashing all instances in which "I" is used as a
* word.
*
* @author David
* @since 8.0.0
*/
public class SmashI extends Canonicizer {

@Override
public String displayName() {
return "Smash I";
}

@Override
public String tooltipText() {
return "Converts all uses of \"I\" to lowercase.";
}

@Override
public String longDescription() {
return "Converts all uses of \"I\" as a word to lowercase.";
}

@Override
public boolean showInGUI() {
return true;
}

@Override
public char[] process(char[] procText) throws CanonicizationException {
for (int x = 0; x < procText.length; x++) {
// Character is not even a potential candidate for smashing if it is
// not a capital I.
if (procText[x] == 'I' ) {
// Left and right flag are for indicating if whitespace was found
// on either side of "I."
boolean leftFlag = false;
boolean rightFlag = false;

try {
// Check for whitespace on left side.
leftFlag = Character.isWhitespace(procText[x - 1]);
}
catch (ArrayIndexOutOfBoundsException e) {
// If the I is at the beginning of the string, set left flag to true.
leftFlag = true;
}

try {
// Check for whitespace on the right side.
rightFlag = Character.isWhitespace(procText[x + 1]);
}
catch (ArrayIndexOutOfBoundsException e) {
// If the I is at the end of the string, set right flag to true.
rightFlag = true;
}

// Smash character if both flags are set to true.
if (leftFlag && rightFlag)
procText[x] = Character.toLowerCase(procText[x]);
}
}
return procText;
}

}
64 changes: 64 additions & 0 deletions unittests/com/jgaap/canonicizers/SmashITest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* JGAAP -- a graphical program for stylometric authorship attribution
* Copyright (C) 2009,2011 by Patrick Juola
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.jgaap.canonicizers;

import static org.junit.Assert.assertTrue;

import java.util.Arrays;

import org.junit.Test;

import com.jgaap.generics.CanonicizationException;

/**
* Unit test for the Smash I canonicizer.
*
* @author David
* @since 8.0.0
*/
public class SmashITest {
@Test
public void testProcess() throws CanonicizationException {
SmashI smashI = new SmashI();

// Test 1 - I on ends
String in = "I don't care if IPad is supposed to be spelled with a lowercase I";
char[] correct = "i don't care if IPad is supposed to be spelled with a lowercase i".toCharArray();
char[] actual = smashI.process(in.toCharArray());
assertTrue(Arrays.equals(correct, actual));

// Test 2 - I in middle surrounded by spaces
in = "Sometimes I cannot think of creative things to write for unit tests.";
correct = "Sometimes i cannot think of creative things to write for unit tests.".toCharArray();
actual = smashI.process(in.toCharArray());
assertTrue(Arrays.equals(correct, actual));

// Test 3 - I in middle surrounded by tabs
in = "Sometimes I cannot think of creative things to write for unit tests.";
correct = in.toCharArray();
correct[11] = 'i';
actual = smashI.process(in.toCharArray());
assertTrue(Arrays.equals(correct, actual));

// Test 4 - Bunch of I's next to each other with varying case
in = "iIiIiiIIiiiiiiIIiiIiIiIIiiIIiiIiiIiiIIiiiIiiiIiI";
correct = in.toCharArray();
actual = smashI.process(in.toCharArray());
assertTrue(Arrays.equals(correct, actual));
}
}

0 comments on commit 1b982d5

Please sign in to comment.