Skip to content

Commit

Permalink
Added the filter for noise words as per A5 requirements.
Browse files Browse the repository at this point in the history
  • Loading branch information
Nellak2017 committed Mar 9, 2024
1 parent bd6b74d commit b020a61
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 4 deletions.
6 changes: 3 additions & 3 deletions src/components/pages/App.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import React, { useState } from 'react'
import { KWIC, display, pipe } from '../../utils/helpers'
import { display, pipe, KWICv2 } from '../../utils/helpers'
import '../../styles/globals.css'

function App() {
Expand All @@ -9,8 +9,8 @@ function App() {
const handleInputChange = e => setInputText(e.target.value.replace(/[^a-zA-Z\n ]/g, '')) // Ensure only a..zA..Z characters are entered
const handleResetInput = () => setInputText('')
const handleResetOutput = () => setOutputText('')
const handleKWIC = () => setOutputText(pipe(display)(KWIC(inputText.trim().split('\n').filter(line => line.trim() !== '')).result))
const handleKWIC = () => setOutputText(pipe(display)(KWICv2(inputText.trim().split('\n').filter(line => line.trim() !== '')).result))

return (
<div className="flex justify-center items-center h-screen bg-blue-100 overflow-y-auto p-16">
<div className="flex flex-col space-y-4">
Expand Down
16 changes: 15 additions & 1 deletion src/utils/helpers.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// ---- Constants
// hash table for maximum efficiency in looking it up
export const NOISE_WORDS = { "a": true, "an": true, "the": true, "and": true, "or": true, "of": true, "to": true, "be": true, "is": true, "in": true, "out": true, "by": true, "as": true, "at": true, "off": true }
// ---- Predicates
export const isLetter = char => !char || typeof char !== 'string' || char.length > 1 ? false : /^[a-zA-Z]/.test(char) // A letter is a..zA..Z only
export const isWord = word => typeof word === 'string' && word.length >= 1 && Array.from(word).every(isLetter) // A word is a string of characters
Expand Down Expand Up @@ -36,14 +39,25 @@ export const processInput = lines => isValidLines(lines)
export const convertLines = linesResult => mapResult(linesResult, [...new Set(linesResult.result.map(line => line.replace(/\s+/g, ' ')))].filter(line => line.trim() !== ''))
export const allCircularShiftsAllLines = linesResult => mapResult(linesResult, linesResult.result.map(line => allCircularShifts(line)).flat())
export const sortLines = linesResult => mapResult(linesResult, orderedSet(linesResult.result.flat()))
export const filterNoiseWords = (linesResult, noiseWords = NOISE_WORDS) => mapResult(linesResult, linesResult.result.filter(line =>
!noiseWords[line.trim().split(' ')[0].toLowerCase()]))

// ---- Display function
export const display = list => list.join('\n')

// ---- KWIC Pipeline as per instructions
// ---- KWIC Pipeline as per instructions (Version 1)
export const KWIC = lines => pipe(
processInput, // verifies input is correct and returns result
convertLines, // converts lines to set and remove extra whitespaces and empty lines (removes duplicates, extra whitespaces, and empty lines)
allCircularShiftsAllLines, // makes a list of list containing all the circular shifts for each line
sortLines, // takes a list of lines, removes duplicate lines, then sorts them line-by-line and character-by-character, and returns a result
)(lines)

// ---- KWIC Pipeline as per instructions (Version 2, A5)
export const KWICv2 = lines => pipe(
processInput, // verifies input is correct and returns result
convertLines, // converts lines to set and remove extra whitespaces and empty lines (removes duplicates, extra whitespaces, and empty lines)
allCircularShiftsAllLines, // makes a list of list containing all the circular shifts for each line
sortLines, // takes a list of lines, removes duplicate lines, then sorts them line-by-line and character-by-character, and returns a result
filterNoiseWords, // No line prefix of (lower/upper case): “a”, “an”, “the”, “and”, “or”, “of”, “to”, “be”, “is”, “in”, “out”, “by”, “as”, “at”, “off”
)(lines)
46 changes: 46 additions & 0 deletions src/utils/helpers.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
sortLines,
display,
KWIC,
filterNoiseWords,
} from './helpers.js'

describe('orderedSet', () => {
Expand Down Expand Up @@ -456,4 +457,49 @@ describe('KWIC', () => {
expect(result).toEqual(testCase.expected)
})
})
})

describe('filterNoiseWords', () => {
const testCases = [
{
input: [
"The quick brown fox",
"A lazy dog jumps",
"An apple a day keeps the doctor away",
"And now for something completely different"
],
expectedOutput: [
]
},
{
input: [
"In the beginning was the word",
"word was with God",
"And the word was God"
],
expectedOutput: [
"word was with God",
]
},
{
input: [
"tale of two cities",
"not to be, that is the question",
"frying pan and into the fire"
],
expectedOutput: [
"tale of two cities",
"not to be, that is the question",
"frying pan and into the fire"
]
}
]

test.each(testCases)('filters out noise words from input lines', ({ input, expectedOutput }) => {
// Act
const result = filterNoiseWords({ result: input, error: '' })

// Assert
expect(result.result).toEqual(expectedOutput)
})
})

0 comments on commit b020a61

Please sign in to comment.