public abstract class BaseTokenStreamTestCase extends LuceneTestCase
When writing unit tests for analysis components, it's highly recommended
to use the helper methods here (especially in conjunction with MockAnalyzer
or
MockTokenizer
), as they contain many assertions and checks to
catch bugs.
MockAnalyzer
,
MockTokenizer
Modifier and Type | Class and Description |
---|---|
static interface |
BaseTokenStreamTestCase.CheckClearAttributesAttribute
Attribute that records if it was cleared or not.
|
static class |
BaseTokenStreamTestCase.CheckClearAttributesAttributeImpl
Attribute that records if it was cleared or not.
|
LuceneTestCase.AwaitsFix, LuceneTestCase.BadApple, LuceneTestCase.Monster, LuceneTestCase.Nightly, LuceneTestCase.Slow, LuceneTestCase.SuppressCodecs, LuceneTestCase.SuppressFileSystems, LuceneTestCase.SuppressFsync, LuceneTestCase.SuppressReproduceLine, LuceneTestCase.SuppressSysoutChecks, LuceneTestCase.SuppressTempFileChecks, LuceneTestCase.ThrowingRunnable, LuceneTestCase.Weekly
assertsAreEnabled, classRules, DEFAULT_LINE_DOCS_FILE, INFOSTREAM, JENKINS_LARGE_LINE_DOCS_FILE, LEAVE_TEMPORARY, MAYBE_CACHE_POLICY, RANDOM_MULTIPLIER, ruleChain, suiteFailureMarker, SYSPROP_AWAITSFIX, SYSPROP_BADAPPLES, SYSPROP_FAILFAST, SYSPROP_MAXFAILURES, SYSPROP_MONSTER, SYSPROP_NIGHTLY, SYSPROP_SLOW, SYSPROP_WEEKLY, TEST_ASSERTS_ENABLED, TEST_AWAITSFIX, TEST_BADAPPLES, TEST_CODEC, TEST_DIRECTORY, TEST_DOCVALUESFORMAT, TEST_LINE_DOCS_FILE, TEST_MONSTER, TEST_NIGHTLY, TEST_POSTINGSFORMAT, TEST_SLOW, TEST_THROTTLING, TEST_WEEKLY, VERBOSE
Constructor and Description |
---|
BaseTokenStreamTestCase() |
Modifier and Type | Method and Description |
---|---|
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output) |
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output,
int[] posIncrements) |
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output,
int[] startOffsets,
int[] endOffsets) |
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output,
int[] startOffsets,
int[] endOffsets,
int[] posIncrements) |
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements) |
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths) |
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths,
boolean graphOffsetsAreCorrect) |
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths,
boolean graphOffsetsAreCorrect,
byte[][] payloads) |
static void |
assertAnalyzesTo(Analyzer a,
input,
[] output,
[] types) |
static void |
assertAnalyzesToPositions(Analyzer a,
input,
[] output,
int[] posIncrements,
int[] posLengths) |
static void |
assertAnalyzesToPositions(Analyzer a,
input,
[] output,
[] types,
int[] posIncrements,
int[] posLengths) |
static void |
assertGraphStrings(Analyzer analyzer,
text,
... expectedStrings)
Enumerates all accepted strings in the token graph created by the analyzer on the provided text, and then
asserts that it's equal to the expected strings.
|
static void |
assertGraphStrings(TokenStream tokenStream,
... expectedStrings)
Enumerates all accepted strings in the token graph created by the already initialized
TokenStream . |
static void |
assertStreamHasNumberOfTokens(TokenStream ts,
int expectedCount)
Asserts that the given stream has expected number of tokens.
|
static void |
assertTokenStreamContents(TokenStream ts,
[] output) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] posIncrements) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
int[] posIncrements) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
int[] posIncrements,
int[] posLengths,
finalOffset) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
int[] posIncrements,
finalOffset) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
finalOffset) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths,
finalOffset) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths,
finalOffset,
boolean graphOffsetsAreCorrect) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths,
finalOffset,
boolean[] keywordAtts,
boolean graphOffsetsAreCorrect) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths,
finalOffset,
finalPosInc,
boolean[] keywordAtts,
boolean graphOffsetsAreCorrect,
byte[][] payloads) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
int[] posLengths,
finalOffset,
finalPosInc,
boolean[] keywordAtts,
boolean graphOffsetsAreCorrect,
byte[][] payloads,
int[] flags) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
int[] startOffsets,
int[] endOffsets,
[] types,
int[] posIncrements,
finalOffset) |
static void |
assertTokenStreamContents(TokenStream ts,
[] output,
[] types) |
static void |
random,
Analyzer a,
boolean useCharFilter,
text) |
static void |
random,
Analyzer a,
boolean useCharFilter,
text,
boolean graphOffsetsAreCorrect) |
static void |
checkOneTerm(Analyzer a,
input,
expected) |
static void |
random,
Analyzer a,
int iterations)
utility method for blasting tokenstreams with data to make sure they don't do anything crazy
|
static void |
random,
Analyzer a,
int iterations,
boolean simple)
utility method for blasting tokenstreams with data to make sure they don't do anything crazy
|
static void |
random,
Analyzer a,
int iterations,
int maxWordLength)
utility method for blasting tokenstreams with data to make sure they don't do anything crazy
|
static void |
random,
Analyzer a,
int iterations,
int maxWordLength,
boolean simple) |
static void |
random,
Analyzer a,
int iterations,
int maxWordLength,
boolean simple,
boolean graphOffsetsAreCorrect) |
static void |
checkResetException(Analyzer a,
input) |
static |
s) |
static <> |
getGraphStrings(Analyzer analyzer,
text)
Returns all paths accepted by the token stream graph produced by analyzing text with the provided analyzer.
|
static <> |
getGraphStrings(TokenStream tokenStream)
Returns all paths accepted by the token stream graph produced by the already initialized
TokenStream . |
protected static MockTokenizer |
input) |
protected static MockTokenizer |
input) |
static AttributeFactory |
newAttributeFactory()
Returns a random AttributeFactory impl
|
static AttributeFactory |
random)
Returns a random AttributeFactory impl
|
protected |
toDot(Analyzer a,
inputText) |
protected void |
toDotFile(Analyzer a,
inputText,
localFileName) |
static |
toString(Analyzer analyzer,
text)
Returns a
String summary of the tokens this analyzer produces on this text |
protected static MockTokenizer |
input) |
protected static MockTokenizer |
input) |
addVirusChecker, assertDeletedDocsEquals, assertDocsAndPositionsEnumEquals, assertDocsEnumEquals, assertDocsSkippingEquals, assertDocValuesEquals, assertDocValuesEquals, assertFieldInfosEquals, assertNormsEquals, assertPointsEquals, assertPositionsSkippingEquals, assertReaderEquals, assertReaderStatisticsEquals, assertStoredFieldEquals, assertStoredFieldsEquals, assertTermsEnumEquals, assertTermsEquals, assertTermsEquals, assertTermsStatisticsEquals, assertTermStatsEquals, assertTermVectorsEquals, asSet, assumeFalse, assumeNoException, assumeTrue, assumeWorkingMMapOnWindows, atLeast, atLeast, callStackContains, callStackContains, callStackContainsAnyOf, closeAfterSuite, closeAfterTest, collate, createTempDir, createTempDir, createTempFile, createTempFile, dumpArray, dumpIterator, ensureSaneIWCOnNightly, expectThrows, expectThrows, expectThrows, expectThrowsAnyOf, expectThrowsAnyOf, getBaseTempDirForTestClass, getDataInputStream, getDataPath, getOnlyLeafReader, getTestClass, getTestName, hasWorkingMMapOnWindows, isTestThread, localeForLanguageTag, maybeChangeLiveIndexWriterConfig, maybeWrapReader, newAlcoholicMergePolicy, newAlcoholicMergePolicy, newDirectory, newDirectory, newDirectory, newDirectory, newDirectory, newField, newField, newFSDirectory, newFSDirectory, newIndexWriterConfig, newIndexWriterConfig, newIndexWriterConfig, newIOContext, newIOContext, newLogMergePolicy, newLogMergePolicy, newLogMergePolicy, newLogMergePolicy, newLogMergePolicy, newMaybeVirusCheckingDirectory, newMaybeVirusCheckingFSDirectory, newMergePolicy, newMergePolicy, newMergePolicy, newMockDirectory, newMockDirectory, newMockDirectory, newMockFSDirectory, newMockFSDirectory, newSearcher, newSearcher, newSearcher, newStringField, newStringField, newStringField, newStringField, newTextField, newTextField, newTieredMergePolicy, newTieredMergePolicy, overrideDefaultQueryCache, overrideTestDefaultQueryCache, random, randomLocale, randomTimeZone, rarely, rarely, replaceMaxFailureRule, resetDefaultQueryCache, restoreCPUCoreCount, restoreIndexWriterMaxDocs, restoreSpins, runWithRestrictedPermissions, setIndexWriterMaxDocs, setUp, setupCPUCoreCount, setupSpins, slowFileExists, tearDown, usually, usually, wrapReader
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths, finalOffset, finalPosInc, boolean[] keywordAtts, boolean graphOffsetsAreCorrect, byte[][] payloads, int[] flags) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths, finalOffset, boolean[] keywordAtts, boolean graphOffsetsAreCorrect) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths, finalOffset, finalPosInc, boolean[] keywordAtts, boolean graphOffsetsAreCorrect, byte[][] payloads) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths, finalOffset, boolean graphOffsetsAreCorrect) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths, finalOffset) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, finalOffset) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths) throws
public static void assertTokenStreamContents(TokenStream ts, [] output) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, [] types) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] posIncrements) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, finalOffset) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, int[] posIncrements) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, int[] posIncrements, finalOffset) throws
public static void assertTokenStreamContents(TokenStream ts, [] output, int[] startOffsets, int[] endOffsets, int[] posIncrements, int[] posLengths, finalOffset) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths, boolean graphOffsetsAreCorrect) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output, int[] startOffsets, int[] endOffsets, [] types, int[] posIncrements, int[] posLengths, boolean graphOffsetsAreCorrect, byte[][] payloads) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output, [] types) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output, int[] posIncrements) throws
public static void assertAnalyzesToPositions(Analyzer a, input, [] output, int[] posIncrements, int[] posLengths) throws
public static void assertAnalyzesToPositions(Analyzer a, input, [] output, [] types, int[] posIncrements, int[] posLengths) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output, int[] startOffsets, int[] endOffsets) throws
public static void assertAnalyzesTo(Analyzer a, input, [] output, int[] startOffsets, int[] endOffsets, int[] posIncrements) throws
public static void checkResetException(Analyzer a, input) throws
public static void checkOneTerm(Analyzer a, input, expected) throws
public static void checkRandomData( random, Analyzer a, int iterations) throws
public static void checkRandomData( random, Analyzer a, int iterations, int maxWordLength) throws
public static void checkRandomData( random, Analyzer a, int iterations, boolean simple) throws
simple
- true if only ascii strings will be used (try to avoid)
public static void assertStreamHasNumberOfTokens(TokenStream ts, int expectedCount) throws
public static void checkRandomData( random, Analyzer a, int iterations, int maxWordLength, boolean simple) throws
public static void checkRandomData( random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean graphOffsetsAreCorrect) throws
public static escape( s)
public static void checkAnalysisConsistency( random, Analyzer a, boolean useCharFilter, text) throws
public static void checkAnalysisConsistency( random, Analyzer a, boolean useCharFilter, text, boolean graphOffsetsAreCorrect) throws
protected toDot(Analyzer a, inputText) throws
protected void toDotFile(Analyzer a, inputText, localFileName) throws
protected static input) throws
protected static input) throws
protected static input) throws
protected static input) throws
public static random)
public static AttributeFactory newAttributeFactory()
public static void assertGraphStrings(Analyzer analyzer, text, ... expectedStrings) throws
TokenStreamToAutomaton
to create an automaton. Asserts the finite strings of the automaton are all
and only the given valid strings.analyzer
- analyzer containing the SynonymFilter under test.text
- text to be analyzed.expectedStrings
- all expected finite strings.
public static void assertGraphStrings(TokenStream tokenStream, ... expectedStrings) throws
TokenStream
.
public static <> getGraphStrings(Analyzer analyzer, text) throws
CharTermAttribute
values are concatenated, and separated with space.
public static <> getGraphStrings(TokenStream tokenStream) throws
TokenStream
.
public static toString(Analyzer analyzer, text) throws
String
summary of the tokens this analyzer produces on this text
Copyright © 2000-2021 Apache Software Foundation. All Rights Reserved.