public class StringUtil extends Object
A collection of useful String operations that can be used for matcher development.
  • Field Details

    • tokenMap

      private static HashMap<String,List<String>> tokenMap
    • myFormat

      private static String myFormat

      private static final Set<String> ENGLISH_STOPWORDS
      A set of English stopwords.
  • Constructor Details

    • StringUtil

      public StringUtil()
  • Method Details

    • tokenize

      public static List<String> tokenize(String text)
      Make tokens out of a String.
      text - String to be tokenized.
      A list of tokens.
    • tokenizeToString

      public static String tokenizeToString(String text)
    • containsMostlyNumbers

      public static boolean containsMostlyNumbers(String term)
    • getProcessedString

      public static String getProcessedString(String text)
    • getTokensWithoutStopword

      public static List<String> getTokensWithoutStopword(String text)
    • removeStopwords

      public static List<String> removeStopwords(List<String> tokens)
    • removeStopwords

      public static List<String> removeStopwords(List<String> tokens, Set<String> stopwords)
    • editDistance

      public static int editDistance(String a, String b, boolean cased)
    • editDistanceNormalised

      public static double editDistanceNormalised(String a, String b)
    • isSuffix

      public static boolean isSuffix(String s1, String s2)
    • isPrefix

      public static boolean isPrefix(String s1, String s2)
    • damerauLevenshtein

      public static int damerauLevenshtein(String compOne, String compTwo)
    • damerauLevenshteinNormalised

      public static double damerauLevenshteinNormalised(String a, String b)
    • exactLength

      public static String exactLength(String in, int length)
    • getNormalised

      private static double getNormalised(double editDistance, double maxLength)
    • getMaxLength

      private static double getMaxLength(String a, String b)