java.lang.Object
de.uni_mannheim.informatik.dws.melt.matching_jena_matchers.util.StringProcessing

public class StringProcessing extends Object
  • Field Details

    • CAMEL_CASE

      private static final Pattern CAMEL_CASE
    • NON_ALPHA

      private static final Pattern NON_ALPHA
    • ENGLISH_GENITIVE_S

      private static final Pattern ENGLISH_GENITIVE_S
    • MULTIPLE_UNDERSCORES

      private static final Pattern MULTIPLE_UNDERSCORES
    • MULTIPLE_WHITESPACE

      private static final Pattern MULTIPLE_WHITESPACE
  • Constructor Details

    • StringProcessing

      public StringProcessing()
  • Method Details

    • normalize

      public static List<String> normalize(String stringToBeNormalized)
      Normalizes a string. Recognizes camelCase.
      Parameters:
      stringToBeNormalized - The String that shall be normalized.
      Returns:
      Bag of Words
    • normalizeAndRemoveStopwords

      public static List<String> normalizeAndRemoveStopwords(String stringToBeNormalized)
      Normalizes a string and removes all (English) stopwords. Recognizes camelCase.
      Parameters:
      stringToBeNormalized - The String that shall be normalized.
      Returns:
      Bag of words.
    • normalizeToStringArray

      private static String[] normalizeToStringArray(String stringToBeNormalized)
    • containsMostlyNumbers

      public static boolean containsMostlyNumbers(String term)
    • normalizeOnlyCamelCaseAndUnderscore

      public static String normalizeOnlyCamelCaseAndUnderscore(String stringToBeNormalized)
    • normalizeOnlyCamelCaseUnderscoreAndHyphen

      public static String normalizeOnlyCamelCaseUnderscoreAndHyphen(String stringToBeNormalized)