1 package org.bitbucket.jrsofty.parser.logging.util;
2
3 import java.util.ArrayList;
4 import java.util.regex.Matcher;
5 import java.util.regex.Pattern;
6
7
8
9
10
11
12
13 public class Tokenizer {
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 public String[] simpleWhiteSpaceTokenizer(final String value) {
40
41 if (null == value) {
42 return new String[] {};
43 }
44 final ArrayList<String> tokenList = new ArrayList<String>();
45 final Matcher regexMatcher = Pattern.compile("(\\S+)").matcher(value);
46 while (regexMatcher.find()) {
47
48 tokenList.add(regexMatcher.group(1));
49
50 }
51
52 return tokenList.toArray(new String[tokenList.size()]);
53 }
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77 public String[] quotedWhiteSpaceTokenizer(final String value) {
78 final ArrayList<String> tokenList = new ArrayList<String>();
79 final String regex = "\"([^\"]*)\"|(\\S+)";
80 final Matcher regexMatcher = Pattern.compile(regex).matcher(value);
81 while (regexMatcher.find()) {
82 if (regexMatcher.group(1) != null) {
83 tokenList.add(regexMatcher.group(1));
84 } else {
85 tokenList.add(regexMatcher.group(2));
86 }
87 }
88
89 return tokenList.toArray(new String[tokenList.size()]);
90 }
91
92
93
94
95
96
97
98
99
100
101
102 public String[] tokenizeWithPattern(final String regexPattern, final String value) {
103 final ArrayList<String> tokenList = new ArrayList<String>();
104 final Matcher regexMatcher = Pattern.compile(regexPattern).matcher(value);
105 while (regexMatcher.find()) {
106 for (int i = 1; i <= regexMatcher.groupCount(); i++) {
107 if (regexMatcher.group(i) != null) {
108 tokenList.add(regexMatcher.group(i));
109 }
110 }
111 }
112
113 return tokenList.toArray(new String[tokenList.size()]);
114 }
115 }