1 /*
2 * ====================================================================
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 * ====================================================================
20 *
21 * This software consists of voluntary contributions made by many
22 * individuals on behalf of the Apache Software Foundation. For more
23 * information on the Apache Software Foundation, please see
24 * <http://www.apache.org/>.
25 *
26 */
27
28 package org.apache.http.message;
29
30 import java.util.BitSet;
31
32 import org.apache.http.annotation.ThreadingBehavior;
33 import org.apache.http.annotation.Contract;
34 import org.apache.http.util.CharArrayBuffer;
35
36 /**
37 * Low level parser for header field elements. The parsing routines of this class are designed
38 * to produce near zero intermediate garbage and make no intermediate copies of input data.
39 * <p>
40 * This class is immutable and thread safe.
41 *
42 * @since 4.4
43 */
44 @Contract(threading = ThreadingBehavior.IMMUTABLE)
45 public class TokenParser {
46
47 public static BitSet INIT_BITSET(final int ... b) {
48 final BitSet bitset = new BitSet();
49 for (final int aB : b) {
50 bitset.set(aB);
51 }
52 return bitset;
53 }
54
55 /** US-ASCII CR, carriage return (13) */
56 public static final char CR = '\r';
57
58 /** US-ASCII LF, line feed (10) */
59 public static final char LF = '\n';
60
61 /** US-ASCII SP, space (32) */
62 public static final char SP = ' ';
63
64 /** US-ASCII HT, horizontal-tab (9) */
65 public static final char HT = '\t';
66
67 /** Double quote */
68 public static final char DQUOTE = '\"';
69
70 /** Backward slash / escape character */
71 public static final char ESCAPE = '\\';
72
73 public static boolean isWhitespace(final char ch) {
74 return ch == SP || ch == HT || ch == CR || ch == LF;
75 }
76
77 public static final TokenParser INSTANCE = new TokenParser();
78
79 /**
80 * Extracts from the sequence of chars a token terminated with any of the given delimiters
81 * discarding semantically insignificant whitespace characters.
82 *
83 * @param buf buffer with the sequence of chars to be parsed
84 * @param cursor defines the bounds and current position of the buffer
85 * @param delimiters set of delimiting characters. Can be {@code null} if the token
86 * is not delimited by any character.
87 */
88 public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
89 final StringBuilder dst = new StringBuilder();
90 boolean whitespace = false;
91 while (!cursor.atEnd()) {
92 final char current = buf.charAt(cursor.getPos());
93 if (delimiters != null && delimiters.get(current)) {
94 break;
95 } else if (isWhitespace(current)) {
96 skipWhiteSpace(buf, cursor);
97 whitespace = true;
98 } else {
99 if (whitespace && dst.length() > 0) {
100 dst.append(' ');
101 }
102 copyContent(buf, cursor, delimiters, dst);
103 whitespace = false;
104 }
105 }
106 return dst.toString();
107 }
108
109 /**
110 * Extracts from the sequence of chars a value which can be enclosed in quote marks and
111 * terminated with any of the given delimiters discarding semantically insignificant
112 * whitespace characters.
113 *
114 * @param buf buffer with the sequence of chars to be parsed
115 * @param cursor defines the bounds and current position of the buffer
116 * @param delimiters set of delimiting characters. Can be {@code null} if the value
117 * is not delimited by any character.
118 */
119 public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
120 final StringBuilder dst = new StringBuilder();
121 boolean whitespace = false;
122 while (!cursor.atEnd()) {
123 final char current = buf.charAt(cursor.getPos());
124 if (delimiters != null && delimiters.get(current)) {
125 break;
126 } else if (isWhitespace(current)) {
127 skipWhiteSpace(buf, cursor);
128 whitespace = true;
129 } else if (current == DQUOTE) {
130 if (whitespace && dst.length() > 0) {
131 dst.append(' ');
132 }
133 copyQuotedContent(buf, cursor, dst);
134 whitespace = false;
135 } else {
136 if (whitespace && dst.length() > 0) {
137 dst.append(' ');
138 }
139 copyUnquotedContent(buf, cursor, delimiters, dst);
140 whitespace = false;
141 }
142 }
143 return dst.toString();
144 }
145
146 /**
147 * Skips semantically insignificant whitespace characters and moves the cursor to the closest
148 * non-whitespace character.
149 *
150 * @param buf buffer with the sequence of chars to be parsed
151 * @param cursor defines the bounds and current position of the buffer
152 */
153 public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) {
154 int pos = cursor.getPos();
155 final int indexFrom = cursor.getPos();
156 final int indexTo = cursor.getUpperBound();
157 for (int i = indexFrom; i < indexTo; i++) {
158 final char current = buf.charAt(i);
159 if (!isWhitespace(current)) {
160 break;
161 }
162 pos++;
163 }
164 cursor.updatePos(pos);
165 }
166
167 /**
168 * Transfers content into the destination buffer until a whitespace character or any of
169 * the given delimiters is encountered.
170 *
171 * @param buf buffer with the sequence of chars to be parsed
172 * @param cursor defines the bounds and current position of the buffer
173 * @param delimiters set of delimiting characters. Can be {@code null} if the value
174 * is delimited by a whitespace only.
175 * @param dst destination buffer
176 */
177 public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters,
178 final StringBuilder dst) {
179 int pos = cursor.getPos();
180 final int indexFrom = cursor.getPos();
181 final int indexTo = cursor.getUpperBound();
182 for (int i = indexFrom; i < indexTo; i++) {
183 final char current = buf.charAt(i);
184 if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
185 break;
186 }
187 pos++;
188 dst.append(current);
189 }
190 cursor.updatePos(pos);
191 }
192
193 /**
194 * Transfers content into the destination buffer until a whitespace character, a quote,
195 * or any of the given delimiters is encountered.
196 *
197 * @param buf buffer with the sequence of chars to be parsed
198 * @param cursor defines the bounds and current position of the buffer
199 * @param delimiters set of delimiting characters. Can be {@code null} if the value
200 * is delimited by a whitespace or a quote only.
201 * @param dst destination buffer
202 */
203 public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
204 final BitSet delimiters, final StringBuilder dst) {
205 int pos = cursor.getPos();
206 final int indexFrom = cursor.getPos();
207 final int indexTo = cursor.getUpperBound();
208 for (int i = indexFrom; i < indexTo; i++) {
209 final char current = buf.charAt(i);
210 if ((delimiters != null && delimiters.get(current))
211 || isWhitespace(current) || current == DQUOTE) {
212 break;
213 }
214 pos++;
215 dst.append(current);
216 }
217 cursor.updatePos(pos);
218 }
219
220 /**
221 * Transfers content enclosed with quote marks into the destination buffer.
222 *
223 * @param buf buffer with the sequence of chars to be parsed
224 * @param cursor defines the bounds and current position of the buffer
225 * @param dst destination buffer
226 */
227 public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
228 final StringBuilder dst) {
229 if (cursor.atEnd()) {
230 return;
231 }
232 int pos = cursor.getPos();
233 int indexFrom = cursor.getPos();
234 final int indexTo = cursor.getUpperBound();
235 char current = buf.charAt(pos);
236 if (current != DQUOTE) {
237 return;
238 }
239 pos++;
240 indexFrom++;
241 boolean escaped = false;
242 for (int i = indexFrom; i < indexTo; i++, pos++) {
243 current = buf.charAt(i);
244 if (escaped) {
245 if (current != DQUOTE && current != ESCAPE) {
246 dst.append(ESCAPE);
247 }
248 dst.append(current);
249 escaped = false;
250 } else {
251 if (current == DQUOTE) {
252 pos++;
253 break;
254 }
255 if (current == ESCAPE) {
256 escaped = true;
257 } else if (current != CR && current != LF) {
258 dst.append(current);
259 }
260 }
261 }
262 cursor.updatePos(pos);
263 }
264
265 }
266