View Javadoc
1   /* 
2    * Licensed under the Apache License, Version 2.0 (the "License");
3    * you may not use this file except in compliance with the License.
4    * You may obtain a copy of the License at
5    *
6    * http://www.apache.org/licenses/LICENSE-2.0
7    *
8    * Unless required by applicable law or agreed to in writing, software
9    * distributed under the License is distributed on an "AS IS" BASIS,
10   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11   * See the License for the specific language governing permissions and
12   * limitations under the License.
13   *
14   */
15  
16  package org.esigate.parser;
17  
18  import java.io.IOException;
19  import java.util.ArrayList;
20  import java.util.Collections;
21  import java.util.List;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.http.HttpResponse;
26  import org.esigate.HttpErrorPage;
27  import org.esigate.impl.DriverRequest;
28  import org.slf4j.Logger;
29  import org.slf4j.LoggerFactory;
30  
31  public class Parser {
32      private static final Logger LOG = LoggerFactory.getLogger(Parser.class);
33      private final Pattern pattern;
34      private final List<ElementType> elementTypes;
35      private DriverRequest httpRequest;
36      private HttpResponse httpResponse;
37  
38      /**
39       * Creates a Parser with a given regular expression pattern and ElementTypes.
40       * 
41       * @param pattern
42       *            The regular expression Pattern
43       * @param elementTypes
44       *            The element types
45       */
46      public Parser(Pattern pattern, ElementType... elementTypes) {
47          this.pattern = pattern;
48          this.elementTypes = new ArrayList<>(elementTypes.length + 1);
49          Collections.addAll(this.elementTypes, elementTypes);
50          this.elementTypes.add(new UnknownElementType());
51  
52      }
53  
54      /**
55       * Parses all the CharSequence.
56       * 
57       * @param in
58       *            The CharSequence to parse
59       * @param out
60       *            The Writable to write the result to
61       * @throws IOException
62       * @throws HttpErrorPage
63       */
64      public void parse(CharSequence in, Appendable out) throws IOException, HttpErrorPage {
65          ParserContextImpl ctx = new ParserContextImpl(out, httpRequest, httpResponse);
66          Matcher matcher = pattern.matcher(in);
67          int currentPosition = 0;
68          while (matcher.find()) {
69              String tag = matcher.group();
70              ctx.characters(in, currentPosition, matcher.start());
71              currentPosition = matcher.end();
72              if (ctx.isCurrentTagEnd(tag)) {
73                  // check if this is the end tag for current element
74                  LOG.info("Processing end tag {}", tag);
75                  ctx.endElement(tag);
76              } else {
77                  // if not, it is an opening tag for a new element
78                  LOG.info("Processing start tag {}", tag);
79                  ElementType type = null;
80                  for (ElementType t : elementTypes) {
81                      if (t.isStartTag(tag)) {
82                          type = t;
83                          break;
84                      }
85                  }
86                  Element element = type.newInstance();
87                  ctx.startElement(type, element, tag);
88                  if (type.isSelfClosing(tag)) {
89                      ctx.endElement(tag);
90                  }
91  
92              }
93          }
94          // we reached the end of input
95          ctx.characters(in, currentPosition, in.length());
96      }
97  
98      public void setHttpRequest(DriverRequest httpRequest) {
99          this.httpRequest = httpRequest;
100     }
101 
102 }