pcre_scanner.cc

Go to the documentation of this file.
00001 // Copyright (c) 2005, Google Inc.
00002 // All rights reserved.
00003 //
00004 // Redistribution and use in source and binary forms, with or without
00005 // modification, are permitted provided that the following conditions are
00006 // met:
00007 //
00008 //     * Redistributions of source code must retain the above copyright
00009 // notice, this list of conditions and the following disclaimer.
00010 //     * Redistributions in binary form must reproduce the above
00011 // copyright notice, this list of conditions and the following disclaimer
00012 // in the documentation and/or other materials provided with the
00013 // distribution.
00014 //     * Neither the name of Google Inc. nor the names of its
00015 // contributors may be used to endorse or promote products derived from
00016 // this software without specific prior written permission.
00017 //
00018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 //
00030 // Author: Sanjay Ghemawat
00031 
00032 #ifdef HAVE_CONFIG_H
00033 #include "config.h"
00034 #endif
00035 
00036 #include <vector>
00037 #include <assert.h>
00038 
00039 #include "pcrecpp_internal.h"
00040 #include "pcre_scanner.h"
00041 
00042 using std::vector;
00043 
00044 namespace pcrecpp {
00045 
00046 Scanner::Scanner()
00047   : data_(),
00048     input_(data_),
00049     skip_(NULL),
00050     should_skip_(false),
00051     skip_repeat_(false),
00052     save_comments_(false),
00053     comments_(NULL),
00054     comments_offset_(0) {
00055 }
00056 
00057 Scanner::Scanner(const string& in)
00058   : data_(in),
00059     input_(data_),
00060     skip_(NULL),
00061     should_skip_(false),
00062     skip_repeat_(false),
00063     save_comments_(false),
00064     comments_(NULL),
00065     comments_offset_(0) {
00066 }
00067 
00068 Scanner::~Scanner() {
00069   delete skip_;
00070   delete comments_;
00071 }
00072 
00073 void Scanner::SetSkipExpression(const char* re) {
00074   delete skip_;
00075   if (re != NULL) {
00076     skip_ = new RE(re);
00077     should_skip_ = true;
00078     skip_repeat_ = true;
00079     ConsumeSkip();
00080   } else {
00081     skip_ = NULL;
00082     should_skip_ = false;
00083     skip_repeat_ = false;
00084   }
00085 }
00086 
00087 void Scanner::Skip(const char* re) {
00088   delete skip_;
00089   if (re != NULL) {
00090     skip_ = new RE(re);
00091     should_skip_ = true;
00092     skip_repeat_ = false;
00093     ConsumeSkip();
00094   } else {
00095     skip_ = NULL;
00096     should_skip_ = false;
00097     skip_repeat_ = false;
00098   }
00099 }
00100 
00101 void Scanner::DisableSkip() {
00102   assert(skip_ != NULL);
00103   should_skip_ = false;
00104 }
00105 
00106 void Scanner::EnableSkip() {
00107   assert(skip_ != NULL);
00108   should_skip_ = true;
00109   ConsumeSkip();
00110 }
00111 
00112 int Scanner::LineNumber() const {
00113   // TODO: Make it more efficient by keeping track of the last point
00114   // where we computed line numbers and counting newlines since then.
00115   // We could use std:count, but not all systems have it. :-(
00116   int count = 1;
00117   for (const char* p = data_.data(); p < input_.data(); ++p)
00118     if (*p == '\n')
00119       ++count;
00120   return count;
00121 }
00122 
00123 int Scanner::Offset() const {
00124   return input_.data() - data_.c_str();
00125 }
00126 
00127 bool Scanner::LookingAt(const RE& re) const {
00128   int consumed;
00129   return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
00130 }
00131 
00132 
00133 bool Scanner::Consume(const RE& re,
00134                       const Arg& arg0,
00135                       const Arg& arg1,
00136                       const Arg& arg2) {
00137   const bool result = re.Consume(&input_, arg0, arg1, arg2);
00138   if (result && should_skip_) ConsumeSkip();
00139   return result;
00140 }
00141 
00142 // helper function to consume *skip_ and honour save_comments_
00143 void Scanner::ConsumeSkip() {
00144   const char* start_data = input_.data();
00145   while (skip_->Consume(&input_)) {
00146     if (!skip_repeat_) {
00147       // Only one skip allowed.
00148       break;
00149     }
00150   }
00151   if (save_comments_) {
00152     if (comments_ == NULL) {
00153       comments_ = new vector<StringPiece>;
00154     }
00155     // already pointing one past end, so no need to +1
00156     int length = input_.data() - start_data;
00157     if (length > 0) {
00158       comments_->push_back(StringPiece(start_data, length));
00159     }
00160   }
00161 }
00162 
00163 
00164 void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
00165   // short circuit out if we've not yet initialized comments_
00166   // (e.g., when save_comments is false)
00167   if (!comments_) {
00168     return;
00169   }
00170   // TODO: if we guarantee that comments_ will contain StringPieces
00171   // that are ordered by their start, then we can do a binary search
00172   // for the first StringPiece at or past start and then scan for the
00173   // ones contained in the range, quit early (use equal_range or
00174   // lower_bound)
00175   for (vector<StringPiece>::const_iterator it = comments_->begin();
00176        it != comments_->end(); ++it) {
00177     if ((it->data() >= data_.c_str() + start &&
00178          it->data() + it->size() <= data_.c_str() + end)) {
00179       ranges->push_back(*it);
00180     }
00181   }
00182 }
00183 
00184 
00185 void Scanner::GetNextComments(vector<StringPiece> *ranges) {
00186   // short circuit out if we've not yet initialized comments_
00187   // (e.g., when save_comments is false)
00188   if (!comments_) {
00189     return;
00190   }
00191   for (vector<StringPiece>::const_iterator it =
00192          comments_->begin() + comments_offset_;
00193        it != comments_->end(); ++it) {
00194     ranges->push_back(*it);
00195     ++comments_offset_;
00196   }
00197 }
00198 
00199 }   // namespace pcrecpp

Generated on Tue Jul 5 14:11:57 2011 for ROOT_528-00b_version by  doxygen 1.5.1