/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* ex: set filetype=cpp softtabstop=4 shiftwidth=4 tabstop=4 cindent expandtab: */ /* $Id: cmnTokenizer.h,v 1.9 2007/04/26 19:33:57 anton Exp $ Author(s): Ofri Sadowsky Created on: 2003-06-09 (C) Copyright 2003-2007 Johns Hopkins University (JHU), All Rights Reserved. --- begin cisst license - do not edit --- This software is provided "as is" under an open source license, with no warranty. The complete license can be found in license.txt and http://www.cisst.org/cisst/license.txt. --- end cisst license --- */ /*! \file cmnTokenizer.h \brief Break strings into tokens. */ #ifndef _cmnTokenizer_h #define _cmnTokenizer_h #include #include #include // Always the last cisst include #include /*! cmnTokenizer provides a convenient interface for parsing a string into a set of tokens. The parsing uses several sets of control characters: Delimiters: Separate tokens. A sequence of delimiters which is not quoted or escaped is ignored, and a new token begins after it. Quote markers: Enclose parts of, or complete, tokens. Anything between a pair of identical quote markers is included in a token. Escape markers: Quote the character immediately following them. The default values for delimiters is whitespace (space, tab, CR); for quote markers is the set of double and single quotation marks (",'); for escape markers is backslash (\). But the user can override them by calling the appropriate method. A cmnTokenizer object maintains an internal copy of the tokenized text, and can return a pointer to an array of pointer, after the fashion of argv. \note It is important to note that the stored tokens have the life span of the tokenizer. If the tokenizer is destroyed, the user cannot access any of the tokens. */ class CISST_EXPORT cmnTokenizer { public: cmnTokenizer(); ~cmnTokenizer(); void SetDelimiters(const char * delimiters) { Delimiters = delimiters; } void SetQuoteMarkers(const char * markers) { QuoteMarkers = markers; } void SetEscapeMarkers(const char * markers) { EscapeMarkers = markers; } const char * GetDelimiters() const { return Delimiters; } const char * GetQuoteMarkers() const { return QuoteMarkers; } const char * GetEscapeMarkers() const { return EscapeMarkers; } static const char * GetDefaultDelimiters() { return DefaultDelimiters; } static const char * GetDefaultQuoteMarkers() { return DefaultQuoteMarkers; } static const char * GetDefaultEscapeMarkers() { return DefaultEscapeMarkers; } /*! Parse the input string and store the tokens internally. If there is a syntax error (e.g., unclosed quotes) throw an exception. \param string the text to be parsed. */ void Parse(const char * string) throw(std::runtime_error); void Parse(const std::string& string) throw(std::runtime_error) { Parse(string.c_str()); } /*! Return the number of tokens stored. */ int GetNumTokens() const { return Tokens.size(); } /*! Return the array of tokens in an argv fashion. \note This parsing returns exactly the tokens in the input string. For an argv-style set of argument, one needs to have the "name of the program" argument in index 0, and the arguments starting at index 1. Use the method GetArgvTokens() for that. */ const char * const * GetTokensArray() const { if (Tokens.empty()) return NULL; return &(Tokens[0]); } /*! This method will fill the input vector with the tokens, but first set the 0-index element to NULL, to follow the argv convention, where argv[0] contains the "name of the program". */ void GetArgvTokens(std::vector & argvTokens) const; private: const char * Delimiters; const char * QuoteMarkers; const char * EscapeMarkers; static const char * const DefaultDelimiters; static const char * const DefaultQuoteMarkers; static const char * const DefaultEscapeMarkers; std::vector StringBuffer; std::vector Tokens; }; #endif // _cmnTokenizer_h // **************************************************************************** // Change History // **************************************************************************** // // $Log: cmnTokenizer.h,v $ // Revision 1.9 2007/04/26 19:33:57 anton // All files in libraries: Applied new license text, separate copyright and // updated dates, added standard header where missing. // // Revision 1.8 2006/11/20 20:33:19 anton // Licensing: Applied new license to cisstCommon, cisstVector, cisstNumerical, // cisstInteractive, cisstImage and cisstOSAbstraction. // // Revision 1.7 2005/09/26 15:41:46 anton // cisst: Added modelines for emacs and vi. // // Revision 1.6 2005/09/23 23:59:49 anton // cmnTokenizer: Use cmnThrow for all exceptions. // // Revision 1.5 2005/07/21 01:58:37 alamora // added CISST_EXPORT for shared libraries // // Revision 1.4 2005/05/19 19:29:00 anton // cisst libs: Added the license to cisstCommon and cisstVector // // Revision 1.3 2005/04/26 03:25:12 anton // cmnTokenizer: Added Parse(std::string) and used NULL instead of "0" for // pointers. // // Revision 1.2 2005/04/25 15:40:34 ofri // cmnTokenizer: Added methods GetNumTokens() and GetArgvTokens() // // Revision 1.1 2005/04/22 23:50:24 ofri // Added class cmnTokenizer to the repository. It can be use to break strings // (commands, paths, etc.) into tokens. // // // ****************************************************************************