MSPUBParser.h
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* libmspub
3  * Version: MPL 1.1 / GPLv2+ / LGPLv2+
4  *
5  * The contents of this file are subject to the Mozilla Public License Version
6  * 1.1 (the "License"); you may not use this file except in compliance with
7  * the License or as specified alternatively below. You may obtain a copy of
8  * the License at http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the
13  * License.
14  *
15  * Major Contributor(s):
16  * Copyright (C) 2012 Brennan Vincent <brennanv@email.arizona.edu>
17  * Copyright (C) 2012 Fridrich Strba <fridrich.strba@bluewin.ch>
18  *
19  * All Rights Reserved.
20  *
21  * For minor contributions see the git repository.
22  *
23  * Alternatively, the contents of this file may be used under the terms of
24  * either the GNU General Public License Version 2 or later (the "GPLv2+"), or
25  * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
26  * in which case the provisions of the GPLv2+ or the LGPLv2+ are applicable
27  * instead of those above.
28  */
29 
30 #ifndef __MSPUBPARSER_H__
31 #define __MSPUBPARSER_H__
32 
33 #include <map>
34 #include <set>
35 #include <vector>
36 #include <memory>
37 
38 #include <boost/shared_ptr.hpp>
39 #include <boost/optional.hpp>
40 
41 #include <libwpd/libwpd.h>
42 #include <libwpg/libwpg.h>
43 
44 #include "MSPUBTypes.h"
45 #include "Fill.h"
46 #include "Coordinate.h"
47 #include "PolygonUtils.h"
48 
49 class WPXInputStream;
50 
51 namespace libmspub
52 {
53 class MSPUBCollector;
55 {
56  unsigned seqNum;
57 public:
58  FindBySeqNum(unsigned sn) : seqNum(sn) { }
60  {
61  return ref.seqNum == seqNum;
62  }
63 };
64 
66 {
67  unsigned seqNum;
68 public:
69  FindByParentSeqNum(unsigned sn) : seqNum(sn) { }
71  {
72  return ref.parentSeqNum == seqNum;
73  }
74 };
75 
76 struct FOPTValues
77 {
78  std::map<unsigned short, unsigned> m_scalarValues;
79  std::map<unsigned short, std::vector<unsigned char> > m_complexValues;
81  {
82  }
83 };
84 
86 {
87 public:
88  explicit MSPUBParser(WPXInputStream *input, MSPUBCollector *collector);
89  virtual ~MSPUBParser();
90  virtual bool parse();
91 protected:
92  virtual unsigned getColorIndexByQuillEntry(unsigned entry);
93 
95  {
96  TextSpanReference(unsigned short f, unsigned short l, const CharacterStyle &cs) : first(f), last(l), charStyle(cs) { }
97  unsigned short first;
98  unsigned short last;
100  };
101 
103  {
104  TextParagraphReference(unsigned short f, unsigned short l, const ParagraphStyle &ps) : first(f), last(l), paraStyle(ps) { }
105  unsigned short first;
106  unsigned short last;
108  };
109 
110  typedef std::vector<ContentChunkReference>::const_iterator ccr_iterator_t;
111 
112  MSPUBParser();
113  MSPUBParser(const MSPUBParser &);
115  virtual bool parseContents(WPXInputStream *input);
116  bool parseQuill(WPXInputStream *input);
117  bool parseEscher(WPXInputStream *input);
118  bool parseEscherDelay(WPXInputStream *input);
119 
120  MSPUBBlockInfo parseBlock(WPXInputStream *input, bool skipHierarchicalData = false);
121  EscherContainerInfo parseEscherContainer(WPXInputStream *input);
122 
123  bool parseContentChunkReference(WPXInputStream *input, MSPUBBlockInfo block);
124  QuillChunkReference parseQuillChunkReference(WPXInputStream *input);
125  bool parseDocumentChunk(WPXInputStream *input, const ContentChunkReference &chunk);
126  bool parsePageChunk(WPXInputStream *input, const ContentChunkReference &chunk);
127  bool parsePaletteChunk(WPXInputStream *input, const ContentChunkReference &chunk);
128  bool parsePageShapeList(WPXInputStream *input, MSPUBBlockInfo block, unsigned pageSeqNum);
129  bool parseShape(WPXInputStream *input, const ContentChunkReference &chunk);
130  bool parseBorderArtChunk(WPXInputStream *input,
131  const ContentChunkReference &chunk);
132  bool parseFontChunk(WPXInputStream *input,
133  const ContentChunkReference &chunk);
134  void parsePaletteEntry(WPXInputStream *input, MSPUBBlockInfo block);
135  void parseColors(WPXInputStream *input, const QuillChunkReference &chunk);
136  void parseFonts(WPXInputStream *input, const QuillChunkReference &chunk);
137  void parseDefaultStyle(WPXInputStream *input, const QuillChunkReference &chunk);
138  void parseShapeGroup(WPXInputStream *input, const EscherContainerInfo &spgr, Coordinate parentCoordinateSystem, Coordinate parentGroupAbsoluteCoord);
139  void skipBlock(WPXInputStream *input, MSPUBBlockInfo block);
140  void parseEscherShape(WPXInputStream *input, const EscherContainerInfo &sp, Coordinate &parentCoordinateSystem, Coordinate &parentGroupAbsoluteCoord);
141  bool findEscherContainer(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, unsigned short type);
142  bool findEscherContainerWithTypeInSet(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, std::set<unsigned short> types);
143  std::map<unsigned short, unsigned> extractEscherValues(WPXInputStream *input, const EscherContainerInfo &record);
144  FOPTValues extractFOPTValues(WPXInputStream *input,
145  const libmspub::EscherContainerInfo &record);
146  std::vector<TextSpanReference> parseCharacterStyles(WPXInputStream *input, const QuillChunkReference &chunk);
147  std::vector<TextParagraphReference> parseParagraphStyles(WPXInputStream *input, const QuillChunkReference &chunk);
148  std::vector<Calculation> parseGuides(const std::vector<unsigned char>
149  &guideData);
150  std::vector<Vertex> parseVertices(const std::vector<unsigned char>
151  &vertexData);
152  std::vector<unsigned> parseTableCellDefinitions(WPXInputStream *input,
153  const QuillChunkReference &chunk);
154  std::vector<unsigned short> parseSegments(
155  const std::vector<unsigned char> &segmentData);
157  const std::vector<unsigned char> &vertexData,
158  const std::vector<unsigned char> &segmentData,
159  const std::vector<unsigned char> &guideData,
160  unsigned geoWidth, unsigned geoHeight);
161  int getColorIndex(WPXInputStream *input, const MSPUBBlockInfo &info);
162  unsigned getFontIndex(WPXInputStream *input, const MSPUBBlockInfo &info);
163  CharacterStyle getCharacterStyle(WPXInputStream *input);
164  ParagraphStyle getParagraphStyle(WPXInputStream *input);
165  boost::shared_ptr<Fill> getNewFill(const std::map<unsigned short, unsigned> &foptValues, bool &skipIfNotBg, std::map<unsigned short, std::vector<unsigned char> > &foptVal);
166 
167  WPXInputStream *m_input;
169  std::vector<MSPUBBlockInfo> m_blockInfo;
170  std::vector<ContentChunkReference> m_contentChunks;
171  std::vector<unsigned> m_cellsChunkIndices;
172  std::vector<unsigned> m_pageChunkIndices;
173  std::vector<unsigned> m_shapeChunkIndices;
174  std::vector<unsigned> m_paletteChunkIndices;
175  std::vector<unsigned> m_borderArtChunkIndices;
176  std::vector<unsigned> m_fontChunkIndices;
177  std::vector<unsigned> m_unknownChunkIndices;
178  boost::optional<unsigned> m_documentChunkIndex;
181  std::vector<int> m_alternateShapeSeqNums;
182  std::vector<int> m_escherDelayIndices;
183 
184  static short getBlockDataLength(unsigned type);
185  static bool isBlockDataString(unsigned type);
186  static PageType getPageTypeBySeqNum(unsigned seqNum);
187  static unsigned getEscherElementTailLength(unsigned short type);
188  static unsigned getEscherElementAdditionalHeaderLength(unsigned short type);
189  static ImgType imgTypeByBlipType(unsigned short type);
190  static int getStartOffset(ImgType type, unsigned short initial);
191  static bool lineExistsByFlagPointer(unsigned *flags,
192  unsigned *geomFlags = NULL);
193 };
194 
195 } // namespace libmspub
196 
197 #endif // __MSPUBRAPHICS_H__
198 /* vim:set shiftwidth=2 softtabstop=2 expandtab: */
virtual unsigned getColorIndexByQuillEntry(unsigned entry)
Definition: MSPUBParser.cpp:81
static int getStartOffset(ImgType type, unsigned short initial)
Definition: MSPUBParser.cpp:207
bool findEscherContainer(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, unsigned short type)
Definition: MSPUBParser.cpp:2162
std::vector< Vertex > parseVertices(const std::vector< unsigned char > &vertexData)
Definition: MSPUBParser.cpp:2068
void parseEscherShape(WPXInputStream *input, const EscherContainerInfo &sp, Coordinate &parentCoordinateSystem, Coordinate &parentGroupAbsoluteCoord)
Definition: MSPUBParser.cpp:1460
unsigned seqNum
Definition: MSPUBTypes.h:94
std::vector< int > m_escherDelayIndices
Definition: MSPUBParser.h:182
static bool isBlockDataString(unsigned type)
Definition: MSPUBParser.cpp:2329
bool parseEscher(WPXInputStream *input)
Definition: MSPUBParser.cpp:1391
unsigned getFontIndex(WPXInputStream *input, const MSPUBBlockInfo &info)
Definition: MSPUBParser.cpp:1353
void parsePaletteEntry(WPXInputStream *input, MSPUBBlockInfo block)
Definition: MSPUBParser.cpp:2440
Definition: PolygonUtils.h:110
bool operator()(const libmspub::ContentChunkReference &ref)
Definition: MSPUBParser.h:70
std::vector< unsigned > m_borderArtChunkIndices
Definition: MSPUBParser.h:175
ParagraphStyle getParagraphStyle(WPXInputStream *input)
Definition: MSPUBParser.cpp:1163
Definition: MSPUBTypes.h:74
EscherContainerInfo parseEscherContainer(WPXInputStream *input)
Definition: MSPUBParser.cpp:2338
void parseFonts(WPXInputStream *input, const QuillChunkReference &chunk)
Definition: MSPUBParser.cpp:1043
FOPTValues()
Definition: MSPUBParser.h:80
std::vector< unsigned short > parseSegments(const std::vector< unsigned char > &segmentData)
Definition: MSPUBParser.cpp:2035
unsigned short first
Definition: MSPUBParser.h:105
MSPUBCollector * m_collector
Definition: MSPUBParser.h:168
QuillChunkReference parseQuillChunkReference(WPXInputStream *input)
Definition: MSPUBParser.cpp:833
bool parseContentChunkReference(WPXInputStream *input, MSPUBBlockInfo block)
Definition: MSPUBParser.cpp:2238
std::vector< unsigned > m_fontChunkIndices
Definition: MSPUBParser.h:176
std::vector< int > m_alternateShapeSeqNums
Definition: MSPUBParser.h:181
bool parseEscherDelay(WPXInputStream *input)
Definition: MSPUBParser.cpp:248
void parseShapeGroup(WPXInputStream *input, const EscherContainerInfo &spgr, Coordinate parentCoordinateSystem, Coordinate parentGroupAbsoluteCoord)
Definition: MSPUBParser.cpp:1437
unsigned m_lastAddedImage
Definition: MSPUBParser.h:180
ParagraphStyle paraStyle
Definition: MSPUBParser.h:107
Definition: MSPUBTypes.h:86
std::vector< ContentChunkReference >::const_iterator ccr_iterator_t
Definition: MSPUBParser.h:110
TextParagraphReference(unsigned short f, unsigned short l, const ParagraphStyle &ps)
Definition: MSPUBParser.h:104
static unsigned getEscherElementAdditionalHeaderLength(unsigned short type)
Definition: MSPUBParser.cpp:2136
CharacterStyle charStyle
Definition: MSPUBParser.h:99
Definition: Coordinate.h:34
std::vector< unsigned > m_shapeChunkIndices
Definition: MSPUBParser.h:173
static PageType getPageTypeBySeqNum(unsigned seqNum)
Definition: MSPUBParser.cpp:2399
void skipBlock(WPXInputStream *input, MSPUBBlockInfo block)
Definition: MSPUBParser.cpp:2333
bool parseFontChunk(WPXInputStream *input, const ContentChunkReference &chunk)
Definition: MSPUBParser.cpp:477
static bool lineExistsByFlagPointer(unsigned *flags, unsigned *geomFlags=NULL)
Definition: MSPUBParser.cpp:71
unsigned parentSeqNum
Definition: MSPUBTypes.h:95
CharacterStyle getCharacterStyle(WPXInputStream *input)
Definition: MSPUBParser.cpp:1293
virtual bool parse()
Definition: MSPUBParser.cpp:129
unsigned short last
Definition: MSPUBParser.h:98
PageType
Definition: MSPUBTypes.h:192
static short getBlockDataLength(unsigned type)
Definition: MSPUBParser.cpp:86
unsigned short first
Definition: MSPUBParser.h:97
ImgType
Definition: MSPUBTypes.h:199
Definition: MSPUBParser.h:54
WPXInputStream * m_input
Definition: MSPUBParser.h:167
bool parseShape(WPXInputStream *input, const ContentChunkReference &chunk)
Definition: MSPUBParser.cpp:650
void parseColors(WPXInputStream *input, const QuillChunkReference &chunk)
Definition: MSPUBParser.cpp:1089
FindBySeqNum(unsigned sn)
Definition: MSPUBParser.h:58
static unsigned getEscherElementTailLength(unsigned short type)
Definition: MSPUBParser.cpp:2124
unsigned short last
Definition: MSPUBParser.h:106
Definition: MSPUBTypes.h:66
bool findEscherContainerWithTypeInSet(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, std::set< unsigned short > types)
Definition: MSPUBParser.cpp:2147
std::vector< TextSpanReference > parseCharacterStyles(WPXInputStream *input, const QuillChunkReference &chunk)
Definition: MSPUBParser.cpp:1136
std::map< unsigned short, unsigned > m_scalarValues
Definition: MSPUBParser.h:78
std::vector< TextParagraphReference > parseParagraphStyles(WPXInputStream *input, const QuillChunkReference &chunk)
Definition: MSPUBParser.cpp:1108
std::map< unsigned short, std::vector< unsigned char > > m_complexValues
Definition: MSPUBParser.h:79
Definition: MSPUBTypes.h:149
DynamicCustomShape getDynamicCustomShape(const std::vector< unsigned char > &vertexData, const std::vector< unsigned char > &segmentData, const std::vector< unsigned char > &guideData, unsigned geoWidth, unsigned geoHeight)
Definition: MSPUBParser.cpp:2023
bool parsePaletteChunk(WPXInputStream *input, const ContentChunkReference &chunk)
Definition: MSPUBParser.cpp:2413
bool parseQuill(WPXInputStream *input)
Definition: MSPUBParser.cpp:876
Definition: MSPUBParser.h:76
bool operator()(const libmspub::ContentChunkReference &ref)
Definition: MSPUBParser.h:59
Definition: MSPUBTypes.h:98
bool parseDocumentChunk(WPXInputStream *input, const ContentChunkReference &chunk)
Definition: MSPUBParser.cpp:433
TextSpanReference(unsigned short f, unsigned short l, const CharacterStyle &cs)
Definition: MSPUBParser.h:96
std::map< unsigned short, unsigned > extractEscherValues(WPXInputStream *input, const EscherContainerInfo &record)
Definition: MSPUBParser.cpp:2224
FindByParentSeqNum(unsigned sn)
Definition: MSPUBParser.h:69
std::vector< unsigned > m_cellsChunkIndices
Definition: MSPUBParser.h:171
unsigned seqNum
Definition: MSPUBParser.h:67
Definition: MSPUBTypes.h:108
std::vector< unsigned > parseTableCellDefinitions(WPXInputStream *input, const QuillChunkReference &chunk)
Definition: MSPUBParser.cpp:858
MSPUBBlockInfo parseBlock(WPXInputStream *input, bool skipHierarchicalData=false)
Definition: MSPUBParser.cpp:2349
boost::shared_ptr< Fill > getNewFill(const std::map< unsigned short, unsigned > &foptValues, bool &skipIfNotBg, std::map< unsigned short, std::vector< unsigned char > > &foptVal)
Definition: MSPUBParser.cpp:1854
bool parsePageChunk(WPXInputStream *input, const ContentChunkReference &chunk)
Definition: MSPUBParser.cpp:594
boost::optional< unsigned > m_documentChunkIndex
Definition: MSPUBParser.h:178
virtual bool parseContents(WPXInputStream *input)
Definition: MSPUBParser.cpp:327
bool parseBorderArtChunk(WPXInputStream *input, const ContentChunkReference &chunk)
Definition: MSPUBParser.cpp:530
Definition: MSPUBCollector.h:66
Definition: MSPUBParser.h:85
std::vector< ContentChunkReference > m_contentChunks
Definition: MSPUBParser.h:170
static ImgType imgTypeByBlipType(unsigned short type)
Definition: MSPUBParser.cpp:183
std::vector< unsigned > m_pageChunkIndices
Definition: MSPUBParser.h:172
std::vector< unsigned > m_paletteChunkIndices
Definition: MSPUBParser.h:174
void parseDefaultStyle(WPXInputStream *input, const QuillChunkReference &chunk)
Definition: MSPUBParser.cpp:1061
bool parsePageShapeList(WPXInputStream *input, MSPUBBlockInfo block, unsigned pageSeqNum)
Definition: MSPUBParser.cpp:636
virtual ~MSPUBParser()
Definition: MSPUBParser.cpp:67
Definition: MSPUBParser.h:94
int m_lastSeenSeqNum
Definition: MSPUBParser.h:179
int getColorIndex(WPXInputStream *input, const MSPUBBlockInfo &info)
Definition: MSPUBParser.cpp:1374
MSPUBParser & operator=(const MSPUBParser &)
std::vector< MSPUBBlockInfo > m_blockInfo
Definition: MSPUBParser.h:169
std::vector< unsigned > m_unknownChunkIndices
Definition: MSPUBParser.h:177
std::vector< Calculation > parseGuides(const std::vector< unsigned char > &guideData)
Definition: MSPUBParser.cpp:2058
FOPTValues extractFOPTValues(WPXInputStream *input, const libmspub::EscherContainerInfo &record)
Definition: MSPUBParser.cpp:2178
unsigned seqNum
Definition: MSPUBParser.h:56
Definition: MSPUBParser.h:65

Generated for libmspub by doxygen 1.8.6