"""Parser for .vsd (Visio binary) files using the OLE2/Compound Binary format.
Based on analysis of the libvisio C++ parser from LibreOffice.
Uses olefile to read the OLE2 structured storage.
The .vsd binary format stores data in streams within an OLE2 container.
The main stream is "VisioDocument" which contains a pointer-based tree
of records (chunks). Each chunk has a header with type, id, data length, etc.
Author: Daniel Nylander <daniel@danielnylander.se>
"""
from __future__ import annotations
import struct
from dataclasses import dataclass, field
from io import BytesIO
from pathlib import Path
# Record type constants (from libvisio VSDDocumentStructure.h)
VSD_FOREIGN_DATA = 0x0C
VSD_OLE_LIST = 0x0D
VSD_TEXT = 0x0E
VSD_TRAILER_STREAM = 0x14
VSD_PAGE = 0x15
VSD_COLORS = 0x16
VSD_FONT_LIST = 0x18
VSD_FONT_IX = 0x19
VSD_STYLES = 0x1A
VSD_STENCILS = 0x1D
VSD_STENCIL_PAGE = 0x1E
VSD_OLE_DATA = 0x1F
VSD_PAGES = 0x27
VSD_NAME_LIST = 0x2C
VSD_NAME = 0x2D
VSD_NAME_LIST2 = 0x32
VSD_NAME2 = 0x33
VSD_NAMEIDX123 = 0x34
VSD_PAGE_SHEET = 0x46
VSD_SHAPE_GROUP = 0x47
VSD_SHAPE_SHAPE = 0x48
VSD_SHAPE_GUIDE = 0x4D
VSD_SHAPE_FOREIGN = 0x4E
VSD_STYLE_SHEET = 0x4A
VSD_SCRATCH_LIST = 0x64
VSD_SHAPE_LIST = 0x65
VSD_FIELD_LIST = 0x66
VSD_PROP_LIST = 0x68
VSD_CHAR_LIST = 0x69
VSD_PARA_LIST = 0x6A
VSD_TABS_DATA_LIST = 0x6B
VSD_GEOM_LIST = 0x6C
VSD_CUST_PROPS_LIST = 0x6D
VSD_ACT_ID_LIST = 0x6E
VSD_LAYER_LIST = 0x6F
VSD_CTRL_LIST = 0x70
VSD_C_PNTS_LIST = 0x71
VSD_CONNECT_LIST = 0x72
VSD_HYPER_LNK_LIST = 0x73
VSD_SMART_TAG_LIST = 0x76
VSD_SHAPE_ID = 0x83
VSD_EVENT = 0x84
VSD_LINE = 0x85
VSD_FILL_AND_SHADOW = 0x86
VSD_TEXT_BLOCK = 0x87
VSD_TABS_DATA_1 = 0x88
VSD_GEOMETRY = 0x89
VSD_MOVE_TO = 0x8A
VSD_LINE_TO = 0x8B
VSD_ARC_TO = 0x8C
VSD_INFINITE_LINE = 0x8D
VSD_ELLIPSE = 0x8F
VSD_ELLIPTICAL_ARC_TO = 0x90
VSD_PAGE_PROPS = 0x92
VSD_STYLE_PROPS = 0x93
VSD_CHAR_IX = 0x94
VSD_PARA_IX = 0x95
VSD_FOREIGN_DATA_TYPE = 0x98
VSD_CONNECTION_POINTS = 0x99
VSD_XFORM_DATA = 0x9B
VSD_TEXT_XFORM = 0x9C
VSD_XFORM_1D = 0x9D
VSD_SCRATCH = 0x9E
VSD_PROTECTION = 0xA0
VSD_TEXT_FIELD = 0xA1
VSD_MISC = 0xA4
VSD_SPLINE_START = 0xA5
VSD_SPLINE_KNOT = 0xA6
VSD_LAYER_MEMBERSHIP = 0xA7
VSD_LAYER = 0xA8
VSD_ACT_ID = 0xA9
VSD_CONTROL = 0xAA
VSD_USER_DEFINED_CELLS = 0xB4
VSD_CUSTOM_PROPS = 0xB6
VSD_RULER_GRID = 0xB7
VSD_CONN_PTS_ALT = 0xBA
VSD_DOC_PROPS = 0xBC
VSD_IMAGE = 0xBD
VSD_GROUP = 0xBE
VSD_LAYOUT = 0xBF
VSD_PAGE_LAYOUT_IX = 0xC0
VSD_POLYLINE_TO = 0xC1
VSD_NURBS_TO = 0xC3
VSD_HYPERLINK = 0xC4
VSD_REVIEWER = 0xC5
VSD_ANNOTATION = 0xC6
VSD_SMART_TAG_DEF = 0xC7
VSD_PRINT_PROPS = 0xC8
VSD_NAMEIDX = 0xC9
VSD_SHAPE_DATA = 0xD1
VSD_FONTFACE = 0xD7
VSD_FONTFACES = 0xD8
_TRAILER_TYPES = {0x64, 0x65, 0x66, 0x69, 0x6A, 0x6B, 0x6F, 0x71,
0x92, 0xA9, 0xB4, 0xB6, 0xB9, 0xC7}
_LIST_TRAILER_TYPES = {0x71, 0x70, 0x6B, 0x6A, 0x69, 0x66, 0x65, 0x2C}
_NO_TRAILER_TYPES = {0x1F, 0xC9, 0x2D, 0xD1}
@dataclass
class XForm:
pin_x: float = 0.0
pin_y: float = 0.0
width: float = 0.0
height: float = 0.0
loc_pin_x: float = 0.0
loc_pin_y: float = 0.0
angle: float = 0.0
flip_x: bool = False
flip_y: bool = False
[docs]
@dataclass
class TextXForm:
"""Text block transform — positions text independently of shape."""
txt_pin_x: float = 0.0
txt_pin_y: float = 0.0
txt_width: float = 0.0
txt_height: float = 0.0
txt_loc_pin_x: float = 0.0
txt_loc_pin_y: float = 0.0
txt_angle: float = 0.0
@dataclass
class GeomRow:
row_type: str = ""
x: float = 0.0
y: float = 0.0
a: float = 0.0
b: float = 0.0
c: float = 0.0
d: float = 0.0
# For NURBS/Polyline: extra data
knot_last: float = 0.0
degree: int = 0
x_type: int = 0
y_type: int = 0
points: list = field(default_factory=list) # list of (x, y) or (x, y, knot) tuples
@dataclass
class GeomSection:
no_fill: bool = False
no_line: bool = False
no_show: bool = False
rows: list = field(default_factory=list)
@dataclass
class CharFormat:
char_count: int = 0
font_id: int = 0
color_r: int = 0
color_g: int = 0
color_b: int = 0
bold: bool = False
italic: bool = False
underline: bool = False
font_size: float = 12.0
[docs]
@dataclass
class ForeignData:
"""Embedded image/OLE data."""
data_type: str = "" # "img", "ole", "metafile"
img_format: str = "" # "png", "jpg", "bmp", "emf", "wmf"
data: bytes = b""
@dataclass
class ConnectionPoint:
x: float = 0.0
y: float = 0.0
[docs]
@dataclass
class VsdShape:
shape_id: int = 0
shape_type: str = "Shape"
parent: int = 0
master_page: int = -1
master_shape: int = -1
xform: XForm = field(default_factory=XForm)
text_xform: TextXForm | None = None
xform_1d: XForm1D | None = None
text: str = ""
geometry: list = field(default_factory=list)
char_formats: list = field(default_factory=list)
para_formats: list = field(default_factory=list)
line_weight: float = 0.01
line_color: str = "#000000"
line_pattern: int = 1
fill_foreground: str = ""
fill_background: str = ""
fill_pattern: int = 1
shadow_offset_x: float = 0.0
shadow_offset_y: float = 0.0
shadow_color: str = ""
shadow_pattern: int = 0
children: list = field(default_factory=list)
sub_shapes: list = field(default_factory=list)
layer_member: str = ""
foreign_data: ForeignData | None = None
connection_points: list = field(default_factory=list)
text_block_bg: str = ""
text_block_margin_left: float = 0.0
text_block_margin_right: float = 0.0
text_block_margin_top: float = 0.0
text_block_margin_bottom: float = 0.0
text_block_valign: int = 0 # 0=top, 1=middle, 2=bottom
[docs]
@dataclass
class VsdPage:
page_id: int = 0
name: str = ""
width: float = 8.5
height: float = 11.0
shapes: list = field(default_factory=list)
background: bool = False
[docs]
@dataclass
class VsdDocument:
pages: list = field(default_factory=list)
colors: list = field(default_factory=list)
fonts: dict = field(default_factory=dict)
names: dict = field(default_factory=dict) # id -> name string
stencil_pages: list = field(default_factory=list) # master shapes
# Binary reading helpers
def _read_u8(data, offset):
if offset >= len(data): return 0, offset + 1
return data[offset], offset + 1
def _read_u16(data, offset):
if offset + 2 > len(data): return 0, offset + 2
return struct.unpack_from('<H', data, offset)[0], offset + 2
def _read_u32(data, offset):
if offset + 4 > len(data): return 0, offset + 4
return struct.unpack_from('<I', data, offset)[0], offset + 4
def _read_s32(data, offset):
if offset + 4 > len(data): return 0, offset + 4
return struct.unpack_from('<i', data, offset)[0], offset + 4
def _read_double(data, offset):
if offset + 8 > len(data): return 0.0, offset + 8
return struct.unpack_from('<d', data, offset)[0], offset + 8
def _read_s16(data, offset):
if offset + 2 > len(data): return 0, offset + 2
return struct.unpack_from('<h', data, offset)[0], offset + 2
@dataclass
class ChunkHeader:
chunk_type: int = 0
record_id: int = 0
list_flag: int = 0
data_length: int = 0
level: int = 0
unknown: int = 0
trailer: int = 0
def _parse_chunk_header(data, offset):
while offset < len(data) and data[offset] == 0:
offset += 1
if offset + 19 > len(data):
return None, offset
hdr = ChunkHeader()
hdr.chunk_type, offset = _read_u32(data, offset)
hdr.record_id, offset = _read_u32(data, offset)
hdr.list_flag, offset = _read_u32(data, offset)
hdr.trailer = 0
if hdr.list_flag != 0 or hdr.chunk_type in _LIST_TRAILER_TYPES:
hdr.trailer += 8
hdr.data_length, offset = _read_u32(data, offset)
hdr.level, offset = _read_u16(data, offset)
hdr.unknown, offset = _read_u8(data, offset)
if (hdr.list_flag != 0 or
(hdr.level == 2 and hdr.unknown == 0x55) or
(hdr.level == 2 and hdr.unknown == 0x54 and hdr.chunk_type == 0xAA) or
(hdr.level == 3 and hdr.unknown not in (0x50, 0x54))):
hdr.trailer += 4
for tt in _TRAILER_TYPES:
if hdr.chunk_type == tt and hdr.trailer not in (12, 4):
hdr.trailer += 4
break
if hdr.chunk_type in _NO_TRAILER_TYPES:
hdr.trailer = 0
return hdr, offset
@dataclass
class Pointer:
ptr_type: int = 0
offset: int = 0
length: int = 0
fmt: int = 0
def _read_pointer(data, offset):
ptr = Pointer()
ptr.ptr_type, offset = _read_u32(data, offset)
offset += 4
ptr.offset, offset = _read_u32(data, offset)
ptr.length, offset = _read_u32(data, offset)
ptr.fmt, offset = _read_u16(data, offset)
return ptr, offset
class VsdParser:
"""Parse a .vsd binary Visio file."""
def __init__(self, data: bytes) -> None:
self.data = data
self.doc = VsdDocument()
self._current_page = None
self._current_shape = None
self._current_geom = None
self._shape_stack: list[VsdShape] = [] # for nested groups
self._current_level = 0
self._page_is_background = False
def parse(self) -> VsdDocument:
try:
import olefile
except ImportError:
raise ImportError(
"olefile is required for .vsd parsing. "
"Install it with: pip install olefile"
)
if not olefile.isOleFile(self.data):
raise ValueError("Not a valid OLE2/Compound Binary file")
ole = olefile.OleFileIO(BytesIO(self.data))
try:
self._parse_ole(ole)
finally:
ole.close()
return self.doc
def _parse_ole(self, ole):
if ole.exists("VisioDocument"):
stream_data = ole.openstream("VisioDocument").read()
self._parse_visio_document(stream_data)
def _parse_visio_document(self, data):
if len(data) < 0x36:
return
# Try pointer-based parsing first
try:
trailer_ptr, _ = _read_pointer(data, 0x24)
if 0 < trailer_ptr.offset < len(data):
compressed = (trailer_ptr.fmt & 2) == 2
if compressed:
trailer_data = self._decompress_stream(data, trailer_ptr.offset, trailer_ptr.length)
else:
end = min(trailer_ptr.offset + trailer_ptr.length, len(data))
trailer_data = data[trailer_ptr.offset:end]
if trailer_data and len(trailer_data) > 8:
self._parse_pointer_tree(data, trailer_data, compressed)
if self.doc.pages:
return
except Exception:
pass
# Fallback: linear chunk scanning
self._parse_chunks_linear(data, 0x36)
def _decompress_stream(self, data, offset, length):
import zlib
end = min(offset + length, len(data))
compressed_data = data[offset:end]
if len(compressed_data) < 4:
return compressed_data
try:
return zlib.decompress(compressed_data[4:], -15)
except zlib.error:
try:
return zlib.decompress(compressed_data[4:])
except zlib.error:
return compressed_data
def _parse_pointer_tree(self, full_data, trailer_data, compressed):
shift = 4 if compressed else 0
if len(trailer_data) < shift + 4:
return
offset_val, _ = _read_u32(trailer_data, shift)
seek_pos = offset_val + shift - 4
if seek_pos + 12 > len(trailer_data):
return
list_size, pos = _read_u32(trailer_data, seek_pos)
pointer_count, pos = _read_s32(trailer_data, pos)
pos += 4
# Separate pointers by type for ordered processing
font_faces = {}
name_lists = {}
name_idx = {}
other_ptrs = {}
for i in range(max(0, pointer_count)):
if pos + 18 > len(trailer_data):
break
ptr, pos = _read_pointer(trailer_data, pos)
if ptr.ptr_type == 0:
continue
if ptr.ptr_type == VSD_FONTFACES:
font_faces[i] = ptr
elif ptr.ptr_type == VSD_NAME_LIST2:
name_lists[i] = ptr
elif ptr.ptr_type in (VSD_NAMEIDX, VSD_NAMEIDX123):
name_idx[i] = ptr
else:
other_ptrs[i] = ptr
# Read ordering list
pointer_order = []
if list_size > 1:
for _ in range(list_size):
if pos + 4 <= len(trailer_data):
val, pos = _read_u32(trailer_data, pos)
pointer_order.append(val)
# Process name lists first, then fonts, then others
for _, ptr in name_lists.items():
self._handle_stream_pointer(full_data, ptr)
for _, ptr in name_idx.items():
self._handle_stream_pointer(full_data, ptr)
for _, ptr in font_faces.items():
self._handle_stream_pointer(full_data, ptr)
if pointer_order:
for idx in pointer_order:
if idx in other_ptrs:
self._handle_stream_pointer(full_data, other_ptrs.pop(idx))
for _, ptr in other_ptrs.items():
self._handle_stream_pointer(full_data, ptr)
def _handle_stream_pointer(self, full_data, ptr):
if ptr.offset >= len(full_data):
return
compressed = (ptr.fmt & 2) == 2
if compressed:
stream_data = self._decompress_stream(full_data, ptr.offset, ptr.length)
else:
end = min(ptr.offset + ptr.length, len(full_data))
stream_data = full_data[ptr.offset:end]
if not stream_data:
return
fmt_high = ptr.fmt >> 4
if ptr.ptr_type == VSD_PAGES:
self._parse_pages_stream(full_data, stream_data, compressed)
elif ptr.ptr_type == VSD_PAGE:
self._flush_shape()
page = VsdPage(page_id=ptr.record_id if hasattr(ptr, 'record_id') else 0)
# Check if background page (Format bit 0 == 0 means background)
if not (ptr.fmt & 0x1):
page.background = True
self._page_is_background = True
else:
self._page_is_background = False
self._current_page = page
self.doc.pages.append(page)
if fmt_high in (0xD, 0xC, 0x8):
self._parse_chunks_stream(stream_data)
else:
self._parse_blob(stream_data, compressed)
if fmt_high == 0x5:
self._parse_sub_pointers(full_data, stream_data, compressed)
self._flush_shape()
elif ptr.ptr_type == VSD_STENCILS:
self._parse_stencils_stream(full_data, stream_data, compressed)
elif ptr.ptr_type == VSD_STENCIL_PAGE:
self._flush_shape()
page = VsdPage(page_id=0)
self._current_page = page
self.doc.stencil_pages.append(page)
if fmt_high in (0xD, 0xC, 0x8):
self._parse_chunks_stream(stream_data)
else:
self._parse_blob(stream_data, compressed)
self._flush_shape()
self._current_page = None
elif ptr.ptr_type == VSD_COLORS:
self._parse_colors_stream(stream_data)
elif fmt_high in (0xD, 0xC, 0x8):
self._parse_chunks_stream(stream_data)
elif fmt_high in (0x4, 0x5, 0x0):
self._parse_blob(stream_data, compressed)
def _parse_colors_stream(self, stream_data):
"""Parse the color table."""
offset = 0
while offset + 4 <= len(stream_data):
r, offset = _read_u8(stream_data, offset)
g, offset = _read_u8(stream_data, offset)
b, offset = _read_u8(stream_data, offset)
_a, offset = _read_u8(stream_data, offset)
self.doc.colors.append(f"#{r:02X}{g:02X}{b:02X}")
def _parse_stencils_stream(self, full_data, stream_data, compressed):
"""Parse stencils container for master shapes."""
shift = 4 if compressed else 0
if len(stream_data) < shift + 4:
return
offset_val, _ = _read_u32(stream_data, shift)
seek_pos = offset_val + shift - 4
if seek_pos + 12 > len(stream_data):
return
list_size, pos = _read_u32(stream_data, seek_pos)
pointer_count, pos = _read_s32(stream_data, pos)
pos += 4
for _ in range(max(0, pointer_count)):
if pos + 18 > len(stream_data):
break
ptr, pos = _read_pointer(stream_data, pos)
if ptr.ptr_type == VSD_STENCIL_PAGE:
self._handle_stream_pointer(full_data, ptr)
def _parse_pages_stream(self, full_data, stream_data, compressed):
shift = 4 if compressed else 0
if len(stream_data) < shift + 4:
return
offset_val, _ = _read_u32(stream_data, shift)
seek_pos = offset_val + shift - 4
if seek_pos + 12 > len(stream_data):
return
list_size, pos = _read_u32(stream_data, seek_pos)
pointer_count, pos = _read_s32(stream_data, pos)
pos += 4
for _ in range(max(0, pointer_count)):
if pos + 18 > len(stream_data):
break
ptr, pos = _read_pointer(stream_data, pos)
if ptr.ptr_type == VSD_PAGE:
self._handle_page_pointer(full_data, ptr)
def _handle_page_pointer(self, full_data, ptr):
if ptr.offset >= len(full_data):
return
compressed = (ptr.fmt & 2) == 2
if compressed:
stream_data = self._decompress_stream(full_data, ptr.offset, ptr.length)
else:
end = min(ptr.offset + ptr.length, len(full_data))
stream_data = full_data[ptr.offset:end]
if not stream_data:
return
self._flush_shape()
page = VsdPage(page_id=ptr.ptr_type)
# Background page detection
if not (ptr.fmt & 0x1):
page.background = True
self._page_is_background = True
else:
self._page_is_background = False
self._current_page = page
self.doc.pages.append(page)
fmt_high = ptr.fmt >> 4
if fmt_high in (0xD, 0xC, 0x8):
self._parse_chunks_stream(stream_data)
else:
self._parse_blob(stream_data, compressed)
if fmt_high == 0x5:
self._parse_sub_pointers(full_data, stream_data, compressed)
self._flush_shape()
def _parse_sub_pointers(self, full_data, stream_data, compressed):
shift = 4 if compressed else 0
if len(stream_data) < shift + 4:
return
offset_val, _ = _read_u32(stream_data, shift)
seek_pos = offset_val + shift - 4
if seek_pos + 12 > len(stream_data):
return
list_size, pos = _read_u32(stream_data, seek_pos)
pointer_count, pos = _read_s32(stream_data, pos)
pos += 4
for _ in range(max(0, pointer_count)):
if pos + 18 > len(stream_data):
break
ptr, pos = _read_pointer(stream_data, pos)
if ptr.ptr_type != 0:
self._handle_stream_pointer(full_data, ptr)
def _parse_blob(self, stream_data, compressed):
shift = 4 if compressed else 0
if len(stream_data) <= shift:
return
self._parse_chunks_stream(stream_data[shift:])
def _parse_chunks_linear(self, data, start_offset):
offset = start_offset
while offset < len(data) - 19:
hdr, offset = _parse_chunk_header(data, offset)
if hdr is None:
break
end_pos = offset + hdr.data_length + hdr.trailer
if end_pos > len(data):
break
chunk_data = data[offset:offset + hdr.data_length]
self._handle_chunk(hdr, chunk_data)
offset = end_pos
def _parse_chunks_stream(self, data):
offset = 0
while offset < len(data) - 19:
hdr, offset = _parse_chunk_header(data, offset)
if hdr is None:
break
end_pos = offset + hdr.data_length + hdr.trailer
if end_pos > len(data):
end_pos = min(offset + hdr.data_length, len(data))
chunk_data = data[offset:min(offset + hdr.data_length, len(data))]
self._handle_chunk(hdr, chunk_data)
offset = max(offset + 1, end_pos)
def _flush_shape(self):
if self._current_shape and self._current_page:
self._current_page.shapes.append(self._current_shape)
self._current_shape = None
self._current_geom = None
def _handle_chunk(self, hdr, data):
ct = hdr.chunk_type
if ct in (VSD_SHAPE_GROUP, VSD_SHAPE_SHAPE, VSD_SHAPE_FOREIGN):
self._flush_shape()
shape = VsdShape(shape_id=hdr.record_id)
if ct == VSD_SHAPE_GROUP:
shape.shape_type = "Group"
elif ct == VSD_SHAPE_FOREIGN:
shape.shape_type = "Foreign"
self._read_shape_header(data, shape)
self._current_shape = shape
self._current_geom = None
elif ct == VSD_XFORM_DATA:
self._read_xform_data(data)
elif ct == VSD_TEXT_XFORM:
self._read_text_xform(data)
elif ct == VSD_XFORM_1D:
self._read_xform_1d(data)
elif ct == VSD_TEXT:
self._read_text(data)
elif ct == VSD_TEXT_BLOCK:
self._read_text_block(data)
elif ct == VSD_PAGE_PROPS:
self._read_page_props(data)
elif ct == VSD_GEOMETRY:
self._read_geometry(data)
elif ct == VSD_MOVE_TO:
self._read_move_to(data)
elif ct == VSD_LINE_TO:
self._read_line_to(data)
elif ct == VSD_ARC_TO:
self._read_arc_to(data)
elif ct == VSD_ELLIPSE:
self._read_ellipse(data)
elif ct == VSD_ELLIPTICAL_ARC_TO:
self._read_elliptical_arc_to(data)
elif ct == VSD_NURBS_TO:
self._read_nurbs_to(data)
elif ct == VSD_POLYLINE_TO:
self._read_polyline_to(data)
elif ct == VSD_SPLINE_START:
self._read_spline_start(data)
elif ct == VSD_SPLINE_KNOT:
self._read_spline_knot(data)
elif ct == VSD_INFINITE_LINE:
self._read_infinite_line(data)
elif ct == VSD_LINE:
self._read_line_fmt(data)
elif ct == VSD_FILL_AND_SHADOW:
self._read_fill(data)
elif ct == VSD_CHAR_IX:
self._read_char_ix(data)
elif ct == VSD_PARA_IX:
self._read_para_ix(data)
elif ct == VSD_LAYER_MEMBERSHIP:
self._read_layer_membership(data)
elif ct == VSD_CONNECTION_POINTS:
self._read_connection_points(data)
elif ct == VSD_FOREIGN_DATA_TYPE:
self._read_foreign_data_type(data)
elif ct == VSD_FOREIGN_DATA:
self._read_foreign_data(data)
elif ct == VSD_FONTFACE:
self._read_fontface(data, hdr.record_id)
elif ct == VSD_NAME:
self._read_name(data, hdr.record_id)
elif ct == VSD_NAME2:
self._read_name(data, hdr.record_id)
def _read_shape_header(self, data, shape):
if len(data) < 4: return
offset = 0
shape.parent, offset = _read_u32(data, offset)
if offset + 4 <= len(data):
shape.master_page, offset = _read_u32(data, offset)
if offset + 4 <= len(data):
shape.master_shape, offset = _read_u32(data, offset)
def _read_xform_data(self, data):
if not self._current_shape: return
xf = self._current_shape.xform
offset = 0
try:
offset += 1; xf.pin_x, offset = _read_double(data, offset)
offset += 1; xf.pin_y, offset = _read_double(data, offset)
offset += 1; xf.width, offset = _read_double(data, offset)
offset += 1; xf.height, offset = _read_double(data, offset)
offset += 1; xf.loc_pin_x, offset = _read_double(data, offset)
offset += 1; xf.loc_pin_y, offset = _read_double(data, offset)
offset += 1; xf.angle, offset = _read_double(data, offset)
if offset < len(data): xf.flip_x = data[offset] != 0; offset += 1
if offset < len(data): xf.flip_y = data[offset] != 0
except (struct.error, IndexError):
pass
def _read_text_xform(self, data):
"""Parse text block transform (TxtXForm)."""
if not self._current_shape: return
txf = TextXForm()
offset = 0
try:
offset += 1; txf.txt_pin_x, offset = _read_double(data, offset)
offset += 1; txf.txt_pin_y, offset = _read_double(data, offset)
offset += 1; txf.txt_width, offset = _read_double(data, offset)
offset += 1; txf.txt_height, offset = _read_double(data, offset)
offset += 1; txf.txt_loc_pin_x, offset = _read_double(data, offset)
offset += 1; txf.txt_loc_pin_y, offset = _read_double(data, offset)
offset += 1; txf.txt_angle, offset = _read_double(data, offset)
except (struct.error, IndexError):
pass
self._current_shape.text_xform = txf
def _read_xform_1d(self, data):
"""Parse 1D connector endpoint transform."""
if not self._current_shape: return
xf1d = XForm1D()
offset = 0
try:
offset += 1; xf1d.begin_x, offset = _read_double(data, offset)
offset += 1; xf1d.begin_y, offset = _read_double(data, offset)
offset += 1; xf1d.end_x, offset = _read_double(data, offset)
offset += 1; xf1d.end_y, offset = _read_double(data, offset)
except (struct.error, IndexError):
pass
self._current_shape.xform_1d = xf1d
def _read_text(self, data):
if not self._current_shape or len(data) < 8: return
text_data = data[8:]
if not text_data: return
# Try UTF-16LE first (most common in .vsd)
try:
text = text_data.decode('utf-16-le', errors='replace').rstrip('\x00')
# Check if it looks like valid text (not garbled)
if text and not all(c == '\ufffd' for c in text):
self._current_shape.text = text
return
except (UnicodeDecodeError, ValueError):
pass
# Try UTF-8
try:
text = text_data.decode('utf-8', errors='replace').rstrip('\x00')
if text:
self._current_shape.text = text
return
except (UnicodeDecodeError, ValueError):
pass
# Fallback to latin-1
try:
self._current_shape.text = text_data.decode('latin-1', errors='replace').rstrip('\x00')
except Exception:
pass
def _read_text_block(self, data):
"""Parse text block format (margins, vertical alignment, background)."""
if not self._current_shape or len(data) < 4: return
offset = 0
try:
# Text block margins (left, right, top, bottom)
offset += 1; self._current_shape.text_block_margin_left, offset = _read_double(data, offset)
offset += 1; self._current_shape.text_block_margin_right, offset = _read_double(data, offset)
offset += 1; self._current_shape.text_block_margin_top, offset = _read_double(data, offset)
offset += 1; self._current_shape.text_block_margin_bottom, offset = _read_double(data, offset)
# Vertical alignment
offset += 1
if offset < len(data):
self._current_shape.text_block_valign = data[offset]
offset += 1
# Background color
offset += 1
if offset + 3 <= len(data):
r, offset = _read_u8(data, offset)
g, offset = _read_u8(data, offset)
b, offset = _read_u8(data, offset)
if r != 0 or g != 0 or b != 0:
self._current_shape.text_block_bg = f"#{r:02X}{g:02X}{b:02X}"
except (struct.error, IndexError):
pass
def _read_page_props(self, data):
if not self._current_page: return
offset = 0
try:
offset += 1; self._current_page.width, offset = _read_double(data, offset)
offset += 1; self._current_page.height, offset = _read_double(data, offset)
except (struct.error, IndexError):
pass
def _read_geometry(self, data):
if not self._current_shape: return
geom = GeomSection()
if len(data) >= 1:
flags = data[0]
geom.no_fill = bool(flags & 1)
geom.no_line = bool(flags & 2)
geom.no_show = bool(flags & 4)
self._current_geom = geom
self._current_shape.geometry.append(geom)
def _ensure_geom(self):
if not self._current_shape: return False
if self._current_geom is None:
self._current_geom = GeomSection()
self._current_shape.geometry.append(self._current_geom)
return True
def _read_move_to(self, data):
if not self._ensure_geom(): return
row = GeomRow(row_type="MoveTo")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
except (struct.error, IndexError): pass
self._current_geom.rows.append(row)
def _read_line_to(self, data):
if not self._ensure_geom(): return
row = GeomRow(row_type="LineTo")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
except (struct.error, IndexError): pass
self._current_geom.rows.append(row)
def _read_arc_to(self, data):
if not self._ensure_geom(): return
row = GeomRow(row_type="ArcTo")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
offset += 1; row.a, offset = _read_double(data, offset)
except (struct.error, IndexError): pass
self._current_geom.rows.append(row)
def _read_ellipse(self, data):
if not self._ensure_geom(): return
row = GeomRow(row_type="Ellipse")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
offset += 1; row.a, offset = _read_double(data, offset)
offset += 1; row.b, offset = _read_double(data, offset)
offset += 1; row.c, offset = _read_double(data, offset)
offset += 1; row.d, offset = _read_double(data, offset)
except (struct.error, IndexError): pass
self._current_geom.rows.append(row)
def _read_elliptical_arc_to(self, data):
if not self._ensure_geom(): return
row = GeomRow(row_type="EllipticalArcTo")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
offset += 1; row.a, offset = _read_double(data, offset)
offset += 1; row.b, offset = _read_double(data, offset)
offset += 1; row.c, offset = _read_double(data, offset)
offset += 1; row.d, offset = _read_double(data, offset)
except (struct.error, IndexError): pass
self._current_geom.rows.append(row)
def _read_nurbs_to(self, data):
"""Parse NURBSTo geometry row.
Format: x, y, knotLast, degree, xType, yType, then
alternating knot/weight/x/y data.
"""
if not self._ensure_geom(): return
row = GeomRow(row_type="NURBSTo")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
offset += 1; row.knot_last, offset = _read_double(data, offset)
offset += 1; row.degree, offset = _read_u16(data, offset)
row.x_type, offset = _read_u8(data, offset)
row.y_type, offset = _read_u8(data, offset)
# Read control points: knot, weight, x, y
points = []
while offset + 32 <= len(data):
knot, offset = _read_double(data, offset)
weight, offset = _read_double(data, offset)
px, offset = _read_double(data, offset)
py, offset = _read_double(data, offset)
points.append((px, py, knot, weight))
row.points = points
except (struct.error, IndexError):
pass
self._current_geom.rows.append(row)
def _read_polyline_to(self, data):
"""Parse PolylineTo geometry row.
Format: x, y, then xType, yType, then point pairs.
"""
if not self._ensure_geom(): return
row = GeomRow(row_type="PolylineTo")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
offset += 1
row.x_type, offset = _read_u8(data, offset)
row.y_type, offset = _read_u8(data, offset)
# Read points
points = []
while offset + 16 <= len(data):
px, offset = _read_double(data, offset)
py, offset = _read_double(data, offset)
points.append((px, py))
row.points = points
except (struct.error, IndexError):
pass
self._current_geom.rows.append(row)
def _read_spline_start(self, data):
"""Parse SplineStart geometry row."""
if not self._ensure_geom(): return
row = GeomRow(row_type="SplineStart")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
offset += 1; row.a, offset = _read_double(data, offset) # second knot
offset += 1; row.b, offset = _read_double(data, offset) # first knot
offset += 1; row.c, offset = _read_double(data, offset) # last knot
offset += 1
if offset < len(data):
row.degree = data[offset]
except (struct.error, IndexError):
pass
self._current_geom.rows.append(row)
def _read_spline_knot(self, data):
"""Parse SplineKnot geometry row."""
if not self._ensure_geom(): return
row = GeomRow(row_type="SplineKnot")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
offset += 1; row.a, offset = _read_double(data, offset) # knot value
except (struct.error, IndexError):
pass
self._current_geom.rows.append(row)
def _read_infinite_line(self, data):
"""Parse InfiniteLine geometry row (two points defining a line)."""
if not self._ensure_geom(): return
row = GeomRow(row_type="InfiniteLine")
offset = 1
try:
row.x, offset = _read_double(data, offset)
offset += 1; row.y, offset = _read_double(data, offset)
offset += 1; row.a, offset = _read_double(data, offset)
offset += 1; row.b, offset = _read_double(data, offset)
except (struct.error, IndexError):
pass
self._current_geom.rows.append(row)
def _read_line_fmt(self, data):
if not self._current_shape: return
offset = 0
try:
offset += 1
self._current_shape.line_weight, offset = _read_double(data, offset)
offset += 1
r, offset = _read_u8(data, offset)
g, offset = _read_u8(data, offset)
b, offset = _read_u8(data, offset)
offset += 1
self._current_shape.line_color = f"#{r:02X}{g:02X}{b:02X}"
if offset + 1 <= len(data):
self._current_shape.line_pattern = data[offset]
except (struct.error, IndexError): pass
def _read_fill(self, data):
if not self._current_shape: return
offset = 0
try:
offset += 1
r, offset = _read_u8(data, offset)
g, offset = _read_u8(data, offset)
b, offset = _read_u8(data, offset)
offset += 1
self._current_shape.fill_foreground = f"#{r:02X}{g:02X}{b:02X}"
offset += 1
r, offset = _read_u8(data, offset)
g, offset = _read_u8(data, offset)
b, offset = _read_u8(data, offset)
offset += 1
self._current_shape.fill_background = f"#{r:02X}{g:02X}{b:02X}"
if offset + 1 <= len(data):
self._current_shape.fill_pattern = data[offset]
offset += 1
# Shadow data follows fill
if offset + 12 <= len(data):
offset += 1 # shadow type byte
r, offset = _read_u8(data, offset)
g, offset = _read_u8(data, offset)
b, offset = _read_u8(data, offset)
self._current_shape.shadow_color = f"#{r:02X}{g:02X}{b:02X}"
offset += 1
self._current_shape.shadow_pattern = data[offset] if offset < len(data) else 0
offset += 1
if offset + 16 <= len(data):
offset += 1
self._current_shape.shadow_offset_x, offset = _read_double(data, offset)
offset += 1
self._current_shape.shadow_offset_y, offset = _read_double(data, offset)
except (struct.error, IndexError): pass
def _read_char_ix(self, data):
if not self._current_shape or len(data) < 12: return
fmt = CharFormat()
offset = 0
try:
fmt.char_count, offset = _read_u32(data, offset)
fmt.font_id, offset = _read_u16(data, offset)
offset += 1
fmt.color_r, offset = _read_u8(data, offset)
fmt.color_g, offset = _read_u8(data, offset)
fmt.color_b, offset = _read_u8(data, offset)
offset += 1 # alpha
font_mod, offset = _read_u8(data, offset)
fmt.bold = bool(font_mod & 1)
fmt.italic = bool(font_mod & 2)
fmt.underline = bool(font_mod & 4)
offset += 4 # skip font mods + scale
fmt.font_size, offset = _read_double(data, offset)
except (struct.error, IndexError): pass
self._current_shape.char_formats.append(fmt)
def _read_para_ix(self, data):
"""Parse paragraph format."""
if not self._current_shape or len(data) < 8: return
pf = ParaFormat()
offset = 0
try:
pf.char_count, offset = _read_u32(data, offset)
offset += 1; pf.indent_first, offset = _read_double(data, offset)
offset += 1; pf.indent_left, offset = _read_double(data, offset)
offset += 1; pf.indent_right, offset = _read_double(data, offset)
offset += 1; pf.spacing_line, offset = _read_double(data, offset)
offset += 1; pf.spacing_before, offset = _read_double(data, offset)
offset += 1; pf.spacing_after, offset = _read_double(data, offset)
offset += 1
if offset < len(data):
pf.horiz_align = data[offset]
offset += 1
offset += 1
if offset < len(data):
pf.bullet = data[offset]
except (struct.error, IndexError):
pass
self._current_shape.para_formats.append(pf)
def _read_layer_membership(self, data):
"""Parse layer membership string."""
if not self._current_shape or len(data) < 2: return
try:
text = data.decode('utf-16-le', errors='replace').rstrip('\x00').strip()
self._current_shape.layer_member = text
except Exception:
pass
def _read_connection_points(self, data):
"""Parse connection point data."""
if not self._current_shape or len(data) < 16: return
offset = 0
try:
offset += 1
x, offset = _read_double(data, offset)
offset += 1
y, offset = _read_double(data, offset)
self._current_shape.connection_points.append(ConnectionPoint(x=x, y=y))
except (struct.error, IndexError):
pass
def _read_foreign_data_type(self, data):
"""Parse foreign data type (image format info)."""
if not self._current_shape or len(data) < 4: return
if self._current_shape.foreign_data is None:
self._current_shape.foreign_data = ForeignData()
offset = 0
try:
img_off_x, offset = _read_double(data, offset) if len(data) >= 8 else (0, 0)
img_off_y, offset = _read_double(data, offset) if offset + 8 <= len(data) else (0, offset)
img_w, offset = _read_double(data, offset) if offset + 8 <= len(data) else (0, offset)
img_h, offset = _read_double(data, offset) if offset + 8 <= len(data) else (0, offset)
if offset + 2 <= len(data):
img_type, offset = _read_u16(data, offset)
fmt_map = {0: "emf", 1: "wmf", 2: "bmp", 3: "ole",
4: "jpg", 5: "png", 6: "gif", 7: "tiff"}
if img_type in fmt_map:
self._current_shape.foreign_data.img_format = fmt_map[img_type]
self._current_shape.foreign_data.data_type = "ole" if img_type == 3 else "img"
except (struct.error, IndexError):
pass
def _read_foreign_data(self, data):
"""Parse foreign data (actual image bytes)."""
if not self._current_shape: return
if self._current_shape.foreign_data is None:
self._current_shape.foreign_data = ForeignData()
self._current_shape.foreign_data.data = data
def _read_fontface(self, data, record_id):
"""Parse font face name."""
if len(data) < 2: return
try:
# Skip initial bytes, then read UTF-16LE name
name = data.decode('utf-16-le', errors='replace').rstrip('\x00').strip()
if name:
self.doc.fonts[record_id] = name
except Exception:
pass
def _read_name(self, data, record_id):
"""Parse name string (page names, etc)."""
if len(data) < 2: return
try:
name = data.decode('utf-16-le', errors='replace').rstrip('\x00').strip()
if name:
self.doc.names[record_id] = name
except Exception:
pass
def _vsd_shape_to_dict(shape):
"""Convert VsdShape to dict matching _parse_single_shape format."""
cells = {}
xf = shape.xform
cells["PinX"] = {"V": str(xf.pin_x), "F": ""}
cells["PinY"] = {"V": str(xf.pin_y), "F": ""}
cells["Width"] = {"V": str(xf.width), "F": ""}
cells["Height"] = {"V": str(xf.height), "F": ""}
cells["LocPinX"] = {"V": str(xf.loc_pin_x), "F": ""}
cells["LocPinY"] = {"V": str(xf.loc_pin_y), "F": ""}
cells["Angle"] = {"V": str(xf.angle), "F": ""}
if xf.flip_x: cells["FlipX"] = {"V": "1", "F": ""}
if xf.flip_y: cells["FlipY"] = {"V": "1", "F": ""}
cells["LineWeight"] = {"V": str(shape.line_weight), "F": ""}
cells["LineColor"] = {"V": shape.line_color, "F": ""}
cells["LinePattern"] = {"V": str(shape.line_pattern), "F": ""}
if shape.fill_foreground:
cells["FillForegnd"] = {"V": shape.fill_foreground, "F": ""}
if shape.fill_background:
cells["FillBkgnd"] = {"V": shape.fill_background, "F": ""}
cells["FillPattern"] = {"V": str(shape.fill_pattern), "F": ""}
# Text transform
if shape.text_xform:
txf = shape.text_xform
cells["TxtPinX"] = {"V": str(txf.txt_pin_x), "F": ""}
cells["TxtPinY"] = {"V": str(txf.txt_pin_y), "F": ""}
cells["TxtWidth"] = {"V": str(txf.txt_width), "F": ""}
cells["TxtHeight"] = {"V": str(txf.txt_height), "F": ""}
cells["TxtLocPinX"] = {"V": str(txf.txt_loc_pin_x), "F": ""}
cells["TxtLocPinY"] = {"V": str(txf.txt_loc_pin_y), "F": ""}
cells["TxtAngle"] = {"V": str(txf.txt_angle), "F": ""}
# 1D connector endpoints
if shape.xform_1d:
xf1d = shape.xform_1d
cells["BeginX"] = {"V": str(xf1d.begin_x), "F": ""}
cells["BeginY"] = {"V": str(xf1d.begin_y), "F": ""}
cells["EndX"] = {"V": str(xf1d.end_x), "F": ""}
cells["EndY"] = {"V": str(xf1d.end_y), "F": ""}
# Text block
if shape.text_block_margin_left or shape.text_block_margin_right:
cells["LeftMargin"] = {"V": str(shape.text_block_margin_left), "F": ""}
cells["RightMargin"] = {"V": str(shape.text_block_margin_right), "F": ""}
if shape.text_block_margin_top or shape.text_block_margin_bottom:
cells["TopMargin"] = {"V": str(shape.text_block_margin_top), "F": ""}
cells["BottomMargin"] = {"V": str(shape.text_block_margin_bottom), "F": ""}
if shape.text_block_valign:
cells["VerticalAlign"] = {"V": str(shape.text_block_valign), "F": ""}
if shape.text_block_bg:
cells["TextBkgnd"] = {"V": shape.text_block_bg, "F": ""}
# Shadow
if shape.shadow_color:
cells["ShdwForegnd"] = {"V": shape.shadow_color, "F": ""}
if shape.shadow_offset_x:
cells["ShapeShdwOffsetX"] = {"V": str(shape.shadow_offset_x), "F": ""}
if shape.shadow_offset_y:
cells["ShapeShdwOffsetY"] = {"V": str(shape.shadow_offset_y), "F": ""}
if shape.shadow_pattern:
cells["ShdwPattern"] = {"V": str(shape.shadow_pattern), "F": ""}
# Layer membership
if shape.layer_member:
cells["LayerMember"] = {"V": shape.layer_member, "F": ""}
geometry = []
for geom in shape.geometry:
geo_dict = {"no_fill": geom.no_fill, "no_line": geom.no_line,
"no_show": geom.no_show, "ix": "0", "rows": []}
for row in geom.rows:
row_dict = {"type": row.row_type, "ix": "",
"cells": {"X": {"V": str(row.x), "F": ""},
"Y": {"V": str(row.y), "F": ""}}}
if row.row_type in ("ArcTo", "Ellipse", "EllipticalArcTo",
"SplineStart", "SplineKnot", "InfiniteLine"):
row_dict["cells"]["A"] = {"V": str(row.a), "F": ""}
row_dict["cells"]["B"] = {"V": str(row.b), "F": ""}
if row.row_type in ("Ellipse", "EllipticalArcTo", "SplineStart"):
row_dict["cells"]["C"] = {"V": str(row.c), "F": ""}
row_dict["cells"]["D"] = {"V": str(row.d), "F": ""}
if row.row_type == "NURBSTo":
row_dict["cells"]["A"] = {"V": str(row.knot_last), "F": ""}
row_dict["cells"]["B"] = {"V": str(row.degree), "F": ""}
row_dict["cells"]["C"] = {"V": str(row.x_type), "F": ""}
row_dict["cells"]["D"] = {"V": str(row.y_type), "F": ""}
# Encode control points as E cell (NURBS formula)
if row.points:
pts_str = ";".join(f"{p[0]},{p[1]},{p[2]},{p[3]}" for p in row.points)
row_dict["cells"]["E"] = {"V": pts_str, "F": "NURBS(...)"}
if row.row_type == "PolylineTo":
# Encode points as formula
if row.points:
pts_str = ";".join(f"{p[0]},{p[1]}" for p in row.points)
row_dict["cells"]["A"] = {"V": pts_str, "F": "POLYLINE(...)"}
if row.row_type == "SplineStart":
row_dict["cells"]["D"] = {"V": str(row.degree), "F": ""}
geo_dict["rows"].append(row_dict)
geometry.append(geo_dict)
char_formats = {}
for i, cf in enumerate(shape.char_formats):
char_formats[str(i)] = {
"Size": str(cf.font_size / 72.0) if cf.font_size > 0 else "0.1111",
"Color": f"#{cf.color_r:02X}{cf.color_g:02X}{cf.color_b:02X}",
"Style": str((1 if cf.bold else 0) | (2 if cf.italic else 0) | (4 if cf.underline else 0)),
"Font": "",
}
para_formats = {}
for i, pf in enumerate(shape.para_formats):
para_formats[str(i)] = {
"IndFirst": str(pf.indent_first),
"IndLeft": str(pf.indent_left),
"IndRight": str(pf.indent_right),
"SpLine": str(pf.spacing_line),
"SpBefore": str(pf.spacing_before),
"SpAfter": str(pf.spacing_after),
"HorzAlign": str(pf.horiz_align),
"Bullet": str(pf.bullet),
}
# Foreign data (embedded images)
foreign_data_dict = None
if shape.foreign_data and shape.foreign_data.data:
import base64
fd = shape.foreign_data
mime_map = {"png": "image/png", "jpg": "image/jpeg", "bmp": "image/bmp",
"emf": "image/x-emf", "wmf": "image/x-wmf", "gif": "image/gif",
"tiff": "image/tiff"}
mime = mime_map.get(fd.img_format, "application/octet-stream")
foreign_data_dict = {
"type": fd.data_type,
"format": fd.img_format,
"mime": mime,
"data": base64.b64encode(fd.data).decode('ascii'),
}
# Connection points
connections = {}
for i, cp in enumerate(shape.connection_points):
connections[str(i)] = {
"X": {"V": str(cp.x), "F": ""},
"Y": {"V": str(cp.y), "F": ""},
}
return {
"id": str(shape.shape_id), "name": "", "name_u": "",
"type": shape.shape_type,
"master": str(shape.master_page) if shape.master_page >= 0 else "",
"master_shape": str(shape.master_shape) if shape.master_shape >= 0 else "",
"cells": cells, "geometry": geometry,
"text": shape.text,
"text_parts": [{"text": shape.text, "cp": "0", "pp": "0"}] if shape.text else [],
"char_formats": char_formats, "para_formats": para_formats,
"sub_shapes": [_vsd_shape_to_dict(s) for s in shape.sub_shapes],
"controls": {}, "connections": connections, "user": {},
"foreign_data": foreign_data_dict,
"line_style": "", "fill_style": "", "text_style": "",
}
[docs]
def parse_vsd_file(file_path: str | Path) -> VsdDocument:
"""Parse a .vsd file and return a VsdDocument."""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
data = path.read_bytes()
parser = VsdParser(data)
return parser.parse()
[docs]
def parse_vsd_to_dicts(file_path: str | Path) -> list[dict]:
"""Parse a .vsd file and return page data compatible with the VSDX SVG renderer.
Returns list of dicts with page_width, page_height, shapes, name.
"""
doc = parse_vsd_file(file_path)
pages = []
for page in doc.pages:
# Skip background pages (they should be used as backgrounds, not standalone)
if page.background:
continue
shape_dicts = [_vsd_shape_to_dict(s) for s in page.shapes]
pages.append({
"page_width": page.width,
"page_height": page.height,
"shapes": shape_dicts,
"name": page.name,
})
# If all pages were background, include them anyway
if not pages:
for page in doc.pages:
shape_dicts = [_vsd_shape_to_dict(s) for s in page.shapes]
pages.append({
"page_width": page.width,
"page_height": page.height,
"shapes": shape_dicts,
"name": page.name,
})
return pages