mupdf
Loading...
Searching...
No Matches
structured-text.h
Go to the documentation of this file.
1// Copyright (C) 2004-2026 Artifex Software, Inc.
2//
3// This file is part of MuPDF.
4//
5// MuPDF is free software: you can redistribute it and/or modify it under the
6// terms of the GNU Affero General Public License as published by the Free
7// Software Foundation, either version 3 of the License, or (at your option)
8// any later version.
9//
10// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13// details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17//
18// Alternative licensing terms are available from the licensor.
19// For commercial licensing, see <https://www.artifex.com/> or contact
20// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21// CA 94129, USA, for further information.
22
23#ifndef MUPDF_FITZ_STRUCTURED_TEXT_H
24#define MUPDF_FITZ_STRUCTURED_TEXT_H
25
26#include "mupdf/fitz/system.h"
27#include "mupdf/fitz/types.h"
28#include "mupdf/fitz/context.h"
29#include "mupdf/fitz/geometry.h"
30#include "mupdf/fitz/font.h"
31#include "mupdf/fitz/image.h"
32#include "mupdf/fitz/output.h"
33#include "mupdf/fitz/device.h"
34#include "mupdf/fitz/pool.h"
35
39typedef struct fz_layout_char
40{
41 float x, advance;
42 const char *p; /* location in source text of character */
45
46typedef struct fz_layout_line
47{
48 float x, y, font_size;
49 const char *p; /* location in source text of start of line */
53
62
68
75
79void fz_add_layout_line(fz_context *ctx, fz_layout_block *block, float x, float y, float h, const char *p);
80
84void fz_add_layout_char(fz_context *ctx, fz_layout_block *block, float x, float w, const char *p);
85
91
97
194enum
195{
218
219 /* An old, deprecated option. */
221};
222
327
328typedef struct
329{
332 int page;
334
343typedef struct
344{
345 int refs;
349
350 /* The following fields are only of use to the routines that
351 * build an fz_stext_page. They change during page construction
352 * and their meaning is subject to change. These values should
353 * not be used by anything outside of the stext device. */
356
357 /* An array of fz_stext_page_details */
360
365
370
371enum
372{
378};
379
380enum
381{
387};
388
389enum
390{
391 /* Indicates that this vector came from a stroked
392 * path. */
394
395 /* Indicates that this vector came from a rectangular
396 * (axis-aligned) path (or path segment). */
398
399 /* Indicates that this vector came from a path
400 * segment, and more segments from this same path are
401 * still to come. */
403};
404
405enum
406{
407 /* Indicates that cell contents cross the right hand edge. */
409 /* Indicates that cell contents cross the bottom edge. */
411 /* Indicates that the cell has a border on the left hand edge. */
413 /* Indicates that the cell has a border on the top edge. */
415 /* Indicates that the cell has content (which may be a space!) */
417};
418
419/* This structure is experimental, and subject to change. */
420typedef struct
421{
422 /* A 2x2 table, will be represented as a 3x3 set of
423 * cells. The rightmost column and bottommost row
424 * exist just to give information about borders on
425 * the edges. For such a table w=h=3.
426 */
427 int w;
428 int h;
429 /* Followed by w*h entries. */
430 struct {
431 unsigned int flags;
434
440{
441 int type;
442 int id;
444 union {
447 struct { fz_stext_struct *down; int index; } s;
448 struct { uint32_t flags; uint32_t argb; } v;
450 } u;
452};
453
458
463{
464 uint8_t wmode; /* 0 for horizontal, 1 for vertical */
465 uint8_t flags;
466 fz_point dir; /* normalized direction of baseline */
470};
471
477{
478 int c; /* unicode character value */
479 uint16_t bidi; /* even for LTR, odd for RTL - probably only needs 8 bits? */
480 uint16_t flags;
481 uint32_t argb; /* sRGB hex color (alpha in top 8 bits, then r, then g, then b in low bits) */
484 float size;
487};
488
489enum
490{
494 FZ_STEXT_BOLD = 8, /* Either real or 'fake' bold */
502};
503
515{
516 /* up points to the block that contains this fz_stext_struct. */
518 /* parent points to the struct that has up as one of its children.
519 * parent is useful for doing depth first traversal without having
520 * to store the entire chain of structs in the iterator. */
522
523 /* first_block points to the first child of this node (or NULL
524 * if there are none). */
526 /* last_block points to the last child of this node (or NULL
527 * if there are none). */
529
530 /* We have a set of 'standard' structure types. Every structure
531 * element should correspond to one of these. */
533 /* Documents can use their own non-standard structure types, which
534 * are held as 'raw' strings. */
536};
537
538/* An example to show how fz_stext_blocks and fz_stext_structs interact:
539 *
540 * [fz_stext_page]
541 * |
542 * first_block|
543 * |
544 * \|/
545 * [fz_stext_block:TEXT]<->[fz_stext_block:STRUCT]<->[fz_stext_block:IMG]
546 * u.s.down| /|\
547 * | |
548 * \|/ |up
549 * [fz_stext_struct]<---------.
550 * | | |
551 * first_block| |last_block |
552 * _______________________| | |
553 * | | |
554 * | | |
555 * \|/ \|/ |
556 * [fz_stext_block:...]<->...<->[fz_stext_block:STRUCT] |
557 * | /|\ |
558 * u.s.down| |up |
559 * \|/ | parent|
560 * [fz_stext_struct]--------'
561 * | |
562 * first_block| |last_block
563 * : :
564 */
565
566 typedef struct
567 {
569 float pos;
570 float min;
571 float max;
574
581
582FZ_DATA extern const char *fz_stext_options_usage;
583
594
601
608
613
623
628
629
634
639
650int fz_search_stext_page(fz_context *ctx, fz_stext_page *text, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max);
651
660typedef int (fz_search_callback_fn)(fz_context *ctx, void *opaque, int num_quads, fz_quad *hit_bbox);
661
670typedef int (fz_match_callback_fn)(fz_context *ctx, void *opaque, int num_quads, fz_quad *hit_bbox, int chapter, int page);
671
685int fz_search_stext_page_cb(fz_context *ctx, fz_stext_page *text, const char *needle, fz_search_callback_fn *cb, void *opaque);
686
691int fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads);
692
693enum
694{
698};
699
701
709char *fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf);
710
718char *fz_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area, int crlf);
719
723typedef struct
724{
725 int flags;
726 float scale;
729
731
739
745
762
788
793
799
805
823
845
857int
859
860
877
910fz_device *
911fz_new_stext_device_for_page(fz_context *ctx, fz_stext_page *stext_page, const fz_stext_options *opts, int chapter_num, int page_num, fz_rect mediabox);
912
913
956fz_device *fz_new_ocr_device(fz_context *ctx, fz_device *target, fz_matrix ctm, fz_rect mediabox, int with_list, const char *language,
957 const char *datadir, int (*progress)(fz_context *, void *, int), void *progress_arg);
958
959fz_device *fz_new_ocr_device_with_options(fz_context *ctx, fz_device *target, fz_matrix ctm, fz_rect mediabox, int with_list, const char *language,
960 const char *datadir, int (*progress)(fz_context *, void *, int), void *progress_arg, fz_options *options);
961
963
968
973
978
979typedef struct fz_search fz_search;
980
991
992FZ_DATA extern const char *fz_search_options_usage;
993
995
997
999
1005fz_search *fz_new_search(fz_context *ctx, const char *needle, fz_search_options options);
1006
1007typedef enum
1008{
1009 /* Ran out of stext to search. Please feed me some more. */
1011
1012 /* We have a match. match structure has been populated. */
1014
1015 /* Search complete */
1018
1019typedef struct
1020{
1021 int seq;
1024
1033
1041
1045typedef struct
1046{
1048 union
1049 {
1052 } u;
1054
1067
1081
1107void fz_feed_search(fz_context *ctx, fz_search *search, fz_stext_page *page, int seq);
1108
1113
1124int fz_match_stext_page(fz_context *ctx, fz_stext_page *text, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max, fz_search_options options);
1125
1139int fz_match_stext_page_cb(fz_context *ctx, fz_stext_page *page, const char *needle, fz_match_callback_fn *cb, void *opaque, fz_search_options options);
1140
1141/*
1142 Allocator function to make a new STRUCT stext block to be used in
1143 a given page (and it's 'down' structure, initially empty). Not
1144 linked in to the overall page structure yet.
1145*/
1146fz_stext_block *fz_new_stext_struct(fz_context *ctx, fz_stext_page *page, fz_structure standard, const char *raw, int index);
1147
1148/* Iterators for walking over stext pages */
1149
1150/*
1151 Iterator definition. The parts of this are subject to change.
1152*/
1160
1161/*
1162 Create a new iterator, initialised to point at the first block on the page.
1163*/
1165
1166/*
1167 Create a new iterator, initialised to point at the given point on the page.
1168*/
1170
1171/*
1172 Create a new iterator, initialised to point at the first non-struct block on the page
1173 in depth first search order.
1174*/
1176
1177/*
1178 Create a new iterator, initialised to point at the first non-struct block on the page
1179 in depth first search order after the given point. The start point is recorded so that
1180 the DFS search will 'eod' at the end of the level that block is in.
1181*/
1183
1184/*
1185 Create a new iterator, initialised to point at the first non-struct block on the page
1186 in reverse depth first search order.
1187*/
1189
1190/*
1191 Create a new iterator, initialised to point at the first non-struct block on the page
1192 in reverse depth first search order after the given point. The start point is recorded so that
1193 the DFS search will 'eod' at the start of the level that block is in.
1194*/
1196
1197/*
1198 Move to the next block (never moving upwards).
1199
1200 If there is no next block, iterator.block is returned as NULL.
1201*/
1203
1204/*
1205 On a structure block, this moves the iterator down to the first child of
1206 that block.
1207
1208 On any other block, this does nothing.
1209*/
1211
1212/*
1213 Move up to the parent of the current block.
1214
1215 If there is no parent, iterator.block is return as NULL.
1216*/
1218
1219/*
1220 Move to the next block (in a depth first traversal style).
1221
1222 The iterator never stops on struct blocks, and instead steps into them.
1223 At the end of a set of child blocks, it will move back to the parent and
1224 continue from there. It will never move past the parent given to begin_from,
1225 if used.
1226*/
1228
1229/*
1230 Move to the next block (in a reverse depth first traversal style).
1231
1232 The iterator never stops on struct blocks, and instead steps into them.
1233 At the end of a set of child blocks, it will move back to the parent and
1234 continue from there. It will never most past the parent given to begin_from,
1235 if used.
1236*/
1238
1239/*
1240 Return true if the iterator is at the end of a list of blocks.
1241 (No attempt is made to account for whether there is more data after a
1242 parent block).
1243*/
1245
1246/*
1247 Return true if the iterator is at the end of a depth first traversal
1248 of the stext page. The depth first traversal endpoint will be the
1249 end of the page, or (if begin_from was used) the end of the level
1250 given at init time.
1251*/
1253
1254/*
1255 Return true if the iterator is at the end of a reverse depth first
1256 traversal of the stext page. The reverse depth first traversal endpoint
1257 will be the start of the page, or (if begin_from was used) the start
1258 of the level given at init time.
1259*/
1261
1262/*
1263 Update a given stext page so that the contents within it that fall
1264 within the given rectangle are contained within a structure tag of the
1265 given classification.
1266
1267 The code tries not to change the ordering of content as seen from
1268 a depth first traversal as it does this.
1269
1270 This is an experimental interface. It may be updated or removed in
1271 future with no warning!
1272*/
1273void
1275
1276/*
1277 Remove any prefix of large white rectangular vectors that (almost)
1278 fills the page from the stext.
1279
1280 This is an experimental interface. It may be updated or removed in
1281 future with no warning!
1282*/
1283int
1285
1286typedef struct
1287{
1288 /* The maximum width or height that should be considered for rafting. */
1290 /* If non-zero, make a combined image, rather than just the bbox. */
1293
1294void
1296
1297/*
1298 Flotilla/Raft handling
1299
1300 We call any 2-dimensional area that's covered by (some type of) content
1301 a raft. i.e. it's made up of several distinct objects ("planks") lashed
1302 together into something that covers a large flat area (a "raft").
1303
1304 The set of all such non-overlapping rafts on a page can be called a
1305 "flotilla".
1306
1307 For instance, the borders and/or backgrounds from a table would form a
1308 raft behind the text content. And the boundaries of that raft might
1309 help us distinguish that table from an adjacent table on a different
1310 raft.
1311
1312 While we could theoretically make rafts from anything, images and
1313 vectors seem like the best bet. We could make rafts from mixed images
1314 and vectors, but to start with, I think we'll get best results from
1315 images and vectors separately.
1316*/
1318
1319/*
1320 Construct a flotilla from all the (rectangular) vectors on a page.
1321*/
1324
1325/*
1326 Drop the flotilla.
1327*/
1328void
1330
1331/*
1332 How many rafts in this flotilla?
1333*/
1334int
1336
1337/*
1338 Return the bounds of the ith raft in the flotilla.
1339*/
1340fz_rect
1342
1343/*
1344 Internal debugging function to verify the soundness
1345 of an stext page.
1346
1347 title: optional string to be printed.
1348*/
1349void fz_verify_stext_page(fz_context *ctx, fz_stext_page *page, const char *title);
1350
1351#endif
fz_structure
Definition device.h:194
#define FZ_DATA
Definition export.h:49
void HEAP_TYPE_NAME HEAP_CONTAINER_TYPE v
Definition heap-imp.h:51
struct fz_pool_array fz_pool_array
Definition pool.h:83
struct fz_pool fz_pool
Definition pool.h:34
Definition context.h:886
Definition device.h:290
Definition document.h:1080
Definition font.h:776
Definition structured-text.h:1287
int combine_image
Definition structured-text.h:1291
int max_size
Definition structured-text.h:1289
Definition image.h:351
Definition structured-text.h:55
fz_matrix inv_matrix
Definition structured-text.h:58
fz_layout_line * head
Definition structured-text.h:59
fz_matrix matrix
Definition structured-text.h:57
fz_pool * pool
Definition structured-text.h:56
fz_layout_char ** text_tailp
Definition structured-text.h:60
fz_layout_line ** tailp
Definition structured-text.h:59
Definition structured-text.h:40
float advance
Definition structured-text.h:41
float x
Definition structured-text.h:41
const char * p
Definition structured-text.h:42
struct fz_layout_char * next
Definition structured-text.h:43
Definition structured-text.h:47
fz_layout_char * text
Definition structured-text.h:50
struct fz_layout_line * next
Definition structured-text.h:51
float y
Definition structured-text.h:48
const char * p
Definition structured-text.h:49
float x
Definition structured-text.h:48
float font_size
Definition structured-text.h:48
Definition geometry.h:388
Definition options.h:207
Definition output.h:111
Definition geometry.h:187
Definition geometry.h:782
Definition geometry.h:231
Definition structured-text.h:1035
int end_seq
Definition structured-text.h:1039
fz_stext_position end
Definition structured-text.h:1038
fz_search_quad * quads
Definition structured-text.h:1037
int num_quads
Definition structured-text.h:1036
fz_stext_position begin
Definition structured-text.h:1038
int begin_seq
Definition structured-text.h:1039
Definition structured-text.h:1020
fz_quad quad
Definition structured-text.h:1022
int seq
Definition structured-text.h:1021
Definition structured-text.h:1046
fz_search_reason reason
Definition structured-text.h:1047
fz_search_match * match
Definition structured-text.h:1051
int seq_needed
Definition structured-text.h:1050
Definition structured-text.h:440
fz_stext_grid_positions * ys
Definition structured-text.h:449
struct fz_stext_block::@322304060277022073152002261250044130307126045012::@220256061145316264155376103332135153203121103165 t
fz_stext_line * first_line
Definition structured-text.h:445
fz_rect bbox
Definition structured-text.h:443
int index
Definition structured-text.h:447
fz_stext_struct * down
Definition structured-text.h:447
int type
Definition structured-text.h:441
fz_image * image
Definition structured-text.h:446
union fz_stext_block::@322304060277022073152002261250044130307126045012 u
uint32_t argb
Definition structured-text.h:448
fz_stext_line * last_line
Definition structured-text.h:445
int flags
Definition structured-text.h:445
fz_stext_block * next
Definition structured-text.h:451
int id
Definition structured-text.h:442
fz_stext_grid_positions * xs
Definition structured-text.h:449
struct fz_stext_block::@322304060277022073152002261250044130307126045012::@353165215110012323255201136074242346044261103243 b
fz_stext_block * prev
Definition structured-text.h:451
fz_stext_grid_info * info
Definition structured-text.h:449
struct fz_stext_block::@322304060277022073152002261250044130307126045012::@354102075123340333030051105300263261033003035325 s
fz_matrix transform
Definition structured-text.h:446
struct fz_stext_block::@322304060277022073152002261250044130307126045012::@017261060370142342330120176116270104331032375154 i
Definition structured-text.h:477
fz_stext_char * next
Definition structured-text.h:486
fz_quad quad
Definition structured-text.h:483
uint32_t argb
Definition structured-text.h:481
fz_font * font
Definition structured-text.h:485
int c
Definition structured-text.h:478
float size
Definition structured-text.h:484
fz_point origin
Definition structured-text.h:482
uint16_t flags
Definition structured-text.h:480
uint16_t bidi
Definition structured-text.h:479
Definition structured-text.h:567
float max
Definition structured-text.h:571
int uncertainty
Definition structured-text.h:572
float pos
Definition structured-text.h:569
float min
Definition structured-text.h:570
int reinforcement
Definition structured-text.h:568
Definition structured-text.h:421
unsigned int flags
Definition structured-text.h:431
int h
Definition structured-text.h:428
int w
Definition structured-text.h:427
Definition structured-text.h:576
int len
Definition structured-text.h:577
int max_uncertainty
Definition structured-text.h:578
fz_stext_grid_divider list[FZ_FLEXIBLE_ARRAY]
Definition structured-text.h:579
Definition structured-text.h:463
fz_stext_line * next
Definition structured-text.h:469
fz_stext_char * first_char
Definition structured-text.h:468
uint8_t flags
Definition structured-text.h:465
fz_stext_char * last_char
Definition structured-text.h:468
uint8_t wmode
Definition structured-text.h:464
fz_rect bbox
Definition structured-text.h:467
fz_point dir
Definition structured-text.h:466
fz_stext_line * prev
Definition structured-text.h:469
Definition structured-text.h:724
float scale
Definition structured-text.h:726
fz_rect clip
Definition structured-text.h:727
int flags
Definition structured-text.h:725
Definition structured-text.h:1154
fz_stext_struct * parent
Definition structured-text.h:1156
fz_stext_struct * top
Definition structured-text.h:1158
fz_stext_block * block
Definition structured-text.h:1157
fz_stext_page * page
Definition structured-text.h:1155
Definition structured-text.h:329
int page
Definition structured-text.h:332
fz_rect mediabox
Definition structured-text.h:330
int chapter
Definition structured-text.h:331
Definition structured-text.h:344
fz_stext_block * first_block
Definition structured-text.h:348
fz_pool_array * id_list
Definition structured-text.h:358
fz_pool * pool
Definition structured-text.h:346
int refs
Definition structured-text.h:345
fz_rect mediabox
Definition structured-text.h:347
fz_stext_block * last_block
Definition structured-text.h:354
fz_stext_struct * last_struct
Definition structured-text.h:355
Definition structured-text.h:1026
fz_stext_line * line
Definition structured-text.h:1030
fz_stext_block * block
Definition structured-text.h:1029
fz_stext_struct * parent
Definition structured-text.h:1028
fz_stext_char * ch
Definition structured-text.h:1031
fz_stext_page * page
Definition structured-text.h:1027
Definition structured-text.h:515
fz_stext_block * up
Definition structured-text.h:517
fz_stext_struct * parent
Definition structured-text.h:521
fz_structure standard
Definition structured-text.h:532
fz_stext_block * first_block
Definition structured-text.h:525
char raw[FZ_FLEXIBLE_ARRAY]
Definition structured-text.h:535
fz_stext_block * last_block
Definition structured-text.h:528
fz_search_options
Definition structured-text.h:982
@ FZ_SEARCH_EXACT
Definition structured-text.h:983
@ FZ_SEARCH_IGNORE_CASE
Definition structured-text.h:984
@ FZ_SEARCH_REGEXP
Definition structured-text.h:986
@ FZ_SEARCH_KEEP_PARAGRAPHS
Definition structured-text.h:988
@ FZ_SEARCH_KEEP_HYPHENS
Definition structured-text.h:989
@ FZ_SEARCH_KEEP_LINES
Definition structured-text.h:987
@ FZ_SEARCH_IGNORE_DIACRITICS
Definition structured-text.h:985
fz_stext_options * fz_parse_stext_options(fz_context *ctx, fz_stext_options *opts, const char *string)
@ FZ_STEXT_FUZZY_VECTORS
Definition structured-text.h:217
@ FZ_STEXT_CLIP_RECT
Definition structured-text.h:213
@ FZ_STEXT_INHIBIT_SPACES
Definition structured-text.h:199
@ FZ_STEXT_COLLECT_VECTORS
Definition structured-text.h:206
@ FZ_STEXT_TABLE_HUNT
Definition structured-text.h:210
@ FZ_STEXT_COLLECT_STYLES
Definition structured-text.h:211
@ FZ_STEXT_ACCURATE_BBOXES
Definition structured-text.h:205
@ FZ_STEXT_IGNORE_ACTUALTEXT
Definition structured-text.h:207
@ FZ_STEXT_ACCURATE_ASCENDERS
Definition structured-text.h:214
@ FZ_STEXT_PRESERVE_SPANS
Definition structured-text.h:201
@ FZ_STEXT_PRESERVE_WHITESPACE
Definition structured-text.h:197
@ FZ_STEXT_MEDIABOX_CLIP
Definition structured-text.h:220
@ FZ_STEXT_DEHYPHENATE
Definition structured-text.h:200
@ FZ_STEXT_SEGMENT
Definition structured-text.h:208
@ FZ_STEXT_CLIP
Definition structured-text.h:202
@ FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE
Definition structured-text.h:203
@ FZ_STEXT_LAZY_VECTORS
Definition structured-text.h:216
@ FZ_STEXT_PRESERVE_LIGATURES
Definition structured-text.h:196
@ FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
Definition structured-text.h:212
@ FZ_STEXT_PARAGRAPH_BREAK
Definition structured-text.h:209
@ FZ_STEXT_COLLECT_STRUCTURE
Definition structured-text.h:204
@ FZ_STEXT_PRESERVE_IMAGES
Definition structured-text.h:198
@ FZ_STEXT_ACCURATE_SIDE_BEARINGS
Definition structured-text.h:215
struct fz_flotilla fz_flotilla
Definition structured-text.h:1317
void fz_init_search_options(fz_context *ctx, fz_search_options *options)
fz_document * fz_open_reflowed_document(fz_context *ctx, fz_document *underdoc, const fz_stext_options *opts)
fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_from_rdfs(fz_stext_page *page, fz_stext_block *block, fz_stext_struct *top)
int fz_is_unicode_hyphen(int c)
FZ_DATA const char * fz_search_options_usage
fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_from(fz_stext_page *page, fz_stext_block *block, fz_stext_struct *top)
fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_from_dfs(fz_stext_page *page, fz_stext_block *block, fz_stext_struct *top)
fz_stext_page_block_iterator fz_stext_page_block_iterator_begin(fz_stext_page *page)
void fz_print_stext_header_as_html(fz_context *ctx, fz_output *out)
fz_stext_line_flags
Definition structured-text.h:455
@ FZ_STEXT_LINE_FLAGS_JOINED
Definition structured-text.h:456
fz_stext_page_block_iterator fz_stext_page_block_iterator_up(fz_stext_page_block_iterator pos)
void fz_print_stext_header_as_xhtml(fz_context *ctx, fz_output *out)
void fz_print_stext_page_as_xml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
int fz_stext_page_block_iterator_eod_dfs(fz_stext_page_block_iterator pos)
fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_dfs(fz_stext_page *page)
fz_stext_page_block_iterator fz_stext_page_block_iterator_next_rdfs(fz_stext_page_block_iterator pos)
fz_stext_block * fz_find_table_within_bounds(fz_context *ctx, fz_stext_page *page, fz_rect bounds)
int fz_match_callback_fn(fz_context *ctx, void *opaque, int num_quads, fz_quad *hit_bbox, int chapter, int page)
Definition structured-text.h:670
@ FZ_STEXT_BLOCK_GRID
Definition structured-text.h:377
@ FZ_STEXT_BLOCK_TEXT
Definition structured-text.h:373
@ FZ_STEXT_BLOCK_IMAGE
Definition structured-text.h:374
@ FZ_STEXT_BLOCK_VECTOR
Definition structured-text.h:376
@ FZ_STEXT_BLOCK_STRUCT
Definition structured-text.h:375
fz_stext_page * fz_keep_stext_page(fz_context *ctx, fz_stext_page *page)
void fz_drop_layout(fz_context *ctx, fz_layout_block *block)
fz_stext_xml_flags
Definition structured-text.h:618
@ FZ_STEXT_XML_FLAGS_CHARS
Definition structured-text.h:619
@ FZ_STEXT_XML_FLAGS_POINTERS
Definition structured-text.h:620
int fz_stext_remove_page_fill(fz_context *ctx, fz_stext_page *page)
fz_stext_page * fz_new_stext_page(fz_context *ctx, fz_rect mediabox)
int fz_segment_stext_rect(fz_context *ctx, fz_stext_page *page, fz_rect rect)
void fz_init_stext_options(fz_context *ctx, fz_stext_options *opts)
fz_search * fz_new_search(fz_context *ctx, const char *needle, fz_search_options options)
fz_stext_page_block_iterator fz_stext_page_block_iterator_next_dfs(fz_stext_page_block_iterator pos)
fz_stext_page_block_iterator fz_stext_page_block_iterator_down(fz_stext_page_block_iterator pos)
fz_search_options * fz_parse_search_options(fz_context *ctx, fz_search_options *options, const char *args)
@ FZ_STEXT_STRIKEOUT
Definition structured-text.h:491
@ FZ_STEXT_CLIPPED
Definition structured-text.h:497
@ FZ_STEXT_FILLED
Definition structured-text.h:495
@ FZ_STEXT_UNICODE_IS_CID
Definition structured-text.h:498
@ FZ_STEXT_SYNTHETIC_LARGE
Definition structured-text.h:500
@ FZ_STEXT_STROKED
Definition structured-text.h:496
@ FZ_STEXT_UNDERLINE
Definition structured-text.h:492
@ FZ_STEXT_UNICODE_IS_GID
Definition structured-text.h:499
@ FZ_STEXT_BOLD
Definition structured-text.h:494
@ FZ_STEXT_SYNTHETIC
Definition structured-text.h:493
@ FZ_STEXT_HIGHLIGHT
Definition structured-text.h:501
@ FZ_STEXT_GRID_V_CROSSED
Definition structured-text.h:410
@ FZ_STEXT_GRID_H_CROSSED
Definition structured-text.h:408
@ FZ_STEXT_GRID_T_BORDER
Definition structured-text.h:414
@ FZ_STEXT_GRID_L_BORDER
Definition structured-text.h:412
@ FZ_STEXT_GRID_FULL
Definition structured-text.h:416
fz_quad fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *ap, fz_point *bp, int mode)
void fz_drop_search(fz_context *ctx, fz_search *search)
void fz_print_stext_trailer_as_html(fz_context *ctx, fz_output *out)
int fz_segment_stext_page(fz_context *ctx, fz_stext_page *page)
void fz_table_hunt_within_bounds(fz_context *ctx, fz_stext_page *page, fz_rect bounds)
void fz_print_stext_page_as_html(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
fz_rect fz_flotilla_raft_area(fz_context *ctx, fz_flotilla *flot, int i)
fz_search_reason
Definition structured-text.h:1008
@ FZ_SEARCH_MORE_INPUT
Definition structured-text.h:1010
@ FZ_SEARCH_COMPLETE
Definition structured-text.h:1016
@ FZ_SEARCH_MATCH
Definition structured-text.h:1013
void fz_verify_stext_page(fz_context *ctx, fz_stext_page *page, const char *title)
int fz_flotilla_size(fz_context *ctx, fz_flotilla *flot)
int fz_is_unicode_space_equivalent(int c)
fz_search_result fz_search_forwards(fz_context *ctx, fz_search *search)
void fz_drop_stext_page(fz_context *ctx, fz_stext_page *page)
void fz_paragraph_break(fz_context *ctx, fz_stext_page *page)
void fz_drop_flotilla(fz_context *ctx, fz_flotilla *f)
fz_stext_block * fz_new_stext_struct(fz_context *ctx, fz_stext_page *page, fz_structure standard, const char *raw, int index)
char * fz_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area, int crlf)
int fz_search_callback_fn(fz_context *ctx, void *opaque, int num_quads, fz_quad *hit_bbox)
Definition structured-text.h:660
fz_stext_block * fz_find_table_within_grid(fz_context *ctx, fz_stext_page *page, fz_stext_grid_positions *xpos, fz_stext_grid_positions *ypos, float limit)
fz_layout_block * fz_new_layout(fz_context *ctx)
fz_stext_page_details * fz_stext_page_details_for_block(fz_context *ctx, fz_stext_page *page, fz_stext_block *block)
fz_stext_page_block_iterator fz_stext_page_block_iterator_begin_rdfs(fz_stext_page *page)
int fz_stext_page_block_iterator_eod_rdfs(fz_stext_page_block_iterator pos)
void fz_apply_stext_options(fz_context *ctx, fz_stext_options *opts, fz_options *options)
fz_device * fz_new_ocr_device(fz_context *ctx, fz_device *target, fz_matrix ctm, fz_rect mediabox, int with_list, const char *language, const char *datadir, int(*progress)(fz_context *, void *, int), void *progress_arg)
int fz_search_stext_page_cb(fz_context *ctx, fz_stext_page *text, const char *needle, fz_search_callback_fn *cb, void *opaque)
void fz_print_stext_page_as_json(fz_context *ctx, fz_output *out, fz_stext_page *page, float scale)
fz_flotilla * fz_new_flotilla_from_stext_page_vectors(fz_context *ctx, fz_stext_page *page)
int fz_search_stext_page(fz_context *ctx, fz_stext_page *text, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max)
fz_device * fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options *options)
void fz_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page)
void fz_add_layout_line(fz_context *ctx, fz_layout_block *block, float x, float y, float h, const char *p)
FZ_DATA const char * fz_stext_options_usage
void fz_classify_stext_rect(fz_context *ctx, fz_stext_page *page, fz_structure classification, fz_rect rect)
@ FZ_SELECT_WORDS
Definition structured-text.h:696
@ FZ_SELECT_LINES
Definition structured-text.h:697
@ FZ_SELECT_CHARS
Definition structured-text.h:695
int fz_match_stext_page_cb(fz_context *ctx, fz_stext_page *page, const char *needle, fz_match_callback_fn *cb, void *opaque, fz_search_options options)
void fz_print_stext_page_as_xml_with_flags(fz_context *ctx, fz_output *out, fz_stext_page *page, int id, fz_stext_xml_flags flags)
void fz_apply_search_options(fz_context *ctx, fz_search_options *options, fz_options *opts)
int fz_match_stext_page(fz_context *ctx, fz_stext_page *text, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max, fz_search_options options)
@ FZ_STEXT_TEXT_JUSTIFY_RIGHT
Definition structured-text.h:385
@ FZ_STEXT_TEXT_JUSTIFY_UNKNOWN
Definition structured-text.h:382
@ FZ_STEXT_TEXT_JUSTIFY_LEFT
Definition structured-text.h:383
@ FZ_STEXT_TEXT_JUSTIFY_CENTER
Definition structured-text.h:384
@ FZ_STEXT_TEXT_JUSTIFY_FULL
Definition structured-text.h:386
void fz_debug_stext_page(fz_context *ctx, fz_stext_page *page, int id)
struct fz_search fz_search
Definition structured-text.h:979
fz_search_result fz_search_backwards(fz_context *ctx, fz_search *search)
fz_device * fz_new_stext_device_for_page(fz_context *ctx, fz_stext_page *stext_page, const fz_stext_options *opts, int chapter_num, int page_num, fz_rect mediabox)
void fz_stext_raft_images(fz_context *ctx, fz_stext_page *stext, fz_image_raft_options *options)
void fz_feed_search(fz_context *ctx, fz_search *search, fz_stext_page *page, int seq)
char * fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf)
int fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads)
int fz_is_unicode_whitespace(int c)
void fz_print_stext_page_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
void fz_print_stext_trailer_as_xhtml(fz_context *ctx, fz_output *out)
fz_device * fz_new_ocr_device_with_options(fz_context *ctx, fz_device *target, fz_matrix ctm, fz_rect mediabox, int with_list, const char *language, const char *datadir, int(*progress)(fz_context *, void *, int), void *progress_arg, fz_options *options)
fz_stext_page_block_iterator fz_stext_page_block_iterator_next(fz_stext_page_block_iterator pos)
void fz_add_layout_char(fz_context *ctx, fz_layout_block *block, float x, float w, const char *p)
int fz_stext_page_block_iterator_eod(fz_stext_page_block_iterator pos)
@ FZ_STEXT_VECTOR_IS_STROKED
Definition structured-text.h:393
@ FZ_STEXT_VECTOR_CONTINUES
Definition structured-text.h:402
@ FZ_STEXT_VECTOR_IS_RECTANGLE
Definition structured-text.h:397
void fz_table_hunt(fz_context *ctx, fz_stext_page *page)
int fz_propose_table_within_bounds(fz_context *ctx, fz_stext_page *page, fz_rect bounds, fz_stext_grid_positions **xposp, fz_stext_grid_positions **yposp)
#define FZ_FLEXIBLE_ARRAY
Definition system.h:46