mupdf
Loading...
Searching...
No Matches
document.h
Go to the documentation of this file.
1// Copyright (C) 2004-2025 Artifex Software, Inc.
2//
3// This file is part of MuPDF.
4//
5// MuPDF is free software: you can redistribute it and/or modify it under the
6// terms of the GNU Affero General Public License as published by the Free
7// Software Foundation, either version 3 of the License, or (at your option)
8// any later version.
9//
10// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13// details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17//
18// Alternative licensing terms are available from the licensor.
19// For commercial licensing, see <https://www.artifex.com/> or contact
20// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21// CA 94129, USA, for further information.
22
23#ifndef MUPDF_PDF_DOCUMENT_H
24#define MUPDF_PDF_DOCUMENT_H
25
26#include "mupdf/fitz/export.h"
27#include "mupdf/fitz/document.h"
28#include "mupdf/fitz/hash.h"
29#include "mupdf/fitz/stream.h"
30#include "mupdf/fitz/xml.h"
31#include "mupdf/pdf/object.h"
32
33typedef struct pdf_xref pdf_xref;
35
36typedef struct pdf_page pdf_page;
37typedef struct pdf_annot pdf_annot;
38typedef struct pdf_js pdf_js;
40
41enum
42{
45};
46
47typedef struct
48{
49 size_t size;
50 size_t base_size;
51 size_t len;
52 int64_t i;
53 float f;
54 char *scratch;
57
63
64/*
65 Document event structures are mostly opaque to the app. Only the type
66 is visible to the app.
67*/
69
70/*
71 the type of function via which the app receives
72 document events.
73*/
74typedef void (pdf_doc_event_cb)(fz_context *ctx, pdf_document *doc, pdf_doc_event *evt, void *data);
75
76/*
77 the type of function via which the app frees
78 the data provided to the event callback pdf_doc_event_cb.
79*/
80typedef void (pdf_free_doc_event_data_cb)(fz_context *ctx, void *data);
81
83
84/*
85 Callback called when the console is dropped because it
86 is being replaced or the javascript is being disabled
87 by a call to pdf_disable_js().
88*/
89typedef void (pdf_js_console_drop_cb)(pdf_js_console *console, void *user);
90
91/*
92 Callback signalling that a piece of javascript is asking
93 the javascript console to be displayed.
94*/
95typedef void (pdf_js_console_show_cb)(void *user);
96
97/*
98 Callback signalling that a piece of javascript is asking
99 the javascript console to be hidden.
100*/
101typedef void (pdf_js_console_hide_cb)(void *user);
102
103/*
104 Callback signalling that a piece of javascript is asking
105 the javascript console to remove all its contents.
106*/
107typedef void (pdf_js_console_clear_cb)(void *user);
108
109/*
110 Callback signalling that a piece of javascript is appending
111 the given message to the javascript console contents.
112*/
113typedef void (pdf_js_console_write_cb)(void *user, const char *msg);
114
115/*
116 The callback functions relating to a javascript console.
117*/
125
126/*
127 Retrieve the currently set javascript console, or NULL
128 if none is set.
129*/
131
132/*
133 Set a new javascript console.
134
135 console: A set of callback functions informing about
136 what pieces of executed js is trying to do
137 to the js console. The caller transfers ownership of
138 console when calling pdf_js_set_console(). Once it and
139 the corresponding user pointer are no longer needed
140 console->drop() will be called passing both the console
141 and the user pointer.
142
143 user: Opaque data that will be passed unchanged to all
144 js console callbacks when called. The caller ensures
145 that this is valid until either the js console is
146 replaced by calling pdf_js_set_console() again with a
147 new console, or pdf_disable_js() is called. In either
148 case the caller to ensures that the user data is freed.
149*/
150void pdf_js_set_console(fz_context *ctx, pdf_document *doc, pdf_js_console *console, void *user);
151
152/*
153 Open a PDF document.
154
155 Open a PDF document by reading its cross reference table, so
156 MuPDF can locate PDF objects inside the file. Upon an broken
157 cross reference table or other parse errors MuPDF will restart
158 parsing the file from the beginning to try to rebuild a
159 (hopefully correct) cross reference table to allow further
160 processing of the file.
161
162 The returned pdf_document should be used when calling most
163 other PDF functions. Note that it wraps the context, so those
164 functions implicitly get access to the global state in
165 context.
166
167 filename: a path to a file as it would be given to open(2).
168*/
169pdf_document *pdf_open_document(fz_context *ctx, const char *filename);
170
171/*
172 Opens a PDF document.
173
174 Same as pdf_open_document, but takes a stream instead of a
175 filename to locate the PDF document to open. Increments the
176 reference count of the stream. See fz_open_file,
177 fz_open_file_w or fz_open_fd for opening a stream, and
178 fz_drop_stream for closing an open stream.
179*/
181
182/*
183 Closes and frees an opened PDF document.
184
185 The resource store in the context associated with pdf_document
186 is emptied.
187*/
189
191
192/*
193 Do a pass through the document to check if it needs
194 any repairs; and trigger a repair if necessary.
195
196 This is a very expensive operation both in terms of memory use
197 and computation, because it needs to parse the entire file to
198 detect any errors.
199
200 The result of the check is saved, so calling this function again
201 after a successful completion is a no-op.
202
203 If this function throws (either because of out of memory (SYSTEM),
204 or other reasons) then the file should be considered suspect.
205
206 Returns non-zero if a repair was triggered during checking, and
207 hence changes to the file may have been lost.
208*/
210
211/*
212 down-cast a fz_document to a pdf_document.
213 Returns NULL if underlying document is not PDF
214*/
216
217/*
218 Down-cast generic fitz objects into pdf specific variants.
219 Returns NULL if the objects are not from a PDF document.
220*/
223
224/*
225 Get a pdf_document handle from an fz_document handle.
226
227 This is superficially similar to pdf_document_from_fz_document
228 (and the older pdf_specifics).
229
230 For fz_documents that are actually pdf_documents, this will return
231 a kept version of the same pointer, just cast differently.
232
233 For fz_documents that have a pdf_document representation internally,
234 then you may get a kept version of a different pointer.
235
236 For fz_documents that have no pdf_document representation internally,
237 this will return NULL.
238
239 Note that this returns a kept pointer that the caller is responsible
240 for freeing, unlike pdf_specifics or pdf_document_from_fz_document.
241*/
243
245
246/*
247 Attempt to authenticate a
248 password.
249
250 Returns 0 for failure, non-zero for success.
251
252 In the non-zero case:
253 bit 0 set => no password required
254 bit 1 set => user password authenticated
255 bit 2 set => owner password authenticated
256*/
257int pdf_authenticate_password(fz_context *ctx, pdf_document *doc, const char *pw);
258
260int pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *ptr, size_t size);
261
263
265
267
268/*
269 Get the number of layer configurations defined in this document.
270
271 doc: The document in question.
272*/
274
275/*
276 Configure visibility of individual layers in this document.
277*/
279const char *pdf_layer_name(fz_context *ctx, pdf_document *doc, int layer);
281void pdf_enable_layer(fz_context *ctx, pdf_document *doc, int layer, int enabled);
282
283typedef struct
284{
285 const char *name;
286 const char *creator;
288
289/*
290 Fetch the name (and optionally creator) of the given layer config.
291
292 doc: The document in question.
293
294 config_num: A value in the 0..n-1 range, where n is the
295 value returned from pdf_count_layer_configs.
296
297 info: Pointer to structure to fill in. Pointers within
298 this structure may be set to NULL if no information is
299 available.
300*/
301void pdf_layer_config_info(fz_context *ctx, pdf_document *doc, int config_num, pdf_layer_config *info);
302
303/*
304 Fetch the creator of the given layer config, or NULL if none exists.
305
306 doc: The document in question.
307
308 config_num: A value in the 0..n-1 range, where n is the
309 value returned from pdf_count_layer_configs.
310*/
311const char *pdf_layer_config_creator(fz_context *ctx, pdf_document *doc, int config_num);
312
313/*
314 Fetch the name of the given layer config, or NULL if none exists.
315
316 doc: The document in question.
317
318 config_num: A value in the 0..n-1 range, where n is the
319 value returned from pdf_count_layer_configs.
320
321*/
322const char *pdf_layer_config_name(fz_context *ctx, pdf_document *doc, int config_num);
323
324/*
325 Set the current configuration.
326 This updates the visibility of the optional content groups
327 within the document.
328
329 doc: The document in question.
330
331 config_num: A value in the 0..n-1 range, where n is the
332 value returned from pdf_count_layer_configs.
333*/
334void pdf_select_layer_config(fz_context *ctx, pdf_document *doc, int config_num);
335
336/*
337 Returns the number of entries in the 'UI' for this layer configuration.
338
339 doc: The document in question.
340*/
342
343/*
344 Select a checkbox/radiobox within the 'UI' for this layer
345 configuration.
346
347 Selecting a UI entry that is a radiobox may disable
348 other UI entries.
349
350 doc: The document in question.
351
352 ui: A value in the 0..m-1 range, where m is the value
353 returned by pdf_count_layer_config_ui.
354*/
356
357/*
358 Select a checkbox/radiobox within the 'UI' for this layer configuration.
359
360 doc: The document in question.
361
362 ui: A value in the 0..m-1 range, where m is the value
363 returned by pdf_count_layer_config_ui.
364*/
366
367/*
368 Toggle a checkbox/radiobox within the 'UI' for this layer configuration.
369
370 Toggling a UI entry that is a radiobox may disable
371 other UI entries.
372
373 doc: The document in question.
374
375 ui: A value in the 0..m-1 range, where m is the value
376 returned by pdf_count_layer_config_ui.
377*/
379
386
389
398
399/*
400 Get the info for a given entry in the layer config ui.
401
402 doc: The document in question.
403
404 ui: A value in the 0..m-1 range, where m is the value
405 returned by pdf_count_layer_config_ui.
406
407 info: Pointer to a structure to fill in with information
408 about the requested ui entry.
409*/
411
412/*
413 Write the current layer config back into the document as the default state.
414*/
416
417/*
418 Determine whether changes have been made since the
419 document was opened or last saved.
420*/
422
423/*
424 Determine if this PDF has been repaired since opening.
425*/
427
428/* Object that can perform the cryptographic operation necessary for document signing */
430
431/* Unsaved signature fields */
442
443typedef struct
444{
445 int page;
448
449typedef struct
450{
451 int number; /* Page object number */
452 int64_t offset; /* Offset of page object */
453 int64_t index; /* Index into shared hint_shared_ref */
455
456typedef struct
457{
458 int number; /* Object number of first object */
459 int64_t offset; /* Offset of first object */
461
463{
465
467
469 int checked; /* we've checked that we don't need to repair */
471 int bias;
472 int64_t startxref;
473 int64_t file_size;
477
484
485 /* The local_xref is only active, if local_xref_nesting >= 0 */
488
495
500
503 int non_structural_change; /* True if we are modifying the document in a way that does not change the (page) structure */
506
507 /* State indicating which file parsing method we are using */
509 int64_t file_length;
510
512 pdf_obj *linear_obj; /* Linearized object (if used) */
513 pdf_obj **linear_page_refs; /* Page objects for linear loading */
515
516 /* The state for the pdf_progressive_advance parser */
517 int64_t linear_pos;
519
522 int hints_loaded; /* Set to 1 after the hints loading has completed,
523 * whether successful or not! */
524 /* Page n references shared object references:
525 * hint_shared_ref[i]
526 * where
527 * i = s to e-1
528 * s = hint_page[n]->index
529 * e = hint_page[n+1]->index
530 * Shared object reference r accesses objects:
531 * rs to re-1
532 * where
533 * rs = hint_shared[r]->number
534 * re = hint_shared[r]->count + rs
535 * These are guaranteed to lie within the region starting at
536 * hint_shared[r]->offset of length hint_shared[r]->length
537 */
543
545
547
551
555
559
560 struct {
565
569
571
573
575};
576
578
580
581/*
582 Return a deep copied object equivalent to the
583 supplied object, suitable for use within the given document.
584
585 dst: The document in which the returned object is to be used.
586
587 obj: The object deep copy.
588
589 Note: If grafting multiple objects, you should use a pdf_graft_map
590 to avoid potential duplication of target objects.
591*/
593
594/*
595 Prepare a graft map object to allow objects
596 to be deep copied from one document to the given one, avoiding
597 problems with duplicated child objects.
598
599 dst: The document to copy objects to.
600
601 Note: all the source objects must come from the same document.
602*/
604
607
608/*
609 Return a deep copied object equivalent
610 to the supplied object, suitable for use within the target
611 document of the map.
612
613 map: A map targeted at the document in which the returned
614 object is to be used.
615
616 obj: The object to be copied.
617
618 Note: Copying multiple objects via the same graft map ensures
619 that any shared children are not copied more than once.
620*/
622
623/*
624 Graft a page (and its resources) from the src document to the
625 destination document of the graft. This involves a deep copy
626 of the objects in question.
627
628 map: A map targeted at the document into which the page should
629 be inserted.
630
631 page_to: The position within the destination document at which
632 the page should be inserted (pages numbered from 0, with -1
633 meaning "at the end").
634
635 src: The document from which the page should be copied.
636
637 page_from: The page number which should be copied from the src
638 document (pages numbered from 0, with -1 meaning "at the end").
639*/
640void pdf_graft_page(fz_context *ctx, pdf_document *dst, int page_to, pdf_document *src, int page_from);
641void pdf_graft_mapped_page(fz_context *ctx, pdf_graft_map *map, int page_to, pdf_document *src, int page_from);
642
643/*
644 Create a device that will record the
645 graphical operations given to it into a sequence of
646 pdf operations, together with a set of resources. This
647 sequence/set pair can then be used as the basis for
648 adding a page to the document (see pdf_add_page).
649 Returns a kept reference.
650
651 doc: The document for which these are intended.
652
653 mediabox: The bbox for the created page.
654
655 presources: Pointer to a place to put the created
656 resources dictionary.
657
658 pcontents: Pointer to a place to put the created
659 contents buffer.
660*/
661fz_device *pdf_page_write(fz_context *ctx, pdf_document *doc, fz_rect mediabox, pdf_obj **presources, fz_buffer **pcontents);
662
663/*
664 Create a pdf device. Rendering to the device creates
665 new pdf content. WARNING: this device is work in progress. It doesn't
666 currently support all rendering cases.
667
668 Note that contents must be a stream (dictionary) to be updated (or
669 a reference to a stream). Callers should take care to ensure that it
670 is not an array, and that is it not shared with other objects/pages.
671*/
673
674/*
675 Create a pdf_obj within a document that
676 represents a page, from a previously created resources
677 dictionary and page content stream. This should then be
678 inserted into the document using pdf_insert_page.
679
680 After this call the page exists within the document
681 structure, but is not actually ever displayed as it is
682 not linked into the PDF page tree.
683
684 doc: The document to which to add the page.
685
686 mediabox: The mediabox for the page (should be identical
687 to that used when creating the resources/contents).
688
689 rotate: 0, 90, 180 or 270. The rotation to use for the
690 page.
691
692 resources: The resources dictionary for the new page
693 (typically created by pdf_page_write).
694
695 contents: The page contents for the new page (typically
696 create by pdf_page_write).
697*/
698pdf_obj *pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int rotate, pdf_obj *resources, fz_buffer *contents);
699
700/*
701 Insert a page previously created by
702 pdf_add_page into the pages tree of the document.
703
704 doc: The document to insert into.
705
706 at: The page number to insert at (pages numbered from 0).
707 0 <= n <= page_count inserts before page n. Negative numbers
708 or INT_MAX are treated as page count, and insert at the end.
709 0 inserts at the start. All existing pages are after the
710 insertion point are shuffled up.
711
712 page: The page to insert.
713*/
714void pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page);
715
716/*
717 Delete a page from the page tree of
718 a document. This does not remove the page contents
719 or resources from the file.
720
721 doc: The document to operate on.
722
723 number: The page to remove (numbered from 0)
724*/
725void pdf_delete_page(fz_context *ctx, pdf_document *doc, int number);
726
727/*
728 Delete a range of pages from the
729 page tree of a document. This does not remove the page
730 contents or resources from the file.
731
732 doc: The document to operate on.
733
734 start, end: The range of pages (numbered from 0)
735 (inclusive, exclusive) to remove. If end is negative or
736 greater than the number of pages in the document, it
737 will be taken to be the end of the document.
738*/
739void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end);
740
741/*
742 Get page label (string) from a page number (index).
743*/
744void pdf_page_label(fz_context *ctx, pdf_document *doc, int page, char *buf, size_t size);
745void pdf_page_label_imp(fz_context *ctx, fz_document *doc, int chapter, int page, char *buf, size_t size);
746
755
756void pdf_set_page_labels(fz_context *ctx, pdf_document *doc, int index, pdf_page_label_style style, const char *prefix, int start);
758
761
762/*
763 In calls to fz_save_document, the following options structure can be used
764 to control aspects of the writing process. This structure may grow
765 in the future, and should be zero-filled to allow forwards compatibility.
766*/
767typedef struct
768{
769 int do_incremental; /* Write just the changed objects. */
770 int do_pretty; /* Pretty-print dictionaries and arrays. */
771 int do_ascii; /* ASCII hex encode binary streams. */
772 int do_compress; /* Compress streams. 1 zlib, 2 brotli */
773 int do_compress_images; /* Compress (or leave compressed) image streams. */
774 int do_compress_fonts; /* Compress (or leave compressed) font streams. */
775 int do_decompress; /* Decompress streams (except when compressing images/fonts). */
776 int do_garbage; /* Garbage collect objects before saving; 1=gc, 2=re-number, 3=de-duplicate. */
777 int do_linear; /* Write linearised. */
778 int do_clean; /* Clean content streams. */
779 int do_sanitize; /* Sanitize content streams. */
780 int do_appearance; /* (Re)create appearance streams. */
781 int do_encrypt; /* Encryption method to use: keep, none, rc4-40, etc. */
782 int dont_regenerate_id; /* Don't regenerate ID if set (used for clean) */
783 int permissions; /* Document encryption permissions. */
784 char opwd_utf8[128]; /* Owner password. */
785 char upwd_utf8[128]; /* User password. */
786 int do_snapshot; /* Do not use directly. Use the snapshot functions. */
787 int do_preserve_metadata; /* When cleaning, preserve metadata unchanged. */
788 int do_use_objstms; /* Use objstms if possible */
789 int compression_effort; /* 0 for default. 100 = max, 1 = min. */
790 int do_labels; /* Add labels to each object showing how it can be reached from the Root. */
792
794
795/*
796 Parse option string into a pdf_write_options struct.
797 Matches the command line options to 'mutool clean':
798 g: garbage collect
799 d, i, f: expand all, fonts, images
800 l: linearize
801 a: ascii hex encode
802 z: deflate
803 c: clean content streams
804 s: sanitize content streams
805*/
807
810
811/*
812 Returns true if there are digital signatures waiting to
813 to updated on save.
814*/
816
817/*
818 Write out the document to an output stream with all changes finalised.
819*/
821
822/*
823 Write out the document to a file with all changes finalised.
824*/
825void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *opts);
826
827/*
828 Snapshot the document to a file. This does not cause the
829 incremental xref to be finalized, so the document in memory
830 remains (essentially) unchanged.
831*/
832void pdf_save_snapshot(fz_context *ctx, pdf_document *doc, const char *filename);
833
834/*
835 Snapshot the document to an output stream. This does not cause
836 the incremental xref to be finalized, so the document in memory
837 remains (essentially) unchanged.
838*/
840
841char *pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts);
842
843/*
844 Return true if the document can be saved incrementally. Applying
845 redactions or having a repaired document make incremental saving
846 impossible.
847*/
849
850/*
851 Write out the journal to an output stream.
852*/
854
855/*
856 Write out the journal to a file.
857*/
858void pdf_save_journal(fz_context *ctx, pdf_document *doc, const char *filename);
859
860/*
861 Read a journal from a filename. Will do nothing if the journal
862 does not match. Will throw on a corrupted journal.
863*/
864void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename);
865
866/*
867 Read a journal from a stream. Will do nothing if the journal
868 does not match. Will throw on a corrupted journal.
869*/
871
872/*
873 Minimize the memory used by a document.
874
875 We walk the in memory xref tables, evicting the PDF objects
876 therein that aren't in use.
877
878 This reduces the current memory use, but any subsequent use
879 of these objects will load them back into memory again.
880*/
882
883/*
884 Map a pdf object representing a structure tag through
885 an optional role_map and convert to an fz_structure.
886*/
888
889/*
890 Run the document structure to a device.
891*/
893
894/*
895 Return the count of the associated files on a document.
896 Note, that this is the count of files associated at the document
897 level and does not necessarily include files associated at other
898 levels.
899*/
901
902/*
903 Return a borrowed pointer to the PDF object that represents a
904 given associated file on a document.
905
906 Indexed from 0 to count-1.
907*/
909
910/*
911 Return the count of the associated files on a given page.
912 Note, that this is the count of files associated at the page
913 level and does not necessarily include files associated at other
914 levels.
915*/
917
918/*
919 Return a borrowed pointer to the PDF object that represents a
920 given associated file on a page.
921
922 Indexed from 0 to count-1.
923*/
925
926
927/*
928 A structure used to create "labels" for numbered objects.
929 The labels are different ways to reach an object from the trailer
930 and page tree, using the "mutool show" syntax.
931
932 Note: Paths involving "Parent", "P", "Prev", and "Last" are ignored,
933 as these are used for cycles in the structures which we don't care about
934 labeling.
935*/
937
938/*
939 Scan the entire object structure to create a directed graph
940 of indirect numbered objects and how they can reach each other.
941*/
943
945
946/*
947 Enumerate all the possible labels for a given numbered object.
948 The callback is invoked with a path for each possible way the object
949 can be reached from the PDF trailer.
950*/
951typedef void (pdf_label_object_fn)(fz_context *ctx, void *arg, const char *label);
952void pdf_label_object(fz_context *ctx, pdf_object_labels *g, int num, pdf_label_object_fn *callback, void *arg);
953
954typedef enum
955{
957
958 /* A struct tree is present in the file. */
960
961 /* The struct tree is unrepairably broken. */
963
964 /* A problem was found, but was fixed. */
966
967 /* The Struct tree contains attributes. */
969
970 /* The Struct tree contains Table attributes. */
972
973 /* The Struct tree contains Table cell spanning attributes. */
975
976 /* The Struct tree contains a cycle. */
979
980/*
981 Run a validation pass over the structure tree, and attempt to repair
982 any problems found. Also returns information about the state of the
983 tree.
984
985 Returns a code with bits set as above.
986*/
988
989/*
990 Helper functions to modify what happens when a repair is kicked off.
991 Most of the time the transparent repair magic works fine, but if a repair
992 happens this can invalidate some pointers held to internal structures.
993
994 To cope with this, we allow the document to be put into a state whereby
995 any repair will trigger an exception (FZ_ERROR_REPAIRED) after any repair.
996
997 Code can therefore use this mechanism to safely catch and retry complete
998 operations if a repair occurs.
999
1000 Because this mechanism is so frequently used when altering xref_base, we
1001 build the xref_base store/restore into these functions.
1002
1003 The pattern of code is therefore as follows:
1004
1005 void pdf_do_some_operation(fz_context *ctx, pdf_document *doc, ...)
1006 {
1007 int xref_base; // Variable to store the initial xref_base value
1008 int repaired = 0;
1009
1010 retry_on_repair:
1011 pdf_start_throw_on_repair(ctx, doc, &xref_base);
1012
1013 fz_try(ctx)
1014 {
1015 // Actual operation goes here. This may involved changing
1016 // doc->xref_base. e.g. doc->xref_base = initial
1017 }
1018 fz_always(ctx)
1019 pdf_end_throw_on_repair(ctx, doc, xref_base);
1020 fz_catch(ctx)
1021 {
1022 if (fz_caught(ctx) == FZ_ERROR_REPAIRED)
1023 {
1024 fz_report_error(ctx);
1025 repaired = 1;
1026 // doc->xref_base will always have been reset to be something legal
1027 // here, but if you have been passed in an xref level to operate at
1028 // you may want to check that that level is still valid here!
1029 // e.g. if (initial >= doc->num_xref_sections) return;
1030 goto retry_on_repair;
1031 }
1032 fz_rethrow(ctx);
1033 }
1034
1035 // If we repaired, then we swallowed the exception. There may have been callers above
1036 // us that were wanting to be informed. This call takes care of that if required.
1037 if (repaired)
1038 pdf_maybe_throw_after_repair(ctx, doc);
1039 }
1040*/
1041
1042/*
1043 Prepare for an operation that can't easily be interrupted by a repair, and should
1044 instead be retried.
1045
1046 See above for example code.
1047*/
1048void pdf_start_throw_on_repair(fz_context *ctx, pdf_document *doc, int *xref_base);
1049
1050/*
1051 Mark the end of an operation that can't easily be interrupted by a repair, and
1052 should instead be retried.
1053
1054 See above for example code.
1055*/
1056void pdf_end_throw_on_repair(fz_context *ctx, pdf_document *doc, int xref_base);
1057
1058/*
1059 If a caller of ours is expecting an exception on a repair, give them one.
1060*/
1062
1063#endif
struct pdf_annot pdf_annot
Definition annot.h:32
fz_structure
Definition device.h:194
#define FZ_DATA
Definition export.h:49
fz_permission
Definition document.h:120
struct fz_hash_table fz_hash_table
Definition hash.h:44
struct pdf_journal pdf_journal
Definition object.h:30
struct pdf_obj pdf_obj
Definition object.h:44
struct pdf_crypt pdf_crypt
Definition object.h:29
void pdf_deselect_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui)
int pdf_was_repaired(fz_context *ctx, pdf_document *doc)
pdf_graft_map * pdf_new_graft_map(fz_context *ctx, pdf_document *dst)
int pdf_layer_is_enabled(fz_context *ctx, pdf_document *doc, int layer)
pdf_document * pdf_open_document(fz_context *ctx, const char *filename)
void pdf_start_throw_on_repair(fz_context *ctx, pdf_document *doc, int *xref_base)
pdf_check_structure_result
Definition document.h:955
@ PDF_STRUCT_PRESENT
Definition document.h:959
@ PDF_STRUCT_FIXED
Definition document.h:965
@ PDF_STRUCT_HAS_TABLE_ATTRIBUTES
Definition document.h:971
@ PDF_STRUCT_HAS_TABLE_SPAN_ATTRIBUTES
Definition document.h:974
@ PDF_STRUCT_NOT_PRESENT
Definition document.h:956
@ PDF_STRUCT_HAS_CYCLE
Definition document.h:977
@ PDF_STRUCT_BROKEN
Definition document.h:962
@ PDF_STRUCT_HAS_ATTRIBUTES
Definition document.h:968
void pdf_js_console_show_cb(void *user)
Definition document.h:95
void pdf_minimize_document(fz_context *ctx, pdf_document *doc)
pdf_layer_config_ui_type pdf_layer_config_ui_type_from_string(const char *str)
void pdf_js_console_drop_cb(pdf_js_console *console, void *user)
Definition document.h:89
void pdf_set_layer_config_as_default(fz_context *ctx, pdf_document *doc)
void pdf_label_object_fn(fz_context *ctx, void *arg, const char *label)
Definition document.h:951
pdf_object_labels * pdf_load_object_labels(fz_context *ctx, pdf_document *doc)
fz_outline * pdf_load_outline(fz_context *ctx, pdf_document *doc)
pdf_page_label_style
Definition document.h:747
@ PDF_PAGE_LABEL_DECIMAL
Definition document.h:749
@ PDF_PAGE_LABEL_ALPHA_UC
Definition document.h:752
@ PDF_PAGE_LABEL_ROMAN_UC
Definition document.h:750
@ PDF_PAGE_LABEL_ALPHA_LC
Definition document.h:753
@ PDF_PAGE_LABEL_ROMAN_LC
Definition document.h:751
@ PDF_PAGE_LABEL_NONE
Definition document.h:748
const char * pdf_layer_config_creator(fz_context *ctx, pdf_document *doc, int config_num)
void pdf_page_label_imp(fz_context *ctx, fz_document *doc, int chapter, int page, char *buf, size_t size)
void pdf_invalidate_xfa(fz_context *ctx, pdf_document *doc)
const char * pdf_layer_config_ui_type_to_string(pdf_layer_config_ui_type type)
pdf_obj * pdf_document_associated_file(fz_context *ctx, pdf_document *doc, int idx)
fz_structure pdf_structure_type(fz_context *ctx, pdf_obj *role_map, pdf_obj *tag)
int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc)
void pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page)
void pdf_set_document_language(fz_context *ctx, pdf_document *doc, fz_text_language lang)
int pdf_check_document(fz_context *ctx, pdf_document *doc)
void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, const pdf_write_options *opts)
pdf_document * pdf_create_document(fz_context *ctx)
struct pdf_pkcs7_signer pdf_pkcs7_signer
Definition document.h:429
int pdf_has_unsaved_changes(fz_context *ctx, pdf_document *doc)
void pdf_delete_page_labels(fz_context *ctx, pdf_document *doc, int index)
pdf_write_options * pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args)
void pdf_js_console_clear_cb(void *user)
Definition document.h:107
void pdf_run_document_structure(fz_context *ctx, pdf_document *doc, fz_device *dev, fz_cookie *cookie)
pdf_obj * pdf_graft_mapped_object(fz_context *ctx, pdf_graft_map *map, pdf_obj *obj)
int pdf_authenticate_password(fz_context *ctx, pdf_document *doc, const char *pw)
void pdf_graft_page(fz_context *ctx, pdf_document *dst, int page_to, pdf_document *src, int page_from)
fz_text_language pdf_document_language(fz_context *ctx, pdf_document *doc)
pdf_obj * pdf_page_associated_file(fz_context *ctx, pdf_page *page, int idx)
pdf_layer_config_ui_type
Definition document.h:381
@ PDF_LAYER_UI_LABEL
Definition document.h:382
@ PDF_LAYER_UI_CHECKBOX
Definition document.h:383
@ PDF_LAYER_UI_RADIOBOX
Definition document.h:384
struct pdf_object_labels pdf_object_labels
Definition document.h:936
const char * pdf_layer_name(fz_context *ctx, pdf_document *doc, int layer)
void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *opts)
int pdf_count_document_associated_files(fz_context *ctx, pdf_document *doc)
int pdf_lookup_metadata(fz_context *ctx, pdf_document *doc, const char *key, char *ptr, size_t size)
void pdf_set_page_labels(fz_context *ctx, pdf_document *doc, int index, pdf_page_label_style style, const char *prefix, int start)
void pdf_free_doc_event_data_cb(fz_context *ctx, void *data)
Definition document.h:80
void pdf_drop_object_labels(fz_context *ctx, pdf_object_labels *g)
@ PDF_LEXBUF_LARGE
Definition document.h:44
@ PDF_LEXBUF_SMALL
Definition document.h:43
void pdf_js_set_console(fz_context *ctx, pdf_document *doc, pdf_js_console *console, void *user)
void pdf_js_console_hide_cb(void *user)
Definition document.h:101
int pdf_count_page_associated_files(fz_context *ctx, pdf_page *page)
void pdf_enable_layer(fz_context *ctx, pdf_document *doc, int layer, int enabled)
pdf_check_structure_result pdf_check_structure_tree(fz_context *ctx, pdf_document *doc)
void pdf_graft_mapped_page(fz_context *ctx, pdf_graft_map *map, int page_to, pdf_document *src, int page_from)
void pdf_write_snapshot(fz_context *ctx, pdf_document *doc, fz_output *out)
pdf_document * pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
int pdf_count_layers(fz_context *ctx, pdf_document *doc)
fz_outline_iterator * pdf_new_outline_iterator(fz_context *ctx, pdf_document *doc)
void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end)
void pdf_label_object(fz_context *ctx, pdf_object_labels *g, int num, pdf_label_object_fn *callback, void *arg)
void pdf_layer_config_info(fz_context *ctx, pdf_document *doc, int config_num, pdf_layer_config *info)
void pdf_delete_page(fz_context *ctx, pdf_document *doc, int number)
int pdf_count_layer_config_ui(fz_context *ctx, pdf_document *doc)
FZ_DATA const pdf_write_options pdf_default_write_options
char * pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const pdf_write_options *opts)
struct pdf_ocg_descriptor pdf_ocg_descriptor
Definition document.h:34
void pdf_select_layer_config(fz_context *ctx, pdf_document *doc, int config_num)
pdf_document * pdf_specifics(fz_context *ctx, fz_document *doc)
int pdf_count_layer_configs(fz_context *ctx, pdf_document *doc)
fz_device * pdf_new_pdf_device(fz_context *ctx, pdf_document *doc, fz_matrix topctm, pdf_obj *resources, fz_buffer *contents)
pdf_document * pdf_open_document_with_stream(fz_context *ctx, fz_stream *file)
void pdf_apply_write_options(fz_context *ctx, pdf_write_options *opts, fz_options *args)
void pdf_end_throw_on_repair(fz_context *ctx, pdf_document *doc, int xref_base)
struct pdf_graft_map pdf_graft_map
Definition document.h:579
void pdf_layer_config_ui_info(fz_context *ctx, pdf_document *doc, int ui, pdf_layer_config_ui *info)
pdf_obj * pdf_graft_object(fz_context *ctx, pdf_document *dst, pdf_obj *obj)
int pdf_has_permission(fz_context *ctx, pdf_document *doc, fz_permission p)
void pdf_toggle_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui)
int pdf_needs_password(fz_context *ctx, pdf_document *doc)
void pdf_save_snapshot(fz_context *ctx, pdf_document *doc, const char *filename)
struct pdf_xref pdf_xref
Definition document.h:33
void pdf_init_write_options(fz_context *ctx, pdf_write_options *opts)
void pdf_save_journal(fz_context *ctx, pdf_document *doc, const char *filename)
void pdf_drop_document(fz_context *ctx, pdf_document *doc)
pdf_document * pdf_keep_document(fz_context *ctx, pdf_document *doc)
const char * pdf_layer_config_name(fz_context *ctx, pdf_document *doc, int config_num)
void pdf_write_journal(fz_context *ctx, pdf_document *doc, fz_output *out)
void pdf_page_label(fz_context *ctx, pdf_document *doc, int page, char *buf, size_t size)
pdf_document * fz_new_pdf_document_from_fz_document(fz_context *ctx, fz_document *ptr)
void pdf_select_layer_config_ui(fz_context *ctx, pdf_document *doc, int ui)
int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc)
struct pdf_js pdf_js
Definition document.h:38
void pdf_read_journal(fz_context *ctx, pdf_document *doc, fz_stream *stm)
pdf_page * pdf_page_from_fz_page(fz_context *ctx, fz_page *ptr)
pdf_graft_map * pdf_keep_graft_map(fz_context *ctx, pdf_graft_map *map)
void pdf_drop_graft_map(fz_context *ctx, pdf_graft_map *map)
pdf_js_console * pdf_js_get_console(fz_context *ctx, pdf_document *doc)
void pdf_doc_event_cb(fz_context *ctx, pdf_document *doc, pdf_doc_event *evt, void *data)
Definition document.h:74
void pdf_js_console_write_cb(void *user, const char *msg)
Definition document.h:113
void pdf_maybe_throw_after_repair(fz_context *ctx, pdf_document *doc)
fz_device * pdf_page_write(fz_context *ctx, pdf_document *doc, fz_rect mediabox, pdf_obj **presources, fz_buffer **pcontents)
pdf_obj * pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int rotate, pdf_obj *resources, fz_buffer *contents)
void pdf_load_journal(fz_context *ctx, pdf_document *doc, const char *filename)
Definition buffer.h:41
Definition color.h:417
Definition context.h:886
Definition device.h:290
Definition document.h:1080
Definition font.h:776
Definition geometry.h:388
Definition options.h:207
Definition outline.h:246
Definition outline.h:141
Definition output.h:111
Definition document.h:1044
Definition geometry.h:231
Definition stream.h:320
Definition event.h:36
Definition document.h:463
int hint_object_offset
Definition document.h:520
pdf_ocg_descriptor * ocg
Definition document.h:475
fz_hash_table * images
Definition document.h:563
int save_in_progress
Definition document.h:492
int64_t linear_pos
Definition document.h:517
fz_hash_table * fonts
Definition document.h:561
int * hint_shared_ref
Definition document.h:539
pdf_journal * journal
Definition document.h:572
fz_document super
Definition document.h:464
pdf_hint_page * hint_page
Definition document.h:538
int bias
Definition document.h:471
int checked
Definition document.h:469
int * xref_index
Definition document.h:491
int disallow_new_increments
Definition document.h:483
int64_t * hint_obj_offsets
Definition document.h:542
int struct_tree_repaired
Definition document.h:504
int use_page_tree_map
Definition document.h:499
int xref_base
Definition document.h:482
int linear_page1_obj_num
Definition document.h:514
pdf_crypt * crypt
Definition document.h:474
pdf_js * js
Definition document.h:546
pdf_xref * saved_xref_sections
Definition document.h:490
int last_xref_was_old_style
Definition document.h:493
pdf_obj ** fwd_page_map
Definition document.h:498
int version
Definition document.h:468
int struct_tree_result
Definition document.h:505
fz_font ** type3_fonts
Definition document.h:558
int non_structural_change
Definition document.h:503
int resynth_required
Definition document.h:550
int64_t file_size
Definition document.h:473
int hint_object_length
Definition document.h:521
int orphans_max
Definition document.h:566
int hints_loaded
Definition document.h:522
int num_type3_fonts
Definition document.h:556
int has_linearization_object
Definition document.h:494
pdf_obj * linear_obj
Definition document.h:512
pdf_hint_shared * hint_shared
Definition document.h:540
int file_reading_linearly
Definition document.h:508
int linear_page_num
Definition document.h:518
int max_xref_len
Definition document.h:478
pdf_rev_page_map * rev_page_map
Definition document.h:497
fz_colorspace * oi
Definition document.h:476
pdf_xref * local_xref
Definition document.h:486
fz_xml_doc * xfa
Definition document.h:570
pdf_obj ** linear_page_refs
Definition document.h:513
pdf_free_doc_event_data_cb * free_event_data_cb
Definition document.h:553
pdf_xref * xref_sections
Definition document.h:489
pdf_doc_event_cb * event_cb
Definition document.h:552
int64_t startxref
Definition document.h:472
struct pdf_document::@250177372312361340073023357134025002031130354233 resources
int saved_num_xref_sections
Definition document.h:480
fz_stream * file
Definition document.h:466
int is_fdf
Definition document.h:470
int local_xref_nesting
Definition document.h:487
fz_hash_table * colorspaces
Definition document.h:562
void * event_cb_data
Definition document.h:554
int max_type3_fonts
Definition document.h:557
int linear_page_count
Definition document.h:511
int orphans_count
Definition document.h:567
int repair_attempted
Definition document.h:501
int64_t file_length
Definition document.h:509
int recalculate
Definition document.h:548
int num_incremental_sections
Definition document.h:481
int num_xref_sections
Definition document.h:479
pdf_obj ** orphans
Definition document.h:568
int throw_on_repair
Definition document.h:574
int hint_obj_offsets_max
Definition document.h:541
int map_page_count
Definition document.h:496
int repair_in_progress
Definition document.h:502
pdf_lexbuf_large lexbuf
Definition document.h:544
int redacted
Definition document.h:549
Definition document.h:450
int64_t index
Definition document.h:453
int64_t offset
Definition document.h:452
int number
Definition document.h:451
Definition document.h:457
int number
Definition document.h:458
int64_t offset
Definition document.h:459
Definition document.h:118
pdf_js_console_clear_cb * clear
Definition document.h:122
pdf_js_console_drop_cb * drop
Definition document.h:119
pdf_js_console_show_cb * show
Definition document.h:120
pdf_js_console_hide_cb * hide
Definition document.h:121
pdf_js_console_write_cb * write
Definition document.h:123
Definition document.h:391
int locked
Definition document.h:396
const char * text
Definition document.h:392
int depth
Definition document.h:393
pdf_layer_config_ui_type type
Definition document.h:394
int selected
Definition document.h:395
Definition document.h:284
const char * name
Definition document.h:285
const char * creator
Definition document.h:286
Definition document.h:59
pdf_lexbuf base
Definition document.h:60
char buffer[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL]
Definition document.h:61
Definition document.h:48
int64_t i
Definition document.h:52
size_t size
Definition document.h:49
char buffer[PDF_LEXBUF_SMALL]
Definition document.h:55
float f
Definition document.h:53
size_t len
Definition document.h:51
char * scratch
Definition document.h:54
size_t base_size
Definition document.h:50
Definition page.h:320
Definition form.h:230
Definition document.h:444
int page
Definition document.h:445
int object
Definition document.h:446
Definition document.h:433
pdf_obj * field
Definition document.h:434
size_t byte_range_end
Definition document.h:436
size_t contents_start
Definition document.h:437
struct pdf_unsaved_sig * next
Definition document.h:440
pdf_pkcs7_signer * signer
Definition document.h:439
size_t contents_end
Definition document.h:438
size_t byte_range_start
Definition document.h:435
Definition document.h:768
int dont_regenerate_id
Definition document.h:782
int do_decompress
Definition document.h:775
int do_linear
Definition document.h:777
int do_appearance
Definition document.h:780
int do_clean
Definition document.h:778
int do_sanitize
Definition document.h:779
int do_compress_fonts
Definition document.h:774
int do_use_objstms
Definition document.h:788
int do_preserve_metadata
Definition document.h:787
int compression_effort
Definition document.h:789
int do_encrypt
Definition document.h:781
int permissions
Definition document.h:783
int do_garbage
Definition document.h:776
int do_labels
Definition document.h:790
int do_compress_images
Definition document.h:773
int do_snapshot
Definition document.h:786
int do_pretty
Definition document.h:770
int do_ascii
Definition document.h:771
char upwd_utf8[128]
Definition document.h:785
char opwd_utf8[128]
Definition document.h:784
int do_incremental
Definition document.h:769
int do_compress
Definition document.h:772
Definition xref.h:86
fz_text_language
Definition text.h:57
fz_xml fz_xml_doc
Definition xml.h:39