mirror of
https://github.com/jmcnamara/libxlsxwriter
synced 2025-03-28 21:13:14 +00:00
Initial image duplicate removal. Same worksheet only.
This commit is contained in:
parent
b0738e2ba9
commit
a4f9e5bad0
1
.indent.pro
vendored
1
.indent.pro
vendored
@ -104,6 +104,7 @@
|
||||
-T lxw_hash_table
|
||||
-T lxw_header_footer_options
|
||||
-T lxw_heading_pair
|
||||
-T lxw_image_md5
|
||||
-T lxw_image_options
|
||||
-T lxw_merged_range
|
||||
-T lxw_object_properties
|
||||
|
@ -11,6 +11,7 @@ env:
|
||||
- NO_VALGRIND=1 CFLAGS='-Werror -m32'
|
||||
- NO_VALGRIND=1 USE_SYSTEM_MINIZIP=1 CFLAGS='-Werror'
|
||||
- NO_VALGRIND=1 USE_DOUBLE_FUNCTION=1 CFLAGS='-Werror'
|
||||
- NO_VALGRIND=1 USE_NO_MD5=1 CFLAGS='-Werror'
|
||||
|
||||
install:
|
||||
- sudo pip install pytest
|
||||
|
@ -175,6 +175,9 @@ enum lxw_custom_property_types {
|
||||
LXW_CUSTOM_DATETIME
|
||||
};
|
||||
|
||||
/* Size of MD5 byte arrays. */
|
||||
#define LXW_MD5_SIZE 16
|
||||
|
||||
/* Excel sheetname max of 31 chars. */
|
||||
#define LXW_SHEETNAME_MAX 31
|
||||
|
||||
|
@ -57,6 +57,7 @@
|
||||
/* Define the tree.h RB structs for the red-black head types. */
|
||||
RB_HEAD(lxw_worksheet_names, lxw_worksheet_name);
|
||||
RB_HEAD(lxw_chartsheet_names, lxw_chartsheet_name);
|
||||
RB_HEAD(lxw_image_md5s, lxw_image_md5);
|
||||
|
||||
/* Define the queue.h structs for the workbook lists. */
|
||||
STAILQ_HEAD(lxw_sheets, lxw_sheet);
|
||||
@ -93,6 +94,14 @@ typedef struct lxw_chartsheet_name {
|
||||
RB_ENTRY (lxw_chartsheet_name) tree_pointers;
|
||||
} lxw_chartsheet_name;
|
||||
|
||||
/* Struct to represent an image MD5/ID pair. */
|
||||
typedef struct lxw_image_md5 {
|
||||
uint32_t id;
|
||||
unsigned char md5[LXW_MD5_SIZE];
|
||||
|
||||
RB_ENTRY (lxw_image_md5) tree_pointers;
|
||||
} lxw_image_md5;
|
||||
|
||||
/* Wrapper around RB_GENERATE_STATIC from tree.h to avoid unused function
|
||||
* warnings and to avoid portability issues with the _unused attribute. */
|
||||
#define LXW_RB_GENERATE_WORKSHEET_NAMES(name, type, field, cmp) \
|
||||
@ -117,6 +126,17 @@ typedef struct lxw_chartsheet_name {
|
||||
/* Add unused struct to allow adding a semicolon */ \
|
||||
struct lxw_rb_generate_charsheet_names{int unused;}
|
||||
|
||||
#define LXW_RB_GENERATE_IMAGE_MD5S(name, type, field, cmp) \
|
||||
RB_GENERATE_INSERT_COLOR(name, type, field, static) \
|
||||
RB_GENERATE_REMOVE_COLOR(name, type, field, static) \
|
||||
RB_GENERATE_INSERT(name, type, field, cmp, static) \
|
||||
RB_GENERATE_REMOVE(name, type, field, static) \
|
||||
RB_GENERATE_FIND(name, type, field, cmp, static) \
|
||||
RB_GENERATE_NEXT(name, type, field, static) \
|
||||
RB_GENERATE_MINMAX(name, type, field, static) \
|
||||
/* Add unused struct to allow adding a semicolon */ \
|
||||
struct lxw_rb_generate_image_md5s{int unused;}
|
||||
|
||||
/**
|
||||
* @brief Macro to loop over all the worksheets in a workbook.
|
||||
*
|
||||
@ -258,6 +278,7 @@ typedef struct lxw_workbook {
|
||||
struct lxw_chartsheets *chartsheets;
|
||||
struct lxw_worksheet_names *worksheet_names;
|
||||
struct lxw_chartsheet_names *chartsheet_names;
|
||||
struct lxw_image_md5s *image_md5s;
|
||||
struct lxw_charts *charts;
|
||||
struct lxw_charts *ordered_charts;
|
||||
struct lxw_formats *formats;
|
||||
|
@ -62,6 +62,7 @@
|
||||
#define LXW_HEADER_FOOTER_MAX 255
|
||||
#define LXW_MAX_NUMBER_URLS 65530
|
||||
#define LXW_PANE_NAME_LENGTH 12 /* bottomRight + 1 */
|
||||
#define LXW_IMAGE_BUFFER_SIZE 10
|
||||
|
||||
/* The Excel 2007 specification says that the maximum number of page
|
||||
* breaks is 1026. However, in practice it is actually 1023. */
|
||||
@ -639,6 +640,8 @@ typedef struct lxw_object_properties {
|
||||
double x_dpi;
|
||||
double y_dpi;
|
||||
lxw_chart *chart;
|
||||
uint8_t is_duplicate;
|
||||
unsigned char md5[LXW_MD5_SIZE];
|
||||
|
||||
STAILQ_ENTRY (lxw_object_properties) list_pointers;
|
||||
} lxw_object_properties;
|
||||
|
@ -265,6 +265,9 @@ _write_image_files(lxw_packager *self)
|
||||
|
||||
STAILQ_FOREACH(object_props, worksheet->image_props, list_pointers) {
|
||||
|
||||
if (object_props->is_duplicate)
|
||||
continue;
|
||||
|
||||
lxw_snprintf(filename, LXW_FILENAME_LENGTH,
|
||||
"xl/media/image%d.%s", index++,
|
||||
object_props->extension);
|
||||
|
@ -17,11 +17,15 @@ STATIC int _worksheet_name_cmp(lxw_worksheet_name *name1,
|
||||
lxw_worksheet_name *name2);
|
||||
STATIC int _chartsheet_name_cmp(lxw_chartsheet_name *name1,
|
||||
lxw_chartsheet_name *name2);
|
||||
STATIC int _image_md5_cmp(lxw_image_md5 *tuple1, lxw_image_md5 *tuple2);
|
||||
|
||||
#ifndef __clang_analyzer__
|
||||
LXW_RB_GENERATE_WORKSHEET_NAMES(lxw_worksheet_names, lxw_worksheet_name,
|
||||
tree_pointers, _worksheet_name_cmp);
|
||||
LXW_RB_GENERATE_CHARTSHEET_NAMES(lxw_chartsheet_names, lxw_chartsheet_name,
|
||||
tree_pointers, _chartsheet_name_cmp);
|
||||
LXW_RB_GENERATE_IMAGE_MD5S(lxw_image_md5s, lxw_image_md5,
|
||||
tree_pointers, _image_md5_cmp);
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -49,6 +53,12 @@ _chartsheet_name_cmp(lxw_chartsheet_name *name1, lxw_chartsheet_name *name2)
|
||||
return lxw_strcasecmp(name1->name, name2->name);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
_image_md5_cmp(lxw_image_md5 *tuple1, lxw_image_md5 *tuple2)
|
||||
{
|
||||
return memcmp(tuple1->md5, tuple2->md5, LXW_MD5_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free workbook properties.
|
||||
*/
|
||||
@ -97,6 +107,8 @@ lxw_workbook_free(lxw_workbook *workbook)
|
||||
struct lxw_worksheet_name *next_worksheet_name;
|
||||
struct lxw_chartsheet_name *chartsheet_name;
|
||||
struct lxw_chartsheet_name *next_chartsheet_name;
|
||||
struct lxw_image_md5 *image_md5;
|
||||
struct lxw_image_md5 *next_image_md5;
|
||||
lxw_chart *chart;
|
||||
lxw_format *format;
|
||||
lxw_defined_name *defined_name;
|
||||
@ -204,6 +216,19 @@ lxw_workbook_free(lxw_workbook *workbook)
|
||||
free(workbook->chartsheet_names);
|
||||
}
|
||||
|
||||
if (workbook->image_md5s) {
|
||||
for (image_md5 = RB_MIN(lxw_image_md5s, workbook->image_md5s);
|
||||
image_md5; image_md5 = next_image_md5) {
|
||||
|
||||
next_image_md5 =
|
||||
RB_NEXT(lxw_image_md5s, workbook->image_md5, image_md5);
|
||||
RB_REMOVE(lxw_image_md5s, workbook->image_md5s, image_md5);
|
||||
free(image_md5);
|
||||
}
|
||||
|
||||
free(workbook->image_md5s);
|
||||
}
|
||||
|
||||
lxw_hash_free(workbook->used_xf_formats);
|
||||
lxw_sst_free(workbook->sst);
|
||||
free(workbook->options.tmpdir);
|
||||
@ -895,8 +920,12 @@ _prepare_drawings(lxw_workbook *self)
|
||||
lxw_object_properties *object_props;
|
||||
uint32_t chart_ref_id = 0;
|
||||
uint32_t image_ref_id = 0;
|
||||
uint32_t ref_id = 0;
|
||||
uint32_t drawing_id = 0;
|
||||
uint8_t is_chartsheet;
|
||||
lxw_image_md5 tmp_image_md5;
|
||||
lxw_image_md5 *new_image_md5 = NULL;
|
||||
lxw_image_md5 *found;
|
||||
|
||||
STAILQ_FOREACH(sheet, self->sheets, list_pointers) {
|
||||
if (sheet->is_chartsheet) {
|
||||
@ -925,9 +954,32 @@ _prepare_drawings(lxw_workbook *self)
|
||||
if (object_props->image_type == LXW_IMAGE_BMP)
|
||||
self->has_bmp = LXW_TRUE;
|
||||
|
||||
image_ref_id++;
|
||||
memcpy(tmp_image_md5.md5, object_props->md5, LXW_MD5_SIZE);
|
||||
|
||||
lxw_worksheet_prepare_image(worksheet, image_ref_id, drawing_id,
|
||||
found = RB_FIND(lxw_image_md5s, self->image_md5s, &tmp_image_md5);
|
||||
|
||||
if (found) {
|
||||
ref_id = found->id;
|
||||
object_props->is_duplicate = LXW_TRUE;
|
||||
}
|
||||
else {
|
||||
image_ref_id++;
|
||||
ref_id = image_ref_id;
|
||||
|
||||
#ifndef USE_NO_MD5
|
||||
new_image_md5 = calloc(1, sizeof(lxw_image_md5));
|
||||
#endif
|
||||
if (new_image_md5) {
|
||||
new_image_md5->id = ref_id;
|
||||
memcpy(new_image_md5->md5, object_props->md5,
|
||||
LXW_MD5_SIZE);
|
||||
|
||||
RB_INSERT(lxw_image_md5s, self->image_md5s,
|
||||
new_image_md5);
|
||||
}
|
||||
}
|
||||
|
||||
lxw_worksheet_prepare_image(worksheet, ref_id, drawing_id,
|
||||
object_props);
|
||||
}
|
||||
|
||||
@ -1438,6 +1490,11 @@ workbook_new_opt(const char *filename, lxw_workbook_options *options)
|
||||
GOTO_LABEL_ON_MEM_ERROR(workbook->chartsheet_names, mem_error);
|
||||
RB_INIT(workbook->chartsheet_names);
|
||||
|
||||
/* Add the image MD5 tree. */
|
||||
workbook->image_md5s = calloc(1, sizeof(struct lxw_image_md5s));
|
||||
GOTO_LABEL_ON_MEM_ERROR(workbook->image_md5s, mem_error);
|
||||
RB_INIT(workbook->image_md5s);
|
||||
|
||||
/* Add the charts list. */
|
||||
workbook->charts = calloc(1, sizeof(struct lxw_charts));
|
||||
GOTO_LABEL_ON_MEM_ERROR(workbook->charts, mem_error);
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "xlsxwriter/format.h"
|
||||
#include "xlsxwriter/utility.h"
|
||||
#include "xlsxwriter/relationships.h"
|
||||
#include "xlsxwriter/third_party/md5.h"
|
||||
|
||||
#define LXW_STR_MAX 32767
|
||||
#define LXW_BUFFER_SIZE 4096
|
||||
@ -2541,6 +2542,11 @@ STATIC lxw_error
|
||||
_get_image_properties(lxw_object_properties *image_props)
|
||||
{
|
||||
unsigned char signature[4];
|
||||
#ifndef USE_NO_MD5
|
||||
MD5_CTX context;
|
||||
size_t size_read;
|
||||
char buffer[LXW_IMAGE_BUFFER_SIZE];
|
||||
#endif
|
||||
|
||||
/* Read 4 bytes to look for the file header/signature. */
|
||||
if (fread(signature, 1, 4, image_props->stream) < 4) {
|
||||
@ -2569,6 +2575,20 @@ _get_image_properties(lxw_object_properties *image_props)
|
||||
return LXW_ERROR_IMAGE_DIMENSIONS;
|
||||
}
|
||||
|
||||
#ifndef USE_NO_MD5
|
||||
rewind(image_props->stream);
|
||||
MD5_Init(&context);
|
||||
size_read = fread(buffer, 1, LXW_IMAGE_BUFFER_SIZE, image_props->stream);
|
||||
|
||||
while (size_read) {
|
||||
MD5_Update(&context, buffer, size_read);
|
||||
size_read =
|
||||
fread(buffer, 1, LXW_IMAGE_BUFFER_SIZE, image_props->stream);
|
||||
}
|
||||
|
||||
MD5_Final(image_props->md5, &context);
|
||||
#endif
|
||||
|
||||
return LXW_NO_ERROR;
|
||||
}
|
||||
|
||||
|
22
test/functional/src/test_image48.c
Normal file
22
test/functional/src/test_image48.c
Normal file
@ -0,0 +1,22 @@
|
||||
/*****************************************************************************
|
||||
* Test cases for libxlsxwriter.
|
||||
*
|
||||
* Test to compare output against Excel files.
|
||||
*
|
||||
* Copyright 2014-2019, John McNamara, jmcnamara@cpan.org
|
||||
*
|
||||
*/
|
||||
|
||||
#include "xlsxwriter.h"
|
||||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = workbook_new("test_image48.xlsx");
|
||||
lxw_worksheet *worksheet1 = workbook_add_worksheet(workbook, NULL);
|
||||
lxw_worksheet *worksheet2 = workbook_add_worksheet(workbook, NULL);
|
||||
|
||||
worksheet_insert_image(worksheet1, CELL("E9"), "images/red.png");
|
||||
worksheet_insert_image(worksheet2, CELL("E9"), "images/red.png");
|
||||
|
||||
return workbook_close(workbook);
|
||||
}
|
35
test/functional/src/test_image49.c
Normal file
35
test/functional/src/test_image49.c
Normal file
@ -0,0 +1,35 @@
|
||||
/*****************************************************************************
|
||||
* Test cases for libxlsxwriter.
|
||||
*
|
||||
* Test to compare output against Excel files.
|
||||
*
|
||||
* Copyright 2014-2019, John McNamara, jmcnamara@cpan.org
|
||||
*
|
||||
*/
|
||||
|
||||
#include "xlsxwriter.h"
|
||||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = workbook_new("test_image49.xlsx");
|
||||
lxw_worksheet *worksheet1 = workbook_add_worksheet(workbook, NULL);
|
||||
lxw_worksheet *worksheet2 = workbook_add_worksheet(workbook, NULL);
|
||||
lxw_worksheet *worksheet3 = workbook_add_worksheet(workbook, NULL);
|
||||
|
||||
worksheet_insert_image(worksheet1, CELL("A1"), "images/blue.png");
|
||||
worksheet_insert_image(worksheet1, CELL("B3"), "images/red.jpg");
|
||||
worksheet_insert_image(worksheet1, CELL("D5"), "images/yellow.jpg");
|
||||
worksheet_insert_image(worksheet1, CELL("F9"), "images/grey.png");
|
||||
|
||||
worksheet_insert_image(worksheet2, CELL("A1"), "images/blue.png");
|
||||
worksheet_insert_image(worksheet2, CELL("B3"), "images/red.jpg");
|
||||
worksheet_insert_image(worksheet2, CELL("D5"), "images/yellow.jpg");
|
||||
worksheet_insert_image(worksheet2, CELL("F9"), "images/grey.png");
|
||||
|
||||
worksheet_insert_image(worksheet3, CELL("A1"), "images/blue.png");
|
||||
worksheet_insert_image(worksheet3, CELL("B3"), "images/red.jpg");
|
||||
worksheet_insert_image(worksheet3, CELL("D5"), "images/yellow.jpg");
|
||||
worksheet_insert_image(worksheet3, CELL("F9"), "images/grey.png");
|
||||
|
||||
return workbook_close(workbook);
|
||||
}
|
@ -5,6 +5,8 @@
|
||||
# Copyright 2014-2019, John McNamara, jmcnamara@cpan.org
|
||||
#
|
||||
|
||||
import os
|
||||
import pytest
|
||||
import base_test_class
|
||||
|
||||
class TestCompareXLSXFiles(base_test_class.XLSXBaseTest):
|
||||
@ -125,6 +127,14 @@ class TestCompareXLSXFiles(base_test_class.XLSXBaseTest):
|
||||
def test_image47(self):
|
||||
self.run_exe_test('test_image47')
|
||||
|
||||
@pytest.mark.skipif(os.environ.get('USE_NO_MD5'), reason="compiled without MD5 support")
|
||||
def test_image48(self):
|
||||
self.run_exe_test('test_image48')
|
||||
|
||||
@pytest.mark.skipif(os.environ.get('USE_NO_MD5'), reason="compiled without MD5 support")
|
||||
def test_image49(self):
|
||||
self.run_exe_test('test_image49')
|
||||
|
||||
# Test in-memory image handling.
|
||||
def test_image81(self):
|
||||
self.run_exe_test('test_image81', 'image01.xlsx')
|
||||
|
BIN
test/functional/xlsx_files/image48.xlsx
Normal file
BIN
test/functional/xlsx_files/image48.xlsx
Normal file
Binary file not shown.
BIN
test/functional/xlsx_files/image49.xlsx
Normal file
BIN
test/functional/xlsx_files/image49.xlsx
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user