From f2eecd9be967d8c13b83442830064576ec4e7e4d Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Sat, 14 Mar 2020 10:15:11 +0100 Subject: [PATCH 1/3] Catch ValueError when calling `name2unicode` when a unicode value cannot be parsed --- pdfminer/encodingdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py index 26b0a05b..58998a90 100644 --- a/pdfminer/encodingdb.py +++ b/pdfminer/encodingdb.py @@ -106,7 +106,7 @@ def get_encoding(cls, name, diff=None): elif isinstance(x, PSLiteral): try: cid2unicode[cid] = name2unicode(x.name) - except KeyError as e: + except (KeyError, ValueError) as e: log.debug(str(e)) cid += 1 return cid2unicode From d1e8a3bf7955ac65577242f69fc198be6c578bc7 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Sat, 14 Mar 2020 10:23:30 +0100 Subject: [PATCH 2/3] Add test for catching ValueError and KeyError when font encoding differences are invalid --- tests/test_encodingdb.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/test_encodingdb.py b/tests/test_encodingdb.py index 2b8d41f1..bd71d783 100644 --- a/tests/test_encodingdb.py +++ b/tests/test_encodingdb.py @@ -6,7 +6,8 @@ """ from nose.tools import assert_raises -from pdfminer.encodingdb import name2unicode +from pdfminer.encodingdb import name2unicode, EncodingDB +from pdfminer.psparser import PSLiteral def test_name2unicode_name_in_agl(): @@ -145,3 +146,12 @@ def test_name2unicode_pua_ogoneksmall(): def test_name2unicode_overflow_error(): assert_raises(KeyError, name2unicode, '226215240241240240240240') + + +def test_get_encoding_with_invalid_differences(): + """Invalid differences should be silently ignored + + Regression test for https://github.com/pdfminer/pdfminer.six/issues/385 + """ + invalid_differences = [PSLiteral('ubuntu'), PSLiteral('1234')] + EncodingDB.get_encoding('StandardEncoding', invalid_differences) From 66b7dc0fe45a86afc9bbc283685e11c93917d6b0 Mon Sep 17 00:00:00 2001 From: Pieter Marsman Date: Sat, 14 Mar 2020 10:28:09 +0100 Subject: [PATCH 3/3] Added line to CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4cf496c..72d64aca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] -Nothing +- Also ignore ValueError's when converting font encoding differences ([#389](https://github.com/pdfminer/pdfminer.six/pull/389)) ## [20200124] - 2020-01-24