From dc3628eaa1e1549f77e5c04e35c743d6a5b07b0f Mon Sep 17 00:00:00 2001
From: "Salvador E. Tropea" <salvador@inti.gob.ar>
Date: Thu, 28 Sep 2023 11:18:49 -0300
Subject: [PATCH] [BoM][Added] `parse_value` can be used to disable the *Value*
 parser

- Better explanation about what affects group_fields
- Added warning about extra information in the value

See #494
---
 CHANGELOG.md                              |  2 ++
 docs/samples/generic_plot.kibot.yaml      | 10 ++++++++++
 docs/source/configuration/outputs/bom.rst |  9 +++++++++
 kibot/bom/bom.py                          |  4 ++--
 kibot/bom/units.py                        | 17 +++++++++++++++--
 kibot/misc.py                             |  1 +
 kibot/out_bom.py                          | 10 ++++++++++
 7 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c4c36345..4b1bde5b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - BoM:
   - Support for ${field} expansion. (#471)
   - LCSC links (SchrodingersGat/KiBoM#190)
+  - `parse_value` can be used to disable the *Value* parser (See #494)
+    Also added a warning about using extra data in the *Value* field.
 - iBoM:
   - `forced_name` option to force the name displayed at the top left corner
     (#470)
diff --git a/docs/samples/generic_plot.kibot.yaml b/docs/samples/generic_plot.kibot.yaml
index 17e9c7a0..57caba80 100644
--- a/docs/samples/generic_plot.kibot.yaml
+++ b/docs/samples/generic_plot.kibot.yaml
@@ -447,6 +447,11 @@ outputs:
       # [list(string)] List of fields used for sorting individual components into groups.
       # Components which match (comparing *all* fields) will be grouped together.
       # Field names are case-insensitive.
+      # For empty fields the behavior is defined by the `group_fields_fallbacks`, `merge_blank_fields` and
+      # `merge_both_blank` options.
+      # Note that for resistors, capacitors and inductors the _Value_ field is parsed and qualifiers, like
+      # tolerance, are discarded. Please use a separated field and disable `merge_blank_fields` if this
+      # information is important. You can also disable `parse_value`.
       # If empty: ['Part', 'Part Lib', 'Value', 'Footprint', 'Footprint Lib',
       #            'Voltage', 'Tolerance', 'Current', 'Power'] is used
       group_fields: ['part', 'part lib', 'value', 'footprint', 'footprint lib', 'voltage', 'tolerance', 'current', 'power']
@@ -520,6 +525,11 @@ outputs:
       number: 1
       # [string='%f-%i%I%v.%x'] filename for the output (%i=bom). Affected by global options
       output: '%f-%i%I%v.%x'
+      # [boolean=true] Parse the `Value` field so things like *1k* and *1000* are interpreted as equal.
+      # Note that this implies that *1k 1%* is the same as *1k 5%*. If you really need to group using the
+      # extra information split it in separated fields, add the fields to `group_fields` and disable
+      # `merge_blank_fields`
+      parse_value: true
       # [string|list(string)='_none'] Name of the filter to transform fields before applying other filters.
       # This option is for simple cases, consider using a full variant for complex cases
       pre_transform: '_none'
diff --git a/docs/source/configuration/outputs/bom.rst b/docs/source/configuration/outputs/bom.rst
index bd705852..aef86554 100644
--- a/docs/source/configuration/outputs/bom.rst
+++ b/docs/source/configuration/outputs/bom.rst
@@ -75,6 +75,11 @@ Parameters:
       -  **group_fields** :index:`: <pair: output - bom - options; group_fields>` [list(string)] List of fields used for sorting individual components into groups.
          Components which match (comparing *all* fields) will be grouped together.
          Field names are case-insensitive.
+         For empty fields the behavior is defined by the `group_fields_fallbacks`, `merge_blank_fields` and
+         `merge_both_blank` options.
+         Note that for resistors, capacitors and inductors the _Value_ field is parsed and qualifiers, like
+         tolerance, are discarded. Please use a separated field and disable `merge_blank_fields` if this
+         information is important. You can also disable `parse_value`.
          If empty: ['Part', 'Part Lib', 'Value', 'Footprint', 'Footprint Lib',
          'Voltage', 'Tolerance', 'Current', 'Power'] is used.
 
@@ -287,6 +292,10 @@ Parameters:
       -  ``no_distributors`` :index:`: <pair: output - bom - options; no_distributors>` [string|list(string)] Exclude this distributors list. They are removed after computing `distributors`.
 
       -  ``normalize_locale`` :index:`: <pair: output - bom - options; normalize_locale>` [boolean=false] When normalizing values use the locale decimal point.
+      -  ``parse_value`` :index:`: <pair: output - bom - options; parse_value>` [boolean=true] Parse the `Value` field so things like *1k* and *1000* are interpreted as equal.
+         Note that this implies that *1k 1%* is the same as *1k 5%*. If you really need to group using the
+         extra information split it in separated fields, add the fields to `group_fields` and disable
+         `merge_blank_fields`.
       -  ``pre_transform`` :index:`: <pair: output - bom - options; pre_transform>` [string|list(string)='_none'] Name of the filter to transform fields before applying other filters.
          This option is for simple cases, consider using a full variant for complex cases.
 
diff --git a/kibot/bom/bom.py b/kibot/bom/bom.py
index 00f56281..54e96221 100644
--- a/kibot/bom/bom.py
+++ b/kibot/bom/bom.py
@@ -431,8 +431,8 @@ def group_components(cfg, components):
         if not c.included:  # Skip components marked as excluded from BoM
             continue
         # Cache the value used to sort
-        if c.ref_prefix in RLC_PREFIX and c.value.lower() not in DNF:
-            c.value_sort = comp_match(c.value, c.ref_prefix, c.ref)
+        if cfg.parse_value and c.ref_prefix in RLC_PREFIX and c.value.lower() not in DNF:
+            c.value_sort = comp_match(c.value, c.ref_prefix, c.ref, warn_extra=True)
         else:
             c.value_sort = None
         # Try to add the component to an existing group
diff --git a/kibot/bom/units.py b/kibot/bom/units.py
index a761c428..bc98dab3 100644
--- a/kibot/bom/units.py
+++ b/kibot/bom/units.py
@@ -18,7 +18,7 @@ import re
 import locale
 from math import log10
 from .. import log
-from ..misc import W_BADVAL1, W_BADVAL2, W_BADVAL3, W_BADVAL4
+from ..misc import W_BADVAL1, W_BADVAL2, W_BADVAL3, W_BADVAL4, W_EXTRAINVAL
 from .electro_grammar import parse
 
 logger = log.get_logger()
@@ -53,6 +53,8 @@ match = None
 decimal_point = None
 # Parser cache
 parser_cache = {}
+# Flag to indicate we already warned about extra data
+warn_extra_issued = False
 
 
 def get_decima_point():
@@ -163,7 +165,16 @@ def value_from_grammar(r):
     return parsed
 
 
-def comp_match(component, ref_prefix, ref=None, relax_severity=False, stronger=False):
+def check_extra_data(r, v):
+    global warn_extra_issued
+    if warn_extra_issued:
+        return
+    if 'tolerance' in r or 'characteristic' in r or 'voltage_rating' in r or 'power_rating' in r or 'size' in r:
+        logger.warning(W_EXTRAINVAL+f'Avoid adding extra information in the component value, use separated fields ({v})')
+        warn_extra_issued = True
+
+
+def comp_match(component, ref_prefix, ref=None, relax_severity=False, stronger=False, warn_extra=False):
     """
     Return a normalized value and units for a given component value string
     Also tries to separate extra data, i.e. tolerance, using a complex parser
@@ -216,6 +227,8 @@ def comp_match(component, ref_prefix, ref=None, relax_severity=False, stronger=F
         # Failed with the regex, try with the parser
         result = parse(ref_prefix[0]+' '+with_commas, with_extra=True, stronger=stronger)
         if result:
+            if warn_extra:
+                check_extra_data(result, original)
             result = value_from_grammar(result)
             if result and result.get_extra('discarded'):
                 discarded = " ".join(list(map(lambda x: '`'+x+'`', result.get_extra('discarded'))))
diff --git a/kibot/misc.py b/kibot/misc.py
index 53c83610..afee7ce1 100644
--- a/kibot/misc.py
+++ b/kibot/misc.py
@@ -285,6 +285,7 @@ W_FLDCOLLISION = '(W129) '
 W_NEWGROUP = '(W130) '
 W_NOTINBOM = '(W131) '
 W_MISSDIR = '(W132) '
+W_EXTRAINVAL = '(W133) '
 # Somehow arbitrary, the colors are real, but can be different
 PCB_MAT_COLORS = {'fr1': "937042", 'fr2': "949d70", 'fr3': "adacb4", 'fr4': "332B16", 'fr5': "6cc290"}
 PCB_FINISH_COLORS = {'hal': "8b898c", 'hasl': "8b898c", 'imag': "8b898c", 'enig': "cfb96e", 'enepig': "cfb96e",
diff --git a/kibot/out_bom.py b/kibot/out_bom.py
index dc1e59be..22c34cbf 100644
--- a/kibot/out_bom.py
+++ b/kibot/out_bom.py
@@ -507,6 +507,11 @@ class BoMOptions(BaseOptions):
             """ *[list(string)] List of fields used for sorting individual components into groups.
                 Components which match (comparing *all* fields) will be grouped together.
                 Field names are case-insensitive.
+                For empty fields the behavior is defined by the `group_fields_fallbacks`, `merge_blank_fields` and
+                `merge_both_blank` options.
+                Note that for resistors, capacitors and inductors the _Value_ field is parsed and qualifiers, like
+                tolerance, are discarded. Please use a separated field and disable `merge_blank_fields` if this
+                information is important. You can also disable `parse_value`.
                 If empty: ['Part', 'Part Lib', 'Value', 'Footprint', 'Footprint Lib',
                 .          'Voltage', 'Tolerance', 'Current', 'Power'] is used """
             self.group_fields_fallbacks = Optionable
@@ -523,6 +528,11 @@ class BoMOptions(BaseOptions):
                 - ['sw', 'switch']
                 - ['zener', 'zenersmall']
                 - ['d', 'diode', 'd_small'] """
+            self.parse_value = True
+            """ Parse the `Value` field so things like *1k* and *1000* are interpreted as equal.
+                Note that this implies that *1k 1%* is the same as *1k 5%*. If you really need to group using the
+                extra information split it in separated fields, add the fields to `group_fields` and disable
+                `merge_blank_fields` """
             self.no_conflict = NoConflict
             """ [list(string)] List of fields where we tolerate conflicts.
                 Use it to avoid undesired warnings.