diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py
index 62c2b7d6..5a354f14 100644
--- a/bin/import_lingua_libre.py
+++ b/bin/import_lingua_libre.py
@@ -11,6 +11,7 @@ import sys
 sys.path.insert(1, os.path.join(sys.path[0], '..'))
 
 import csv
+import re
 import sox
 import zipfile
 import subprocess
@@ -74,7 +75,8 @@ def _maybe_convert_sets(target_dir, extracted_data):
     glob_dir = os.path.join(ogg_root_dir, '**/*.ogg')
     for record in glob(glob_dir, recursive=True):
         record_file = record.replace(ogg_root_dir + os.path.sep, '')
-        samples.append((record_file, os.path.splitext(os.path.basename(record_file))[0]))
+        if record_filter(record_file):
+            samples.append((record_file, os.path.splitext(os.path.basename(record_file))[0]))
 
     # Keep track of how many samples are good vs. problematic
     counter = {'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'total_time': 0}
@@ -177,12 +179,23 @@ def handle_args():
     parser.add_argument('--english-name', type=str, required=True, help='Enligh name of the language')
     parser.add_argument('--filter_alphabet', help='Exclude samples with characters not in provided alphabet')
     parser.add_argument('--normalize', action='store_true', help='Converts diacritic characters to their base ones')
+    parser.add_argument('--bogus-records', type=argparse.FileType('r'), required=False, help='Text file listing well-known bogus record to skip from importing, from https://lingualibre.fr/wiki/LinguaLibre:Misleading_items')
     return parser.parse_args()
 
 if __name__ == "__main__":
     CLI_ARGS = handle_args()
     ALPHABET = Alphabet(CLI_ARGS.filter_alphabet) if CLI_ARGS.filter_alphabet else None
 
+    bogus_regexes = []
+    for line in CLI_ARGS.bogus_records:
+        bogus_regexes.append(re.compile(line.strip()))
+
+    def record_filter(path):
+        if any(regex.match(path) for regex in bogus_regexes):
+            print('Reject', path)
+            return False
+        return True
+
     def label_filter(label):
         if CLI_ARGS.normalize:
             label = unicodedata.normalize("NFKD", label.strip()) \