{"id":1588,"date":"2021-06-21T16:39:31","date_gmt":"2021-06-21T08:39:31","guid":{"rendered":"http:\/\/www.yatenglg.cn\/?p=1588"},"modified":"2022-10-18T16:34:39","modified_gmt":"2022-10-18T08:34:39","slug":"voc%e6%a0%bc%e5%bc%8f%e6%95%b0%e6%8d%ae%e9%9b%86xml%e6%96%87%e4%bb%b6%e6%93%8d%e4%bd%9c","status":"publish","type":"post","link":"http:\/\/www.yatenglg.cn\/blog\/?p=1588","title":{"rendered":"voc\u683c\u5f0f\u6570\u636e\u96c6xml\u6587\u4ef6\u64cd\u4f5c"},"content":{"rendered":"\n<h2>1. \u4ecb\u7ecd<\/h2>\n\n\n\n<p>\u5305\u62ec\uff1a\u79fb\u52a8\uff0c\u590d\u5236\uff0c\u68c0\u67e5\u7a7a\u6587\u4ef6\u3001\u65e0\u76ee\u6807\u6587\u4ef6\uff0c\u66ff\u6362\u7c7b\u522b\u540d\u7b49\u3002\u53ef\u901a\u8fc7\u7ee7\u627fBaseOp\u7c7b\uff0c\u8f7b\u677e\u6269\u5c55\u5176\u4ed6\u529f\u80fd\u3002<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"alignleft size-large\"><img width=\"600\" height=\"396\" src=\"http:\/\/www.yatenglg.cn\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-U0UB50.png\" alt=\"\" class=\"wp-image-1598\" srcset=\"http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-U0UB50.png 600w, http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-U0UB50-300x198.png 300w\" sizes=\"(max-width: 600px) 100vw, 600px\" \/><\/figure><\/div>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"alignleft size-large\"><img width=\"636\" height=\"146\" src=\"http:\/\/www.yatenglg.cn\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-IM0R50.png\" alt=\"\" class=\"wp-image-1608\" srcset=\"http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-IM0R50.png 636w, http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-IM0R50-300x69.png 300w\" sizes=\"(max-width: 636px) 100vw, 636px\" \/><\/figure><\/div>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"alignleft size-large\"><img width=\"654\" height=\"164\" src=\"http:\/\/www.yatenglg.cn\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-UJ7D50.png\" alt=\"\" class=\"wp-image-1611\" srcset=\"http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-UJ7D50.png 654w, http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-UJ7D50-300x75.png 300w\" sizes=\"(max-width: 654px) 100vw, 654px\" \/><\/figure><\/div>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"alignleft size-large\"><img width=\"582\" height=\"146\" src=\"http:\/\/www.yatenglg.cn\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-BBZS50.png\" alt=\"\" class=\"wp-image-1610\" srcset=\"http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-BBZS50.png 582w, http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-BBZS50-300x75.png 300w\" sizes=\"(max-width: 582px) 100vw, 582px\" \/><\/figure><\/div>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"alignleft size-large is-resized\"><img src=\"http:\/\/www.yatenglg.cn\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-AMNV50.png\" alt=\"\" class=\"wp-image-1609\" width=\"562\" height=\"148\" srcset=\"http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-AMNV50.png 562w, http:\/\/www.yatenglg.cn\/blog\/wp-content\/uploads\/2021\/06\/gnome-shell-screenshot-AMNV50-300x79.png 300w\" sizes=\"(max-width: 562px) 100vw, 562px\" \/><\/figure><\/div>\n\n\n\n<h2>2. \u4f8b\u5b50<\/h2>\n\n\n\n<pre class=\"wp-block-code\"><code># \u4f8b\u5b50-\u590d\u5236, \u5c06\/my_data\u4e0b\u6240\u6709\u7684xml\u6587\u4ef6\uff08\u5305\u62ec\u5b50\u6587\u4ef6\u5939\u4e0b\u7684\uff09\u590d\u5236\u5230 my_data_new\u76ee\u5f55\u4e0b\npython VocXMLOps.py --root \/home\/my_data --recursion --num_processes 5 copy --to_root \/home\/my_data_new\/\n\n# \u4f8b\u5b50-\u66ff\u6362\u7c7b\u522b\u540d, \u5c06\/my_data\u5f53\u524d\u76ee\u5f55\u4e0b\u7684xml\u6587\u4ef6\u4e2d\u7684 'Person'\u7c7b\u522b\u540d\u66ff\u6362\u4e3a'\u4eba'\npython VocXMLOps.py --root \/home\/my_data replace --old_name Person --new_name \u4eba<\/code><\/pre>\n\n\n\n<h2>3. \u4ee3\u7801<\/h2>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism undefined-numbers lang-python\" data-file=\"VocXMLOps.py\" data-lang=\"Python\"><code>import xml.etree.ElementTree as ET\nimport os\nimport shutil\nfrom multiprocessing import Pool\n\n\nclass BaseOp(object):\n    def __init__(self):\n        &quot;&quot;&quot;\n        \u64cd\u4f5c\u7c7b\u57fa\u7c7b\n        &quot;&quot;&quot;\n        pass\n\n    def run(self, file_path):\n        raise NotImplementedError\n\n\nclass IsInvaild(BaseOp):\n    def __init__(self, remove=False):\n        &quot;&quot;&quot;\n        \u67e5\u627e\u65e0\u6548\u6216\u65e0\u76ee\u6807\u7684xml\u6587\u4ef6\n        &quot;&quot;&quot;\n        super(IsInvaild, self).__init__()\n        self.remove = remove\n\n    def run(self, file_path):\n        try:\n            tree = ET.parse(file_path)\n            objs = tree.findall(&#39;object&#39;)\n            if len(objs) &lt; 1:\n                if self.remove:\n                    os.remove(file_path)\n                    print(&#39;Found and remove no object xml file : {}&#39;.format(file_path))\n                else:\n                    print(&#39;Found no object xml file : {}&#39;.format(file_path))\n\n        except:\n            if self.remove:\n                try:\n                    os.remove(file_path)\n                    print(&#39;Found and remove invaild xml file : {}&#39;.format(file_path))\n                except:\n                    print(&#39;Found invaild xml file : {}, but remove failed.&#39;.format(file_path))\n            else:\n                print(&#39;Found invaild xml file : {}&#39;.format(file_path))\n\n\nclass DelObject(BaseOp):\n    def __init__(self, del_object_names:list):\n        super(DelObject, self).__init__()\n        self.del_object_names = del_object_names\n\n    def run(self, file_path):\n        try:\n            tree = ET.parse(file_path)\n            root = tree.getroot()\n            objs = tree.findall(&#39;object&#39;)\n            for obj in objs:\n                name = obj.find(&#39;name&#39;).text\n                if name in self.del_object_names:\n                    root.remove(obj)\n\n            tree.write(file_path)\n            print(&quot;Del objs: {} from {}&quot;.format(self.del_object_names, file_path))\n        except:\n            pass\n\n\nclass MoveOp(BaseOp):\n    def __init__(self, to_root):\n        &quot;&quot;&quot;\n        \u79fb\u52a8\u64cd\u4f5c\n        :param to_root:\n        &quot;&quot;&quot;\n        super(MoveOp, self).__init__()\n        self.to_root = to_root\n\n    def run(self, file_path):\n        try:\n            to_path = os.path.join(self.to_root, os.path.split(file_path)[-1])\n            shutil.move(file_path, os.path.join(self.to_root, os.path.split(file_path)[-1]))\n            print(&quot;Move {} to {}&quot;.format(file_path, to_path))\n        except:\n            pass\n\n\nclass CopyOp(BaseOp):\n    def __init__(self, to_root):\n        &quot;&quot;&quot;\n        \u590d\u5236\u64cd\u4f5c\n        :param to_root:\n        &quot;&quot;&quot;\n        super(CopyOp, self).__init__()\n        self.to_root = to_root\n\n    def run(self, file_path):\n        try:\n            to_path = os.path.join(self.to_root, os.path.split(file_path)[-1])\n            shutil.copy(file_path, to_path)\n            print(&quot;Copy {} to {}&quot;.format(file_path, to_path))\n        except:\n            pass\n\n\nclass ReplaceNameOp(BaseOp):\n    def __init__(self, old_name, new_name):\n        &quot;&quot;&quot;\n        xml\u7c7b\u522b\u540d\u79f0\u91cd\u547d\u540d\u64cd\u4f5c\n        :param old_name:\n        :param new_name:\n        &quot;&quot;&quot;\n        super(ReplaceNameOp, self).__init__()\n        self.old_name = old_name\n        self.new_name = new_name\n\n    def run(self, file_path):\n        try:\n            tree = ET.parse(file_path)\n            objs = tree.findall(&#39;object&#39;)\n            for obj in objs:\n                name = obj.find(&#39;name&#39;).text\n                if name == self.old_name:\n                    obj.find(&#39;name&#39;).text = self.new_name\n            tree.write(file_path)\n            print(&quot;Replace {}&quot;.format(file_path))\n        except:\n            pass\n\n\nclass VOCParser(object):\n    def __init__(self, root:str, recursion=False, num_processes=10):\n        &quot;&quot;&quot;\n\n        :param root:        \u6839\u76ee\u5f55\n        :param recursion:   \u662f\u5426\u9012\u5f52\u5904\u7406\u6240\u6709\u5b50\u6587\u4ef6\u5939\n        &quot;&quot;&quot;\n        self.root = root\n        self.ops = []\n        self.xmls = []\n        self.jpgs = []\n\n        self.pool = Pool(processes=num_processes)\n\n        self.find_xml(self.root, recursion)\n\n    def register(self, op:BaseOp):\n        self.ops.append(op)\n\n    def find_xml(self, root, recursion):\n        fs = os.listdir(root)\n        for f in fs:\n            f = os.path.join(root, f)\n            if os.path.isdir(f) and recursion:\n                self.find_xml(f, recursion)\n            if os.path.isfile(f):\n                if f.endswith(&#39;.xml&#39;):\n                    self.xmls.append(f)\n                else:\n                    pass\n\n    def run(self):\n        for xml in self.xmls:\n            for op in self.ops:\n                self.pool.apply_async(op.run, (xml,))\n\n        self.pool.close()\n        self.pool.join()\n\n\nif __name__ == &#39;__main__&#39;:\n    import argparse\n\n    parse = argparse.ArgumentParser(description=&#39;VOC\u683c\u5f0f\u6570\u636e\u96c6,xml\u6587\u4ef6\u64cd\u4f5c.&#39;)\n    parse.add_argument(&#39;--root&#39;, required=True, help=&#39;\u6587\u4ef6\u6839\u76ee\u5f55.&#39;)\n    parse.add_argument(&#39;--recursion&#39;, action=&#39;store_true&#39;, help=&#39;\u5426\u5b9a\u9012\u5f52\u5904\u7406\u6240\u6709\u5b50\u6587\u4ef6\u5939\u4e2d\u7684xml\u6587\u4ef6.&#39;)\n    parse.add_argument(&#39;--num_processes&#39;, default=10, type=int, help=&#39;\u8fdb\u7a0b\u6570.&#39;)\n\n    subparser = parse.add_subparsers(dest=&#39;subparser_name&#39;, description=&#39;\u5b50\u547d\u4ee4&#39;)\n    invaild = subparser.add_parser(&#39;invaild&#39;, help=&#39;\u67e5\u627e\u65e0\u6548\u6216\u65e0\u76ee\u6807\u7684xml\u6587\u4ef6&#39;, description=&#39;\u67e5\u627e\u65e0\u6548\u6216\u65e0\u76ee\u6807\u7684xml\u6587\u4ef6&#39;)\n    invaild.add_argument(&#39;--remove&#39;, action=&#39;store_true&#39;, help=&#39;\u79fb\u9664\u65e0\u6548\u6587\u4ef6&#39;)\n\n    replace = subparser.add_parser(&#39;replace&#39;, help=&#39;\u66ff\u6362xml\u7c7b\u522b\u540d&#39;, description=&#39;\u66ff\u6362xml\u7c7b\u522b\u540d&#39;)\n    replace.add_argument(&#39;--old_name&#39;, required=True, help=&#39;\u65e7\u7c7b\u522b\u540d&#39;)\n    replace.add_argument(&#39;--new_name&#39;, required=True, help=&#39;\u65b0\u7c7b\u522b\u540d&#39;)\n\n    delobjects = subparser.add_parser(&#39;delobjects&#39;, help=&#39;\u5220\u9664xml\u7c7b\u522b.\u53ef\u540c\u65f6\u5220\u9664\u591a\u4e2a\u7c7b\u522b\uff0c\u591a\u4e2a\u7c7b\u522b\u95f4\u7528\u7a7a\u683c\u9694\u5f00&#39;, description=&#39;\u5220\u9664xml\u7c7b\u522b&#39;)\n    delobjects.add_argument(&#39;--del_objects&#39;, nargs=&#39;+&#39;, help=&#39;\u5220\u9664\u7684\u7c7b\u522b\u540d&#39;)\n\n    move = subparser.add_parser(&#39;move&#39;, help=&#39;\u79fb\u52a8xml\u6587\u4ef6&#39;, description=&#39;\u79fb\u52a8xml\u6587\u4ef6&#39;)\n    move.add_argument(&#39;--to_root&#39;, required=True, help=&#39;\u76ee\u6807\u6587\u4ef6\u5939.&#39;)\n\n    copy = subparser.add_parser(&#39;copy&#39;, help=&#39;\u590d\u5236xml\u6587\u4ef6&#39;, description=&#39;\u590d\u5236xml\u6587\u4ef6&#39;)\n    copy.add_argument(&#39;--to_root&#39;, required=True, help=&#39;\u76ee\u6807\u6587\u4ef6\u5939.&#39;)\n\n    args = parse.parse_args()\n\n    vocparser = VOCParser(args.root, args.recursion, args.num_processes)\n\n    if args.subparser_name == &#39;invaild&#39;:\n        vocparser.register(IsInvaild(args.remove))\n\n    elif args.subparser_name == &#39;replace&#39;:\n        vocparser.register(ReplaceNameOp(args.old_name, args.new_name))\n\n    elif args.subparser_name == &#39;move&#39;:\n        vocparser.register(MoveOp(args.to_root))\n\n    elif args.subparser_name == &#39;copy&#39;:\n        vocparser.register(CopyOp(args.to_root))\n\n    elif args.subparser_name == &#39;delobjects&#39;:\n        vocparser.register(DelObject(args.del_objects))\n\n    vocparser.run()<\/code><\/pre><\/div>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>VOC\u683c\u5f0f\u6570\u636e\uff0cxml\u6587\u4ef6\u64cd\u4f5c\u96c6\u5408\u3002<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":[],"categories":[8,5],"tags":[],"_links":{"self":[{"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=\/wp\/v2\/posts\/1588"}],"collection":[{"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1588"}],"version-history":[{"count":47,"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=\/wp\/v2\/posts\/1588\/revisions"}],"predecessor-version":[{"id":1642,"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=\/wp\/v2\/posts\/1588\/revisions\/1642"}],"wp:attachment":[{"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1588"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1588"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.yatenglg.cn\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1588"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}